21721: Remove tests_require=mock
[arvados.git] / sdk / python / tests / test_http.py
1 # Copyright (C) The Arvados Authors. All rights reserved.
2 #
3 # SPDX-License-Identifier: Apache-2.0
4
5 from future import standard_library
6 standard_library.install_aliases()
7
8 import copy
9 import io
10 import functools
11 import hashlib
12 import json
13 import logging
14 import sys
15 import unittest
16 import datetime
17
18 from unittest import mock
19
20 import arvados
21 import arvados.collection
22 import arvados.keep
23 import pycurl
24
25 from arvados.http_to_keep import http_to_keep
26
27 import ruamel.yaml as yaml
28
29 # Turns out there was already "FakeCurl" that serves the same purpose, but
30 # I wrote this before I knew that.  Whoops.
31 class CurlMock:
32     def __init__(self, headers = {}):
33         self.perform_was_called = False
34         self.headers = headers
35         self.get_response = 200
36         self.head_response = 200
37         self.req_headers = []
38
39     def setopt(self, op, *args):
40         if op == pycurl.URL:
41             self.url = args[0]
42         if op == pycurl.WRITEFUNCTION:
43             self.writefn = args[0]
44         if op == pycurl.HEADERFUNCTION:
45             self.headerfn = args[0]
46         if op == pycurl.NOBODY:
47             self.head = True
48         if op == pycurl.HTTPGET:
49             self.head = False
50         if op == pycurl.HTTPHEADER:
51             self.req_headers = args[0]
52
53     def getinfo(self, op):
54         if op == pycurl.RESPONSE_CODE:
55             if self.head:
56                 return self.head_response
57             else:
58                 return self.get_response
59
60     def perform(self):
61         self.perform_was_called = True
62
63         if self.head:
64             self.headerfn("HTTP/1.1 {} Status\r\n".format(self.head_response))
65         else:
66             self.headerfn("HTTP/1.1 {} Status\r\n".format(self.get_response))
67
68         for k,v in self.headers.items():
69             self.headerfn("%s: %s" % (k,v))
70
71         if not self.head and self.get_response == 200:
72             self.writefn(self.chunk)
73
74
75 class TestHttpToKeep(unittest.TestCase):
76
77     @mock.patch("pycurl.Curl")
78     @mock.patch("arvados.collection.Collection")
79     def test_http_get(self, collectionmock, curlmock):
80         api = mock.MagicMock()
81
82         api.collections().list().execute.return_value = {
83             "items": []
84         }
85
86         cm = mock.MagicMock()
87         cm.manifest_locator.return_value = "zzzzz-4zz18-zzzzzzzzzzzzzz3"
88         cm.portable_data_hash.return_value = "99999999999999999999999999999998+99"
89         collectionmock.return_value = cm
90
91         mockobj = CurlMock()
92         mockobj.chunk = b'abc'
93         def init():
94             return mockobj
95         curlmock.side_effect = init
96
97         utcnow = mock.MagicMock()
98         utcnow.return_value = datetime.datetime(2018, 5, 15)
99
100         r = http_to_keep(api, None, "http://example.com/file1.txt", utcnow=utcnow)
101         self.assertEqual(r, ("99999999999999999999999999999998+99", "file1.txt",
102                              'zzzzz-4zz18-zzzzzzzzzzzzzz3', 'http://example.com/file1.txt',
103                              datetime.datetime(2018, 5, 15, 0, 0)))
104
105         assert mockobj.url == b"http://example.com/file1.txt"
106         assert mockobj.perform_was_called is True
107
108         cm.open.assert_called_with("file1.txt", "wb")
109         cm.save_new.assert_called_with(name="Downloaded from http%3A%2F%2Fexample.com%2Ffile1.txt",
110                                        owner_uuid=None, ensure_unique_name=True)
111
112         api.collections().update.assert_has_calls([
113             mock.call(uuid=cm.manifest_locator(),
114                       body={"collection":{"properties": {'http://example.com/file1.txt': {'Date': 'Tue, 15 May 2018 00:00:00 GMT'}}}})
115         ])
116
117
118     @mock.patch("pycurl.Curl")
119     @mock.patch("arvados.collection.CollectionReader")
120     def test_http_expires(self, collectionmock, curlmock):
121         api = mock.MagicMock()
122
123         api.collections().list().execute.return_value = {
124             "items": [{
125                 "uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz3",
126                 "portable_data_hash": "99999999999999999999999999999998+99",
127                 "properties": {
128                     'http://example.com/file1.txt': {
129                         'Date': 'Tue, 15 May 2018 00:00:00 GMT',
130                         'Expires': 'Tue, 17 May 2018 00:00:00 GMT'
131                     }
132                 }
133             }]
134         }
135
136         cm = mock.MagicMock()
137         cm.manifest_locator.return_value = "zzzzz-4zz18-zzzzzzzzzzzzzz3"
138         cm.portable_data_hash.return_value = "99999999999999999999999999999998+99"
139         cm.keys.return_value = ["file1.txt"]
140         collectionmock.return_value = cm
141
142         mockobj = CurlMock()
143         mockobj.chunk = b'abc'
144         def init():
145             return mockobj
146         curlmock.side_effect = init
147
148         utcnow = mock.MagicMock()
149         utcnow.return_value = datetime.datetime(2018, 5, 16)
150
151         r = http_to_keep(api, None, "http://example.com/file1.txt", utcnow=utcnow)
152         self.assertEqual(r, ("99999999999999999999999999999998+99", "file1.txt",
153                              'zzzzz-4zz18-zzzzzzzzzzzzzz3', 'http://example.com/file1.txt',
154                              datetime.datetime(2018, 5, 16, 0, 0)))
155
156         assert mockobj.perform_was_called is False
157
158
159     @mock.patch("pycurl.Curl")
160     @mock.patch("arvados.collection.CollectionReader")
161     def test_http_cache_control(self, collectionmock, curlmock):
162         api = mock.MagicMock()
163
164         api.collections().list().execute.return_value = {
165             "items": [{
166                 "uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz3",
167                 "portable_data_hash": "99999999999999999999999999999998+99",
168                 "properties": {
169                     'http://example.com/file1.txt': {
170                         'Date': 'Tue, 15 May 2018 00:00:00 GMT',
171                         'Cache-Control': 'max-age=172800'
172                     }
173                 }
174             }]
175         }
176
177         cm = mock.MagicMock()
178         cm.manifest_locator.return_value = "zzzzz-4zz18-zzzzzzzzzzzzzz3"
179         cm.portable_data_hash.return_value = "99999999999999999999999999999998+99"
180         cm.keys.return_value = ["file1.txt"]
181         collectionmock.return_value = cm
182
183         mockobj = CurlMock()
184         mockobj.chunk = b'abc'
185         def init():
186             return mockobj
187         curlmock.side_effect = init
188
189         utcnow = mock.MagicMock()
190         utcnow.return_value = datetime.datetime(2018, 5, 16)
191
192         r = http_to_keep(api, None, "http://example.com/file1.txt", utcnow=utcnow)
193         self.assertEqual(r, ("99999999999999999999999999999998+99", "file1.txt", 'zzzzz-4zz18-zzzzzzzzzzzzzz3',
194                              'http://example.com/file1.txt', datetime.datetime(2018, 5, 16, 0, 0)))
195
196         assert mockobj.perform_was_called is False
197
198
199     @mock.patch("pycurl.Curl")
200     @mock.patch("arvados.collection.Collection")
201     def test_http_expired(self, collectionmock, curlmock):
202         api = mock.MagicMock()
203
204         api.collections().list().execute.return_value = {
205             "items": [{
206                 "uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz3",
207                 "portable_data_hash": "99999999999999999999999999999998+99",
208                 "properties": {
209                     'http://example.com/file1.txt': {
210                         'Date': 'Tue, 15 May 2018 00:00:00 GMT',
211                         'Expires': 'Wed, 16 May 2018 00:00:00 GMT'
212                     }
213                 }
214             }]
215         }
216
217         cm = mock.MagicMock()
218         cm.manifest_locator.return_value = "zzzzz-4zz18-zzzzzzzzzzzzzz4"
219         cm.portable_data_hash.return_value = "99999999999999999999999999999997+99"
220         cm.keys.return_value = ["file1.txt"]
221         collectionmock.return_value = cm
222
223         mockobj = CurlMock({'Date': 'Thu, 17 May 2018 00:00:00 GMT'})
224         mockobj.chunk = b'def'
225         def init():
226             return mockobj
227         curlmock.side_effect = init
228
229         utcnow = mock.MagicMock()
230         utcnow.return_value = datetime.datetime(2018, 5, 17)
231
232         r = http_to_keep(api, None, "http://example.com/file1.txt", utcnow=utcnow)
233         self.assertEqual(r, ("99999999999999999999999999999997+99", "file1.txt",
234                              'zzzzz-4zz18-zzzzzzzzzzzzzz4',
235                              'http://example.com/file1.txt', datetime.datetime(2018, 5, 17, 0, 0)))
236
237
238         assert mockobj.url == b"http://example.com/file1.txt"
239         assert mockobj.perform_was_called is True
240
241         cm.open.assert_called_with("file1.txt", "wb")
242         cm.save_new.assert_called_with(name="Downloaded from http%3A%2F%2Fexample.com%2Ffile1.txt",
243                                        owner_uuid=None, ensure_unique_name=True)
244
245         api.collections().update.assert_has_calls([
246             mock.call(uuid=cm.manifest_locator(),
247                       body={"collection":{"properties": {'http://example.com/file1.txt': {'Date': 'Thu, 17 May 2018 00:00:00 GMT'}}}})
248         ])
249
250
251     @mock.patch("pycurl.Curl")
252     @mock.patch("arvados.collection.CollectionReader")
253     def test_http_etag(self, collectionmock, curlmock):
254         api = mock.MagicMock()
255
256         api.collections().list().execute.return_value = {
257             "items": [{
258                 "uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz3",
259                 "portable_data_hash": "99999999999999999999999999999998+99",
260                 "properties": {
261                     'http://example.com/file1.txt': {
262                         'Date': 'Tue, 15 May 2018 00:00:00 GMT',
263                         'Expires': 'Wed, 16 May 2018 00:00:00 GMT',
264                         'Etag': '"123456"'
265                     }
266                 }
267             }]
268         }
269
270         cm = mock.MagicMock()
271         cm.manifest_locator.return_value = "zzzzz-4zz18-zzzzzzzzzzzzzz3"
272         cm.portable_data_hash.return_value = "99999999999999999999999999999998+99"
273         cm.keys.return_value = ["file1.txt"]
274         collectionmock.return_value = cm
275
276         mockobj = CurlMock({
277             'Date': 'Thu, 17 May 2018 00:00:00 GMT',
278             'Expires': 'Sat, 19 May 2018 00:00:00 GMT',
279             'Etag': '"123456"'
280         })
281         mockobj.chunk = None
282         def init():
283             return mockobj
284         curlmock.side_effect = init
285
286         utcnow = mock.MagicMock()
287         utcnow.return_value = datetime.datetime(2018, 5, 17)
288
289         r = http_to_keep(api, None, "http://example.com/file1.txt", utcnow=utcnow)
290         self.assertEqual(r, ("99999999999999999999999999999998+99", "file1.txt",
291                              'zzzzz-4zz18-zzzzzzzzzzzzzz3', 'http://example.com/file1.txt',
292                              datetime.datetime(2018, 5, 17, 0, 0)))
293
294         cm.open.assert_not_called()
295
296         api.collections().update.assert_has_calls([
297             mock.call(uuid=cm.manifest_locator(),
298                       body={"collection":{"properties": {'http://example.com/file1.txt': {
299                           'Date': 'Thu, 17 May 2018 00:00:00 GMT',
300                           'Expires': 'Sat, 19 May 2018 00:00:00 GMT',
301                           'Etag': '"123456"'
302                       }}}})
303                       ])
304
305     @mock.patch("pycurl.Curl")
306     @mock.patch("arvados.collection.Collection")
307     def test_http_content_disp(self, collectionmock, curlmock):
308         api = mock.MagicMock()
309
310         api.collections().list().execute.return_value = {
311             "items": []
312         }
313
314         cm = mock.MagicMock()
315         cm.manifest_locator.return_value = "zzzzz-4zz18-zzzzzzzzzzzzzz3"
316         cm.portable_data_hash.return_value = "99999999999999999999999999999998+99"
317         collectionmock.return_value = cm
318
319         mockobj = CurlMock({"Content-Disposition": "attachment; filename=file1.txt"})
320         mockobj.chunk = "abc"
321         def init():
322             return mockobj
323         curlmock.side_effect = init
324
325         utcnow = mock.MagicMock()
326         utcnow.return_value = datetime.datetime(2018, 5, 15)
327
328         r = http_to_keep(api, None, "http://example.com/download?fn=/file1.txt", utcnow=utcnow)
329         self.assertEqual(r, ("99999999999999999999999999999998+99", "file1.txt",
330                              'zzzzz-4zz18-zzzzzzzzzzzzzz3',
331                              'http://example.com/download?fn=/file1.txt',
332                              datetime.datetime(2018, 5, 15, 0, 0)))
333
334         assert mockobj.url == b"http://example.com/download?fn=/file1.txt"
335
336         cm.open.assert_called_with("file1.txt", "wb")
337         cm.save_new.assert_called_with(name="Downloaded from http%3A%2F%2Fexample.com%2Fdownload%3Ffn%3D%2Ffile1.txt",
338                                        owner_uuid=None, ensure_unique_name=True)
339
340         api.collections().update.assert_has_calls([
341             mock.call(uuid=cm.manifest_locator(),
342                       body={"collection":{"properties": {"http://example.com/download?fn=/file1.txt": {'Date': 'Tue, 15 May 2018 00:00:00 GMT'}}}})
343         ])
344
345     @mock.patch("pycurl.Curl")
346     @mock.patch("arvados.collection.CollectionReader")
347     def test_http_etag_if_none_match(self, collectionmock, curlmock):
348         api = mock.MagicMock()
349
350         api.collections().list().execute.return_value = {
351             "items": [{
352                 "uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz3",
353                 "portable_data_hash": "99999999999999999999999999999998+99",
354                 "properties": {
355                     'http://example.com/file1.txt': {
356                         'Date': 'Tue, 15 May 2018 00:00:00 GMT',
357                         'Expires': 'Tue, 16 May 2018 00:00:00 GMT',
358                         'Etag': '"123456"'
359                     }
360                 }
361             }]
362         }
363
364         cm = mock.MagicMock()
365         cm.manifest_locator.return_value = "zzzzz-4zz18-zzzzzzzzzzzzzz3"
366         cm.portable_data_hash.return_value = "99999999999999999999999999999998+99"
367         cm.keys.return_value = ["file1.txt"]
368         collectionmock.return_value = cm
369
370         mockobj = CurlMock({
371             'Date': 'Tue, 17 May 2018 00:00:00 GMT',
372             'Expires': 'Tue, 19 May 2018 00:00:00 GMT',
373             'Etag': '"123456"'
374         })
375         mockobj.chunk = None
376         mockobj.head_response = 403
377         mockobj.get_response = 304
378         def init():
379             return mockobj
380         curlmock.side_effect = init
381
382         utcnow = mock.MagicMock()
383         utcnow.return_value = datetime.datetime(2018, 5, 17)
384
385         r = http_to_keep(api, None, "http://example.com/file1.txt", utcnow=utcnow)
386         self.assertEqual(r, ("99999999999999999999999999999998+99", "file1.txt",
387                              'zzzzz-4zz18-zzzzzzzzzzzzzz3', 'http://example.com/file1.txt',
388                              datetime.datetime(2018, 5, 17, 0, 0)))
389
390         print(mockobj.req_headers)
391         assert mockobj.req_headers == ["Accept: application/octet-stream", "If-None-Match: \"123456\""]
392         cm.open.assert_not_called()
393
394         api.collections().update.assert_has_calls([
395             mock.call(uuid=cm.manifest_locator(),
396                       body={"collection":{"properties": {'http://example.com/file1.txt': {
397                           'Date': 'Tue, 17 May 2018 00:00:00 GMT',
398                           'Expires': 'Tue, 19 May 2018 00:00:00 GMT',
399                           'Etag': '"123456"'
400                       }}}})
401                       ])
402
403     @mock.patch("pycurl.Curl")
404     @mock.patch("arvados.collection.CollectionReader")
405     def test_http_prefer_cached_downloads(self, collectionmock, curlmock):
406         api = mock.MagicMock()
407
408         api.collections().list().execute.return_value = {
409             "items": [{
410                 "uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz3",
411                 "portable_data_hash": "99999999999999999999999999999998+99",
412                 "properties": {
413                     'http://example.com/file1.txt': {
414                         'Date': 'Tue, 15 May 2018 00:00:00 GMT',
415                         'Expires': 'Tue, 16 May 2018 00:00:00 GMT',
416                         'Etag': '"123456"'
417                     }
418                 }
419             }]
420         }
421
422         cm = mock.MagicMock()
423         cm.manifest_locator.return_value = "zzzzz-4zz18-zzzzzzzzzzzzzz3"
424         cm.portable_data_hash.return_value = "99999999999999999999999999999998+99"
425         cm.keys.return_value = ["file1.txt"]
426         collectionmock.return_value = cm
427
428         mockobj = CurlMock()
429         def init():
430             return mockobj
431         curlmock.side_effect = init
432
433         utcnow = mock.MagicMock()
434         utcnow.return_value = datetime.datetime(2018, 5, 17)
435
436         r = http_to_keep(api, None, "http://example.com/file1.txt", utcnow=utcnow, prefer_cached_downloads=True)
437         self.assertEqual(r, ("99999999999999999999999999999998+99", "file1.txt", 'zzzzz-4zz18-zzzzzzzzzzzzzz3',
438                              'http://example.com/file1.txt', datetime.datetime(2018, 5, 17, 0, 0)))
439
440         assert mockobj.perform_was_called is False
441         cm.open.assert_not_called()
442         api.collections().update.assert_not_called()
443
444     @mock.patch("pycurl.Curl")
445     @mock.patch("arvados.collection.CollectionReader")
446     def test_http_varying_url_params(self, collectionmock, curlmock):
447         for prurl in ("http://example.com/file1.txt", "http://example.com/file1.txt?KeyId=123&Signature=456&Expires=789"):
448             api = mock.MagicMock()
449
450             api.collections().list().execute.return_value = {
451                 "items": [{
452                     "uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz3",
453                     "portable_data_hash": "99999999999999999999999999999998+99",
454                     "properties": {
455                         prurl: {
456                             'Date': 'Tue, 15 May 2018 00:00:00 GMT',
457                             'Expires': 'Tue, 16 May 2018 00:00:00 GMT',
458                             'Etag': '"123456"'
459                         }
460                     }
461                 }]
462             }
463
464             cm = mock.MagicMock()
465             cm.manifest_locator.return_value = "zzzzz-4zz18-zzzzzzzzzzzzzz3"
466             cm.portable_data_hash.return_value = "99999999999999999999999999999998+99"
467             cm.keys.return_value = ["file1.txt"]
468             collectionmock.return_value = cm
469
470             mockobj = CurlMock({
471                 'Date': 'Tue, 17 May 2018 00:00:00 GMT',
472                 'Expires': 'Tue, 19 May 2018 00:00:00 GMT',
473                 'Etag': '"123456"'
474             })
475             mockobj.chunk = None
476             def init():
477                 return mockobj
478             curlmock.side_effect = init
479
480             utcnow = mock.MagicMock()
481             utcnow.return_value = datetime.datetime(2018, 5, 17)
482
483             r = http_to_keep(api, None, "http://example.com/file1.txt?KeyId=123&Signature=456&Expires=789",
484                                               utcnow=utcnow, varying_url_params="KeyId,Signature,Expires")
485             self.assertEqual(r, ("99999999999999999999999999999998+99", "file1.txt", 'zzzzz-4zz18-zzzzzzzzzzzzzz3',
486                                  'http://example.com/file1.txt', datetime.datetime(2018, 5, 17, 0, 0)))
487
488             assert mockobj.perform_was_called is True
489             cm.open.assert_not_called()
490
491             api.collections().update.assert_has_calls([
492                 mock.call(uuid=cm.manifest_locator(),
493                           body={"collection":{"properties": {'http://example.com/file1.txt': {
494                               'Date': 'Tue, 17 May 2018 00:00:00 GMT',
495                               'Expires': 'Tue, 19 May 2018 00:00:00 GMT',
496                               'Etag': '"123456"'
497                           }}}})
498                           ])