Merge branch '21356-clean-imports'
[arvados.git] / sdk / python / tests / test_http.py
1 # Copyright (C) The Arvados Authors. All rights reserved.
2 #
3 # SPDX-License-Identifier: Apache-2.0
4
5 import copy
6 import io
7 import functools
8 import hashlib
9 import json
10 import logging
11 import sys
12 import unittest
13 import datetime
14
15 from unittest import mock
16
17 import arvados
18 import arvados.collection
19 import arvados.keep
20 import pycurl
21
22 from arvados.http_to_keep import http_to_keep
23
24 # Turns out there was already "FakeCurl" that serves the same purpose, but
25 # I wrote this before I knew that.  Whoops.
26 class CurlMock:
27     def __init__(self, headers = {}):
28         self.perform_was_called = False
29         self.headers = headers
30         self.get_response = 200
31         self.head_response = 200
32         self.req_headers = []
33
34     def setopt(self, op, *args):
35         if op == pycurl.URL:
36             self.url = args[0]
37         if op == pycurl.WRITEFUNCTION:
38             self.writefn = args[0]
39         if op == pycurl.HEADERFUNCTION:
40             self.headerfn = args[0]
41         if op == pycurl.NOBODY:
42             self.head = True
43         if op == pycurl.HTTPGET:
44             self.head = False
45         if op == pycurl.HTTPHEADER:
46             self.req_headers = args[0]
47
48     def getinfo(self, op):
49         if op == pycurl.RESPONSE_CODE:
50             if self.head:
51                 return self.head_response
52             else:
53                 return self.get_response
54
55     def perform(self):
56         self.perform_was_called = True
57
58         if self.head:
59             self.headerfn("HTTP/1.1 {} Status\r\n".format(self.head_response))
60         else:
61             self.headerfn("HTTP/1.1 {} Status\r\n".format(self.get_response))
62
63         for k,v in self.headers.items():
64             self.headerfn("%s: %s" % (k,v))
65
66         if not self.head and self.get_response == 200:
67             self.writefn(self.chunk)
68
69
70 class TestHttpToKeep(unittest.TestCase):
71
72     @mock.patch("pycurl.Curl")
73     @mock.patch("arvados.collection.Collection")
74     def test_http_get(self, collectionmock, curlmock):
75         api = mock.MagicMock()
76
77         api.collections().list().execute.return_value = {
78             "items": []
79         }
80
81         cm = mock.MagicMock()
82         cm.manifest_locator.return_value = "zzzzz-4zz18-zzzzzzzzzzzzzz3"
83         cm.portable_data_hash.return_value = "99999999999999999999999999999998+99"
84         collectionmock.return_value = cm
85
86         mockobj = CurlMock()
87         mockobj.chunk = b'abc'
88         def init():
89             return mockobj
90         curlmock.side_effect = init
91
92         utcnow = mock.MagicMock()
93         utcnow.return_value = datetime.datetime(2018, 5, 15)
94
95         r = http_to_keep(api, None, "http://example.com/file1.txt", utcnow=utcnow)
96         self.assertEqual(r, ("99999999999999999999999999999998+99", "file1.txt",
97                              'zzzzz-4zz18-zzzzzzzzzzzzzz3', 'http://example.com/file1.txt',
98                              datetime.datetime(2018, 5, 15, 0, 0)))
99
100         assert mockobj.url == b"http://example.com/file1.txt"
101         assert mockobj.perform_was_called is True
102
103         cm.open.assert_called_with("file1.txt", "wb")
104         cm.save_new.assert_called_with(name="Downloaded from http%3A%2F%2Fexample.com%2Ffile1.txt",
105                                        owner_uuid=None, ensure_unique_name=True)
106
107         api.collections().update.assert_has_calls([
108             mock.call(uuid=cm.manifest_locator(),
109                       body={"collection":{"properties": {'http://example.com/file1.txt': {'Date': 'Tue, 15 May 2018 00:00:00 GMT'}}}})
110         ])
111
112
113     @mock.patch("pycurl.Curl")
114     @mock.patch("arvados.collection.CollectionReader")
115     def test_http_expires(self, collectionmock, curlmock):
116         api = mock.MagicMock()
117
118         api.collections().list().execute.return_value = {
119             "items": [{
120                 "uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz3",
121                 "portable_data_hash": "99999999999999999999999999999998+99",
122                 "properties": {
123                     'http://example.com/file1.txt': {
124                         'Date': 'Tue, 15 May 2018 00:00:00 GMT',
125                         'Expires': 'Tue, 17 May 2018 00:00:00 GMT'
126                     }
127                 }
128             }]
129         }
130
131         cm = mock.MagicMock()
132         cm.manifest_locator.return_value = "zzzzz-4zz18-zzzzzzzzzzzzzz3"
133         cm.portable_data_hash.return_value = "99999999999999999999999999999998+99"
134         cm.keys.return_value = ["file1.txt"]
135         collectionmock.return_value = cm
136
137         mockobj = CurlMock()
138         mockobj.chunk = b'abc'
139         def init():
140             return mockobj
141         curlmock.side_effect = init
142
143         utcnow = mock.MagicMock()
144         utcnow.return_value = datetime.datetime(2018, 5, 16)
145
146         r = http_to_keep(api, None, "http://example.com/file1.txt", utcnow=utcnow)
147         self.assertEqual(r, ("99999999999999999999999999999998+99", "file1.txt",
148                              'zzzzz-4zz18-zzzzzzzzzzzzzz3', 'http://example.com/file1.txt',
149                              datetime.datetime(2018, 5, 16, 0, 0)))
150
151         assert mockobj.perform_was_called is False
152
153
154     @mock.patch("pycurl.Curl")
155     @mock.patch("arvados.collection.CollectionReader")
156     def test_http_cache_control(self, collectionmock, curlmock):
157         api = mock.MagicMock()
158
159         api.collections().list().execute.return_value = {
160             "items": [{
161                 "uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz3",
162                 "portable_data_hash": "99999999999999999999999999999998+99",
163                 "properties": {
164                     'http://example.com/file1.txt': {
165                         'Date': 'Tue, 15 May 2018 00:00:00 GMT',
166                         'Cache-Control': 'max-age=172800'
167                     }
168                 }
169             }]
170         }
171
172         cm = mock.MagicMock()
173         cm.manifest_locator.return_value = "zzzzz-4zz18-zzzzzzzzzzzzzz3"
174         cm.portable_data_hash.return_value = "99999999999999999999999999999998+99"
175         cm.keys.return_value = ["file1.txt"]
176         collectionmock.return_value = cm
177
178         mockobj = CurlMock()
179         mockobj.chunk = b'abc'
180         def init():
181             return mockobj
182         curlmock.side_effect = init
183
184         utcnow = mock.MagicMock()
185         utcnow.return_value = datetime.datetime(2018, 5, 16)
186
187         r = http_to_keep(api, None, "http://example.com/file1.txt", utcnow=utcnow)
188         self.assertEqual(r, ("99999999999999999999999999999998+99", "file1.txt", 'zzzzz-4zz18-zzzzzzzzzzzzzz3',
189                              'http://example.com/file1.txt', datetime.datetime(2018, 5, 16, 0, 0)))
190
191         assert mockobj.perform_was_called is False
192
193
194     @mock.patch("pycurl.Curl")
195     @mock.patch("arvados.collection.Collection")
196     def test_http_expired(self, collectionmock, curlmock):
197         api = mock.MagicMock()
198
199         api.collections().list().execute.return_value = {
200             "items": [{
201                 "uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz3",
202                 "portable_data_hash": "99999999999999999999999999999998+99",
203                 "properties": {
204                     'http://example.com/file1.txt': {
205                         'Date': 'Tue, 15 May 2018 00:00:00 GMT',
206                         'Expires': 'Wed, 16 May 2018 00:00:00 GMT'
207                     }
208                 }
209             }]
210         }
211
212         cm = mock.MagicMock()
213         cm.manifest_locator.return_value = "zzzzz-4zz18-zzzzzzzzzzzzzz4"
214         cm.portable_data_hash.return_value = "99999999999999999999999999999997+99"
215         cm.keys.return_value = ["file1.txt"]
216         collectionmock.return_value = cm
217
218         mockobj = CurlMock({'Date': 'Thu, 17 May 2018 00:00:00 GMT'})
219         mockobj.chunk = b'def'
220         def init():
221             return mockobj
222         curlmock.side_effect = init
223
224         utcnow = mock.MagicMock()
225         utcnow.return_value = datetime.datetime(2018, 5, 17)
226
227         r = http_to_keep(api, None, "http://example.com/file1.txt", utcnow=utcnow)
228         self.assertEqual(r, ("99999999999999999999999999999997+99", "file1.txt",
229                              'zzzzz-4zz18-zzzzzzzzzzzzzz4',
230                              'http://example.com/file1.txt', datetime.datetime(2018, 5, 17, 0, 0)))
231
232
233         assert mockobj.url == b"http://example.com/file1.txt"
234         assert mockobj.perform_was_called is True
235
236         cm.open.assert_called_with("file1.txt", "wb")
237         cm.save_new.assert_called_with(name="Downloaded from http%3A%2F%2Fexample.com%2Ffile1.txt",
238                                        owner_uuid=None, ensure_unique_name=True)
239
240         api.collections().update.assert_has_calls([
241             mock.call(uuid=cm.manifest_locator(),
242                       body={"collection":{"properties": {'http://example.com/file1.txt': {'Date': 'Thu, 17 May 2018 00:00:00 GMT'}}}})
243         ])
244
245
246     @mock.patch("pycurl.Curl")
247     @mock.patch("arvados.collection.CollectionReader")
248     def test_http_etag(self, collectionmock, curlmock):
249         api = mock.MagicMock()
250
251         api.collections().list().execute.return_value = {
252             "items": [{
253                 "uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz3",
254                 "portable_data_hash": "99999999999999999999999999999998+99",
255                 "properties": {
256                     'http://example.com/file1.txt': {
257                         'Date': 'Tue, 15 May 2018 00:00:00 GMT',
258                         'Expires': 'Wed, 16 May 2018 00:00:00 GMT',
259                         'Etag': '"123456"'
260                     }
261                 }
262             }]
263         }
264
265         cm = mock.MagicMock()
266         cm.manifest_locator.return_value = "zzzzz-4zz18-zzzzzzzzzzzzzz3"
267         cm.portable_data_hash.return_value = "99999999999999999999999999999998+99"
268         cm.keys.return_value = ["file1.txt"]
269         collectionmock.return_value = cm
270
271         mockobj = CurlMock({
272             'Date': 'Thu, 17 May 2018 00:00:00 GMT',
273             'Expires': 'Sat, 19 May 2018 00:00:00 GMT',
274             'Etag': '"123456"'
275         })
276         mockobj.chunk = None
277         def init():
278             return mockobj
279         curlmock.side_effect = init
280
281         utcnow = mock.MagicMock()
282         utcnow.return_value = datetime.datetime(2018, 5, 17)
283
284         r = http_to_keep(api, None, "http://example.com/file1.txt", utcnow=utcnow)
285         self.assertEqual(r, ("99999999999999999999999999999998+99", "file1.txt",
286                              'zzzzz-4zz18-zzzzzzzzzzzzzz3', 'http://example.com/file1.txt',
287                              datetime.datetime(2018, 5, 17, 0, 0)))
288
289         cm.open.assert_not_called()
290
291         api.collections().update.assert_has_calls([
292             mock.call(uuid=cm.manifest_locator(),
293                       body={"collection":{"properties": {'http://example.com/file1.txt': {
294                           'Date': 'Thu, 17 May 2018 00:00:00 GMT',
295                           'Expires': 'Sat, 19 May 2018 00:00:00 GMT',
296                           'Etag': '"123456"'
297                       }}}})
298                       ])
299
300     @mock.patch("pycurl.Curl")
301     @mock.patch("arvados.collection.Collection")
302     def test_http_content_disp(self, collectionmock, curlmock):
303         api = mock.MagicMock()
304
305         api.collections().list().execute.return_value = {
306             "items": []
307         }
308
309         cm = mock.MagicMock()
310         cm.manifest_locator.return_value = "zzzzz-4zz18-zzzzzzzzzzzzzz3"
311         cm.portable_data_hash.return_value = "99999999999999999999999999999998+99"
312         collectionmock.return_value = cm
313
314         mockobj = CurlMock({"Content-Disposition": "attachment; filename=file1.txt"})
315         mockobj.chunk = "abc"
316         def init():
317             return mockobj
318         curlmock.side_effect = init
319
320         utcnow = mock.MagicMock()
321         utcnow.return_value = datetime.datetime(2018, 5, 15)
322
323         r = http_to_keep(api, None, "http://example.com/download?fn=/file1.txt", utcnow=utcnow)
324         self.assertEqual(r, ("99999999999999999999999999999998+99", "file1.txt",
325                              'zzzzz-4zz18-zzzzzzzzzzzzzz3',
326                              'http://example.com/download?fn=/file1.txt',
327                              datetime.datetime(2018, 5, 15, 0, 0)))
328
329         assert mockobj.url == b"http://example.com/download?fn=/file1.txt"
330
331         cm.open.assert_called_with("file1.txt", "wb")
332         cm.save_new.assert_called_with(name="Downloaded from http%3A%2F%2Fexample.com%2Fdownload%3Ffn%3D%2Ffile1.txt",
333                                        owner_uuid=None, ensure_unique_name=True)
334
335         api.collections().update.assert_has_calls([
336             mock.call(uuid=cm.manifest_locator(),
337                       body={"collection":{"properties": {"http://example.com/download?fn=/file1.txt": {'Date': 'Tue, 15 May 2018 00:00:00 GMT'}}}})
338         ])
339
340     @mock.patch("pycurl.Curl")
341     @mock.patch("arvados.collection.CollectionReader")
342     def test_http_etag_if_none_match(self, collectionmock, curlmock):
343         api = mock.MagicMock()
344
345         api.collections().list().execute.return_value = {
346             "items": [{
347                 "uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz3",
348                 "portable_data_hash": "99999999999999999999999999999998+99",
349                 "properties": {
350                     'http://example.com/file1.txt': {
351                         'Date': 'Tue, 15 May 2018 00:00:00 GMT',
352                         'Expires': 'Tue, 16 May 2018 00:00:00 GMT',
353                         'Etag': '"123456"'
354                     }
355                 }
356             }]
357         }
358
359         cm = mock.MagicMock()
360         cm.manifest_locator.return_value = "zzzzz-4zz18-zzzzzzzzzzzzzz3"
361         cm.portable_data_hash.return_value = "99999999999999999999999999999998+99"
362         cm.keys.return_value = ["file1.txt"]
363         collectionmock.return_value = cm
364
365         mockobj = CurlMock({
366             'Date': 'Tue, 17 May 2018 00:00:00 GMT',
367             'Expires': 'Tue, 19 May 2018 00:00:00 GMT',
368             'Etag': '"123456"'
369         })
370         mockobj.chunk = None
371         mockobj.head_response = 403
372         mockobj.get_response = 304
373         def init():
374             return mockobj
375         curlmock.side_effect = init
376
377         utcnow = mock.MagicMock()
378         utcnow.return_value = datetime.datetime(2018, 5, 17)
379
380         r = http_to_keep(api, None, "http://example.com/file1.txt", utcnow=utcnow)
381         self.assertEqual(r, ("99999999999999999999999999999998+99", "file1.txt",
382                              'zzzzz-4zz18-zzzzzzzzzzzzzz3', 'http://example.com/file1.txt',
383                              datetime.datetime(2018, 5, 17, 0, 0)))
384
385         print(mockobj.req_headers)
386         assert mockobj.req_headers == ["Accept: application/octet-stream", "If-None-Match: \"123456\""]
387         cm.open.assert_not_called()
388
389         api.collections().update.assert_has_calls([
390             mock.call(uuid=cm.manifest_locator(),
391                       body={"collection":{"properties": {'http://example.com/file1.txt': {
392                           'Date': 'Tue, 17 May 2018 00:00:00 GMT',
393                           'Expires': 'Tue, 19 May 2018 00:00:00 GMT',
394                           'Etag': '"123456"'
395                       }}}})
396                       ])
397
398     @mock.patch("pycurl.Curl")
399     @mock.patch("arvados.collection.CollectionReader")
400     def test_http_prefer_cached_downloads(self, collectionmock, curlmock):
401         api = mock.MagicMock()
402
403         api.collections().list().execute.return_value = {
404             "items": [{
405                 "uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz3",
406                 "portable_data_hash": "99999999999999999999999999999998+99",
407                 "properties": {
408                     'http://example.com/file1.txt': {
409                         'Date': 'Tue, 15 May 2018 00:00:00 GMT',
410                         'Expires': 'Tue, 16 May 2018 00:00:00 GMT',
411                         'Etag': '"123456"'
412                     }
413                 }
414             }]
415         }
416
417         cm = mock.MagicMock()
418         cm.manifest_locator.return_value = "zzzzz-4zz18-zzzzzzzzzzzzzz3"
419         cm.portable_data_hash.return_value = "99999999999999999999999999999998+99"
420         cm.keys.return_value = ["file1.txt"]
421         collectionmock.return_value = cm
422
423         mockobj = CurlMock()
424         def init():
425             return mockobj
426         curlmock.side_effect = init
427
428         utcnow = mock.MagicMock()
429         utcnow.return_value = datetime.datetime(2018, 5, 17)
430
431         r = http_to_keep(api, None, "http://example.com/file1.txt", utcnow=utcnow, prefer_cached_downloads=True)
432         self.assertEqual(r, ("99999999999999999999999999999998+99", "file1.txt", 'zzzzz-4zz18-zzzzzzzzzzzzzz3',
433                              'http://example.com/file1.txt', datetime.datetime(2018, 5, 17, 0, 0)))
434
435         assert mockobj.perform_was_called is False
436         cm.open.assert_not_called()
437         api.collections().update.assert_not_called()
438
439     @mock.patch("pycurl.Curl")
440     @mock.patch("arvados.collection.CollectionReader")
441     def test_http_varying_url_params(self, collectionmock, curlmock):
442         for prurl in ("http://example.com/file1.txt", "http://example.com/file1.txt?KeyId=123&Signature=456&Expires=789"):
443             api = mock.MagicMock()
444
445             api.collections().list().execute.return_value = {
446                 "items": [{
447                     "uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz3",
448                     "portable_data_hash": "99999999999999999999999999999998+99",
449                     "properties": {
450                         prurl: {
451                             'Date': 'Tue, 15 May 2018 00:00:00 GMT',
452                             'Expires': 'Tue, 16 May 2018 00:00:00 GMT',
453                             'Etag': '"123456"'
454                         }
455                     }
456                 }]
457             }
458
459             cm = mock.MagicMock()
460             cm.manifest_locator.return_value = "zzzzz-4zz18-zzzzzzzzzzzzzz3"
461             cm.portable_data_hash.return_value = "99999999999999999999999999999998+99"
462             cm.keys.return_value = ["file1.txt"]
463             collectionmock.return_value = cm
464
465             mockobj = CurlMock({
466                 'Date': 'Tue, 17 May 2018 00:00:00 GMT',
467                 'Expires': 'Tue, 19 May 2018 00:00:00 GMT',
468                 'Etag': '"123456"'
469             })
470             mockobj.chunk = None
471             def init():
472                 return mockobj
473             curlmock.side_effect = init
474
475             utcnow = mock.MagicMock()
476             utcnow.return_value = datetime.datetime(2018, 5, 17)
477
478             r = http_to_keep(api, None, "http://example.com/file1.txt?KeyId=123&Signature=456&Expires=789",
479                                               utcnow=utcnow, varying_url_params="KeyId,Signature,Expires")
480             self.assertEqual(r, ("99999999999999999999999999999998+99", "file1.txt", 'zzzzz-4zz18-zzzzzzzzzzzzzz3',
481                                  'http://example.com/file1.txt', datetime.datetime(2018, 5, 17, 0, 0)))
482
483             assert mockobj.perform_was_called is True
484             cm.open.assert_not_called()
485
486             api.collections().update.assert_has_calls([
487                 mock.call(uuid=cm.manifest_locator(),
488                           body={"collection":{"properties": {'http://example.com/file1.txt': {
489                               'Date': 'Tue, 17 May 2018 00:00:00 GMT',
490                               'Expires': 'Tue, 19 May 2018 00:00:00 GMT',
491                               'Etag': '"123456"'
492                           }}}})
493                           ])