21700: Install Bundler system-wide in Rails postinst
[arvados.git] / sdk / python / tests / test_http.py
1 # Copyright (C) The Arvados Authors. All rights reserved.
2 #
3 # SPDX-License-Identifier: Apache-2.0
4
5 from future import standard_library
6 standard_library.install_aliases()
7
8 import copy
9 import io
10 import functools
11 import hashlib
12 import json
13 import logging
14 import sys
15 import unittest
16 import datetime
17
18 from unittest import mock
19
20 import arvados
21 import arvados.collection
22 import arvados.keep
23 import pycurl
24
25 from arvados.http_to_keep import http_to_keep
26
27 # Turns out there was already "FakeCurl" that serves the same purpose, but
28 # I wrote this before I knew that.  Whoops.
29 class CurlMock:
30     def __init__(self, headers = {}):
31         self.perform_was_called = False
32         self.headers = headers
33         self.get_response = 200
34         self.head_response = 200
35         self.req_headers = []
36
37     def setopt(self, op, *args):
38         if op == pycurl.URL:
39             self.url = args[0]
40         if op == pycurl.WRITEFUNCTION:
41             self.writefn = args[0]
42         if op == pycurl.HEADERFUNCTION:
43             self.headerfn = args[0]
44         if op == pycurl.NOBODY:
45             self.head = True
46         if op == pycurl.HTTPGET:
47             self.head = False
48         if op == pycurl.HTTPHEADER:
49             self.req_headers = args[0]
50
51     def getinfo(self, op):
52         if op == pycurl.RESPONSE_CODE:
53             if self.head:
54                 return self.head_response
55             else:
56                 return self.get_response
57
58     def perform(self):
59         self.perform_was_called = True
60
61         if self.head:
62             self.headerfn("HTTP/1.1 {} Status\r\n".format(self.head_response))
63         else:
64             self.headerfn("HTTP/1.1 {} Status\r\n".format(self.get_response))
65
66         for k,v in self.headers.items():
67             self.headerfn("%s: %s" % (k,v))
68
69         if not self.head and self.get_response == 200:
70             self.writefn(self.chunk)
71
72
73 class TestHttpToKeep(unittest.TestCase):
74
75     @mock.patch("pycurl.Curl")
76     @mock.patch("arvados.collection.Collection")
77     def test_http_get(self, collectionmock, curlmock):
78         api = mock.MagicMock()
79
80         api.collections().list().execute.return_value = {
81             "items": []
82         }
83
84         cm = mock.MagicMock()
85         cm.manifest_locator.return_value = "zzzzz-4zz18-zzzzzzzzzzzzzz3"
86         cm.portable_data_hash.return_value = "99999999999999999999999999999998+99"
87         collectionmock.return_value = cm
88
89         mockobj = CurlMock()
90         mockobj.chunk = b'abc'
91         def init():
92             return mockobj
93         curlmock.side_effect = init
94
95         utcnow = mock.MagicMock()
96         utcnow.return_value = datetime.datetime(2018, 5, 15)
97
98         r = http_to_keep(api, None, "http://example.com/file1.txt", utcnow=utcnow)
99         self.assertEqual(r, ("99999999999999999999999999999998+99", "file1.txt",
100                              'zzzzz-4zz18-zzzzzzzzzzzzzz3', 'http://example.com/file1.txt',
101                              datetime.datetime(2018, 5, 15, 0, 0)))
102
103         assert mockobj.url == b"http://example.com/file1.txt"
104         assert mockobj.perform_was_called is True
105
106         cm.open.assert_called_with("file1.txt", "wb")
107         cm.save_new.assert_called_with(name="Downloaded from http%3A%2F%2Fexample.com%2Ffile1.txt",
108                                        owner_uuid=None, ensure_unique_name=True)
109
110         api.collections().update.assert_has_calls([
111             mock.call(uuid=cm.manifest_locator(),
112                       body={"collection":{"properties": {'http://example.com/file1.txt': {'Date': 'Tue, 15 May 2018 00:00:00 GMT'}}}})
113         ])
114
115
116     @mock.patch("pycurl.Curl")
117     @mock.patch("arvados.collection.CollectionReader")
118     def test_http_expires(self, collectionmock, curlmock):
119         api = mock.MagicMock()
120
121         api.collections().list().execute.return_value = {
122             "items": [{
123                 "uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz3",
124                 "portable_data_hash": "99999999999999999999999999999998+99",
125                 "properties": {
126                     'http://example.com/file1.txt': {
127                         'Date': 'Tue, 15 May 2018 00:00:00 GMT',
128                         'Expires': 'Tue, 17 May 2018 00:00:00 GMT'
129                     }
130                 }
131             }]
132         }
133
134         cm = mock.MagicMock()
135         cm.manifest_locator.return_value = "zzzzz-4zz18-zzzzzzzzzzzzzz3"
136         cm.portable_data_hash.return_value = "99999999999999999999999999999998+99"
137         cm.keys.return_value = ["file1.txt"]
138         collectionmock.return_value = cm
139
140         mockobj = CurlMock()
141         mockobj.chunk = b'abc'
142         def init():
143             return mockobj
144         curlmock.side_effect = init
145
146         utcnow = mock.MagicMock()
147         utcnow.return_value = datetime.datetime(2018, 5, 16)
148
149         r = http_to_keep(api, None, "http://example.com/file1.txt", utcnow=utcnow)
150         self.assertEqual(r, ("99999999999999999999999999999998+99", "file1.txt",
151                              'zzzzz-4zz18-zzzzzzzzzzzzzz3', 'http://example.com/file1.txt',
152                              datetime.datetime(2018, 5, 16, 0, 0)))
153
154         assert mockobj.perform_was_called is False
155
156
157     @mock.patch("pycurl.Curl")
158     @mock.patch("arvados.collection.CollectionReader")
159     def test_http_cache_control(self, collectionmock, curlmock):
160         api = mock.MagicMock()
161
162         api.collections().list().execute.return_value = {
163             "items": [{
164                 "uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz3",
165                 "portable_data_hash": "99999999999999999999999999999998+99",
166                 "properties": {
167                     'http://example.com/file1.txt': {
168                         'Date': 'Tue, 15 May 2018 00:00:00 GMT',
169                         'Cache-Control': 'max-age=172800'
170                     }
171                 }
172             }]
173         }
174
175         cm = mock.MagicMock()
176         cm.manifest_locator.return_value = "zzzzz-4zz18-zzzzzzzzzzzzzz3"
177         cm.portable_data_hash.return_value = "99999999999999999999999999999998+99"
178         cm.keys.return_value = ["file1.txt"]
179         collectionmock.return_value = cm
180
181         mockobj = CurlMock()
182         mockobj.chunk = b'abc'
183         def init():
184             return mockobj
185         curlmock.side_effect = init
186
187         utcnow = mock.MagicMock()
188         utcnow.return_value = datetime.datetime(2018, 5, 16)
189
190         r = http_to_keep(api, None, "http://example.com/file1.txt", utcnow=utcnow)
191         self.assertEqual(r, ("99999999999999999999999999999998+99", "file1.txt", 'zzzzz-4zz18-zzzzzzzzzzzzzz3',
192                              'http://example.com/file1.txt', datetime.datetime(2018, 5, 16, 0, 0)))
193
194         assert mockobj.perform_was_called is False
195
196
197     @mock.patch("pycurl.Curl")
198     @mock.patch("arvados.collection.Collection")
199     def test_http_expired(self, collectionmock, curlmock):
200         api = mock.MagicMock()
201
202         api.collections().list().execute.return_value = {
203             "items": [{
204                 "uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz3",
205                 "portable_data_hash": "99999999999999999999999999999998+99",
206                 "properties": {
207                     'http://example.com/file1.txt': {
208                         'Date': 'Tue, 15 May 2018 00:00:00 GMT',
209                         'Expires': 'Wed, 16 May 2018 00:00:00 GMT'
210                     }
211                 }
212             }]
213         }
214
215         cm = mock.MagicMock()
216         cm.manifest_locator.return_value = "zzzzz-4zz18-zzzzzzzzzzzzzz4"
217         cm.portable_data_hash.return_value = "99999999999999999999999999999997+99"
218         cm.keys.return_value = ["file1.txt"]
219         collectionmock.return_value = cm
220
221         mockobj = CurlMock({'Date': 'Thu, 17 May 2018 00:00:00 GMT'})
222         mockobj.chunk = b'def'
223         def init():
224             return mockobj
225         curlmock.side_effect = init
226
227         utcnow = mock.MagicMock()
228         utcnow.return_value = datetime.datetime(2018, 5, 17)
229
230         r = http_to_keep(api, None, "http://example.com/file1.txt", utcnow=utcnow)
231         self.assertEqual(r, ("99999999999999999999999999999997+99", "file1.txt",
232                              'zzzzz-4zz18-zzzzzzzzzzzzzz4',
233                              'http://example.com/file1.txt', datetime.datetime(2018, 5, 17, 0, 0)))
234
235
236         assert mockobj.url == b"http://example.com/file1.txt"
237         assert mockobj.perform_was_called is True
238
239         cm.open.assert_called_with("file1.txt", "wb")
240         cm.save_new.assert_called_with(name="Downloaded from http%3A%2F%2Fexample.com%2Ffile1.txt",
241                                        owner_uuid=None, ensure_unique_name=True)
242
243         api.collections().update.assert_has_calls([
244             mock.call(uuid=cm.manifest_locator(),
245                       body={"collection":{"properties": {'http://example.com/file1.txt': {'Date': 'Thu, 17 May 2018 00:00:00 GMT'}}}})
246         ])
247
248
249     @mock.patch("pycurl.Curl")
250     @mock.patch("arvados.collection.CollectionReader")
251     def test_http_etag(self, collectionmock, curlmock):
252         api = mock.MagicMock()
253
254         api.collections().list().execute.return_value = {
255             "items": [{
256                 "uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz3",
257                 "portable_data_hash": "99999999999999999999999999999998+99",
258                 "properties": {
259                     'http://example.com/file1.txt': {
260                         'Date': 'Tue, 15 May 2018 00:00:00 GMT',
261                         'Expires': 'Wed, 16 May 2018 00:00:00 GMT',
262                         'Etag': '"123456"'
263                     }
264                 }
265             }]
266         }
267
268         cm = mock.MagicMock()
269         cm.manifest_locator.return_value = "zzzzz-4zz18-zzzzzzzzzzzzzz3"
270         cm.portable_data_hash.return_value = "99999999999999999999999999999998+99"
271         cm.keys.return_value = ["file1.txt"]
272         collectionmock.return_value = cm
273
274         mockobj = CurlMock({
275             'Date': 'Thu, 17 May 2018 00:00:00 GMT',
276             'Expires': 'Sat, 19 May 2018 00:00:00 GMT',
277             'Etag': '"123456"'
278         })
279         mockobj.chunk = None
280         def init():
281             return mockobj
282         curlmock.side_effect = init
283
284         utcnow = mock.MagicMock()
285         utcnow.return_value = datetime.datetime(2018, 5, 17)
286
287         r = http_to_keep(api, None, "http://example.com/file1.txt", utcnow=utcnow)
288         self.assertEqual(r, ("99999999999999999999999999999998+99", "file1.txt",
289                              'zzzzz-4zz18-zzzzzzzzzzzzzz3', 'http://example.com/file1.txt',
290                              datetime.datetime(2018, 5, 17, 0, 0)))
291
292         cm.open.assert_not_called()
293
294         api.collections().update.assert_has_calls([
295             mock.call(uuid=cm.manifest_locator(),
296                       body={"collection":{"properties": {'http://example.com/file1.txt': {
297                           'Date': 'Thu, 17 May 2018 00:00:00 GMT',
298                           'Expires': 'Sat, 19 May 2018 00:00:00 GMT',
299                           'Etag': '"123456"'
300                       }}}})
301                       ])
302
303     @mock.patch("pycurl.Curl")
304     @mock.patch("arvados.collection.Collection")
305     def test_http_content_disp(self, collectionmock, curlmock):
306         api = mock.MagicMock()
307
308         api.collections().list().execute.return_value = {
309             "items": []
310         }
311
312         cm = mock.MagicMock()
313         cm.manifest_locator.return_value = "zzzzz-4zz18-zzzzzzzzzzzzzz3"
314         cm.portable_data_hash.return_value = "99999999999999999999999999999998+99"
315         collectionmock.return_value = cm
316
317         mockobj = CurlMock({"Content-Disposition": "attachment; filename=file1.txt"})
318         mockobj.chunk = "abc"
319         def init():
320             return mockobj
321         curlmock.side_effect = init
322
323         utcnow = mock.MagicMock()
324         utcnow.return_value = datetime.datetime(2018, 5, 15)
325
326         r = http_to_keep(api, None, "http://example.com/download?fn=/file1.txt", utcnow=utcnow)
327         self.assertEqual(r, ("99999999999999999999999999999998+99", "file1.txt",
328                              'zzzzz-4zz18-zzzzzzzzzzzzzz3',
329                              'http://example.com/download?fn=/file1.txt',
330                              datetime.datetime(2018, 5, 15, 0, 0)))
331
332         assert mockobj.url == b"http://example.com/download?fn=/file1.txt"
333
334         cm.open.assert_called_with("file1.txt", "wb")
335         cm.save_new.assert_called_with(name="Downloaded from http%3A%2F%2Fexample.com%2Fdownload%3Ffn%3D%2Ffile1.txt",
336                                        owner_uuid=None, ensure_unique_name=True)
337
338         api.collections().update.assert_has_calls([
339             mock.call(uuid=cm.manifest_locator(),
340                       body={"collection":{"properties": {"http://example.com/download?fn=/file1.txt": {'Date': 'Tue, 15 May 2018 00:00:00 GMT'}}}})
341         ])
342
343     @mock.patch("pycurl.Curl")
344     @mock.patch("arvados.collection.CollectionReader")
345     def test_http_etag_if_none_match(self, collectionmock, curlmock):
346         api = mock.MagicMock()
347
348         api.collections().list().execute.return_value = {
349             "items": [{
350                 "uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz3",
351                 "portable_data_hash": "99999999999999999999999999999998+99",
352                 "properties": {
353                     'http://example.com/file1.txt': {
354                         'Date': 'Tue, 15 May 2018 00:00:00 GMT',
355                         'Expires': 'Tue, 16 May 2018 00:00:00 GMT',
356                         'Etag': '"123456"'
357                     }
358                 }
359             }]
360         }
361
362         cm = mock.MagicMock()
363         cm.manifest_locator.return_value = "zzzzz-4zz18-zzzzzzzzzzzzzz3"
364         cm.portable_data_hash.return_value = "99999999999999999999999999999998+99"
365         cm.keys.return_value = ["file1.txt"]
366         collectionmock.return_value = cm
367
368         mockobj = CurlMock({
369             'Date': 'Tue, 17 May 2018 00:00:00 GMT',
370             'Expires': 'Tue, 19 May 2018 00:00:00 GMT',
371             'Etag': '"123456"'
372         })
373         mockobj.chunk = None
374         mockobj.head_response = 403
375         mockobj.get_response = 304
376         def init():
377             return mockobj
378         curlmock.side_effect = init
379
380         utcnow = mock.MagicMock()
381         utcnow.return_value = datetime.datetime(2018, 5, 17)
382
383         r = http_to_keep(api, None, "http://example.com/file1.txt", utcnow=utcnow)
384         self.assertEqual(r, ("99999999999999999999999999999998+99", "file1.txt",
385                              'zzzzz-4zz18-zzzzzzzzzzzzzz3', 'http://example.com/file1.txt',
386                              datetime.datetime(2018, 5, 17, 0, 0)))
387
388         print(mockobj.req_headers)
389         assert mockobj.req_headers == ["Accept: application/octet-stream", "If-None-Match: \"123456\""]
390         cm.open.assert_not_called()
391
392         api.collections().update.assert_has_calls([
393             mock.call(uuid=cm.manifest_locator(),
394                       body={"collection":{"properties": {'http://example.com/file1.txt': {
395                           'Date': 'Tue, 17 May 2018 00:00:00 GMT',
396                           'Expires': 'Tue, 19 May 2018 00:00:00 GMT',
397                           'Etag': '"123456"'
398                       }}}})
399                       ])
400
401     @mock.patch("pycurl.Curl")
402     @mock.patch("arvados.collection.CollectionReader")
403     def test_http_prefer_cached_downloads(self, collectionmock, curlmock):
404         api = mock.MagicMock()
405
406         api.collections().list().execute.return_value = {
407             "items": [{
408                 "uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz3",
409                 "portable_data_hash": "99999999999999999999999999999998+99",
410                 "properties": {
411                     'http://example.com/file1.txt': {
412                         'Date': 'Tue, 15 May 2018 00:00:00 GMT',
413                         'Expires': 'Tue, 16 May 2018 00:00:00 GMT',
414                         'Etag': '"123456"'
415                     }
416                 }
417             }]
418         }
419
420         cm = mock.MagicMock()
421         cm.manifest_locator.return_value = "zzzzz-4zz18-zzzzzzzzzzzzzz3"
422         cm.portable_data_hash.return_value = "99999999999999999999999999999998+99"
423         cm.keys.return_value = ["file1.txt"]
424         collectionmock.return_value = cm
425
426         mockobj = CurlMock()
427         def init():
428             return mockobj
429         curlmock.side_effect = init
430
431         utcnow = mock.MagicMock()
432         utcnow.return_value = datetime.datetime(2018, 5, 17)
433
434         r = http_to_keep(api, None, "http://example.com/file1.txt", utcnow=utcnow, prefer_cached_downloads=True)
435         self.assertEqual(r, ("99999999999999999999999999999998+99", "file1.txt", 'zzzzz-4zz18-zzzzzzzzzzzzzz3',
436                              'http://example.com/file1.txt', datetime.datetime(2018, 5, 17, 0, 0)))
437
438         assert mockobj.perform_was_called is False
439         cm.open.assert_not_called()
440         api.collections().update.assert_not_called()
441
442     @mock.patch("pycurl.Curl")
443     @mock.patch("arvados.collection.CollectionReader")
444     def test_http_varying_url_params(self, collectionmock, curlmock):
445         for prurl in ("http://example.com/file1.txt", "http://example.com/file1.txt?KeyId=123&Signature=456&Expires=789"):
446             api = mock.MagicMock()
447
448             api.collections().list().execute.return_value = {
449                 "items": [{
450                     "uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz3",
451                     "portable_data_hash": "99999999999999999999999999999998+99",
452                     "properties": {
453                         prurl: {
454                             'Date': 'Tue, 15 May 2018 00:00:00 GMT',
455                             'Expires': 'Tue, 16 May 2018 00:00:00 GMT',
456                             'Etag': '"123456"'
457                         }
458                     }
459                 }]
460             }
461
462             cm = mock.MagicMock()
463             cm.manifest_locator.return_value = "zzzzz-4zz18-zzzzzzzzzzzzzz3"
464             cm.portable_data_hash.return_value = "99999999999999999999999999999998+99"
465             cm.keys.return_value = ["file1.txt"]
466             collectionmock.return_value = cm
467
468             mockobj = CurlMock({
469                 'Date': 'Tue, 17 May 2018 00:00:00 GMT',
470                 'Expires': 'Tue, 19 May 2018 00:00:00 GMT',
471                 'Etag': '"123456"'
472             })
473             mockobj.chunk = None
474             def init():
475                 return mockobj
476             curlmock.side_effect = init
477
478             utcnow = mock.MagicMock()
479             utcnow.return_value = datetime.datetime(2018, 5, 17)
480
481             r = http_to_keep(api, None, "http://example.com/file1.txt?KeyId=123&Signature=456&Expires=789",
482                                               utcnow=utcnow, varying_url_params="KeyId,Signature,Expires")
483             self.assertEqual(r, ("99999999999999999999999999999998+99", "file1.txt", 'zzzzz-4zz18-zzzzzzzzzzzzzz3',
484                                  'http://example.com/file1.txt', datetime.datetime(2018, 5, 17, 0, 0)))
485
486             assert mockobj.perform_was_called is True
487             cm.open.assert_not_called()
488
489             api.collections().update.assert_has_calls([
490                 mock.call(uuid=cm.manifest_locator(),
491                           body={"collection":{"properties": {'http://example.com/file1.txt': {
492                               'Date': 'Tue, 17 May 2018 00:00:00 GMT',
493                               'Expires': 'Tue, 19 May 2018 00:00:00 GMT',
494                               'Etag': '"123456"'
495                           }}}})
496                           ])