Merge branch '21278-search-redirect' refs #21278
[arvados.git] / sdk / python / tests / test_http.py
1 # Copyright (C) The Arvados Authors. All rights reserved.
2 #
3 # SPDX-License-Identifier: Apache-2.0
4
5 from future import standard_library
6 standard_library.install_aliases()
7
8 import copy
9 import io
10 import functools
11 import hashlib
12 import json
13 import logging
14 import mock
15 import sys
16 import unittest
17 import datetime
18
19 import arvados
20 import arvados.collection
21 import arvados.keep
22 import pycurl
23
24 from arvados.http_to_keep import http_to_keep
25
26 import ruamel.yaml as yaml
27
28 # Turns out there was already "FakeCurl" that serves the same purpose, but
29 # I wrote this before I knew that.  Whoops.
30 class CurlMock:
31     def __init__(self, headers = {}):
32         self.perform_was_called = False
33         self.headers = headers
34         self.get_response = 200
35         self.head_response = 200
36         self.req_headers = []
37
38     def setopt(self, op, *args):
39         if op == pycurl.URL:
40             self.url = args[0]
41         if op == pycurl.WRITEFUNCTION:
42             self.writefn = args[0]
43         if op == pycurl.HEADERFUNCTION:
44             self.headerfn = args[0]
45         if op == pycurl.NOBODY:
46             self.head = True
47         if op == pycurl.HTTPGET:
48             self.head = False
49         if op == pycurl.HTTPHEADER:
50             self.req_headers = args[0]
51
52     def getinfo(self, op):
53         if op == pycurl.RESPONSE_CODE:
54             if self.head:
55                 return self.head_response
56             else:
57                 return self.get_response
58
59     def perform(self):
60         self.perform_was_called = True
61
62         if self.head:
63             self.headerfn("HTTP/1.1 {} Status\r\n".format(self.head_response))
64         else:
65             self.headerfn("HTTP/1.1 {} Status\r\n".format(self.get_response))
66
67         for k,v in self.headers.items():
68             self.headerfn("%s: %s" % (k,v))
69
70         if not self.head and self.get_response == 200:
71             self.writefn(self.chunk)
72
73
74 class TestHttpToKeep(unittest.TestCase):
75
76     @mock.patch("pycurl.Curl")
77     @mock.patch("arvados.collection.Collection")
78     def test_http_get(self, collectionmock, curlmock):
79         api = mock.MagicMock()
80
81         api.collections().list().execute.return_value = {
82             "items": []
83         }
84
85         cm = mock.MagicMock()
86         cm.manifest_locator.return_value = "zzzzz-4zz18-zzzzzzzzzzzzzz3"
87         cm.portable_data_hash.return_value = "99999999999999999999999999999998+99"
88         collectionmock.return_value = cm
89
90         mockobj = CurlMock()
91         mockobj.chunk = b'abc'
92         def init():
93             return mockobj
94         curlmock.side_effect = init
95
96         utcnow = mock.MagicMock()
97         utcnow.return_value = datetime.datetime(2018, 5, 15)
98
99         r = http_to_keep(api, None, "http://example.com/file1.txt", utcnow=utcnow)
100         self.assertEqual(r, ("99999999999999999999999999999998+99", "file1.txt",
101                              'zzzzz-4zz18-zzzzzzzzzzzzzz3', 'http://example.com/file1.txt',
102                              datetime.datetime(2018, 5, 15, 0, 0)))
103
104         assert mockobj.url == b"http://example.com/file1.txt"
105         assert mockobj.perform_was_called is True
106
107         cm.open.assert_called_with("file1.txt", "wb")
108         cm.save_new.assert_called_with(name="Downloaded from http%3A%2F%2Fexample.com%2Ffile1.txt",
109                                        owner_uuid=None, ensure_unique_name=True)
110
111         api.collections().update.assert_has_calls([
112             mock.call(uuid=cm.manifest_locator(),
113                       body={"collection":{"properties": {'http://example.com/file1.txt': {'Date': 'Tue, 15 May 2018 00:00:00 GMT'}}}})
114         ])
115
116
117     @mock.patch("pycurl.Curl")
118     @mock.patch("arvados.collection.CollectionReader")
119     def test_http_expires(self, collectionmock, curlmock):
120         api = mock.MagicMock()
121
122         api.collections().list().execute.return_value = {
123             "items": [{
124                 "uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz3",
125                 "portable_data_hash": "99999999999999999999999999999998+99",
126                 "properties": {
127                     'http://example.com/file1.txt': {
128                         'Date': 'Tue, 15 May 2018 00:00:00 GMT',
129                         'Expires': 'Tue, 17 May 2018 00:00:00 GMT'
130                     }
131                 }
132             }]
133         }
134
135         cm = mock.MagicMock()
136         cm.manifest_locator.return_value = "zzzzz-4zz18-zzzzzzzzzzzzzz3"
137         cm.portable_data_hash.return_value = "99999999999999999999999999999998+99"
138         cm.keys.return_value = ["file1.txt"]
139         collectionmock.return_value = cm
140
141         mockobj = CurlMock()
142         mockobj.chunk = b'abc'
143         def init():
144             return mockobj
145         curlmock.side_effect = init
146
147         utcnow = mock.MagicMock()
148         utcnow.return_value = datetime.datetime(2018, 5, 16)
149
150         r = http_to_keep(api, None, "http://example.com/file1.txt", utcnow=utcnow)
151         self.assertEqual(r, ("99999999999999999999999999999998+99", "file1.txt",
152                              'zzzzz-4zz18-zzzzzzzzzzzzzz3', 'http://example.com/file1.txt',
153                              datetime.datetime(2018, 5, 16, 0, 0)))
154
155         assert mockobj.perform_was_called is False
156
157
158     @mock.patch("pycurl.Curl")
159     @mock.patch("arvados.collection.CollectionReader")
160     def test_http_cache_control(self, collectionmock, curlmock):
161         api = mock.MagicMock()
162
163         api.collections().list().execute.return_value = {
164             "items": [{
165                 "uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz3",
166                 "portable_data_hash": "99999999999999999999999999999998+99",
167                 "properties": {
168                     'http://example.com/file1.txt': {
169                         'Date': 'Tue, 15 May 2018 00:00:00 GMT',
170                         'Cache-Control': 'max-age=172800'
171                     }
172                 }
173             }]
174         }
175
176         cm = mock.MagicMock()
177         cm.manifest_locator.return_value = "zzzzz-4zz18-zzzzzzzzzzzzzz3"
178         cm.portable_data_hash.return_value = "99999999999999999999999999999998+99"
179         cm.keys.return_value = ["file1.txt"]
180         collectionmock.return_value = cm
181
182         mockobj = CurlMock()
183         mockobj.chunk = b'abc'
184         def init():
185             return mockobj
186         curlmock.side_effect = init
187
188         utcnow = mock.MagicMock()
189         utcnow.return_value = datetime.datetime(2018, 5, 16)
190
191         r = http_to_keep(api, None, "http://example.com/file1.txt", utcnow=utcnow)
192         self.assertEqual(r, ("99999999999999999999999999999998+99", "file1.txt", 'zzzzz-4zz18-zzzzzzzzzzzzzz3',
193                              'http://example.com/file1.txt', datetime.datetime(2018, 5, 16, 0, 0)))
194
195         assert mockobj.perform_was_called is False
196
197
198     @mock.patch("pycurl.Curl")
199     @mock.patch("arvados.collection.Collection")
200     def test_http_expired(self, collectionmock, curlmock):
201         api = mock.MagicMock()
202
203         api.collections().list().execute.return_value = {
204             "items": [{
205                 "uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz3",
206                 "portable_data_hash": "99999999999999999999999999999998+99",
207                 "properties": {
208                     'http://example.com/file1.txt': {
209                         'Date': 'Tue, 15 May 2018 00:00:00 GMT',
210                         'Expires': 'Wed, 16 May 2018 00:00:00 GMT'
211                     }
212                 }
213             }]
214         }
215
216         cm = mock.MagicMock()
217         cm.manifest_locator.return_value = "zzzzz-4zz18-zzzzzzzzzzzzzz4"
218         cm.portable_data_hash.return_value = "99999999999999999999999999999997+99"
219         cm.keys.return_value = ["file1.txt"]
220         collectionmock.return_value = cm
221
222         mockobj = CurlMock({'Date': 'Thu, 17 May 2018 00:00:00 GMT'})
223         mockobj.chunk = b'def'
224         def init():
225             return mockobj
226         curlmock.side_effect = init
227
228         utcnow = mock.MagicMock()
229         utcnow.return_value = datetime.datetime(2018, 5, 17)
230
231         r = http_to_keep(api, None, "http://example.com/file1.txt", utcnow=utcnow)
232         self.assertEqual(r, ("99999999999999999999999999999997+99", "file1.txt",
233                              'zzzzz-4zz18-zzzzzzzzzzzzzz4',
234                              'http://example.com/file1.txt', datetime.datetime(2018, 5, 17, 0, 0)))
235
236
237         assert mockobj.url == b"http://example.com/file1.txt"
238         assert mockobj.perform_was_called is True
239
240         cm.open.assert_called_with("file1.txt", "wb")
241         cm.save_new.assert_called_with(name="Downloaded from http%3A%2F%2Fexample.com%2Ffile1.txt",
242                                        owner_uuid=None, ensure_unique_name=True)
243
244         api.collections().update.assert_has_calls([
245             mock.call(uuid=cm.manifest_locator(),
246                       body={"collection":{"properties": {'http://example.com/file1.txt': {'Date': 'Thu, 17 May 2018 00:00:00 GMT'}}}})
247         ])
248
249
250     @mock.patch("pycurl.Curl")
251     @mock.patch("arvados.collection.CollectionReader")
252     def test_http_etag(self, collectionmock, curlmock):
253         api = mock.MagicMock()
254
255         api.collections().list().execute.return_value = {
256             "items": [{
257                 "uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz3",
258                 "portable_data_hash": "99999999999999999999999999999998+99",
259                 "properties": {
260                     'http://example.com/file1.txt': {
261                         'Date': 'Tue, 15 May 2018 00:00:00 GMT',
262                         'Expires': 'Wed, 16 May 2018 00:00:00 GMT',
263                         'Etag': '"123456"'
264                     }
265                 }
266             }]
267         }
268
269         cm = mock.MagicMock()
270         cm.manifest_locator.return_value = "zzzzz-4zz18-zzzzzzzzzzzzzz3"
271         cm.portable_data_hash.return_value = "99999999999999999999999999999998+99"
272         cm.keys.return_value = ["file1.txt"]
273         collectionmock.return_value = cm
274
275         mockobj = CurlMock({
276             'Date': 'Thu, 17 May 2018 00:00:00 GMT',
277             'Expires': 'Sat, 19 May 2018 00:00:00 GMT',
278             'Etag': '"123456"'
279         })
280         mockobj.chunk = None
281         def init():
282             return mockobj
283         curlmock.side_effect = init
284
285         utcnow = mock.MagicMock()
286         utcnow.return_value = datetime.datetime(2018, 5, 17)
287
288         r = http_to_keep(api, None, "http://example.com/file1.txt", utcnow=utcnow)
289         self.assertEqual(r, ("99999999999999999999999999999998+99", "file1.txt",
290                              'zzzzz-4zz18-zzzzzzzzzzzzzz3', 'http://example.com/file1.txt',
291                              datetime.datetime(2018, 5, 17, 0, 0)))
292
293         cm.open.assert_not_called()
294
295         api.collections().update.assert_has_calls([
296             mock.call(uuid=cm.manifest_locator(),
297                       body={"collection":{"properties": {'http://example.com/file1.txt': {
298                           'Date': 'Thu, 17 May 2018 00:00:00 GMT',
299                           'Expires': 'Sat, 19 May 2018 00:00:00 GMT',
300                           'Etag': '"123456"'
301                       }}}})
302                       ])
303
304     @mock.patch("pycurl.Curl")
305     @mock.patch("arvados.collection.Collection")
306     def test_http_content_disp(self, collectionmock, curlmock):
307         api = mock.MagicMock()
308
309         api.collections().list().execute.return_value = {
310             "items": []
311         }
312
313         cm = mock.MagicMock()
314         cm.manifest_locator.return_value = "zzzzz-4zz18-zzzzzzzzzzzzzz3"
315         cm.portable_data_hash.return_value = "99999999999999999999999999999998+99"
316         collectionmock.return_value = cm
317
318         mockobj = CurlMock({"Content-Disposition": "attachment; filename=file1.txt"})
319         mockobj.chunk = "abc"
320         def init():
321             return mockobj
322         curlmock.side_effect = init
323
324         utcnow = mock.MagicMock()
325         utcnow.return_value = datetime.datetime(2018, 5, 15)
326
327         r = http_to_keep(api, None, "http://example.com/download?fn=/file1.txt", utcnow=utcnow)
328         self.assertEqual(r, ("99999999999999999999999999999998+99", "file1.txt",
329                              'zzzzz-4zz18-zzzzzzzzzzzzzz3',
330                              'http://example.com/download?fn=/file1.txt',
331                              datetime.datetime(2018, 5, 15, 0, 0)))
332
333         assert mockobj.url == b"http://example.com/download?fn=/file1.txt"
334
335         cm.open.assert_called_with("file1.txt", "wb")
336         cm.save_new.assert_called_with(name="Downloaded from http%3A%2F%2Fexample.com%2Fdownload%3Ffn%3D%2Ffile1.txt",
337                                        owner_uuid=None, ensure_unique_name=True)
338
339         api.collections().update.assert_has_calls([
340             mock.call(uuid=cm.manifest_locator(),
341                       body={"collection":{"properties": {"http://example.com/download?fn=/file1.txt": {'Date': 'Tue, 15 May 2018 00:00:00 GMT'}}}})
342         ])
343
344     @mock.patch("pycurl.Curl")
345     @mock.patch("arvados.collection.CollectionReader")
346     def test_http_etag_if_none_match(self, collectionmock, curlmock):
347         api = mock.MagicMock()
348
349         api.collections().list().execute.return_value = {
350             "items": [{
351                 "uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz3",
352                 "portable_data_hash": "99999999999999999999999999999998+99",
353                 "properties": {
354                     'http://example.com/file1.txt': {
355                         'Date': 'Tue, 15 May 2018 00:00:00 GMT',
356                         'Expires': 'Tue, 16 May 2018 00:00:00 GMT',
357                         'Etag': '"123456"'
358                     }
359                 }
360             }]
361         }
362
363         cm = mock.MagicMock()
364         cm.manifest_locator.return_value = "zzzzz-4zz18-zzzzzzzzzzzzzz3"
365         cm.portable_data_hash.return_value = "99999999999999999999999999999998+99"
366         cm.keys.return_value = ["file1.txt"]
367         collectionmock.return_value = cm
368
369         mockobj = CurlMock({
370             'Date': 'Tue, 17 May 2018 00:00:00 GMT',
371             'Expires': 'Tue, 19 May 2018 00:00:00 GMT',
372             'Etag': '"123456"'
373         })
374         mockobj.chunk = None
375         mockobj.head_response = 403
376         mockobj.get_response = 304
377         def init():
378             return mockobj
379         curlmock.side_effect = init
380
381         utcnow = mock.MagicMock()
382         utcnow.return_value = datetime.datetime(2018, 5, 17)
383
384         r = http_to_keep(api, None, "http://example.com/file1.txt", utcnow=utcnow)
385         self.assertEqual(r, ("99999999999999999999999999999998+99", "file1.txt",
386                              'zzzzz-4zz18-zzzzzzzzzzzzzz3', 'http://example.com/file1.txt',
387                              datetime.datetime(2018, 5, 17, 0, 0)))
388
389         print(mockobj.req_headers)
390         assert mockobj.req_headers == ["Accept: application/octet-stream", "If-None-Match: \"123456\""]
391         cm.open.assert_not_called()
392
393         api.collections().update.assert_has_calls([
394             mock.call(uuid=cm.manifest_locator(),
395                       body={"collection":{"properties": {'http://example.com/file1.txt': {
396                           'Date': 'Tue, 17 May 2018 00:00:00 GMT',
397                           'Expires': 'Tue, 19 May 2018 00:00:00 GMT',
398                           'Etag': '"123456"'
399                       }}}})
400                       ])
401
402     @mock.patch("pycurl.Curl")
403     @mock.patch("arvados.collection.CollectionReader")
404     def test_http_prefer_cached_downloads(self, collectionmock, curlmock):
405         api = mock.MagicMock()
406
407         api.collections().list().execute.return_value = {
408             "items": [{
409                 "uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz3",
410                 "portable_data_hash": "99999999999999999999999999999998+99",
411                 "properties": {
412                     'http://example.com/file1.txt': {
413                         'Date': 'Tue, 15 May 2018 00:00:00 GMT',
414                         'Expires': 'Tue, 16 May 2018 00:00:00 GMT',
415                         'Etag': '"123456"'
416                     }
417                 }
418             }]
419         }
420
421         cm = mock.MagicMock()
422         cm.manifest_locator.return_value = "zzzzz-4zz18-zzzzzzzzzzzzzz3"
423         cm.portable_data_hash.return_value = "99999999999999999999999999999998+99"
424         cm.keys.return_value = ["file1.txt"]
425         collectionmock.return_value = cm
426
427         mockobj = CurlMock()
428         def init():
429             return mockobj
430         curlmock.side_effect = init
431
432         utcnow = mock.MagicMock()
433         utcnow.return_value = datetime.datetime(2018, 5, 17)
434
435         r = http_to_keep(api, None, "http://example.com/file1.txt", utcnow=utcnow, prefer_cached_downloads=True)
436         self.assertEqual(r, ("99999999999999999999999999999998+99", "file1.txt", 'zzzzz-4zz18-zzzzzzzzzzzzzz3',
437                              'http://example.com/file1.txt', datetime.datetime(2018, 5, 17, 0, 0)))
438
439         assert mockobj.perform_was_called is False
440         cm.open.assert_not_called()
441         api.collections().update.assert_not_called()
442
443     @mock.patch("pycurl.Curl")
444     @mock.patch("arvados.collection.CollectionReader")
445     def test_http_varying_url_params(self, collectionmock, curlmock):
446         for prurl in ("http://example.com/file1.txt", "http://example.com/file1.txt?KeyId=123&Signature=456&Expires=789"):
447             api = mock.MagicMock()
448
449             api.collections().list().execute.return_value = {
450                 "items": [{
451                     "uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz3",
452                     "portable_data_hash": "99999999999999999999999999999998+99",
453                     "properties": {
454                         prurl: {
455                             'Date': 'Tue, 15 May 2018 00:00:00 GMT',
456                             'Expires': 'Tue, 16 May 2018 00:00:00 GMT',
457                             'Etag': '"123456"'
458                         }
459                     }
460                 }]
461             }
462
463             cm = mock.MagicMock()
464             cm.manifest_locator.return_value = "zzzzz-4zz18-zzzzzzzzzzzzzz3"
465             cm.portable_data_hash.return_value = "99999999999999999999999999999998+99"
466             cm.keys.return_value = ["file1.txt"]
467             collectionmock.return_value = cm
468
469             mockobj = CurlMock({
470                 'Date': 'Tue, 17 May 2018 00:00:00 GMT',
471                 'Expires': 'Tue, 19 May 2018 00:00:00 GMT',
472                 'Etag': '"123456"'
473             })
474             mockobj.chunk = None
475             def init():
476                 return mockobj
477             curlmock.side_effect = init
478
479             utcnow = mock.MagicMock()
480             utcnow.return_value = datetime.datetime(2018, 5, 17)
481
482             r = http_to_keep(api, None, "http://example.com/file1.txt?KeyId=123&Signature=456&Expires=789",
483                                               utcnow=utcnow, varying_url_params="KeyId,Signature,Expires")
484             self.assertEqual(r, ("99999999999999999999999999999998+99", "file1.txt", 'zzzzz-4zz18-zzzzzzzzzzzzzz3',
485                                  'http://example.com/file1.txt', datetime.datetime(2018, 5, 17, 0, 0)))
486
487             assert mockobj.perform_was_called is True
488             cm.open.assert_not_called()
489
490             api.collections().update.assert_has_calls([
491                 mock.call(uuid=cm.manifest_locator(),
492                           body={"collection":{"properties": {'http://example.com/file1.txt': {
493                               'Date': 'Tue, 17 May 2018 00:00:00 GMT',
494                               'Expires': 'Tue, 19 May 2018 00:00:00 GMT',
495                               'Etag': '"123456"'
496                           }}}})
497                           ])