1 # Copyright (C) The Arvados Authors. All rights reserved.
3 # SPDX-License-Identifier: Apache-2.0
5 from future import standard_library
6 standard_library.install_aliases()
18 from unittest import mock
21 import arvados.collection
25 from arvados.http_to_keep import http_to_keep
27 # Turns out there was already "FakeCurl" that serves the same purpose, but
28 # I wrote this before I knew that. Whoops.
30 def __init__(self, headers = {}):
31 self.perform_was_called = False
32 self.headers = headers
33 self.get_response = 200
34 self.head_response = 200
37 def setopt(self, op, *args):
40 if op == pycurl.WRITEFUNCTION:
41 self.writefn = args[0]
42 if op == pycurl.HEADERFUNCTION:
43 self.headerfn = args[0]
44 if op == pycurl.NOBODY:
46 if op == pycurl.HTTPGET:
48 if op == pycurl.HTTPHEADER:
49 self.req_headers = args[0]
51 def getinfo(self, op):
52 if op == pycurl.RESPONSE_CODE:
54 return self.head_response
56 return self.get_response
59 self.perform_was_called = True
62 self.headerfn("HTTP/1.1 {} Status\r\n".format(self.head_response))
64 self.headerfn("HTTP/1.1 {} Status\r\n".format(self.get_response))
66 for k,v in self.headers.items():
67 self.headerfn("%s: %s" % (k,v))
69 if not self.head and self.get_response == 200:
70 self.writefn(self.chunk)
73 class TestHttpToKeep(unittest.TestCase):
75 @mock.patch("pycurl.Curl")
76 @mock.patch("arvados.collection.Collection")
77 def test_http_get(self, collectionmock, curlmock):
78 api = mock.MagicMock()
80 api.collections().list().execute.return_value = {
85 cm.manifest_locator.return_value = "zzzzz-4zz18-zzzzzzzzzzzzzz3"
86 cm.portable_data_hash.return_value = "99999999999999999999999999999998+99"
87 collectionmock.return_value = cm
90 mockobj.chunk = b'abc'
93 curlmock.side_effect = init
95 utcnow = mock.MagicMock()
96 utcnow.return_value = datetime.datetime(2018, 5, 15)
98 r = http_to_keep(api, None, "http://example.com/file1.txt", utcnow=utcnow)
99 self.assertEqual(r, ("99999999999999999999999999999998+99", "file1.txt",
100 'zzzzz-4zz18-zzzzzzzzzzzzzz3', 'http://example.com/file1.txt',
101 datetime.datetime(2018, 5, 15, 0, 0)))
103 assert mockobj.url == b"http://example.com/file1.txt"
104 assert mockobj.perform_was_called is True
106 cm.open.assert_called_with("file1.txt", "wb")
107 cm.save_new.assert_called_with(name="Downloaded from http%3A%2F%2Fexample.com%2Ffile1.txt",
108 owner_uuid=None, ensure_unique_name=True)
110 api.collections().update.assert_has_calls([
111 mock.call(uuid=cm.manifest_locator(),
112 body={"collection":{"properties": {'http://example.com/file1.txt': {'Date': 'Tue, 15 May 2018 00:00:00 GMT'}}}})
116 @mock.patch("pycurl.Curl")
117 @mock.patch("arvados.collection.CollectionReader")
118 def test_http_expires(self, collectionmock, curlmock):
119 api = mock.MagicMock()
121 api.collections().list().execute.return_value = {
123 "uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz3",
124 "portable_data_hash": "99999999999999999999999999999998+99",
126 'http://example.com/file1.txt': {
127 'Date': 'Tue, 15 May 2018 00:00:00 GMT',
128 'Expires': 'Tue, 17 May 2018 00:00:00 GMT'
134 cm = mock.MagicMock()
135 cm.manifest_locator.return_value = "zzzzz-4zz18-zzzzzzzzzzzzzz3"
136 cm.portable_data_hash.return_value = "99999999999999999999999999999998+99"
137 cm.keys.return_value = ["file1.txt"]
138 collectionmock.return_value = cm
141 mockobj.chunk = b'abc'
144 curlmock.side_effect = init
146 utcnow = mock.MagicMock()
147 utcnow.return_value = datetime.datetime(2018, 5, 16)
149 r = http_to_keep(api, None, "http://example.com/file1.txt", utcnow=utcnow)
150 self.assertEqual(r, ("99999999999999999999999999999998+99", "file1.txt",
151 'zzzzz-4zz18-zzzzzzzzzzzzzz3', 'http://example.com/file1.txt',
152 datetime.datetime(2018, 5, 16, 0, 0)))
154 assert mockobj.perform_was_called is False
157 @mock.patch("pycurl.Curl")
158 @mock.patch("arvados.collection.CollectionReader")
159 def test_http_cache_control(self, collectionmock, curlmock):
160 api = mock.MagicMock()
162 api.collections().list().execute.return_value = {
164 "uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz3",
165 "portable_data_hash": "99999999999999999999999999999998+99",
167 'http://example.com/file1.txt': {
168 'Date': 'Tue, 15 May 2018 00:00:00 GMT',
169 'Cache-Control': 'max-age=172800'
175 cm = mock.MagicMock()
176 cm.manifest_locator.return_value = "zzzzz-4zz18-zzzzzzzzzzzzzz3"
177 cm.portable_data_hash.return_value = "99999999999999999999999999999998+99"
178 cm.keys.return_value = ["file1.txt"]
179 collectionmock.return_value = cm
182 mockobj.chunk = b'abc'
185 curlmock.side_effect = init
187 utcnow = mock.MagicMock()
188 utcnow.return_value = datetime.datetime(2018, 5, 16)
190 r = http_to_keep(api, None, "http://example.com/file1.txt", utcnow=utcnow)
191 self.assertEqual(r, ("99999999999999999999999999999998+99", "file1.txt", 'zzzzz-4zz18-zzzzzzzzzzzzzz3',
192 'http://example.com/file1.txt', datetime.datetime(2018, 5, 16, 0, 0)))
194 assert mockobj.perform_was_called is False
197 @mock.patch("pycurl.Curl")
198 @mock.patch("arvados.collection.Collection")
199 def test_http_expired(self, collectionmock, curlmock):
200 api = mock.MagicMock()
202 api.collections().list().execute.return_value = {
204 "uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz3",
205 "portable_data_hash": "99999999999999999999999999999998+99",
207 'http://example.com/file1.txt': {
208 'Date': 'Tue, 15 May 2018 00:00:00 GMT',
209 'Expires': 'Wed, 16 May 2018 00:00:00 GMT'
215 cm = mock.MagicMock()
216 cm.manifest_locator.return_value = "zzzzz-4zz18-zzzzzzzzzzzzzz4"
217 cm.portable_data_hash.return_value = "99999999999999999999999999999997+99"
218 cm.keys.return_value = ["file1.txt"]
219 collectionmock.return_value = cm
221 mockobj = CurlMock({'Date': 'Thu, 17 May 2018 00:00:00 GMT'})
222 mockobj.chunk = b'def'
225 curlmock.side_effect = init
227 utcnow = mock.MagicMock()
228 utcnow.return_value = datetime.datetime(2018, 5, 17)
230 r = http_to_keep(api, None, "http://example.com/file1.txt", utcnow=utcnow)
231 self.assertEqual(r, ("99999999999999999999999999999997+99", "file1.txt",
232 'zzzzz-4zz18-zzzzzzzzzzzzzz4',
233 'http://example.com/file1.txt', datetime.datetime(2018, 5, 17, 0, 0)))
236 assert mockobj.url == b"http://example.com/file1.txt"
237 assert mockobj.perform_was_called is True
239 cm.open.assert_called_with("file1.txt", "wb")
240 cm.save_new.assert_called_with(name="Downloaded from http%3A%2F%2Fexample.com%2Ffile1.txt",
241 owner_uuid=None, ensure_unique_name=True)
243 api.collections().update.assert_has_calls([
244 mock.call(uuid=cm.manifest_locator(),
245 body={"collection":{"properties": {'http://example.com/file1.txt': {'Date': 'Thu, 17 May 2018 00:00:00 GMT'}}}})
249 @mock.patch("pycurl.Curl")
250 @mock.patch("arvados.collection.CollectionReader")
251 def test_http_etag(self, collectionmock, curlmock):
252 api = mock.MagicMock()
254 api.collections().list().execute.return_value = {
256 "uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz3",
257 "portable_data_hash": "99999999999999999999999999999998+99",
259 'http://example.com/file1.txt': {
260 'Date': 'Tue, 15 May 2018 00:00:00 GMT',
261 'Expires': 'Wed, 16 May 2018 00:00:00 GMT',
268 cm = mock.MagicMock()
269 cm.manifest_locator.return_value = "zzzzz-4zz18-zzzzzzzzzzzzzz3"
270 cm.portable_data_hash.return_value = "99999999999999999999999999999998+99"
271 cm.keys.return_value = ["file1.txt"]
272 collectionmock.return_value = cm
275 'Date': 'Thu, 17 May 2018 00:00:00 GMT',
276 'Expires': 'Sat, 19 May 2018 00:00:00 GMT',
282 curlmock.side_effect = init
284 utcnow = mock.MagicMock()
285 utcnow.return_value = datetime.datetime(2018, 5, 17)
287 r = http_to_keep(api, None, "http://example.com/file1.txt", utcnow=utcnow)
288 self.assertEqual(r, ("99999999999999999999999999999998+99", "file1.txt",
289 'zzzzz-4zz18-zzzzzzzzzzzzzz3', 'http://example.com/file1.txt',
290 datetime.datetime(2018, 5, 17, 0, 0)))
292 cm.open.assert_not_called()
294 api.collections().update.assert_has_calls([
295 mock.call(uuid=cm.manifest_locator(),
296 body={"collection":{"properties": {'http://example.com/file1.txt': {
297 'Date': 'Thu, 17 May 2018 00:00:00 GMT',
298 'Expires': 'Sat, 19 May 2018 00:00:00 GMT',
303 @mock.patch("pycurl.Curl")
304 @mock.patch("arvados.collection.Collection")
305 def test_http_content_disp(self, collectionmock, curlmock):
306 api = mock.MagicMock()
308 api.collections().list().execute.return_value = {
312 cm = mock.MagicMock()
313 cm.manifest_locator.return_value = "zzzzz-4zz18-zzzzzzzzzzzzzz3"
314 cm.portable_data_hash.return_value = "99999999999999999999999999999998+99"
315 collectionmock.return_value = cm
317 mockobj = CurlMock({"Content-Disposition": "attachment; filename=file1.txt"})
318 mockobj.chunk = "abc"
321 curlmock.side_effect = init
323 utcnow = mock.MagicMock()
324 utcnow.return_value = datetime.datetime(2018, 5, 15)
326 r = http_to_keep(api, None, "http://example.com/download?fn=/file1.txt", utcnow=utcnow)
327 self.assertEqual(r, ("99999999999999999999999999999998+99", "file1.txt",
328 'zzzzz-4zz18-zzzzzzzzzzzzzz3',
329 'http://example.com/download?fn=/file1.txt',
330 datetime.datetime(2018, 5, 15, 0, 0)))
332 assert mockobj.url == b"http://example.com/download?fn=/file1.txt"
334 cm.open.assert_called_with("file1.txt", "wb")
335 cm.save_new.assert_called_with(name="Downloaded from http%3A%2F%2Fexample.com%2Fdownload%3Ffn%3D%2Ffile1.txt",
336 owner_uuid=None, ensure_unique_name=True)
338 api.collections().update.assert_has_calls([
339 mock.call(uuid=cm.manifest_locator(),
340 body={"collection":{"properties": {"http://example.com/download?fn=/file1.txt": {'Date': 'Tue, 15 May 2018 00:00:00 GMT'}}}})
343 @mock.patch("pycurl.Curl")
344 @mock.patch("arvados.collection.CollectionReader")
345 def test_http_etag_if_none_match(self, collectionmock, curlmock):
346 api = mock.MagicMock()
348 api.collections().list().execute.return_value = {
350 "uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz3",
351 "portable_data_hash": "99999999999999999999999999999998+99",
353 'http://example.com/file1.txt': {
354 'Date': 'Tue, 15 May 2018 00:00:00 GMT',
355 'Expires': 'Tue, 16 May 2018 00:00:00 GMT',
362 cm = mock.MagicMock()
363 cm.manifest_locator.return_value = "zzzzz-4zz18-zzzzzzzzzzzzzz3"
364 cm.portable_data_hash.return_value = "99999999999999999999999999999998+99"
365 cm.keys.return_value = ["file1.txt"]
366 collectionmock.return_value = cm
369 'Date': 'Tue, 17 May 2018 00:00:00 GMT',
370 'Expires': 'Tue, 19 May 2018 00:00:00 GMT',
374 mockobj.head_response = 403
375 mockobj.get_response = 304
378 curlmock.side_effect = init
380 utcnow = mock.MagicMock()
381 utcnow.return_value = datetime.datetime(2018, 5, 17)
383 r = http_to_keep(api, None, "http://example.com/file1.txt", utcnow=utcnow)
384 self.assertEqual(r, ("99999999999999999999999999999998+99", "file1.txt",
385 'zzzzz-4zz18-zzzzzzzzzzzzzz3', 'http://example.com/file1.txt',
386 datetime.datetime(2018, 5, 17, 0, 0)))
388 print(mockobj.req_headers)
389 assert mockobj.req_headers == ["Accept: application/octet-stream", "If-None-Match: \"123456\""]
390 cm.open.assert_not_called()
392 api.collections().update.assert_has_calls([
393 mock.call(uuid=cm.manifest_locator(),
394 body={"collection":{"properties": {'http://example.com/file1.txt': {
395 'Date': 'Tue, 17 May 2018 00:00:00 GMT',
396 'Expires': 'Tue, 19 May 2018 00:00:00 GMT',
401 @mock.patch("pycurl.Curl")
402 @mock.patch("arvados.collection.CollectionReader")
403 def test_http_prefer_cached_downloads(self, collectionmock, curlmock):
404 api = mock.MagicMock()
406 api.collections().list().execute.return_value = {
408 "uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz3",
409 "portable_data_hash": "99999999999999999999999999999998+99",
411 'http://example.com/file1.txt': {
412 'Date': 'Tue, 15 May 2018 00:00:00 GMT',
413 'Expires': 'Tue, 16 May 2018 00:00:00 GMT',
420 cm = mock.MagicMock()
421 cm.manifest_locator.return_value = "zzzzz-4zz18-zzzzzzzzzzzzzz3"
422 cm.portable_data_hash.return_value = "99999999999999999999999999999998+99"
423 cm.keys.return_value = ["file1.txt"]
424 collectionmock.return_value = cm
429 curlmock.side_effect = init
431 utcnow = mock.MagicMock()
432 utcnow.return_value = datetime.datetime(2018, 5, 17)
434 r = http_to_keep(api, None, "http://example.com/file1.txt", utcnow=utcnow, prefer_cached_downloads=True)
435 self.assertEqual(r, ("99999999999999999999999999999998+99", "file1.txt", 'zzzzz-4zz18-zzzzzzzzzzzzzz3',
436 'http://example.com/file1.txt', datetime.datetime(2018, 5, 17, 0, 0)))
438 assert mockobj.perform_was_called is False
439 cm.open.assert_not_called()
440 api.collections().update.assert_not_called()
442 @mock.patch("pycurl.Curl")
443 @mock.patch("arvados.collection.CollectionReader")
444 def test_http_varying_url_params(self, collectionmock, curlmock):
445 for prurl in ("http://example.com/file1.txt", "http://example.com/file1.txt?KeyId=123&Signature=456&Expires=789"):
446 api = mock.MagicMock()
448 api.collections().list().execute.return_value = {
450 "uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz3",
451 "portable_data_hash": "99999999999999999999999999999998+99",
454 'Date': 'Tue, 15 May 2018 00:00:00 GMT',
455 'Expires': 'Tue, 16 May 2018 00:00:00 GMT',
462 cm = mock.MagicMock()
463 cm.manifest_locator.return_value = "zzzzz-4zz18-zzzzzzzzzzzzzz3"
464 cm.portable_data_hash.return_value = "99999999999999999999999999999998+99"
465 cm.keys.return_value = ["file1.txt"]
466 collectionmock.return_value = cm
469 'Date': 'Tue, 17 May 2018 00:00:00 GMT',
470 'Expires': 'Tue, 19 May 2018 00:00:00 GMT',
476 curlmock.side_effect = init
478 utcnow = mock.MagicMock()
479 utcnow.return_value = datetime.datetime(2018, 5, 17)
481 r = http_to_keep(api, None, "http://example.com/file1.txt?KeyId=123&Signature=456&Expires=789",
482 utcnow=utcnow, varying_url_params="KeyId,Signature,Expires")
483 self.assertEqual(r, ("99999999999999999999999999999998+99", "file1.txt", 'zzzzz-4zz18-zzzzzzzzzzzzzz3',
484 'http://example.com/file1.txt', datetime.datetime(2018, 5, 17, 0, 0)))
486 assert mockobj.perform_was_called is True
487 cm.open.assert_not_called()
489 api.collections().update.assert_has_calls([
490 mock.call(uuid=cm.manifest_locator(),
491 body={"collection":{"properties": {'http://example.com/file1.txt': {
492 'Date': 'Tue, 17 May 2018 00:00:00 GMT',
493 'Expires': 'Tue, 19 May 2018 00:00:00 GMT',