1 # Copyright (C) The Arvados Authors. All rights reserved.
3 # SPDX-License-Identifier: Apache-2.0
15 from unittest import mock
18 import arvados.collection
22 from arvados.http_to_keep import http_to_keep
24 # Turns out there was already "FakeCurl" that serves the same purpose, but
25 # I wrote this before I knew that. Whoops.
27 def __init__(self, headers = {}):
28 self.perform_was_called = False
29 self.headers = headers
30 self.get_response = 200
31 self.head_response = 200
34 def setopt(self, op, *args):
37 if op == pycurl.WRITEFUNCTION:
38 self.writefn = args[0]
39 if op == pycurl.HEADERFUNCTION:
40 self.headerfn = args[0]
41 if op == pycurl.NOBODY:
43 if op == pycurl.HTTPGET:
45 if op == pycurl.HTTPHEADER:
46 self.req_headers = args[0]
48 def getinfo(self, op):
49 if op == pycurl.RESPONSE_CODE:
51 return self.head_response
53 return self.get_response
56 self.perform_was_called = True
59 self.headerfn("HTTP/1.1 {} Status\r\n".format(self.head_response))
61 self.headerfn("HTTP/1.1 {} Status\r\n".format(self.get_response))
63 for k,v in self.headers.items():
64 self.headerfn("%s: %s" % (k,v))
66 if not self.head and self.get_response == 200:
67 self.writefn(self.chunk)
70 class TestHttpToKeep(unittest.TestCase):
72 @mock.patch("pycurl.Curl")
73 @mock.patch("arvados.collection.Collection")
74 def test_http_get(self, collectionmock, curlmock):
75 api = mock.MagicMock()
77 api.collections().list().execute.return_value = {
82 cm.manifest_locator.return_value = "zzzzz-4zz18-zzzzzzzzzzzzzz3"
83 cm.portable_data_hash.return_value = "99999999999999999999999999999998+99"
84 collectionmock.return_value = cm
87 mockobj.chunk = b'abc'
90 curlmock.side_effect = init
92 utcnow = mock.MagicMock()
93 utcnow.return_value = datetime.datetime(2018, 5, 15)
95 r = http_to_keep(api, None, "http://example.com/file1.txt", utcnow=utcnow)
96 self.assertEqual(r, ("99999999999999999999999999999998+99", "file1.txt",
97 'zzzzz-4zz18-zzzzzzzzzzzzzz3', 'http://example.com/file1.txt',
98 datetime.datetime(2018, 5, 15, 0, 0)))
100 assert mockobj.url == b"http://example.com/file1.txt"
101 assert mockobj.perform_was_called is True
103 cm.open.assert_called_with("file1.txt", "wb")
104 cm.save_new.assert_called_with(name="Downloaded from http%3A%2F%2Fexample.com%2Ffile1.txt",
105 owner_uuid=None, ensure_unique_name=True)
107 api.collections().update.assert_has_calls([
108 mock.call(uuid=cm.manifest_locator(),
109 body={"collection":{"properties": {'http://example.com/file1.txt': {'Date': 'Tue, 15 May 2018 00:00:00 GMT'}}}})
113 @mock.patch("pycurl.Curl")
114 @mock.patch("arvados.collection.CollectionReader")
115 def test_http_expires(self, collectionmock, curlmock):
116 api = mock.MagicMock()
118 api.collections().list().execute.return_value = {
120 "uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz3",
121 "portable_data_hash": "99999999999999999999999999999998+99",
123 'http://example.com/file1.txt': {
124 'Date': 'Tue, 15 May 2018 00:00:00 GMT',
125 'Expires': 'Tue, 17 May 2018 00:00:00 GMT'
131 cm = mock.MagicMock()
132 cm.manifest_locator.return_value = "zzzzz-4zz18-zzzzzzzzzzzzzz3"
133 cm.portable_data_hash.return_value = "99999999999999999999999999999998+99"
134 cm.keys.return_value = ["file1.txt"]
135 collectionmock.return_value = cm
138 mockobj.chunk = b'abc'
141 curlmock.side_effect = init
143 utcnow = mock.MagicMock()
144 utcnow.return_value = datetime.datetime(2018, 5, 16)
146 r = http_to_keep(api, None, "http://example.com/file1.txt", utcnow=utcnow)
147 self.assertEqual(r, ("99999999999999999999999999999998+99", "file1.txt",
148 'zzzzz-4zz18-zzzzzzzzzzzzzz3', 'http://example.com/file1.txt',
149 datetime.datetime(2018, 5, 16, 0, 0)))
151 assert mockobj.perform_was_called is False
154 @mock.patch("pycurl.Curl")
155 @mock.patch("arvados.collection.CollectionReader")
156 def test_http_cache_control(self, collectionmock, curlmock):
157 api = mock.MagicMock()
159 api.collections().list().execute.return_value = {
161 "uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz3",
162 "portable_data_hash": "99999999999999999999999999999998+99",
164 'http://example.com/file1.txt': {
165 'Date': 'Tue, 15 May 2018 00:00:00 GMT',
166 'Cache-Control': 'max-age=172800'
172 cm = mock.MagicMock()
173 cm.manifest_locator.return_value = "zzzzz-4zz18-zzzzzzzzzzzzzz3"
174 cm.portable_data_hash.return_value = "99999999999999999999999999999998+99"
175 cm.keys.return_value = ["file1.txt"]
176 collectionmock.return_value = cm
179 mockobj.chunk = b'abc'
182 curlmock.side_effect = init
184 utcnow = mock.MagicMock()
185 utcnow.return_value = datetime.datetime(2018, 5, 16)
187 r = http_to_keep(api, None, "http://example.com/file1.txt", utcnow=utcnow)
188 self.assertEqual(r, ("99999999999999999999999999999998+99", "file1.txt", 'zzzzz-4zz18-zzzzzzzzzzzzzz3',
189 'http://example.com/file1.txt', datetime.datetime(2018, 5, 16, 0, 0)))
191 assert mockobj.perform_was_called is False
194 @mock.patch("pycurl.Curl")
195 @mock.patch("arvados.collection.Collection")
196 def test_http_expired(self, collectionmock, curlmock):
197 api = mock.MagicMock()
199 api.collections().list().execute.return_value = {
201 "uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz3",
202 "portable_data_hash": "99999999999999999999999999999998+99",
204 'http://example.com/file1.txt': {
205 'Date': 'Tue, 15 May 2018 00:00:00 GMT',
206 'Expires': 'Wed, 16 May 2018 00:00:00 GMT'
212 cm = mock.MagicMock()
213 cm.manifest_locator.return_value = "zzzzz-4zz18-zzzzzzzzzzzzzz4"
214 cm.portable_data_hash.return_value = "99999999999999999999999999999997+99"
215 cm.keys.return_value = ["file1.txt"]
216 collectionmock.return_value = cm
218 mockobj = CurlMock({'Date': 'Thu, 17 May 2018 00:00:00 GMT'})
219 mockobj.chunk = b'def'
222 curlmock.side_effect = init
224 utcnow = mock.MagicMock()
225 utcnow.return_value = datetime.datetime(2018, 5, 17)
227 r = http_to_keep(api, None, "http://example.com/file1.txt", utcnow=utcnow)
228 self.assertEqual(r, ("99999999999999999999999999999997+99", "file1.txt",
229 'zzzzz-4zz18-zzzzzzzzzzzzzz4',
230 'http://example.com/file1.txt', datetime.datetime(2018, 5, 17, 0, 0)))
233 assert mockobj.url == b"http://example.com/file1.txt"
234 assert mockobj.perform_was_called is True
236 cm.open.assert_called_with("file1.txt", "wb")
237 cm.save_new.assert_called_with(name="Downloaded from http%3A%2F%2Fexample.com%2Ffile1.txt",
238 owner_uuid=None, ensure_unique_name=True)
240 api.collections().update.assert_has_calls([
241 mock.call(uuid=cm.manifest_locator(),
242 body={"collection":{"properties": {'http://example.com/file1.txt': {'Date': 'Thu, 17 May 2018 00:00:00 GMT'}}}})
246 @mock.patch("pycurl.Curl")
247 @mock.patch("arvados.collection.CollectionReader")
248 def test_http_etag(self, collectionmock, curlmock):
249 api = mock.MagicMock()
251 api.collections().list().execute.return_value = {
253 "uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz3",
254 "portable_data_hash": "99999999999999999999999999999998+99",
256 'http://example.com/file1.txt': {
257 'Date': 'Tue, 15 May 2018 00:00:00 GMT',
258 'Expires': 'Wed, 16 May 2018 00:00:00 GMT',
265 cm = mock.MagicMock()
266 cm.manifest_locator.return_value = "zzzzz-4zz18-zzzzzzzzzzzzzz3"
267 cm.portable_data_hash.return_value = "99999999999999999999999999999998+99"
268 cm.keys.return_value = ["file1.txt"]
269 collectionmock.return_value = cm
272 'Date': 'Thu, 17 May 2018 00:00:00 GMT',
273 'Expires': 'Sat, 19 May 2018 00:00:00 GMT',
279 curlmock.side_effect = init
281 utcnow = mock.MagicMock()
282 utcnow.return_value = datetime.datetime(2018, 5, 17)
284 r = http_to_keep(api, None, "http://example.com/file1.txt", utcnow=utcnow)
285 self.assertEqual(r, ("99999999999999999999999999999998+99", "file1.txt",
286 'zzzzz-4zz18-zzzzzzzzzzzzzz3', 'http://example.com/file1.txt',
287 datetime.datetime(2018, 5, 17, 0, 0)))
289 cm.open.assert_not_called()
291 api.collections().update.assert_has_calls([
292 mock.call(uuid=cm.manifest_locator(),
293 body={"collection":{"properties": {'http://example.com/file1.txt': {
294 'Date': 'Thu, 17 May 2018 00:00:00 GMT',
295 'Expires': 'Sat, 19 May 2018 00:00:00 GMT',
300 @mock.patch("pycurl.Curl")
301 @mock.patch("arvados.collection.Collection")
302 def test_http_content_disp(self, collectionmock, curlmock):
303 api = mock.MagicMock()
305 api.collections().list().execute.return_value = {
309 cm = mock.MagicMock()
310 cm.manifest_locator.return_value = "zzzzz-4zz18-zzzzzzzzzzzzzz3"
311 cm.portable_data_hash.return_value = "99999999999999999999999999999998+99"
312 collectionmock.return_value = cm
314 mockobj = CurlMock({"Content-Disposition": "attachment; filename=file1.txt"})
315 mockobj.chunk = "abc"
318 curlmock.side_effect = init
320 utcnow = mock.MagicMock()
321 utcnow.return_value = datetime.datetime(2018, 5, 15)
323 r = http_to_keep(api, None, "http://example.com/download?fn=/file1.txt", utcnow=utcnow)
324 self.assertEqual(r, ("99999999999999999999999999999998+99", "file1.txt",
325 'zzzzz-4zz18-zzzzzzzzzzzzzz3',
326 'http://example.com/download?fn=/file1.txt',
327 datetime.datetime(2018, 5, 15, 0, 0)))
329 assert mockobj.url == b"http://example.com/download?fn=/file1.txt"
331 cm.open.assert_called_with("file1.txt", "wb")
332 cm.save_new.assert_called_with(name="Downloaded from http%3A%2F%2Fexample.com%2Fdownload%3Ffn%3D%2Ffile1.txt",
333 owner_uuid=None, ensure_unique_name=True)
335 api.collections().update.assert_has_calls([
336 mock.call(uuid=cm.manifest_locator(),
337 body={"collection":{"properties": {"http://example.com/download?fn=/file1.txt": {'Date': 'Tue, 15 May 2018 00:00:00 GMT'}}}})
340 @mock.patch("pycurl.Curl")
341 @mock.patch("arvados.collection.CollectionReader")
342 def test_http_etag_if_none_match(self, collectionmock, curlmock):
343 api = mock.MagicMock()
345 api.collections().list().execute.return_value = {
347 "uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz3",
348 "portable_data_hash": "99999999999999999999999999999998+99",
350 'http://example.com/file1.txt': {
351 'Date': 'Tue, 15 May 2018 00:00:00 GMT',
352 'Expires': 'Tue, 16 May 2018 00:00:00 GMT',
359 cm = mock.MagicMock()
360 cm.manifest_locator.return_value = "zzzzz-4zz18-zzzzzzzzzzzzzz3"
361 cm.portable_data_hash.return_value = "99999999999999999999999999999998+99"
362 cm.keys.return_value = ["file1.txt"]
363 collectionmock.return_value = cm
366 'Date': 'Tue, 17 May 2018 00:00:00 GMT',
367 'Expires': 'Tue, 19 May 2018 00:00:00 GMT',
371 mockobj.head_response = 403
372 mockobj.get_response = 304
375 curlmock.side_effect = init
377 utcnow = mock.MagicMock()
378 utcnow.return_value = datetime.datetime(2018, 5, 17)
380 r = http_to_keep(api, None, "http://example.com/file1.txt", utcnow=utcnow)
381 self.assertEqual(r, ("99999999999999999999999999999998+99", "file1.txt",
382 'zzzzz-4zz18-zzzzzzzzzzzzzz3', 'http://example.com/file1.txt',
383 datetime.datetime(2018, 5, 17, 0, 0)))
385 print(mockobj.req_headers)
386 assert mockobj.req_headers == ["Accept: application/octet-stream", "If-None-Match: \"123456\""]
387 cm.open.assert_not_called()
389 api.collections().update.assert_has_calls([
390 mock.call(uuid=cm.manifest_locator(),
391 body={"collection":{"properties": {'http://example.com/file1.txt': {
392 'Date': 'Tue, 17 May 2018 00:00:00 GMT',
393 'Expires': 'Tue, 19 May 2018 00:00:00 GMT',
398 @mock.patch("pycurl.Curl")
399 @mock.patch("arvados.collection.CollectionReader")
400 def test_http_prefer_cached_downloads(self, collectionmock, curlmock):
401 api = mock.MagicMock()
403 api.collections().list().execute.return_value = {
405 "uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz3",
406 "portable_data_hash": "99999999999999999999999999999998+99",
408 'http://example.com/file1.txt': {
409 'Date': 'Tue, 15 May 2018 00:00:00 GMT',
410 'Expires': 'Tue, 16 May 2018 00:00:00 GMT',
417 cm = mock.MagicMock()
418 cm.manifest_locator.return_value = "zzzzz-4zz18-zzzzzzzzzzzzzz3"
419 cm.portable_data_hash.return_value = "99999999999999999999999999999998+99"
420 cm.keys.return_value = ["file1.txt"]
421 collectionmock.return_value = cm
426 curlmock.side_effect = init
428 utcnow = mock.MagicMock()
429 utcnow.return_value = datetime.datetime(2018, 5, 17)
431 r = http_to_keep(api, None, "http://example.com/file1.txt", utcnow=utcnow, prefer_cached_downloads=True)
432 self.assertEqual(r, ("99999999999999999999999999999998+99", "file1.txt", 'zzzzz-4zz18-zzzzzzzzzzzzzz3',
433 'http://example.com/file1.txt', datetime.datetime(2018, 5, 17, 0, 0)))
435 assert mockobj.perform_was_called is False
436 cm.open.assert_not_called()
437 api.collections().update.assert_not_called()
439 @mock.patch("pycurl.Curl")
440 @mock.patch("arvados.collection.CollectionReader")
441 def test_http_varying_url_params(self, collectionmock, curlmock):
442 for prurl in ("http://example.com/file1.txt", "http://example.com/file1.txt?KeyId=123&Signature=456&Expires=789"):
443 api = mock.MagicMock()
445 api.collections().list().execute.return_value = {
447 "uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz3",
448 "portable_data_hash": "99999999999999999999999999999998+99",
451 'Date': 'Tue, 15 May 2018 00:00:00 GMT',
452 'Expires': 'Tue, 16 May 2018 00:00:00 GMT',
459 cm = mock.MagicMock()
460 cm.manifest_locator.return_value = "zzzzz-4zz18-zzzzzzzzzzzzzz3"
461 cm.portable_data_hash.return_value = "99999999999999999999999999999998+99"
462 cm.keys.return_value = ["file1.txt"]
463 collectionmock.return_value = cm
466 'Date': 'Tue, 17 May 2018 00:00:00 GMT',
467 'Expires': 'Tue, 19 May 2018 00:00:00 GMT',
473 curlmock.side_effect = init
475 utcnow = mock.MagicMock()
476 utcnow.return_value = datetime.datetime(2018, 5, 17)
478 r = http_to_keep(api, None, "http://example.com/file1.txt?KeyId=123&Signature=456&Expires=789",
479 utcnow=utcnow, varying_url_params="KeyId,Signature,Expires")
480 self.assertEqual(r, ("99999999999999999999999999999998+99", "file1.txt", 'zzzzz-4zz18-zzzzzzzzzzzzzz3',
481 'http://example.com/file1.txt', datetime.datetime(2018, 5, 17, 0, 0)))
483 assert mockobj.perform_was_called is True
484 cm.open.assert_not_called()
486 api.collections().update.assert_has_calls([
487 mock.call(uuid=cm.manifest_locator(),
488 body={"collection":{"properties": {'http://example.com/file1.txt': {
489 'Date': 'Tue, 17 May 2018 00:00:00 GMT',
490 'Expires': 'Tue, 19 May 2018 00:00:00 GMT',