1 # Copyright (C) The Arvados Authors. All rights reserved.
3 # SPDX-License-Identifier: Apache-2.0
5 from future import standard_library
6 standard_library.install_aliases()
20 import arvados.collection
24 from arvados.http_to_keep import http_to_keep
26 import ruamel.yaml as yaml
28 # Turns out there was already "FakeCurl" that serves the same purpose, but
29 # I wrote this before I knew that. Whoops.
31 def __init__(self, headers = {}):
32 self.perform_was_called = False
33 self.headers = headers
34 self.get_response = 200
35 self.head_response = 200
38 def setopt(self, op, *args):
41 if op == pycurl.WRITEFUNCTION:
42 self.writefn = args[0]
43 if op == pycurl.HEADERFUNCTION:
44 self.headerfn = args[0]
45 if op == pycurl.NOBODY:
47 if op == pycurl.HTTPGET:
49 if op == pycurl.HTTPHEADER:
50 self.req_headers = args[0]
52 def getinfo(self, op):
53 if op == pycurl.RESPONSE_CODE:
55 return self.head_response
57 return self.get_response
60 self.perform_was_called = True
63 self.headerfn("HTTP/1.1 {} Status\r\n".format(self.head_response))
65 self.headerfn("HTTP/1.1 {} Status\r\n".format(self.get_response))
67 for k,v in self.headers.items():
68 self.headerfn("%s: %s" % (k,v))
70 if not self.head and self.get_response == 200:
71 self.writefn(self.chunk)
74 class TestHttpToKeep(unittest.TestCase):
76 @mock.patch("pycurl.Curl")
77 @mock.patch("arvados.collection.Collection")
78 def test_http_get(self, collectionmock, curlmock):
79 api = mock.MagicMock()
81 api.collections().list().execute.return_value = {
86 cm.manifest_locator.return_value = "zzzzz-4zz18-zzzzzzzzzzzzzz3"
87 cm.portable_data_hash.return_value = "99999999999999999999999999999998+99"
88 collectionmock.return_value = cm
91 mockobj.chunk = b'abc'
94 curlmock.side_effect = init
96 utcnow = mock.MagicMock()
97 utcnow.return_value = datetime.datetime(2018, 5, 15)
99 r = http_to_keep(api, None, "http://example.com/file1.txt", utcnow=utcnow)
100 self.assertEqual(r, ("99999999999999999999999999999998+99", "file1.txt",
101 'zzzzz-4zz18-zzzzzzzzzzzzzz3', 'http://example.com/file1.txt',
102 datetime.datetime(2018, 5, 15, 0, 0)))
104 assert mockobj.url == b"http://example.com/file1.txt"
105 assert mockobj.perform_was_called is True
107 cm.open.assert_called_with("file1.txt", "wb")
108 cm.save_new.assert_called_with(name="Downloaded from http%3A%2F%2Fexample.com%2Ffile1.txt",
109 owner_uuid=None, ensure_unique_name=True)
111 api.collections().update.assert_has_calls([
112 mock.call(uuid=cm.manifest_locator(),
113 body={"collection":{"properties": {'http://example.com/file1.txt': {'Date': 'Tue, 15 May 2018 00:00:00 GMT'}}}})
117 @mock.patch("pycurl.Curl")
118 @mock.patch("arvados.collection.CollectionReader")
119 def test_http_expires(self, collectionmock, curlmock):
120 api = mock.MagicMock()
122 api.collections().list().execute.return_value = {
124 "uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz3",
125 "portable_data_hash": "99999999999999999999999999999998+99",
127 'http://example.com/file1.txt': {
128 'Date': 'Tue, 15 May 2018 00:00:00 GMT',
129 'Expires': 'Tue, 17 May 2018 00:00:00 GMT'
135 cm = mock.MagicMock()
136 cm.manifest_locator.return_value = "zzzzz-4zz18-zzzzzzzzzzzzzz3"
137 cm.portable_data_hash.return_value = "99999999999999999999999999999998+99"
138 cm.keys.return_value = ["file1.txt"]
139 collectionmock.return_value = cm
142 mockobj.chunk = b'abc'
145 curlmock.side_effect = init
147 utcnow = mock.MagicMock()
148 utcnow.return_value = datetime.datetime(2018, 5, 16)
150 r = http_to_keep(api, None, "http://example.com/file1.txt", utcnow=utcnow)
151 self.assertEqual(r, ("99999999999999999999999999999998+99", "file1.txt",
152 'zzzzz-4zz18-zzzzzzzzzzzzzz3', 'http://example.com/file1.txt',
153 datetime.datetime(2018, 5, 16, 0, 0)))
155 assert mockobj.perform_was_called is False
158 @mock.patch("pycurl.Curl")
159 @mock.patch("arvados.collection.CollectionReader")
160 def test_http_cache_control(self, collectionmock, curlmock):
161 api = mock.MagicMock()
163 api.collections().list().execute.return_value = {
165 "uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz3",
166 "portable_data_hash": "99999999999999999999999999999998+99",
168 'http://example.com/file1.txt': {
169 'Date': 'Tue, 15 May 2018 00:00:00 GMT',
170 'Cache-Control': 'max-age=172800'
176 cm = mock.MagicMock()
177 cm.manifest_locator.return_value = "zzzzz-4zz18-zzzzzzzzzzzzzz3"
178 cm.portable_data_hash.return_value = "99999999999999999999999999999998+99"
179 cm.keys.return_value = ["file1.txt"]
180 collectionmock.return_value = cm
183 mockobj.chunk = b'abc'
186 curlmock.side_effect = init
188 utcnow = mock.MagicMock()
189 utcnow.return_value = datetime.datetime(2018, 5, 16)
191 r = http_to_keep(api, None, "http://example.com/file1.txt", utcnow=utcnow)
192 self.assertEqual(r, ("99999999999999999999999999999998+99", "file1.txt", 'zzzzz-4zz18-zzzzzzzzzzzzzz3',
193 'http://example.com/file1.txt', datetime.datetime(2018, 5, 16, 0, 0)))
195 assert mockobj.perform_was_called is False
198 @mock.patch("pycurl.Curl")
199 @mock.patch("arvados.collection.Collection")
200 def test_http_expired(self, collectionmock, curlmock):
201 api = mock.MagicMock()
203 api.collections().list().execute.return_value = {
205 "uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz3",
206 "portable_data_hash": "99999999999999999999999999999998+99",
208 'http://example.com/file1.txt': {
209 'Date': 'Tue, 15 May 2018 00:00:00 GMT',
210 'Expires': 'Wed, 16 May 2018 00:00:00 GMT'
216 cm = mock.MagicMock()
217 cm.manifest_locator.return_value = "zzzzz-4zz18-zzzzzzzzzzzzzz4"
218 cm.portable_data_hash.return_value = "99999999999999999999999999999997+99"
219 cm.keys.return_value = ["file1.txt"]
220 collectionmock.return_value = cm
222 mockobj = CurlMock({'Date': 'Thu, 17 May 2018 00:00:00 GMT'})
223 mockobj.chunk = b'def'
226 curlmock.side_effect = init
228 utcnow = mock.MagicMock()
229 utcnow.return_value = datetime.datetime(2018, 5, 17)
231 r = http_to_keep(api, None, "http://example.com/file1.txt", utcnow=utcnow)
232 self.assertEqual(r, ("99999999999999999999999999999997+99", "file1.txt",
233 'zzzzz-4zz18-zzzzzzzzzzzzzz4',
234 'http://example.com/file1.txt', datetime.datetime(2018, 5, 17, 0, 0)))
237 assert mockobj.url == b"http://example.com/file1.txt"
238 assert mockobj.perform_was_called is True
240 cm.open.assert_called_with("file1.txt", "wb")
241 cm.save_new.assert_called_with(name="Downloaded from http%3A%2F%2Fexample.com%2Ffile1.txt",
242 owner_uuid=None, ensure_unique_name=True)
244 api.collections().update.assert_has_calls([
245 mock.call(uuid=cm.manifest_locator(),
246 body={"collection":{"properties": {'http://example.com/file1.txt': {'Date': 'Thu, 17 May 2018 00:00:00 GMT'}}}})
250 @mock.patch("pycurl.Curl")
251 @mock.patch("arvados.collection.CollectionReader")
252 def test_http_etag(self, collectionmock, curlmock):
253 api = mock.MagicMock()
255 api.collections().list().execute.return_value = {
257 "uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz3",
258 "portable_data_hash": "99999999999999999999999999999998+99",
260 'http://example.com/file1.txt': {
261 'Date': 'Tue, 15 May 2018 00:00:00 GMT',
262 'Expires': 'Wed, 16 May 2018 00:00:00 GMT',
269 cm = mock.MagicMock()
270 cm.manifest_locator.return_value = "zzzzz-4zz18-zzzzzzzzzzzzzz3"
271 cm.portable_data_hash.return_value = "99999999999999999999999999999998+99"
272 cm.keys.return_value = ["file1.txt"]
273 collectionmock.return_value = cm
276 'Date': 'Thu, 17 May 2018 00:00:00 GMT',
277 'Expires': 'Sat, 19 May 2018 00:00:00 GMT',
283 curlmock.side_effect = init
285 utcnow = mock.MagicMock()
286 utcnow.return_value = datetime.datetime(2018, 5, 17)
288 r = http_to_keep(api, None, "http://example.com/file1.txt", utcnow=utcnow)
289 self.assertEqual(r, ("99999999999999999999999999999998+99", "file1.txt",
290 'zzzzz-4zz18-zzzzzzzzzzzzzz3', 'http://example.com/file1.txt',
291 datetime.datetime(2018, 5, 17, 0, 0)))
293 cm.open.assert_not_called()
295 api.collections().update.assert_has_calls([
296 mock.call(uuid=cm.manifest_locator(),
297 body={"collection":{"properties": {'http://example.com/file1.txt': {
298 'Date': 'Thu, 17 May 2018 00:00:00 GMT',
299 'Expires': 'Sat, 19 May 2018 00:00:00 GMT',
304 @mock.patch("pycurl.Curl")
305 @mock.patch("arvados.collection.Collection")
306 def test_http_content_disp(self, collectionmock, curlmock):
307 api = mock.MagicMock()
309 api.collections().list().execute.return_value = {
313 cm = mock.MagicMock()
314 cm.manifest_locator.return_value = "zzzzz-4zz18-zzzzzzzzzzzzzz3"
315 cm.portable_data_hash.return_value = "99999999999999999999999999999998+99"
316 collectionmock.return_value = cm
318 mockobj = CurlMock({"Content-Disposition": "attachment; filename=file1.txt"})
319 mockobj.chunk = "abc"
322 curlmock.side_effect = init
324 utcnow = mock.MagicMock()
325 utcnow.return_value = datetime.datetime(2018, 5, 15)
327 r = http_to_keep(api, None, "http://example.com/download?fn=/file1.txt", utcnow=utcnow)
328 self.assertEqual(r, ("99999999999999999999999999999998+99", "file1.txt",
329 'zzzzz-4zz18-zzzzzzzzzzzzzz3',
330 'http://example.com/download?fn=/file1.txt',
331 datetime.datetime(2018, 5, 15, 0, 0)))
333 assert mockobj.url == b"http://example.com/download?fn=/file1.txt"
335 cm.open.assert_called_with("file1.txt", "wb")
336 cm.save_new.assert_called_with(name="Downloaded from http%3A%2F%2Fexample.com%2Fdownload%3Ffn%3D%2Ffile1.txt",
337 owner_uuid=None, ensure_unique_name=True)
339 api.collections().update.assert_has_calls([
340 mock.call(uuid=cm.manifest_locator(),
341 body={"collection":{"properties": {"http://example.com/download?fn=/file1.txt": {'Date': 'Tue, 15 May 2018 00:00:00 GMT'}}}})
344 @mock.patch("pycurl.Curl")
345 @mock.patch("arvados.collection.CollectionReader")
346 def test_http_etag_if_none_match(self, collectionmock, curlmock):
347 api = mock.MagicMock()
349 api.collections().list().execute.return_value = {
351 "uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz3",
352 "portable_data_hash": "99999999999999999999999999999998+99",
354 'http://example.com/file1.txt': {
355 'Date': 'Tue, 15 May 2018 00:00:00 GMT',
356 'Expires': 'Tue, 16 May 2018 00:00:00 GMT',
363 cm = mock.MagicMock()
364 cm.manifest_locator.return_value = "zzzzz-4zz18-zzzzzzzzzzzzzz3"
365 cm.portable_data_hash.return_value = "99999999999999999999999999999998+99"
366 cm.keys.return_value = ["file1.txt"]
367 collectionmock.return_value = cm
370 'Date': 'Tue, 17 May 2018 00:00:00 GMT',
371 'Expires': 'Tue, 19 May 2018 00:00:00 GMT',
375 mockobj.head_response = 403
376 mockobj.get_response = 304
379 curlmock.side_effect = init
381 utcnow = mock.MagicMock()
382 utcnow.return_value = datetime.datetime(2018, 5, 17)
384 r = http_to_keep(api, None, "http://example.com/file1.txt", utcnow=utcnow)
385 self.assertEqual(r, ("99999999999999999999999999999998+99", "file1.txt",
386 'zzzzz-4zz18-zzzzzzzzzzzzzz3', 'http://example.com/file1.txt',
387 datetime.datetime(2018, 5, 17, 0, 0)))
389 print(mockobj.req_headers)
390 assert mockobj.req_headers == ["Accept: application/octet-stream", "If-None-Match: \"123456\""]
391 cm.open.assert_not_called()
393 api.collections().update.assert_has_calls([
394 mock.call(uuid=cm.manifest_locator(),
395 body={"collection":{"properties": {'http://example.com/file1.txt': {
396 'Date': 'Tue, 17 May 2018 00:00:00 GMT',
397 'Expires': 'Tue, 19 May 2018 00:00:00 GMT',
402 @mock.patch("pycurl.Curl")
403 @mock.patch("arvados.collection.CollectionReader")
404 def test_http_prefer_cached_downloads(self, collectionmock, curlmock):
405 api = mock.MagicMock()
407 api.collections().list().execute.return_value = {
409 "uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz3",
410 "portable_data_hash": "99999999999999999999999999999998+99",
412 'http://example.com/file1.txt': {
413 'Date': 'Tue, 15 May 2018 00:00:00 GMT',
414 'Expires': 'Tue, 16 May 2018 00:00:00 GMT',
421 cm = mock.MagicMock()
422 cm.manifest_locator.return_value = "zzzzz-4zz18-zzzzzzzzzzzzzz3"
423 cm.portable_data_hash.return_value = "99999999999999999999999999999998+99"
424 cm.keys.return_value = ["file1.txt"]
425 collectionmock.return_value = cm
430 curlmock.side_effect = init
432 utcnow = mock.MagicMock()
433 utcnow.return_value = datetime.datetime(2018, 5, 17)
435 r = http_to_keep(api, None, "http://example.com/file1.txt", utcnow=utcnow, prefer_cached_downloads=True)
436 self.assertEqual(r, ("99999999999999999999999999999998+99", "file1.txt", 'zzzzz-4zz18-zzzzzzzzzzzzzz3',
437 'http://example.com/file1.txt', datetime.datetime(2018, 5, 17, 0, 0)))
439 assert mockobj.perform_was_called is False
440 cm.open.assert_not_called()
441 api.collections().update.assert_not_called()
443 @mock.patch("pycurl.Curl")
444 @mock.patch("arvados.collection.CollectionReader")
445 def test_http_varying_url_params(self, collectionmock, curlmock):
446 for prurl in ("http://example.com/file1.txt", "http://example.com/file1.txt?KeyId=123&Signature=456&Expires=789"):
447 api = mock.MagicMock()
449 api.collections().list().execute.return_value = {
451 "uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz3",
452 "portable_data_hash": "99999999999999999999999999999998+99",
455 'Date': 'Tue, 15 May 2018 00:00:00 GMT',
456 'Expires': 'Tue, 16 May 2018 00:00:00 GMT',
463 cm = mock.MagicMock()
464 cm.manifest_locator.return_value = "zzzzz-4zz18-zzzzzzzzzzzzzz3"
465 cm.portable_data_hash.return_value = "99999999999999999999999999999998+99"
466 cm.keys.return_value = ["file1.txt"]
467 collectionmock.return_value = cm
470 'Date': 'Tue, 17 May 2018 00:00:00 GMT',
471 'Expires': 'Tue, 19 May 2018 00:00:00 GMT',
477 curlmock.side_effect = init
479 utcnow = mock.MagicMock()
480 utcnow.return_value = datetime.datetime(2018, 5, 17)
482 r = http_to_keep(api, None, "http://example.com/file1.txt?KeyId=123&Signature=456&Expires=789",
483 utcnow=utcnow, varying_url_params="KeyId,Signature,Expires")
484 self.assertEqual(r, ("99999999999999999999999999999998+99", "file1.txt", 'zzzzz-4zz18-zzzzzzzzzzzzzz3',
485 'http://example.com/file1.txt', datetime.datetime(2018, 5, 17, 0, 0)))
487 assert mockobj.perform_was_called is True
488 cm.open.assert_not_called()
490 api.collections().update.assert_has_calls([
491 mock.call(uuid=cm.manifest_locator(),
492 body={"collection":{"properties": {'http://example.com/file1.txt': {
493 'Date': 'Tue, 17 May 2018 00:00:00 GMT',
494 'Expires': 'Tue, 19 May 2018 00:00:00 GMT',