1 # Copyright (C) The Arvados Authors. All rights reserved.
3 # SPDX-License-Identifier: Apache-2.0
5 from future import standard_library
6 standard_library.install_aliases()
18 from unittest import mock
21 import arvados.collection
25 from arvados.http_to_keep import http_to_keep
27 import ruamel.yaml as yaml
29 # Turns out there was already "FakeCurl" that serves the same purpose, but
30 # I wrote this before I knew that. Whoops.
32 def __init__(self, headers = {}):
33 self.perform_was_called = False
34 self.headers = headers
35 self.get_response = 200
36 self.head_response = 200
39 def setopt(self, op, *args):
42 if op == pycurl.WRITEFUNCTION:
43 self.writefn = args[0]
44 if op == pycurl.HEADERFUNCTION:
45 self.headerfn = args[0]
46 if op == pycurl.NOBODY:
48 if op == pycurl.HTTPGET:
50 if op == pycurl.HTTPHEADER:
51 self.req_headers = args[0]
53 def getinfo(self, op):
54 if op == pycurl.RESPONSE_CODE:
56 return self.head_response
58 return self.get_response
61 self.perform_was_called = True
64 self.headerfn("HTTP/1.1 {} Status\r\n".format(self.head_response))
66 self.headerfn("HTTP/1.1 {} Status\r\n".format(self.get_response))
68 for k,v in self.headers.items():
69 self.headerfn("%s: %s" % (k,v))
71 if not self.head and self.get_response == 200:
72 self.writefn(self.chunk)
75 class TestHttpToKeep(unittest.TestCase):
77 @mock.patch("pycurl.Curl")
78 @mock.patch("arvados.collection.Collection")
79 def test_http_get(self, collectionmock, curlmock):
80 api = mock.MagicMock()
82 api.collections().list().execute.return_value = {
87 cm.manifest_locator.return_value = "zzzzz-4zz18-zzzzzzzzzzzzzz3"
88 cm.portable_data_hash.return_value = "99999999999999999999999999999998+99"
89 collectionmock.return_value = cm
92 mockobj.chunk = b'abc'
95 curlmock.side_effect = init
97 utcnow = mock.MagicMock()
98 utcnow.return_value = datetime.datetime(2018, 5, 15)
100 r = http_to_keep(api, None, "http://example.com/file1.txt", utcnow=utcnow)
101 self.assertEqual(r, ("99999999999999999999999999999998+99", "file1.txt",
102 'zzzzz-4zz18-zzzzzzzzzzzzzz3', 'http://example.com/file1.txt',
103 datetime.datetime(2018, 5, 15, 0, 0)))
105 assert mockobj.url == b"http://example.com/file1.txt"
106 assert mockobj.perform_was_called is True
108 cm.open.assert_called_with("file1.txt", "wb")
109 cm.save_new.assert_called_with(name="Downloaded from http%3A%2F%2Fexample.com%2Ffile1.txt",
110 owner_uuid=None, ensure_unique_name=True)
112 api.collections().update.assert_has_calls([
113 mock.call(uuid=cm.manifest_locator(),
114 body={"collection":{"properties": {'http://example.com/file1.txt': {'Date': 'Tue, 15 May 2018 00:00:00 GMT'}}}})
118 @mock.patch("pycurl.Curl")
119 @mock.patch("arvados.collection.CollectionReader")
120 def test_http_expires(self, collectionmock, curlmock):
121 api = mock.MagicMock()
123 api.collections().list().execute.return_value = {
125 "uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz3",
126 "portable_data_hash": "99999999999999999999999999999998+99",
128 'http://example.com/file1.txt': {
129 'Date': 'Tue, 15 May 2018 00:00:00 GMT',
130 'Expires': 'Tue, 17 May 2018 00:00:00 GMT'
136 cm = mock.MagicMock()
137 cm.manifest_locator.return_value = "zzzzz-4zz18-zzzzzzzzzzzzzz3"
138 cm.portable_data_hash.return_value = "99999999999999999999999999999998+99"
139 cm.keys.return_value = ["file1.txt"]
140 collectionmock.return_value = cm
143 mockobj.chunk = b'abc'
146 curlmock.side_effect = init
148 utcnow = mock.MagicMock()
149 utcnow.return_value = datetime.datetime(2018, 5, 16)
151 r = http_to_keep(api, None, "http://example.com/file1.txt", utcnow=utcnow)
152 self.assertEqual(r, ("99999999999999999999999999999998+99", "file1.txt",
153 'zzzzz-4zz18-zzzzzzzzzzzzzz3', 'http://example.com/file1.txt',
154 datetime.datetime(2018, 5, 16, 0, 0)))
156 assert mockobj.perform_was_called is False
159 @mock.patch("pycurl.Curl")
160 @mock.patch("arvados.collection.CollectionReader")
161 def test_http_cache_control(self, collectionmock, curlmock):
162 api = mock.MagicMock()
164 api.collections().list().execute.return_value = {
166 "uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz3",
167 "portable_data_hash": "99999999999999999999999999999998+99",
169 'http://example.com/file1.txt': {
170 'Date': 'Tue, 15 May 2018 00:00:00 GMT',
171 'Cache-Control': 'max-age=172800'
177 cm = mock.MagicMock()
178 cm.manifest_locator.return_value = "zzzzz-4zz18-zzzzzzzzzzzzzz3"
179 cm.portable_data_hash.return_value = "99999999999999999999999999999998+99"
180 cm.keys.return_value = ["file1.txt"]
181 collectionmock.return_value = cm
184 mockobj.chunk = b'abc'
187 curlmock.side_effect = init
189 utcnow = mock.MagicMock()
190 utcnow.return_value = datetime.datetime(2018, 5, 16)
192 r = http_to_keep(api, None, "http://example.com/file1.txt", utcnow=utcnow)
193 self.assertEqual(r, ("99999999999999999999999999999998+99", "file1.txt", 'zzzzz-4zz18-zzzzzzzzzzzzzz3',
194 'http://example.com/file1.txt', datetime.datetime(2018, 5, 16, 0, 0)))
196 assert mockobj.perform_was_called is False
199 @mock.patch("pycurl.Curl")
200 @mock.patch("arvados.collection.Collection")
201 def test_http_expired(self, collectionmock, curlmock):
202 api = mock.MagicMock()
204 api.collections().list().execute.return_value = {
206 "uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz3",
207 "portable_data_hash": "99999999999999999999999999999998+99",
209 'http://example.com/file1.txt': {
210 'Date': 'Tue, 15 May 2018 00:00:00 GMT',
211 'Expires': 'Wed, 16 May 2018 00:00:00 GMT'
217 cm = mock.MagicMock()
218 cm.manifest_locator.return_value = "zzzzz-4zz18-zzzzzzzzzzzzzz4"
219 cm.portable_data_hash.return_value = "99999999999999999999999999999997+99"
220 cm.keys.return_value = ["file1.txt"]
221 collectionmock.return_value = cm
223 mockobj = CurlMock({'Date': 'Thu, 17 May 2018 00:00:00 GMT'})
224 mockobj.chunk = b'def'
227 curlmock.side_effect = init
229 utcnow = mock.MagicMock()
230 utcnow.return_value = datetime.datetime(2018, 5, 17)
232 r = http_to_keep(api, None, "http://example.com/file1.txt", utcnow=utcnow)
233 self.assertEqual(r, ("99999999999999999999999999999997+99", "file1.txt",
234 'zzzzz-4zz18-zzzzzzzzzzzzzz4',
235 'http://example.com/file1.txt', datetime.datetime(2018, 5, 17, 0, 0)))
238 assert mockobj.url == b"http://example.com/file1.txt"
239 assert mockobj.perform_was_called is True
241 cm.open.assert_called_with("file1.txt", "wb")
242 cm.save_new.assert_called_with(name="Downloaded from http%3A%2F%2Fexample.com%2Ffile1.txt",
243 owner_uuid=None, ensure_unique_name=True)
245 api.collections().update.assert_has_calls([
246 mock.call(uuid=cm.manifest_locator(),
247 body={"collection":{"properties": {'http://example.com/file1.txt': {'Date': 'Thu, 17 May 2018 00:00:00 GMT'}}}})
251 @mock.patch("pycurl.Curl")
252 @mock.patch("arvados.collection.CollectionReader")
253 def test_http_etag(self, collectionmock, curlmock):
254 api = mock.MagicMock()
256 api.collections().list().execute.return_value = {
258 "uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz3",
259 "portable_data_hash": "99999999999999999999999999999998+99",
261 'http://example.com/file1.txt': {
262 'Date': 'Tue, 15 May 2018 00:00:00 GMT',
263 'Expires': 'Wed, 16 May 2018 00:00:00 GMT',
270 cm = mock.MagicMock()
271 cm.manifest_locator.return_value = "zzzzz-4zz18-zzzzzzzzzzzzzz3"
272 cm.portable_data_hash.return_value = "99999999999999999999999999999998+99"
273 cm.keys.return_value = ["file1.txt"]
274 collectionmock.return_value = cm
277 'Date': 'Thu, 17 May 2018 00:00:00 GMT',
278 'Expires': 'Sat, 19 May 2018 00:00:00 GMT',
284 curlmock.side_effect = init
286 utcnow = mock.MagicMock()
287 utcnow.return_value = datetime.datetime(2018, 5, 17)
289 r = http_to_keep(api, None, "http://example.com/file1.txt", utcnow=utcnow)
290 self.assertEqual(r, ("99999999999999999999999999999998+99", "file1.txt",
291 'zzzzz-4zz18-zzzzzzzzzzzzzz3', 'http://example.com/file1.txt',
292 datetime.datetime(2018, 5, 17, 0, 0)))
294 cm.open.assert_not_called()
296 api.collections().update.assert_has_calls([
297 mock.call(uuid=cm.manifest_locator(),
298 body={"collection":{"properties": {'http://example.com/file1.txt': {
299 'Date': 'Thu, 17 May 2018 00:00:00 GMT',
300 'Expires': 'Sat, 19 May 2018 00:00:00 GMT',
305 @mock.patch("pycurl.Curl")
306 @mock.patch("arvados.collection.Collection")
307 def test_http_content_disp(self, collectionmock, curlmock):
308 api = mock.MagicMock()
310 api.collections().list().execute.return_value = {
314 cm = mock.MagicMock()
315 cm.manifest_locator.return_value = "zzzzz-4zz18-zzzzzzzzzzzzzz3"
316 cm.portable_data_hash.return_value = "99999999999999999999999999999998+99"
317 collectionmock.return_value = cm
319 mockobj = CurlMock({"Content-Disposition": "attachment; filename=file1.txt"})
320 mockobj.chunk = "abc"
323 curlmock.side_effect = init
325 utcnow = mock.MagicMock()
326 utcnow.return_value = datetime.datetime(2018, 5, 15)
328 r = http_to_keep(api, None, "http://example.com/download?fn=/file1.txt", utcnow=utcnow)
329 self.assertEqual(r, ("99999999999999999999999999999998+99", "file1.txt",
330 'zzzzz-4zz18-zzzzzzzzzzzzzz3',
331 'http://example.com/download?fn=/file1.txt',
332 datetime.datetime(2018, 5, 15, 0, 0)))
334 assert mockobj.url == b"http://example.com/download?fn=/file1.txt"
336 cm.open.assert_called_with("file1.txt", "wb")
337 cm.save_new.assert_called_with(name="Downloaded from http%3A%2F%2Fexample.com%2Fdownload%3Ffn%3D%2Ffile1.txt",
338 owner_uuid=None, ensure_unique_name=True)
340 api.collections().update.assert_has_calls([
341 mock.call(uuid=cm.manifest_locator(),
342 body={"collection":{"properties": {"http://example.com/download?fn=/file1.txt": {'Date': 'Tue, 15 May 2018 00:00:00 GMT'}}}})
345 @mock.patch("pycurl.Curl")
346 @mock.patch("arvados.collection.CollectionReader")
347 def test_http_etag_if_none_match(self, collectionmock, curlmock):
348 api = mock.MagicMock()
350 api.collections().list().execute.return_value = {
352 "uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz3",
353 "portable_data_hash": "99999999999999999999999999999998+99",
355 'http://example.com/file1.txt': {
356 'Date': 'Tue, 15 May 2018 00:00:00 GMT',
357 'Expires': 'Tue, 16 May 2018 00:00:00 GMT',
364 cm = mock.MagicMock()
365 cm.manifest_locator.return_value = "zzzzz-4zz18-zzzzzzzzzzzzzz3"
366 cm.portable_data_hash.return_value = "99999999999999999999999999999998+99"
367 cm.keys.return_value = ["file1.txt"]
368 collectionmock.return_value = cm
371 'Date': 'Tue, 17 May 2018 00:00:00 GMT',
372 'Expires': 'Tue, 19 May 2018 00:00:00 GMT',
376 mockobj.head_response = 403
377 mockobj.get_response = 304
380 curlmock.side_effect = init
382 utcnow = mock.MagicMock()
383 utcnow.return_value = datetime.datetime(2018, 5, 17)
385 r = http_to_keep(api, None, "http://example.com/file1.txt", utcnow=utcnow)
386 self.assertEqual(r, ("99999999999999999999999999999998+99", "file1.txt",
387 'zzzzz-4zz18-zzzzzzzzzzzzzz3', 'http://example.com/file1.txt',
388 datetime.datetime(2018, 5, 17, 0, 0)))
390 print(mockobj.req_headers)
391 assert mockobj.req_headers == ["Accept: application/octet-stream", "If-None-Match: \"123456\""]
392 cm.open.assert_not_called()
394 api.collections().update.assert_has_calls([
395 mock.call(uuid=cm.manifest_locator(),
396 body={"collection":{"properties": {'http://example.com/file1.txt': {
397 'Date': 'Tue, 17 May 2018 00:00:00 GMT',
398 'Expires': 'Tue, 19 May 2018 00:00:00 GMT',
403 @mock.patch("pycurl.Curl")
404 @mock.patch("arvados.collection.CollectionReader")
405 def test_http_prefer_cached_downloads(self, collectionmock, curlmock):
406 api = mock.MagicMock()
408 api.collections().list().execute.return_value = {
410 "uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz3",
411 "portable_data_hash": "99999999999999999999999999999998+99",
413 'http://example.com/file1.txt': {
414 'Date': 'Tue, 15 May 2018 00:00:00 GMT',
415 'Expires': 'Tue, 16 May 2018 00:00:00 GMT',
422 cm = mock.MagicMock()
423 cm.manifest_locator.return_value = "zzzzz-4zz18-zzzzzzzzzzzzzz3"
424 cm.portable_data_hash.return_value = "99999999999999999999999999999998+99"
425 cm.keys.return_value = ["file1.txt"]
426 collectionmock.return_value = cm
431 curlmock.side_effect = init
433 utcnow = mock.MagicMock()
434 utcnow.return_value = datetime.datetime(2018, 5, 17)
436 r = http_to_keep(api, None, "http://example.com/file1.txt", utcnow=utcnow, prefer_cached_downloads=True)
437 self.assertEqual(r, ("99999999999999999999999999999998+99", "file1.txt", 'zzzzz-4zz18-zzzzzzzzzzzzzz3',
438 'http://example.com/file1.txt', datetime.datetime(2018, 5, 17, 0, 0)))
440 assert mockobj.perform_was_called is False
441 cm.open.assert_not_called()
442 api.collections().update.assert_not_called()
444 @mock.patch("pycurl.Curl")
445 @mock.patch("arvados.collection.CollectionReader")
446 def test_http_varying_url_params(self, collectionmock, curlmock):
447 for prurl in ("http://example.com/file1.txt", "http://example.com/file1.txt?KeyId=123&Signature=456&Expires=789"):
448 api = mock.MagicMock()
450 api.collections().list().execute.return_value = {
452 "uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz3",
453 "portable_data_hash": "99999999999999999999999999999998+99",
456 'Date': 'Tue, 15 May 2018 00:00:00 GMT',
457 'Expires': 'Tue, 16 May 2018 00:00:00 GMT',
464 cm = mock.MagicMock()
465 cm.manifest_locator.return_value = "zzzzz-4zz18-zzzzzzzzzzzzzz3"
466 cm.portable_data_hash.return_value = "99999999999999999999999999999998+99"
467 cm.keys.return_value = ["file1.txt"]
468 collectionmock.return_value = cm
471 'Date': 'Tue, 17 May 2018 00:00:00 GMT',
472 'Expires': 'Tue, 19 May 2018 00:00:00 GMT',
478 curlmock.side_effect = init
480 utcnow = mock.MagicMock()
481 utcnow.return_value = datetime.datetime(2018, 5, 17)
483 r = http_to_keep(api, None, "http://example.com/file1.txt?KeyId=123&Signature=456&Expires=789",
484 utcnow=utcnow, varying_url_params="KeyId,Signature,Expires")
485 self.assertEqual(r, ("99999999999999999999999999999998+99", "file1.txt", 'zzzzz-4zz18-zzzzzzzzzzzzzz3',
486 'http://example.com/file1.txt', datetime.datetime(2018, 5, 17, 0, 0)))
488 assert mockobj.perform_was_called is True
489 cm.open.assert_not_called()
491 api.collections().update.assert_has_calls([
492 mock.call(uuid=cm.manifest_locator(),
493 body={"collection":{"properties": {'http://example.com/file1.txt': {
494 'Date': 'Tue, 17 May 2018 00:00:00 GMT',
495 'Expires': 'Tue, 19 May 2018 00:00:00 GMT',