1 # Copyright (C) The Arvados Authors. All rights reserved.
3 # SPDX-License-Identifier: Apache-2.0
5 from future import standard_library
6 standard_library.install_aliases()
20 import arvados.collection
24 from arvados.http_to_keep import http_to_keep
26 import ruamel.yaml as yaml
28 # Turns out there was already "FakeCurl" that serves the same purpose, but
29 # I wrote this before I knew that. Whoops.
31 def __init__(self, headers = {}):
32 self.perform_was_called = False
33 self.headers = headers
34 self.get_response = 200
35 self.head_response = 200
38 def setopt(self, op, *args):
41 if op == pycurl.WRITEFUNCTION:
42 self.writefn = args[0]
43 if op == pycurl.HEADERFUNCTION:
44 self.headerfn = args[0]
45 if op == pycurl.NOBODY:
47 if op == pycurl.HTTPGET:
49 if op == pycurl.HTTPHEADER:
50 self.req_headers = args[0]
52 def getinfo(self, op):
53 if op == pycurl.RESPONSE_CODE:
55 return self.head_response
57 return self.get_response
60 self.perform_was_called = True
63 self.headerfn("HTTP/1.1 {} Status\r\n".format(self.head_response))
65 self.headerfn("HTTP/1.1 {} Status\r\n".format(self.get_response))
67 for k,v in self.headers.items():
68 self.headerfn("%s: %s" % (k,v))
70 if not self.head and self.get_response == 200:
71 self.writefn(self.chunk)
74 class TestHttpToKeep(unittest.TestCase):
76 @mock.patch("pycurl.Curl")
77 @mock.patch("arvados.collection.Collection")
78 def test_http_get(self, collectionmock, curlmock):
79 api = mock.MagicMock()
81 api.collections().list().execute.return_value = {
86 cm.manifest_locator.return_value = "zzzzz-4zz18-zzzzzzzzzzzzzz3"
87 cm.portable_data_hash.return_value = "99999999999999999999999999999998+99"
88 collectionmock.return_value = cm
91 mockobj.chunk = b'abc'
94 curlmock.side_effect = init
96 utcnow = mock.MagicMock()
97 utcnow.return_value = datetime.datetime(2018, 5, 15)
99 r = http_to_keep(api, None, "http://example.com/file1.txt", utcnow=utcnow)
100 self.assertEqual(r, ("99999999999999999999999999999998+99", "file1.txt"))
102 assert mockobj.url == b"http://example.com/file1.txt"
103 assert mockobj.perform_was_called is True
105 cm.open.assert_called_with("file1.txt", "wb")
106 cm.save_new.assert_called_with(name="Downloaded from http%3A%2F%2Fexample.com%2Ffile1.txt",
107 owner_uuid=None, ensure_unique_name=True)
109 api.collections().update.assert_has_calls([
110 mock.call(uuid=cm.manifest_locator(),
111 body={"collection":{"properties": {'http://example.com/file1.txt': {'Date': 'Tue, 15 May 2018 00:00:00 GMT'}}}})
115 @mock.patch("pycurl.Curl")
116 @mock.patch("arvados.collection.CollectionReader")
117 def test_http_expires(self, collectionmock, curlmock):
118 api = mock.MagicMock()
120 api.collections().list().execute.return_value = {
122 "uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz3",
123 "portable_data_hash": "99999999999999999999999999999998+99",
125 'http://example.com/file1.txt': {
126 'Date': 'Tue, 15 May 2018 00:00:00 GMT',
127 'Expires': 'Tue, 17 May 2018 00:00:00 GMT'
133 cm = mock.MagicMock()
134 cm.manifest_locator.return_value = "zzzzz-4zz18-zzzzzzzzzzzzzz3"
135 cm.portable_data_hash.return_value = "99999999999999999999999999999998+99"
136 cm.keys.return_value = ["file1.txt"]
137 collectionmock.return_value = cm
140 mockobj.chunk = b'abc'
143 curlmock.side_effect = init
145 utcnow = mock.MagicMock()
146 utcnow.return_value = datetime.datetime(2018, 5, 16)
148 r = http_to_keep(api, None, "http://example.com/file1.txt", utcnow=utcnow)
149 self.assertEqual(r, ("99999999999999999999999999999998+99", "file1.txt"))
151 assert mockobj.perform_was_called is False
154 @mock.patch("pycurl.Curl")
155 @mock.patch("arvados.collection.CollectionReader")
156 def test_http_cache_control(self, collectionmock, curlmock):
157 api = mock.MagicMock()
159 api.collections().list().execute.return_value = {
161 "uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz3",
162 "portable_data_hash": "99999999999999999999999999999998+99",
164 'http://example.com/file1.txt': {
165 'Date': 'Tue, 15 May 2018 00:00:00 GMT',
166 'Cache-Control': 'max-age=172800'
172 cm = mock.MagicMock()
173 cm.manifest_locator.return_value = "zzzzz-4zz18-zzzzzzzzzzzzzz3"
174 cm.portable_data_hash.return_value = "99999999999999999999999999999998+99"
175 cm.keys.return_value = ["file1.txt"]
176 collectionmock.return_value = cm
179 mockobj.chunk = b'abc'
182 curlmock.side_effect = init
184 utcnow = mock.MagicMock()
185 utcnow.return_value = datetime.datetime(2018, 5, 16)
187 r = http_to_keep(api, None, "http://example.com/file1.txt", utcnow=utcnow)
188 self.assertEqual(r, ("99999999999999999999999999999998+99", "file1.txt"))
190 assert mockobj.perform_was_called is False
193 @mock.patch("pycurl.Curl")
194 @mock.patch("arvados.collection.Collection")
195 def test_http_expired(self, collectionmock, curlmock):
196 api = mock.MagicMock()
198 api.collections().list().execute.return_value = {
200 "uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz3",
201 "portable_data_hash": "99999999999999999999999999999998+99",
203 'http://example.com/file1.txt': {
204 'Date': 'Tue, 15 May 2018 00:00:00 GMT',
205 'Expires': 'Wed, 16 May 2018 00:00:00 GMT'
211 cm = mock.MagicMock()
212 cm.manifest_locator.return_value = "zzzzz-4zz18-zzzzzzzzzzzzzz4"
213 cm.portable_data_hash.return_value = "99999999999999999999999999999997+99"
214 cm.keys.return_value = ["file1.txt"]
215 collectionmock.return_value = cm
217 mockobj = CurlMock({'Date': 'Thu, 17 May 2018 00:00:00 GMT'})
218 mockobj.chunk = b'def'
221 curlmock.side_effect = init
223 utcnow = mock.MagicMock()
224 utcnow.return_value = datetime.datetime(2018, 5, 17)
226 r = http_to_keep(api, None, "http://example.com/file1.txt", utcnow=utcnow)
227 self.assertEqual(r, ("99999999999999999999999999999997+99", "file1.txt"))
229 assert mockobj.url == b"http://example.com/file1.txt"
230 assert mockobj.perform_was_called is True
232 cm.open.assert_called_with("file1.txt", "wb")
233 cm.save_new.assert_called_with(name="Downloaded from http%3A%2F%2Fexample.com%2Ffile1.txt",
234 owner_uuid=None, ensure_unique_name=True)
236 api.collections().update.assert_has_calls([
237 mock.call(uuid=cm.manifest_locator(),
238 body={"collection":{"properties": {'http://example.com/file1.txt': {'Date': 'Thu, 17 May 2018 00:00:00 GMT'}}}})
242 @mock.patch("pycurl.Curl")
243 @mock.patch("arvados.collection.CollectionReader")
244 def test_http_etag(self, collectionmock, curlmock):
245 api = mock.MagicMock()
247 api.collections().list().execute.return_value = {
249 "uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz3",
250 "portable_data_hash": "99999999999999999999999999999998+99",
252 'http://example.com/file1.txt': {
253 'Date': 'Tue, 15 May 2018 00:00:00 GMT',
254 'Expires': 'Wed, 16 May 2018 00:00:00 GMT',
261 cm = mock.MagicMock()
262 cm.manifest_locator.return_value = "zzzzz-4zz18-zzzzzzzzzzzzzz3"
263 cm.portable_data_hash.return_value = "99999999999999999999999999999998+99"
264 cm.keys.return_value = ["file1.txt"]
265 collectionmock.return_value = cm
268 'Date': 'Thu, 17 May 2018 00:00:00 GMT',
269 'Expires': 'Sat, 19 May 2018 00:00:00 GMT',
275 curlmock.side_effect = init
277 utcnow = mock.MagicMock()
278 utcnow.return_value = datetime.datetime(2018, 5, 17)
280 r = http_to_keep(api, None, "http://example.com/file1.txt", utcnow=utcnow)
281 self.assertEqual(r, ("99999999999999999999999999999998+99", "file1.txt"))
283 cm.open.assert_not_called()
285 api.collections().update.assert_has_calls([
286 mock.call(uuid=cm.manifest_locator(),
287 body={"collection":{"properties": {'http://example.com/file1.txt': {
288 'Date': 'Thu, 17 May 2018 00:00:00 GMT',
289 'Expires': 'Sat, 19 May 2018 00:00:00 GMT',
294 @mock.patch("pycurl.Curl")
295 @mock.patch("arvados.collection.Collection")
296 def test_http_content_disp(self, collectionmock, curlmock):
297 api = mock.MagicMock()
299 api.collections().list().execute.return_value = {
303 cm = mock.MagicMock()
304 cm.manifest_locator.return_value = "zzzzz-4zz18-zzzzzzzzzzzzzz3"
305 cm.portable_data_hash.return_value = "99999999999999999999999999999998+99"
306 collectionmock.return_value = cm
308 mockobj = CurlMock({"Content-Disposition": "attachment; filename=file1.txt"})
309 mockobj.chunk = "abc"
312 curlmock.side_effect = init
314 utcnow = mock.MagicMock()
315 utcnow.return_value = datetime.datetime(2018, 5, 15)
317 r = http_to_keep(api, None, "http://example.com/download?fn=/file1.txt", utcnow=utcnow)
318 self.assertEqual(r, ("99999999999999999999999999999998+99", "file1.txt"))
320 assert mockobj.url == b"http://example.com/download?fn=/file1.txt"
322 cm.open.assert_called_with("file1.txt", "wb")
323 cm.save_new.assert_called_with(name="Downloaded from http%3A%2F%2Fexample.com%2Fdownload%3Ffn%3D%2Ffile1.txt",
324 owner_uuid=None, ensure_unique_name=True)
326 api.collections().update.assert_has_calls([
327 mock.call(uuid=cm.manifest_locator(),
328 body={"collection":{"properties": {"http://example.com/download?fn=/file1.txt": {'Date': 'Tue, 15 May 2018 00:00:00 GMT'}}}})
331 @mock.patch("pycurl.Curl")
332 @mock.patch("arvados.collection.CollectionReader")
333 def test_http_etag_if_none_match(self, collectionmock, curlmock):
334 api = mock.MagicMock()
336 api.collections().list().execute.return_value = {
338 "uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz3",
339 "portable_data_hash": "99999999999999999999999999999998+99",
341 'http://example.com/file1.txt': {
342 'Date': 'Tue, 15 May 2018 00:00:00 GMT',
343 'Expires': 'Tue, 16 May 2018 00:00:00 GMT',
350 cm = mock.MagicMock()
351 cm.manifest_locator.return_value = "zzzzz-4zz18-zzzzzzzzzzzzzz3"
352 cm.portable_data_hash.return_value = "99999999999999999999999999999998+99"
353 cm.keys.return_value = ["file1.txt"]
354 collectionmock.return_value = cm
357 'Date': 'Tue, 17 May 2018 00:00:00 GMT',
358 'Expires': 'Tue, 19 May 2018 00:00:00 GMT',
362 mockobj.head_response = 403
363 mockobj.get_response = 304
366 curlmock.side_effect = init
368 utcnow = mock.MagicMock()
369 utcnow.return_value = datetime.datetime(2018, 5, 17)
371 r = http_to_keep(api, None, "http://example.com/file1.txt", utcnow=utcnow)
372 self.assertEqual(r, ("99999999999999999999999999999998+99", "file1.txt"))
374 print(mockobj.req_headers)
375 assert mockobj.req_headers == ["Accept: application/octet-stream", "If-None-Match: \"123456\""]
376 cm.open.assert_not_called()
378 api.collections().update.assert_has_calls([
379 mock.call(uuid=cm.manifest_locator(),
380 body={"collection":{"properties": {'http://example.com/file1.txt': {
381 'Date': 'Tue, 17 May 2018 00:00:00 GMT',
382 'Expires': 'Tue, 19 May 2018 00:00:00 GMT',
387 @mock.patch("pycurl.Curl")
388 @mock.patch("arvados.collection.CollectionReader")
389 def test_http_prefer_cached_downloads(self, collectionmock, curlmock):
390 api = mock.MagicMock()
392 api.collections().list().execute.return_value = {
394 "uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz3",
395 "portable_data_hash": "99999999999999999999999999999998+99",
397 'http://example.com/file1.txt': {
398 'Date': 'Tue, 15 May 2018 00:00:00 GMT',
399 'Expires': 'Tue, 16 May 2018 00:00:00 GMT',
406 cm = mock.MagicMock()
407 cm.manifest_locator.return_value = "zzzzz-4zz18-zzzzzzzzzzzzzz3"
408 cm.portable_data_hash.return_value = "99999999999999999999999999999998+99"
409 cm.keys.return_value = ["file1.txt"]
410 collectionmock.return_value = cm
415 curlmock.side_effect = init
417 utcnow = mock.MagicMock()
418 utcnow.return_value = datetime.datetime(2018, 5, 17)
420 r = http_to_keep(api, None, "http://example.com/file1.txt", utcnow=utcnow, prefer_cached_downloads=True)
421 self.assertEqual(r, ("99999999999999999999999999999998+99", "file1.txt"))
423 assert mockobj.perform_was_called is False
424 cm.open.assert_not_called()
425 api.collections().update.assert_not_called()
427 @mock.patch("pycurl.Curl")
428 @mock.patch("arvados.collection.CollectionReader")
429 def test_http_varying_url_params(self, collectionmock, curlmock):
430 for prurl in ("http://example.com/file1.txt", "http://example.com/file1.txt?KeyId=123&Signature=456&Expires=789"):
431 api = mock.MagicMock()
433 api.collections().list().execute.return_value = {
435 "uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz3",
436 "portable_data_hash": "99999999999999999999999999999998+99",
439 'Date': 'Tue, 15 May 2018 00:00:00 GMT',
440 'Expires': 'Tue, 16 May 2018 00:00:00 GMT',
447 cm = mock.MagicMock()
448 cm.manifest_locator.return_value = "zzzzz-4zz18-zzzzzzzzzzzzzz3"
449 cm.portable_data_hash.return_value = "99999999999999999999999999999998+99"
450 cm.keys.return_value = ["file1.txt"]
451 collectionmock.return_value = cm
454 'Date': 'Tue, 17 May 2018 00:00:00 GMT',
455 'Expires': 'Tue, 19 May 2018 00:00:00 GMT',
461 curlmock.side_effect = init
463 utcnow = mock.MagicMock()
464 utcnow.return_value = datetime.datetime(2018, 5, 17)
466 r = http_to_keep(api, None, "http://example.com/file1.txt?KeyId=123&Signature=456&Expires=789",
467 utcnow=utcnow, varying_url_params="KeyId,Signature,Expires")
468 self.assertEqual(r, ("99999999999999999999999999999998+99", "file1.txt"))
470 assert mockobj.perform_was_called is True
471 cm.open.assert_not_called()
473 api.collections().update.assert_has_calls([
474 mock.call(uuid=cm.manifest_locator(),
475 body={"collection":{"properties": {'http://example.com/file1.txt': {
476 'Date': 'Tue, 17 May 2018 00:00:00 GMT',
477 'Expires': 'Tue, 19 May 2018 00:00:00 GMT',