15 import arvados_testutil as tutil
17 import run_test_server
19 class KeepTestCase(run_test_server.TestCaseWithServers):
25 super(KeepTestCase, cls).setUpClass()
26 run_test_server.authorize_with("admin")
27 cls.api_client = arvados.api('v1')
28 cls.keep_client = arvados.KeepClient(api_client=cls.api_client,
29 proxy='', local_store='')
31 def test_KeepBasicRWTest(self):
32 self.assertEqual(0, self.keep_client.upload_counter.get())
33 foo_locator = self.keep_client.put('foo')
34 self.assertRegexpMatches(
36 '^acbd18db4cc2f85cedef654fccc4a4d8\+3',
37 'wrong md5 hash from Keep.put("foo"): ' + foo_locator)
39 # 6 bytes because uploaded 2 copies
40 self.assertEqual(6, self.keep_client.upload_counter.get())
42 self.assertEqual(0, self.keep_client.download_counter.get())
43 self.assertEqual(self.keep_client.get(foo_locator),
45 'wrong content from Keep.get(md5("foo"))')
46 self.assertEqual(3, self.keep_client.download_counter.get())
48 def test_KeepBinaryRWTest(self):
49 blob_str = '\xff\xfe\xf7\x00\x01\x02'
50 blob_locator = self.keep_client.put(blob_str)
51 self.assertRegexpMatches(
53 '^7fc7c53b45e53926ba52821140fef396\+6',
54 ('wrong locator from Keep.put(<binarydata>):' + blob_locator))
55 self.assertEqual(self.keep_client.get(blob_locator),
57 'wrong content from Keep.get(md5(<binarydata>))')
59 def test_KeepLongBinaryRWTest(self):
60 blob_str = '\xff\xfe\xfd\xfc\x00\x01\x02\x03'
62 blob_str = blob_str + blob_str
63 blob_locator = self.keep_client.put(blob_str)
64 self.assertRegexpMatches(
66 '^84d90fc0d8175dd5dcfab04b999bc956\+67108864',
67 ('wrong locator from Keep.put(<binarydata>): ' + blob_locator))
68 self.assertEqual(self.keep_client.get(blob_locator),
70 'wrong content from Keep.get(md5(<binarydata>))')
72 @unittest.skip("unreliable test - please fix and close #8752")
73 def test_KeepSingleCopyRWTest(self):
74 blob_str = '\xff\xfe\xfd\xfc\x00\x01\x02\x03'
75 blob_locator = self.keep_client.put(blob_str, copies=1)
76 self.assertRegexpMatches(
78 '^c902006bc98a3eb4a3663b65ab4a6fab\+8',
79 ('wrong locator from Keep.put(<binarydata>): ' + blob_locator))
80 self.assertEqual(self.keep_client.get(blob_locator),
82 'wrong content from Keep.get(md5(<binarydata>))')
84 def test_KeepEmptyCollectionTest(self):
85 blob_locator = self.keep_client.put('', copies=1)
86 self.assertRegexpMatches(
88 '^d41d8cd98f00b204e9800998ecf8427e\+0',
89 ('wrong locator from Keep.put(""): ' + blob_locator))
91 def test_unicode_must_be_ascii(self):
92 # If unicode type, must only consist of valid ASCII
93 foo_locator = self.keep_client.put(u'foo')
94 self.assertRegexpMatches(
96 '^acbd18db4cc2f85cedef654fccc4a4d8\+3',
97 'wrong md5 hash from Keep.put("foo"): ' + foo_locator)
99 with self.assertRaises(UnicodeEncodeError):
100 # Error if it is not ASCII
101 self.keep_client.put(u'\xe2')
103 with self.assertRaises(arvados.errors.ArgumentError):
104 # Must be a string type
105 self.keep_client.put({})
107 def test_KeepHeadTest(self):
108 locator = self.keep_client.put('test_head')
109 self.assertRegexpMatches(
111 '^b9a772c7049325feb7130fff1f8333e9\+9',
112 'wrong md5 hash from Keep.put for "test_head": ' + locator)
113 self.assertEqual(True, self.keep_client.head(locator))
114 self.assertEqual(self.keep_client.get(locator),
116 'wrong content from Keep.get for "test_head"')
118 class KeepPermissionTestCase(run_test_server.TestCaseWithServers):
120 KEEP_SERVER = {'blob_signing_key': 'abcdefghijk0123456789',
121 'enforce_permissions': True}
123 def test_KeepBasicRWTest(self):
124 run_test_server.authorize_with('active')
125 keep_client = arvados.KeepClient()
126 foo_locator = keep_client.put('foo')
127 self.assertRegexpMatches(
129 r'^acbd18db4cc2f85cedef654fccc4a4d8\+3\+A[a-f0-9]+@[a-f0-9]+$',
130 'invalid locator from Keep.put("foo"): ' + foo_locator)
131 self.assertEqual(keep_client.get(foo_locator),
133 'wrong content from Keep.get(md5("foo"))')
135 # GET with an unsigned locator => NotFound
136 bar_locator = keep_client.put('bar')
137 unsigned_bar_locator = "37b51d194a7513e45b56f6524f2d51f2+3"
138 self.assertRegexpMatches(
140 r'^37b51d194a7513e45b56f6524f2d51f2\+3\+A[a-f0-9]+@[a-f0-9]+$',
141 'invalid locator from Keep.put("bar"): ' + bar_locator)
142 self.assertRaises(arvados.errors.NotFoundError,
144 unsigned_bar_locator)
146 # GET from a different user => NotFound
147 run_test_server.authorize_with('spectator')
148 self.assertRaises(arvados.errors.NotFoundError,
152 # Unauthenticated GET for a signed locator => NotFound
153 # Unauthenticated GET for an unsigned locator => NotFound
154 keep_client.api_token = ''
155 self.assertRaises(arvados.errors.NotFoundError,
158 self.assertRaises(arvados.errors.NotFoundError,
160 unsigned_bar_locator)
163 # KeepOptionalPermission: starts Keep with --permission-key-file
164 # but not --enforce-permissions (i.e. generate signatures on PUT
165 # requests, but do not require them for GET requests)
167 # All of these requests should succeed when permissions are optional:
168 # * authenticated request, signed locator
169 # * authenticated request, unsigned locator
170 # * unauthenticated request, signed locator
171 # * unauthenticated request, unsigned locator
172 class KeepOptionalPermission(run_test_server.TestCaseWithServers):
174 KEEP_SERVER = {'blob_signing_key': 'abcdefghijk0123456789',
175 'enforce_permissions': False}
179 super(KeepOptionalPermission, cls).setUpClass()
180 run_test_server.authorize_with("admin")
181 cls.api_client = arvados.api('v1')
184 super(KeepOptionalPermission, self).setUp()
185 self.keep_client = arvados.KeepClient(api_client=self.api_client,
186 proxy='', local_store='')
188 def _put_foo_and_check(self):
189 signed_locator = self.keep_client.put('foo')
190 self.assertRegexpMatches(
192 r'^acbd18db4cc2f85cedef654fccc4a4d8\+3\+A[a-f0-9]+@[a-f0-9]+$',
193 'invalid locator from Keep.put("foo"): ' + signed_locator)
194 return signed_locator
196 def test_KeepAuthenticatedSignedTest(self):
197 signed_locator = self._put_foo_and_check()
198 self.assertEqual(self.keep_client.get(signed_locator),
200 'wrong content from Keep.get(md5("foo"))')
202 def test_KeepAuthenticatedUnsignedTest(self):
203 signed_locator = self._put_foo_and_check()
204 self.assertEqual(self.keep_client.get("acbd18db4cc2f85cedef654fccc4a4d8"),
206 'wrong content from Keep.get(md5("foo"))')
208 def test_KeepUnauthenticatedSignedTest(self):
209 # Check that signed GET requests work even when permissions
210 # enforcement is off.
211 signed_locator = self._put_foo_and_check()
212 self.keep_client.api_token = ''
213 self.assertEqual(self.keep_client.get(signed_locator),
215 'wrong content from Keep.get(md5("foo"))')
217 def test_KeepUnauthenticatedUnsignedTest(self):
218 # Since --enforce-permissions is not in effect, GET requests
219 # need not be authenticated.
220 signed_locator = self._put_foo_and_check()
221 self.keep_client.api_token = ''
222 self.assertEqual(self.keep_client.get("acbd18db4cc2f85cedef654fccc4a4d8"),
224 'wrong content from Keep.get(md5("foo"))')
227 class KeepProxyTestCase(run_test_server.TestCaseWithServers):
230 KEEP_PROXY_SERVER = {}
234 super(KeepProxyTestCase, cls).setUpClass()
235 run_test_server.authorize_with('active')
236 cls.api_client = arvados.api('v1')
239 arvados.config.settings().pop('ARVADOS_EXTERNAL_CLIENT', None)
240 super(KeepProxyTestCase, self).tearDown()
242 def test_KeepProxyTest1(self):
243 # Will use ARVADOS_KEEP_PROXY environment variable that is set by
245 keep_client = arvados.KeepClient(api_client=self.api_client,
247 baz_locator = keep_client.put('baz')
248 self.assertRegexpMatches(
250 '^73feffa4b7f6bb68e44cf984c85f6e88\+3',
251 'wrong md5 hash from Keep.put("baz"): ' + baz_locator)
252 self.assertEqual(keep_client.get(baz_locator),
254 'wrong content from Keep.get(md5("baz"))')
255 self.assertTrue(keep_client.using_proxy)
257 def test_KeepProxyTest2(self):
258 # Don't instantiate the proxy directly, but set the X-External-Client
259 # header. The API server should direct us to the proxy.
260 arvados.config.settings()['ARVADOS_EXTERNAL_CLIENT'] = 'true'
261 keep_client = arvados.KeepClient(api_client=self.api_client,
262 proxy='', local_store='')
263 baz_locator = keep_client.put('baz2')
264 self.assertRegexpMatches(
266 '^91f372a266fe2bf2823cb8ec7fda31ce\+4',
267 'wrong md5 hash from Keep.put("baz2"): ' + baz_locator)
268 self.assertEqual(keep_client.get(baz_locator),
270 'wrong content from Keep.get(md5("baz2"))')
271 self.assertTrue(keep_client.using_proxy)
274 class KeepClientServiceTestCase(unittest.TestCase, tutil.ApiClientMock):
275 def get_service_roots(self, api_client):
276 keep_client = arvados.KeepClient(api_client=api_client)
277 services = keep_client.weighted_service_roots(arvados.KeepLocator('0'*32))
278 return [urlparse.urlparse(url) for url in sorted(services)]
280 def test_ssl_flag_respected_in_roots(self):
281 for ssl_flag in [False, True]:
282 services = self.get_service_roots(self.mock_keep_services(
283 service_ssl_flag=ssl_flag))
285 ('https' if ssl_flag else 'http'), services[0].scheme)
287 def test_correct_ports_with_ipv6_addresses(self):
288 service = self.get_service_roots(self.mock_keep_services(
289 service_type='proxy', service_host='100::1', service_port=10, count=1))[0]
290 self.assertEqual('100::1', service.hostname)
291 self.assertEqual(10, service.port)
293 # test_*_timeout verify that KeepClient instructs pycurl to use
294 # the appropriate connection and read timeouts. They don't care
295 # whether pycurl actually exhibits the expected timeout behavior
296 # -- those tests are in the KeepClientTimeout test class.
298 def test_get_timeout(self):
299 api_client = self.mock_keep_services(count=1)
300 force_timeout = socket.timeout("timed out")
301 with tutil.mock_keep_responses(force_timeout, 0) as mock:
302 keep_client = arvados.KeepClient(api_client=api_client)
303 with self.assertRaises(arvados.errors.KeepReadError):
304 keep_client.get('ffffffffffffffffffffffffffffffff')
306 mock.responses[0].getopt(pycurl.CONNECTTIMEOUT_MS),
307 int(arvados.KeepClient.DEFAULT_TIMEOUT[0]*1000))
309 mock.responses[0].getopt(pycurl.LOW_SPEED_TIME),
310 int(arvados.KeepClient.DEFAULT_TIMEOUT[1]))
312 mock.responses[0].getopt(pycurl.LOW_SPEED_LIMIT),
313 int(arvados.KeepClient.DEFAULT_TIMEOUT[2]))
315 def test_put_timeout(self):
316 api_client = self.mock_keep_services(count=1)
317 force_timeout = socket.timeout("timed out")
318 with tutil.mock_keep_responses(force_timeout, 0) as mock:
319 keep_client = arvados.KeepClient(api_client=api_client)
320 with self.assertRaises(arvados.errors.KeepWriteError):
321 keep_client.put('foo')
323 mock.responses[0].getopt(pycurl.CONNECTTIMEOUT_MS),
324 int(arvados.KeepClient.DEFAULT_TIMEOUT[0]*1000))
326 mock.responses[0].getopt(pycurl.LOW_SPEED_TIME),
327 int(arvados.KeepClient.DEFAULT_TIMEOUT[1]))
329 mock.responses[0].getopt(pycurl.LOW_SPEED_LIMIT),
330 int(arvados.KeepClient.DEFAULT_TIMEOUT[2]))
332 def test_head_timeout(self):
333 api_client = self.mock_keep_services(count=1)
334 force_timeout = socket.timeout("timed out")
335 with tutil.mock_keep_responses(force_timeout, 0) as mock:
336 keep_client = arvados.KeepClient(api_client=api_client)
337 with self.assertRaises(arvados.errors.KeepReadError):
338 keep_client.head('ffffffffffffffffffffffffffffffff')
340 mock.responses[0].getopt(pycurl.CONNECTTIMEOUT_MS),
341 int(arvados.KeepClient.DEFAULT_TIMEOUT[0]*1000))
343 mock.responses[0].getopt(pycurl.LOW_SPEED_TIME),
344 int(arvados.KeepClient.DEFAULT_TIMEOUT[1]))
346 mock.responses[0].getopt(pycurl.LOW_SPEED_LIMIT),
347 int(arvados.KeepClient.DEFAULT_TIMEOUT[2]))
349 def test_proxy_get_timeout(self):
350 api_client = self.mock_keep_services(service_type='proxy', count=1)
351 force_timeout = socket.timeout("timed out")
352 with tutil.mock_keep_responses(force_timeout, 0) as mock:
353 keep_client = arvados.KeepClient(api_client=api_client)
354 with self.assertRaises(arvados.errors.KeepReadError):
355 keep_client.get('ffffffffffffffffffffffffffffffff')
357 mock.responses[0].getopt(pycurl.CONNECTTIMEOUT_MS),
358 int(arvados.KeepClient.DEFAULT_PROXY_TIMEOUT[0]*1000))
360 mock.responses[0].getopt(pycurl.LOW_SPEED_TIME),
361 int(arvados.KeepClient.DEFAULT_PROXY_TIMEOUT[1]))
363 mock.responses[0].getopt(pycurl.LOW_SPEED_LIMIT),
364 int(arvados.KeepClient.DEFAULT_PROXY_TIMEOUT[2]))
366 def test_proxy_head_timeout(self):
367 api_client = self.mock_keep_services(service_type='proxy', count=1)
368 force_timeout = socket.timeout("timed out")
369 with tutil.mock_keep_responses(force_timeout, 0) as mock:
370 keep_client = arvados.KeepClient(api_client=api_client)
371 with self.assertRaises(arvados.errors.KeepReadError):
372 keep_client.head('ffffffffffffffffffffffffffffffff')
374 mock.responses[0].getopt(pycurl.CONNECTTIMEOUT_MS),
375 int(arvados.KeepClient.DEFAULT_PROXY_TIMEOUT[0]*1000))
377 mock.responses[0].getopt(pycurl.LOW_SPEED_TIME),
378 int(arvados.KeepClient.DEFAULT_PROXY_TIMEOUT[1]))
380 mock.responses[0].getopt(pycurl.LOW_SPEED_LIMIT),
381 int(arvados.KeepClient.DEFAULT_PROXY_TIMEOUT[2]))
383 def test_proxy_put_timeout(self):
384 api_client = self.mock_keep_services(service_type='proxy', count=1)
385 force_timeout = socket.timeout("timed out")
386 with tutil.mock_keep_responses(force_timeout, 0) as mock:
387 keep_client = arvados.KeepClient(api_client=api_client)
388 with self.assertRaises(arvados.errors.KeepWriteError):
389 keep_client.put('foo')
391 mock.responses[0].getopt(pycurl.CONNECTTIMEOUT_MS),
392 int(arvados.KeepClient.DEFAULT_PROXY_TIMEOUT[0]*1000))
394 mock.responses[0].getopt(pycurl.LOW_SPEED_TIME),
395 int(arvados.KeepClient.DEFAULT_PROXY_TIMEOUT[1]))
397 mock.responses[0].getopt(pycurl.LOW_SPEED_LIMIT),
398 int(arvados.KeepClient.DEFAULT_PROXY_TIMEOUT[2]))
400 def check_no_services_error(self, verb, exc_class):
401 api_client = mock.MagicMock(name='api_client')
402 api_client.keep_services().accessible().execute.side_effect = (
403 arvados.errors.ApiError)
404 keep_client = arvados.KeepClient(api_client=api_client)
405 with self.assertRaises(exc_class) as err_check:
406 getattr(keep_client, verb)('d41d8cd98f00b204e9800998ecf8427e+0')
407 self.assertEqual(0, len(err_check.exception.request_errors()))
409 def test_get_error_with_no_services(self):
410 self.check_no_services_error('get', arvados.errors.KeepReadError)
412 def test_head_error_with_no_services(self):
413 self.check_no_services_error('head', arvados.errors.KeepReadError)
415 def test_put_error_with_no_services(self):
416 self.check_no_services_error('put', arvados.errors.KeepWriteError)
418 def check_errors_from_last_retry(self, verb, exc_class):
419 api_client = self.mock_keep_services(count=2)
420 req_mock = tutil.mock_keep_responses(
421 "retry error reporting test", 500, 500, 403, 403)
422 with req_mock, tutil.skip_sleep, \
423 self.assertRaises(exc_class) as err_check:
424 keep_client = arvados.KeepClient(api_client=api_client)
425 getattr(keep_client, verb)('d41d8cd98f00b204e9800998ecf8427e+0',
427 self.assertEqual([403, 403], [
428 getattr(error, 'status_code', None)
429 for error in err_check.exception.request_errors().itervalues()])
431 def test_get_error_reflects_last_retry(self):
432 self.check_errors_from_last_retry('get', arvados.errors.KeepReadError)
434 def test_head_error_reflects_last_retry(self):
435 self.check_errors_from_last_retry('head', arvados.errors.KeepReadError)
437 def test_put_error_reflects_last_retry(self):
438 self.check_errors_from_last_retry('put', arvados.errors.KeepWriteError)
440 def test_put_error_does_not_include_successful_puts(self):
441 data = 'partial failure test'
442 data_loc = tutil.str_keep_locator(data)
443 api_client = self.mock_keep_services(count=3)
444 with tutil.mock_keep_responses(data_loc, 200, 500, 500) as req_mock, \
445 self.assertRaises(arvados.errors.KeepWriteError) as exc_check:
446 keep_client = arvados.KeepClient(api_client=api_client)
447 keep_client.put(data)
448 self.assertEqual(2, len(exc_check.exception.request_errors()))
450 def test_proxy_put_with_no_writable_services(self):
451 data = 'test with no writable services'
452 data_loc = tutil.str_keep_locator(data)
453 api_client = self.mock_keep_services(service_type='proxy', read_only=True, count=1)
454 with tutil.mock_keep_responses(data_loc, 200, 500, 500) as req_mock, \
455 self.assertRaises(arvados.errors.KeepWriteError) as exc_check:
456 keep_client = arvados.KeepClient(api_client=api_client)
457 keep_client.put(data)
458 self.assertEqual(True, ("no Keep services available" in str(exc_check.exception)))
459 self.assertEqual(0, len(exc_check.exception.request_errors()))
461 def test_oddball_service_get(self):
462 body = 'oddball service get'
463 api_client = self.mock_keep_services(service_type='fancynewblobstore')
464 with tutil.mock_keep_responses(body, 200):
465 keep_client = arvados.KeepClient(api_client=api_client)
466 actual = keep_client.get(tutil.str_keep_locator(body))
467 self.assertEqual(body, actual)
469 def test_oddball_service_put(self):
470 body = 'oddball service put'
471 pdh = tutil.str_keep_locator(body)
472 api_client = self.mock_keep_services(service_type='fancynewblobstore')
473 with tutil.mock_keep_responses(pdh, 200):
474 keep_client = arvados.KeepClient(api_client=api_client)
475 actual = keep_client.put(body, copies=1)
476 self.assertEqual(pdh, actual)
478 def test_oddball_service_writer_count(self):
479 body = 'oddball service writer count'
480 pdh = tutil.str_keep_locator(body)
481 api_client = self.mock_keep_services(service_type='fancynewblobstore',
483 headers = {'x-keep-replicas-stored': 3}
484 with tutil.mock_keep_responses(pdh, 200, 418, 418, 418,
485 **headers) as req_mock:
486 keep_client = arvados.KeepClient(api_client=api_client)
487 actual = keep_client.put(body, copies=2)
488 self.assertEqual(pdh, actual)
489 self.assertEqual(1, req_mock.call_count)
493 class KeepClientRendezvousTestCase(unittest.TestCase, tutil.ApiClientMock):
496 # expected_order[i] is the probe order for
497 # hash=md5(sprintf("%064x",i)) where there are 16 services
498 # with uuid sprintf("anything-%015x",j) with j in 0..15. E.g.,
499 # the first probe for the block consisting of 64 "0"
500 # characters is the service whose uuid is
501 # "zzzzz-bi6l4-000000000000003", so expected_order[0][0]=='3'.
503 self.expected_order = [
504 list('3eab2d5fc9681074'),
505 list('097dba52e648f1c3'),
506 list('c5b4e023f8a7d691'),
507 list('9d81c02e76a3bf54'),
511 for x in range(len(self.expected_order))]
513 hashlib.md5(self.blocks[x]).hexdigest()
514 for x in range(len(self.expected_order))]
515 self.api_client = self.mock_keep_services(count=self.services)
516 self.keep_client = arvados.KeepClient(api_client=self.api_client)
518 def test_weighted_service_roots_against_reference_set(self):
519 # Confirm weighted_service_roots() returns the correct order
520 for i, hash in enumerate(self.hashes):
521 roots = self.keep_client.weighted_service_roots(arvados.KeepLocator(hash))
523 re.search(r'//\[?keep0x([0-9a-f]+)', root).group(1)
525 self.assertEqual(self.expected_order[i], got_order)
527 def test_get_probe_order_against_reference_set(self):
528 self._test_probe_order_against_reference_set(
529 lambda i: self.keep_client.get(self.hashes[i], num_retries=1))
531 def test_head_probe_order_against_reference_set(self):
532 self._test_probe_order_against_reference_set(
533 lambda i: self.keep_client.head(self.hashes[i], num_retries=1))
535 def test_put_probe_order_against_reference_set(self):
536 # copies=1 prevents the test from being sensitive to races
537 # between writer threads.
538 self._test_probe_order_against_reference_set(
539 lambda i: self.keep_client.put(self.blocks[i], num_retries=1, copies=1))
541 def _test_probe_order_against_reference_set(self, op):
542 for i in range(len(self.blocks)):
543 with tutil.mock_keep_responses('', *[500 for _ in range(self.services*2)]) as mock, \
544 self.assertRaises(arvados.errors.KeepRequestError):
547 re.search(r'//\[?keep0x([0-9a-f]+)', resp.getopt(pycurl.URL)).group(1)
548 for resp in mock.responses]
549 self.assertEqual(self.expected_order[i]*2, got_order)
551 def test_put_probe_order_multiple_copies(self):
552 for copies in range(2, 4):
553 for i in range(len(self.blocks)):
554 with tutil.mock_keep_responses('', *[500 for _ in range(self.services*3)]) as mock, \
555 self.assertRaises(arvados.errors.KeepWriteError):
556 self.keep_client.put(self.blocks[i], num_retries=2, copies=copies)
558 re.search(r'//\[?keep0x([0-9a-f]+)', resp.getopt(pycurl.URL)).group(1)
559 for resp in mock.responses]
560 # With T threads racing to make requests, the position
561 # of a given server in the sequence of HTTP requests
562 # (got_order) cannot be more than T-1 positions
563 # earlier than that server's position in the reference
564 # probe sequence (expected_order).
566 # Loop invariant: we have accounted for +pos+ expected
567 # probes, either by seeing them in +got_order+ or by
568 # putting them in +pending+ in the hope of seeing them
569 # later. As long as +len(pending)<T+, we haven't
570 # started a request too early.
572 for pos, expected in enumerate(self.expected_order[i]*3):
573 got = got_order[pos-len(pending)]
574 while got in pending:
575 del pending[pending.index(got)]
576 got = got_order[pos-len(pending)]
578 pending.append(expected)
580 len(pending), copies,
581 "pending={}, with copies={}, got {}, expected {}".format(
582 pending, copies, repr(got_order), repr(self.expected_order[i]*3)))
584 def test_probe_waste_adding_one_server(self):
586 hashlib.md5("{:064x}".format(x)).hexdigest() for x in range(100)]
587 initial_services = 12
588 self.api_client = self.mock_keep_services(count=initial_services)
589 self.keep_client = arvados.KeepClient(api_client=self.api_client)
591 self.keep_client.weighted_service_roots(arvados.KeepLocator(hash)) for hash in hashes]
592 for added_services in range(1, 12):
593 api_client = self.mock_keep_services(count=initial_services+added_services)
594 keep_client = arvados.KeepClient(api_client=api_client)
596 for hash_index in range(len(hashes)):
597 probe_after = keep_client.weighted_service_roots(
598 arvados.KeepLocator(hashes[hash_index]))
599 penalty = probe_after.index(probes_before[hash_index][0])
600 self.assertLessEqual(penalty, added_services)
601 total_penalty += penalty
602 # Average penalty per block should not exceed
603 # N(added)/N(orig) by more than 20%, and should get closer
604 # to the ideal as we add data points.
607 len(hashes) / initial_services)
610 (120 - added_services)/100)
612 expect_penalty * 8/10)
614 min_penalty <= total_penalty <= max_penalty,
615 "With {}+{} services, {} blocks, penalty {} but expected {}..{}".format(
623 def check_64_zeros_error_order(self, verb, exc_class):
626 data = tutil.str_keep_locator(data)
627 # Arbitrary port number:
628 aport = random.randint(1024,65535)
629 api_client = self.mock_keep_services(service_port=aport, count=self.services)
630 keep_client = arvados.KeepClient(api_client=api_client)
631 with mock.patch('pycurl.Curl') as curl_mock, \
632 self.assertRaises(exc_class) as err_check:
633 curl_mock.return_value.side_effect = socket.timeout
634 getattr(keep_client, verb)(data)
635 urls = [urlparse.urlparse(url)
636 for url in err_check.exception.request_errors()]
637 self.assertEqual([('keep0x' + c, aport) for c in '3eab2d5fc9681074'],
638 [(url.hostname, url.port) for url in urls])
640 def test_get_error_shows_probe_order(self):
641 self.check_64_zeros_error_order('get', arvados.errors.KeepReadError)
643 def test_put_error_shows_probe_order(self):
644 self.check_64_zeros_error_order('put', arvados.errors.KeepWriteError)
647 class KeepClientTimeout(unittest.TestCase, tutil.ApiClientMock):
648 # BANDWIDTH_LOW_LIM must be less than len(DATA) so we can transfer
649 # 1s worth of data and then trigger bandwidth errors before running
652 BANDWIDTH_LOW_LIM = 1024
655 class assertTakesBetween(unittest.TestCase):
656 def __init__(self, tmin, tmax):
661 self.t0 = time.time()
663 def __exit__(self, *args, **kwargs):
664 # Round times to milliseconds, like CURL. Otherwise, we
665 # fail when CURL reaches a 1s timeout at 0.9998s.
666 delta = round(time.time() - self.t0, 3)
667 self.assertGreaterEqual(delta, self.tmin)
668 self.assertLessEqual(delta, self.tmax)
670 class assertTakesGreater(unittest.TestCase):
671 def __init__(self, tmin):
675 self.t0 = time.time()
677 def __exit__(self, *args, **kwargs):
678 delta = round(time.time() - self.t0, 3)
679 self.assertGreaterEqual(delta, self.tmin)
682 sock = socket.socket()
683 sock.bind(('0.0.0.0', 0))
684 self.port = sock.getsockname()[1]
686 self.server = keepstub.Server(('0.0.0.0', self.port), keepstub.Handler)
687 self.thread = threading.Thread(target=self.server.serve_forever)
688 self.thread.daemon = True # Exit thread if main proc exits
690 self.api_client = self.mock_keep_services(
692 service_host='localhost',
693 service_port=self.port,
697 self.server.shutdown()
699 def keepClient(self, timeouts=(0.1, TIMEOUT_TIME, BANDWIDTH_LOW_LIM)):
700 return arvados.KeepClient(
701 api_client=self.api_client,
704 def test_timeout_slow_connect(self):
705 # Can't simulate TCP delays with our own socket. Leave our
706 # stub server running uselessly, and try to connect to an
707 # unroutable IP address instead.
708 self.api_client = self.mock_keep_services(
710 service_host='240.0.0.0',
712 with self.assertTakesBetween(0.1, 0.5):
713 with self.assertRaises(arvados.errors.KeepWriteError):
714 self.keepClient().put(self.DATA, copies=1, num_retries=0)
716 def test_low_bandwidth_no_delays_success(self):
717 self.server.setbandwidth(2*self.BANDWIDTH_LOW_LIM)
718 kc = self.keepClient()
719 loc = kc.put(self.DATA, copies=1, num_retries=0)
720 self.assertEqual(self.DATA, kc.get(loc, num_retries=0))
722 def test_too_low_bandwidth_no_delays_failure(self):
723 # Check that lessening bandwidth corresponds to failing
724 kc = self.keepClient()
725 loc = kc.put(self.DATA, copies=1, num_retries=0)
726 self.server.setbandwidth(0.5*self.BANDWIDTH_LOW_LIM)
727 with self.assertTakesGreater(self.TIMEOUT_TIME):
728 with self.assertRaises(arvados.errors.KeepReadError) as e:
729 kc.get(loc, num_retries=0)
730 with self.assertTakesGreater(self.TIMEOUT_TIME):
731 with self.assertRaises(arvados.errors.KeepWriteError):
732 kc.put(self.DATA, copies=1, num_retries=0)
734 def test_low_bandwidth_with_server_response_delay_failure(self):
735 kc = self.keepClient()
736 loc = kc.put(self.DATA, copies=1, num_retries=0)
737 self.server.setbandwidth(self.BANDWIDTH_LOW_LIM)
738 self.server.setdelays(response=self.TIMEOUT_TIME)
739 with self.assertTakesGreater(self.TIMEOUT_TIME):
740 with self.assertRaises(arvados.errors.KeepReadError) as e:
741 kc.get(loc, num_retries=0)
742 with self.assertTakesGreater(self.TIMEOUT_TIME):
743 with self.assertRaises(arvados.errors.KeepWriteError):
744 kc.put(self.DATA, copies=1, num_retries=0)
745 with self.assertTakesGreater(self.TIMEOUT_TIME):
746 with self.assertRaises(arvados.errors.KeepReadError) as e:
747 kc.head(loc, num_retries=0)
749 def test_low_bandwidth_with_server_mid_delay_failure(self):
750 kc = self.keepClient()
751 loc = kc.put(self.DATA, copies=1, num_retries=0)
752 self.server.setbandwidth(self.BANDWIDTH_LOW_LIM)
753 self.server.setdelays(mid_write=self.TIMEOUT_TIME, mid_read=self.TIMEOUT_TIME)
754 with self.assertTakesGreater(self.TIMEOUT_TIME):
755 with self.assertRaises(arvados.errors.KeepReadError) as e:
756 kc.get(loc, num_retries=0)
757 with self.assertTakesGreater(self.TIMEOUT_TIME):
758 with self.assertRaises(arvados.errors.KeepWriteError):
759 kc.put(self.DATA, copies=1, num_retries=0)
761 def test_timeout_slow_request(self):
762 loc = self.keepClient().put(self.DATA, copies=1, num_retries=0)
763 self.server.setdelays(request=.2)
764 self._test_connect_timeout_under_200ms(loc)
765 self.server.setdelays(request=2)
766 self._test_response_timeout_under_2s(loc)
768 def test_timeout_slow_response(self):
769 loc = self.keepClient().put(self.DATA, copies=1, num_retries=0)
770 self.server.setdelays(response=.2)
771 self._test_connect_timeout_under_200ms(loc)
772 self.server.setdelays(response=2)
773 self._test_response_timeout_under_2s(loc)
775 def test_timeout_slow_response_body(self):
776 loc = self.keepClient().put(self.DATA, copies=1, num_retries=0)
777 self.server.setdelays(response_body=.2)
778 self._test_connect_timeout_under_200ms(loc)
779 self.server.setdelays(response_body=2)
780 self._test_response_timeout_under_2s(loc)
782 def _test_connect_timeout_under_200ms(self, loc):
783 # Allow 100ms to connect, then 1s for response. Everything
784 # should work, and everything should take at least 200ms to
786 kc = self.keepClient(timeouts=(.1, 1))
787 with self.assertTakesBetween(.2, .3):
788 kc.put(self.DATA, copies=1, num_retries=0)
789 with self.assertTakesBetween(.2, .3):
790 self.assertEqual(self.DATA, kc.get(loc, num_retries=0))
792 def _test_response_timeout_under_2s(self, loc):
793 # Allow 10s to connect, then 1s for response. Nothing should
794 # work, and everything should take at least 1s to return.
795 kc = self.keepClient(timeouts=(10, 1))
796 with self.assertTakesBetween(1, 1.9):
797 with self.assertRaises(arvados.errors.KeepReadError):
798 kc.get(loc, num_retries=0)
799 with self.assertTakesBetween(1, 1.9):
800 with self.assertRaises(arvados.errors.KeepWriteError):
801 kc.put(self.DATA, copies=1, num_retries=0)
804 class KeepClientGatewayTestCase(unittest.TestCase, tutil.ApiClientMock):
805 def mock_disks_and_gateways(self, disks=3, gateways=1):
807 'uuid': 'zzzzz-bi6l4-gateway{:08d}'.format(i),
808 'owner_uuid': 'zzzzz-tpzed-000000000000000',
809 'service_host': 'gatewayhost{}'.format(i),
810 'service_port': 12345,
811 'service_ssl_flag': True,
812 'service_type': 'gateway:test',
813 } for i in range(gateways)]
814 self.gateway_roots = [
815 "https://{service_host}:{service_port}/".format(**gw)
816 for gw in self.gateways]
817 self.api_client = self.mock_keep_services(
818 count=disks, additional_services=self.gateways)
819 self.keepClient = arvados.KeepClient(api_client=self.api_client)
821 @mock.patch('pycurl.Curl')
822 def test_get_with_gateway_hint_first(self, MockCurl):
823 MockCurl.return_value = tutil.FakeCurl.make(
824 code=200, body='foo', headers={'Content-Length': 3})
825 self.mock_disks_and_gateways()
826 locator = 'acbd18db4cc2f85cedef654fccc4a4d8+3+K@' + self.gateways[0]['uuid']
827 self.assertEqual('foo', self.keepClient.get(locator))
828 self.assertEqual(self.gateway_roots[0]+locator,
829 MockCurl.return_value.getopt(pycurl.URL))
830 self.assertEqual(True, self.keepClient.head(locator))
832 @mock.patch('pycurl.Curl')
833 def test_get_with_gateway_hints_in_order(self, MockCurl):
837 tutil.FakeCurl.make(code=404, body='')
838 for _ in range(gateways+disks)
840 MockCurl.side_effect = tutil.queue_with(mocks)
841 self.mock_disks_and_gateways(gateways=gateways, disks=disks)
842 locator = '+'.join(['acbd18db4cc2f85cedef654fccc4a4d8+3'] +
843 ['K@'+gw['uuid'] for gw in self.gateways])
844 with self.assertRaises(arvados.errors.NotFoundError):
845 self.keepClient.get(locator)
846 # Gateways are tried first, in the order given.
847 for i, root in enumerate(self.gateway_roots):
848 self.assertEqual(root+locator,
849 mocks[i].getopt(pycurl.URL))
850 # Disk services are tried next.
851 for i in range(gateways, gateways+disks):
852 self.assertRegexpMatches(
853 mocks[i].getopt(pycurl.URL),
856 @mock.patch('pycurl.Curl')
857 def test_head_with_gateway_hints_in_order(self, MockCurl):
861 tutil.FakeCurl.make(code=404, body='')
862 for _ in range(gateways+disks)
864 MockCurl.side_effect = tutil.queue_with(mocks)
865 self.mock_disks_and_gateways(gateways=gateways, disks=disks)
866 locator = '+'.join(['acbd18db4cc2f85cedef654fccc4a4d8+3'] +
867 ['K@'+gw['uuid'] for gw in self.gateways])
868 with self.assertRaises(arvados.errors.NotFoundError):
869 self.keepClient.head(locator)
870 # Gateways are tried first, in the order given.
871 for i, root in enumerate(self.gateway_roots):
872 self.assertEqual(root+locator,
873 mocks[i].getopt(pycurl.URL))
874 # Disk services are tried next.
875 for i in range(gateways, gateways+disks):
876 self.assertRegexpMatches(
877 mocks[i].getopt(pycurl.URL),
880 @mock.patch('pycurl.Curl')
881 def test_get_with_remote_proxy_hint(self, MockCurl):
882 MockCurl.return_value = tutil.FakeCurl.make(
883 code=200, body='foo', headers={'Content-Length': 3})
884 self.mock_disks_and_gateways()
885 locator = 'acbd18db4cc2f85cedef654fccc4a4d8+3+K@xyzzy'
886 self.assertEqual('foo', self.keepClient.get(locator))
887 self.assertEqual('https://keep.xyzzy.arvadosapi.com/'+locator,
888 MockCurl.return_value.getopt(pycurl.URL))
890 @mock.patch('pycurl.Curl')
891 def test_head_with_remote_proxy_hint(self, MockCurl):
892 MockCurl.return_value = tutil.FakeCurl.make(
893 code=200, body='foo', headers={'Content-Length': 3})
894 self.mock_disks_and_gateways()
895 locator = 'acbd18db4cc2f85cedef654fccc4a4d8+3+K@xyzzy'
896 self.assertEqual(True, self.keepClient.head(locator))
897 self.assertEqual('https://keep.xyzzy.arvadosapi.com/'+locator,
898 MockCurl.return_value.getopt(pycurl.URL))
901 class KeepClientRetryTestMixin(object):
902 # Testing with a local Keep store won't exercise the retry behavior.
903 # Instead, our strategy is:
904 # * Create a client with one proxy specified (pointed at a black
905 # hole), so there's no need to instantiate an API client, and
906 # all HTTP requests come from one place.
907 # * Mock httplib's request method to provide simulated responses.
908 # This lets us test the retry logic extensively without relying on any
909 # supporting servers, and prevents side effects in case something hiccups.
910 # To use this mixin, define DEFAULT_EXPECT, DEFAULT_EXCEPTION, and
913 # Test classes must define TEST_PATCHER to a method that mocks
914 # out appropriate methods in the client.
916 PROXY_ADDR = 'http://[%s]:65535/' % (tutil.TEST_HOST,)
917 TEST_DATA = 'testdata'
918 TEST_LOCATOR = 'ef654c40ab4f1747fc699915d4f70902+8'
921 self.client_kwargs = {'proxy': self.PROXY_ADDR, 'local_store': ''}
923 def new_client(self, **caller_kwargs):
924 kwargs = self.client_kwargs.copy()
925 kwargs.update(caller_kwargs)
926 return arvados.KeepClient(**kwargs)
928 def run_method(self, *args, **kwargs):
929 raise NotImplementedError("test subclasses must define run_method")
931 def check_success(self, expected=None, *args, **kwargs):
933 expected = self.DEFAULT_EXPECT
934 self.assertEqual(expected, self.run_method(*args, **kwargs))
936 def check_exception(self, error_class=None, *args, **kwargs):
937 if error_class is None:
938 error_class = self.DEFAULT_EXCEPTION
939 self.assertRaises(error_class, self.run_method, *args, **kwargs)
941 def test_immediate_success(self):
942 with self.TEST_PATCHER(self.DEFAULT_EXPECT, 200):
945 def test_retry_then_success(self):
946 with self.TEST_PATCHER(self.DEFAULT_EXPECT, 500, 200):
947 self.check_success(num_retries=3)
949 def test_exception_then_success(self):
950 with self.TEST_PATCHER(self.DEFAULT_EXPECT, Exception('mock err'), 200):
951 self.check_success(num_retries=3)
953 def test_no_default_retry(self):
954 with self.TEST_PATCHER(self.DEFAULT_EXPECT, 500, 200):
955 self.check_exception()
957 def test_no_retry_after_permanent_error(self):
958 with self.TEST_PATCHER(self.DEFAULT_EXPECT, 403, 200):
959 self.check_exception(num_retries=3)
961 def test_error_after_retries_exhausted(self):
962 with self.TEST_PATCHER(self.DEFAULT_EXPECT, 500, 500, 200):
963 self.check_exception(num_retries=1)
965 def test_num_retries_instance_fallback(self):
966 self.client_kwargs['num_retries'] = 3
967 with self.TEST_PATCHER(self.DEFAULT_EXPECT, 500, 200):
972 class KeepClientRetryGetTestCase(KeepClientRetryTestMixin, unittest.TestCase):
973 DEFAULT_EXPECT = KeepClientRetryTestMixin.TEST_DATA
974 DEFAULT_EXCEPTION = arvados.errors.KeepReadError
975 HINTED_LOCATOR = KeepClientRetryTestMixin.TEST_LOCATOR + '+K@xyzzy'
976 TEST_PATCHER = staticmethod(tutil.mock_keep_responses)
978 def run_method(self, locator=KeepClientRetryTestMixin.TEST_LOCATOR,
980 return self.new_client().get(locator, *args, **kwargs)
982 def test_specific_exception_when_not_found(self):
983 with tutil.mock_keep_responses(self.DEFAULT_EXPECT, 404, 200):
984 self.check_exception(arvados.errors.NotFoundError, num_retries=3)
986 def test_general_exception_with_mixed_errors(self):
987 # get should raise a NotFoundError if no server returns the block,
988 # and a high threshold of servers report that it's not found.
989 # This test rigs up 50/50 disagreement between two servers, and
990 # checks that it does not become a NotFoundError.
991 client = self.new_client()
992 with tutil.mock_keep_responses(self.DEFAULT_EXPECT, 404, 500):
993 with self.assertRaises(arvados.errors.KeepReadError) as exc_check:
994 client.get(self.HINTED_LOCATOR)
995 self.assertNotIsInstance(
996 exc_check.exception, arvados.errors.NotFoundError,
997 "mixed errors raised NotFoundError")
999 def test_hint_server_can_succeed_without_retries(self):
1000 with tutil.mock_keep_responses(self.DEFAULT_EXPECT, 404, 200, 500):
1001 self.check_success(locator=self.HINTED_LOCATOR)
1003 def test_try_next_server_after_timeout(self):
1004 with tutil.mock_keep_responses(
1005 (socket.timeout("timed out"), 200),
1006 (self.DEFAULT_EXPECT, 200)):
1007 self.check_success(locator=self.HINTED_LOCATOR)
1009 def test_retry_data_with_wrong_checksum(self):
1010 with tutil.mock_keep_responses(
1012 (self.DEFAULT_EXPECT, 200)):
1013 self.check_success(locator=self.HINTED_LOCATOR)
1016 class KeepClientRetryHeadTestCase(KeepClientRetryTestMixin, unittest.TestCase):
1017 DEFAULT_EXPECT = True
1018 DEFAULT_EXCEPTION = arvados.errors.KeepReadError
1019 HINTED_LOCATOR = KeepClientRetryTestMixin.TEST_LOCATOR + '+K@xyzzy'
1020 TEST_PATCHER = staticmethod(tutil.mock_keep_responses)
1022 def run_method(self, locator=KeepClientRetryTestMixin.TEST_LOCATOR,
1024 return self.new_client().head(locator, *args, **kwargs)
1026 def test_specific_exception_when_not_found(self):
1027 with tutil.mock_keep_responses(self.DEFAULT_EXPECT, 404, 200):
1028 self.check_exception(arvados.errors.NotFoundError, num_retries=3)
1030 def test_general_exception_with_mixed_errors(self):
1031 # head should raise a NotFoundError if no server returns the block,
1032 # and a high threshold of servers report that it's not found.
1033 # This test rigs up 50/50 disagreement between two servers, and
1034 # checks that it does not become a NotFoundError.
1035 client = self.new_client()
1036 with tutil.mock_keep_responses(self.DEFAULT_EXPECT, 404, 500):
1037 with self.assertRaises(arvados.errors.KeepReadError) as exc_check:
1038 client.head(self.HINTED_LOCATOR)
1039 self.assertNotIsInstance(
1040 exc_check.exception, arvados.errors.NotFoundError,
1041 "mixed errors raised NotFoundError")
1043 def test_hint_server_can_succeed_without_retries(self):
1044 with tutil.mock_keep_responses(self.DEFAULT_EXPECT, 404, 200, 500):
1045 self.check_success(locator=self.HINTED_LOCATOR)
1047 def test_try_next_server_after_timeout(self):
1048 with tutil.mock_keep_responses(
1049 (socket.timeout("timed out"), 200),
1050 (self.DEFAULT_EXPECT, 200)):
1051 self.check_success(locator=self.HINTED_LOCATOR)
1054 class KeepClientRetryPutTestCase(KeepClientRetryTestMixin, unittest.TestCase):
1055 DEFAULT_EXPECT = KeepClientRetryTestMixin.TEST_LOCATOR
1056 DEFAULT_EXCEPTION = arvados.errors.KeepWriteError
1057 TEST_PATCHER = staticmethod(tutil.mock_keep_responses)
1059 def run_method(self, data=KeepClientRetryTestMixin.TEST_DATA,
1060 copies=1, *args, **kwargs):
1061 return self.new_client().put(data, copies, *args, **kwargs)
1063 def test_do_not_send_multiple_copies_to_same_server(self):
1064 with tutil.mock_keep_responses(self.DEFAULT_EXPECT, 200):
1065 self.check_exception(copies=2, num_retries=3)
1068 class KeepClientAvoidClientOverreplicationTestCase(unittest.TestCase, tutil.ApiClientMock):
1071 class FakeKeepService(object):
1072 def __init__(self, delay, will_succeed, replicas=1):
1074 self.success = will_succeed
1076 self._result['headers'] = {}
1077 self._result['headers']['x-keep-replicas-stored'] = str(replicas)
1078 self._result['body'] = 'foobar'
1080 def put(self, data_hash, data, timeout):
1081 time.sleep(self.delay)
1084 def last_result(self):
1091 def test_only_write_enough_on_success(self):
1093 pool = arvados.KeepClient.KeepWriterThreadPool(
1095 data_hash = 'acbd18db4cc2f85cedef654fccc4a4d8+3',
1096 max_service_replicas = copies,
1100 ks = self.FakeKeepService(delay=i/10.0, will_succeed=True)
1101 pool.add_task(ks, None)
1103 self.assertEqual(pool.done(), copies)
1105 def test_only_write_enough_on_partial_success(self):
1107 pool = arvados.KeepClient.KeepWriterThreadPool(
1109 data_hash = 'acbd18db4cc2f85cedef654fccc4a4d8+3',
1110 max_service_replicas = copies,
1114 ks = self.FakeKeepService(delay=i/10.0, will_succeed=False)
1115 pool.add_task(ks, None)
1116 ks = self.FakeKeepService(delay=i/10.0, will_succeed=True)
1117 pool.add_task(ks, None)
1119 self.assertEqual(pool.done(), copies)
1123 class RetryNeedsMultipleServices(unittest.TestCase, tutil.ApiClientMock):
1124 # Test put()s that need two distinct servers to succeed, possibly
1125 # requiring multiple passes through the retry loop.
1128 self.api_client = self.mock_keep_services(count=2)
1129 self.keep_client = arvados.KeepClient(api_client=self.api_client)
1131 def test_success_after_exception(self):
1132 with tutil.mock_keep_responses(
1133 'acbd18db4cc2f85cedef654fccc4a4d8+3',
1134 Exception('mock err'), 200, 200) as req_mock:
1135 self.keep_client.put('foo', num_retries=1, copies=2)
1136 self.assertEqual(3, req_mock.call_count)
1138 def test_success_after_retryable_error(self):
1139 with tutil.mock_keep_responses(
1140 'acbd18db4cc2f85cedef654fccc4a4d8+3',
1141 500, 200, 200) as req_mock:
1142 self.keep_client.put('foo', num_retries=1, copies=2)
1143 self.assertEqual(3, req_mock.call_count)
1145 def test_fail_after_final_error(self):
1146 # First retry loop gets a 200 (can't achieve replication by
1147 # storing again on that server) and a 400 (can't retry that
1148 # server at all), so we shouldn't try a third request.
1149 with tutil.mock_keep_responses(
1150 'acbd18db4cc2f85cedef654fccc4a4d8+3',
1151 200, 400, 200) as req_mock:
1152 with self.assertRaises(arvados.errors.KeepWriteError):
1153 self.keep_client.put('foo', num_retries=1, copies=2)
1154 self.assertEqual(2, req_mock.call_count)