+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+from __future__ import absolute_import
+from __future__ import division
+from future import standard_library
+standard_library.install_aliases()
+from builtins import str
+from builtins import range
+from builtins import object
import hashlib
import mock
import os
import random
import re
import socket
-import threading
+import sys
import time
import unittest
-import urlparse
+import urllib.parse
import arvados
import arvados.retry
-import arvados_testutil as tutil
-import keepstub
-import run_test_server
+import arvados.util
+from . import arvados_testutil as tutil
+from . import keepstub
+from . import run_test_server
class KeepTestCase(run_test_server.TestCaseWithServers):
MAIN_SERVER = {}
def test_KeepBasicRWTest(self):
self.assertEqual(0, self.keep_client.upload_counter.get())
foo_locator = self.keep_client.put('foo')
- self.assertRegexpMatches(
+ self.assertRegex(
foo_locator,
'^acbd18db4cc2f85cedef654fccc4a4d8\+3',
'wrong md5 hash from Keep.put("foo"): ' + foo_locator)
self.assertEqual(0, self.keep_client.download_counter.get())
self.assertEqual(self.keep_client.get(foo_locator),
- 'foo',
+ b'foo',
'wrong content from Keep.get(md5("foo"))')
self.assertEqual(3, self.keep_client.download_counter.get())
def test_KeepBinaryRWTest(self):
- blob_str = '\xff\xfe\xf7\x00\x01\x02'
+ blob_str = b'\xff\xfe\xf7\x00\x01\x02'
blob_locator = self.keep_client.put(blob_str)
- self.assertRegexpMatches(
+ self.assertRegex(
blob_locator,
'^7fc7c53b45e53926ba52821140fef396\+6',
('wrong locator from Keep.put(<binarydata>):' + blob_locator))
'wrong content from Keep.get(md5(<binarydata>))')
def test_KeepLongBinaryRWTest(self):
- blob_str = '\xff\xfe\xfd\xfc\x00\x01\x02\x03'
- for i in range(0,23):
- blob_str = blob_str + blob_str
- blob_locator = self.keep_client.put(blob_str)
- self.assertRegexpMatches(
+ blob_data = b'\xff\xfe\xfd\xfc\x00\x01\x02\x03'
+ for i in range(0, 23):
+ blob_data = blob_data + blob_data
+ blob_locator = self.keep_client.put(blob_data)
+ self.assertRegex(
blob_locator,
'^84d90fc0d8175dd5dcfab04b999bc956\+67108864',
('wrong locator from Keep.put(<binarydata>): ' + blob_locator))
self.assertEqual(self.keep_client.get(blob_locator),
- blob_str,
+ blob_data,
'wrong content from Keep.get(md5(<binarydata>))')
@unittest.skip("unreliable test - please fix and close #8752")
def test_KeepSingleCopyRWTest(self):
- blob_str = '\xff\xfe\xfd\xfc\x00\x01\x02\x03'
- blob_locator = self.keep_client.put(blob_str, copies=1)
- self.assertRegexpMatches(
+ blob_data = b'\xff\xfe\xfd\xfc\x00\x01\x02\x03'
+ blob_locator = self.keep_client.put(blob_data, copies=1)
+ self.assertRegex(
blob_locator,
'^c902006bc98a3eb4a3663b65ab4a6fab\+8',
('wrong locator from Keep.put(<binarydata>): ' + blob_locator))
self.assertEqual(self.keep_client.get(blob_locator),
- blob_str,
+ blob_data,
'wrong content from Keep.get(md5(<binarydata>))')
def test_KeepEmptyCollectionTest(self):
blob_locator = self.keep_client.put('', copies=1)
- self.assertRegexpMatches(
+ self.assertRegex(
blob_locator,
'^d41d8cd98f00b204e9800998ecf8427e\+0',
('wrong locator from Keep.put(""): ' + blob_locator))
def test_unicode_must_be_ascii(self):
# If unicode type, must only consist of valid ASCII
foo_locator = self.keep_client.put(u'foo')
- self.assertRegexpMatches(
+ self.assertRegex(
foo_locator,
'^acbd18db4cc2f85cedef654fccc4a4d8\+3',
'wrong md5 hash from Keep.put("foo"): ' + foo_locator)
- with self.assertRaises(UnicodeEncodeError):
- # Error if it is not ASCII
- self.keep_client.put(u'\xe2')
+ if sys.version_info < (3, 0):
+ with self.assertRaises(UnicodeEncodeError):
+ # Error if it is not ASCII
+ self.keep_client.put(u'\xe2')
- with self.assertRaises(arvados.errors.ArgumentError):
- # Must be a string type
+ with self.assertRaises(AttributeError):
+ # Must be bytes or have an encode() method
self.keep_client.put({})
def test_KeepHeadTest(self):
locator = self.keep_client.put('test_head')
- self.assertRegexpMatches(
+ self.assertRegex(
locator,
'^b9a772c7049325feb7130fff1f8333e9\+9',
'wrong md5 hash from Keep.put for "test_head": ' + locator)
self.assertEqual(True, self.keep_client.head(locator))
self.assertEqual(self.keep_client.get(locator),
- 'test_head',
+ b'test_head',
'wrong content from Keep.get for "test_head"')
class KeepPermissionTestCase(run_test_server.TestCaseWithServers):
MAIN_SERVER = {}
- KEEP_SERVER = {'blob_signing_key': 'abcdefghijk0123456789',
- 'enforce_permissions': True}
+ KEEP_SERVER = {'blob_signing': True}
def test_KeepBasicRWTest(self):
run_test_server.authorize_with('active')
keep_client = arvados.KeepClient()
foo_locator = keep_client.put('foo')
- self.assertRegexpMatches(
+ self.assertRegex(
foo_locator,
r'^acbd18db4cc2f85cedef654fccc4a4d8\+3\+A[a-f0-9]+@[a-f0-9]+$',
'invalid locator from Keep.put("foo"): ' + foo_locator)
self.assertEqual(keep_client.get(foo_locator),
- 'foo',
+ b'foo',
'wrong content from Keep.get(md5("foo"))')
# GET with an unsigned locator => NotFound
bar_locator = keep_client.put('bar')
unsigned_bar_locator = "37b51d194a7513e45b56f6524f2d51f2+3"
- self.assertRegexpMatches(
+ self.assertRegex(
bar_locator,
r'^37b51d194a7513e45b56f6524f2d51f2\+3\+A[a-f0-9]+@[a-f0-9]+$',
'invalid locator from Keep.put("bar"): ' + bar_locator)
unsigned_bar_locator)
-# KeepOptionalPermission: starts Keep with --permission-key-file
-# but not --enforce-permissions (i.e. generate signatures on PUT
-# requests, but do not require them for GET requests)
-#
-# All of these requests should succeed when permissions are optional:
-# * authenticated request, signed locator
-# * authenticated request, unsigned locator
-# * unauthenticated request, signed locator
-# * unauthenticated request, unsigned locator
-class KeepOptionalPermission(run_test_server.TestCaseWithServers):
- MAIN_SERVER = {}
- KEEP_SERVER = {'blob_signing_key': 'abcdefghijk0123456789',
- 'enforce_permissions': False}
-
- @classmethod
- def setUpClass(cls):
- super(KeepOptionalPermission, cls).setUpClass()
- run_test_server.authorize_with("admin")
- cls.api_client = arvados.api('v1')
-
- def setUp(self):
- super(KeepOptionalPermission, self).setUp()
- self.keep_client = arvados.KeepClient(api_client=self.api_client,
- proxy='', local_store='')
-
- def _put_foo_and_check(self):
- signed_locator = self.keep_client.put('foo')
- self.assertRegexpMatches(
- signed_locator,
- r'^acbd18db4cc2f85cedef654fccc4a4d8\+3\+A[a-f0-9]+@[a-f0-9]+$',
- 'invalid locator from Keep.put("foo"): ' + signed_locator)
- return signed_locator
-
- def test_KeepAuthenticatedSignedTest(self):
- signed_locator = self._put_foo_and_check()
- self.assertEqual(self.keep_client.get(signed_locator),
- 'foo',
- 'wrong content from Keep.get(md5("foo"))')
-
- def test_KeepAuthenticatedUnsignedTest(self):
- signed_locator = self._put_foo_and_check()
- self.assertEqual(self.keep_client.get("acbd18db4cc2f85cedef654fccc4a4d8"),
- 'foo',
- 'wrong content from Keep.get(md5("foo"))')
-
- def test_KeepUnauthenticatedSignedTest(self):
- # Check that signed GET requests work even when permissions
- # enforcement is off.
- signed_locator = self._put_foo_and_check()
- self.keep_client.api_token = ''
- self.assertEqual(self.keep_client.get(signed_locator),
- 'foo',
- 'wrong content from Keep.get(md5("foo"))')
-
- def test_KeepUnauthenticatedUnsignedTest(self):
- # Since --enforce-permissions is not in effect, GET requests
- # need not be authenticated.
- signed_locator = self._put_foo_and_check()
- self.keep_client.api_token = ''
- self.assertEqual(self.keep_client.get("acbd18db4cc2f85cedef654fccc4a4d8"),
- 'foo',
- 'wrong content from Keep.get(md5("foo"))')
-
-
class KeepProxyTestCase(run_test_server.TestCaseWithServers):
MAIN_SERVER = {}
KEEP_SERVER = {}
super(KeepProxyTestCase, self).tearDown()
def test_KeepProxyTest1(self):
- # Will use ARVADOS_KEEP_PROXY environment variable that is set by
- # setUpClass().
+ # Will use ARVADOS_KEEP_SERVICES environment variable that
+ # is set by setUpClass().
keep_client = arvados.KeepClient(api_client=self.api_client,
local_store='')
baz_locator = keep_client.put('baz')
- self.assertRegexpMatches(
+ self.assertRegex(
baz_locator,
'^73feffa4b7f6bb68e44cf984c85f6e88\+3',
'wrong md5 hash from Keep.put("baz"): ' + baz_locator)
self.assertEqual(keep_client.get(baz_locator),
- 'baz',
+ b'baz',
'wrong content from Keep.get(md5("baz"))')
self.assertTrue(keep_client.using_proxy)
keep_client = arvados.KeepClient(api_client=self.api_client,
proxy='', local_store='')
baz_locator = keep_client.put('baz2')
- self.assertRegexpMatches(
+ self.assertRegex(
baz_locator,
'^91f372a266fe2bf2823cb8ec7fda31ce\+4',
'wrong md5 hash from Keep.put("baz2"): ' + baz_locator)
self.assertEqual(keep_client.get(baz_locator),
- 'baz2',
+ b'baz2',
'wrong content from Keep.get(md5("baz2"))')
self.assertTrue(keep_client.using_proxy)
+ def test_KeepProxyTestMultipleURIs(self):
+ # Test using ARVADOS_KEEP_SERVICES env var overriding any
+ # existing proxy setting and setting multiple proxies
+ arvados.config.settings()['ARVADOS_KEEP_SERVICES'] = 'http://10.0.0.1 https://foo.example.org:1234/'
+ keep_client = arvados.KeepClient(api_client=self.api_client,
+ local_store='')
+ uris = [x['_service_root'] for x in keep_client._keep_services]
+ self.assertEqual(uris, ['http://10.0.0.1/',
+ 'https://foo.example.org:1234/'])
+
+ def test_KeepProxyTestInvalidURI(self):
+ arvados.config.settings()['ARVADOS_KEEP_SERVICES'] = 'bad.uri.org'
+ with self.assertRaises(arvados.errors.ArgumentError):
+ keep_client = arvados.KeepClient(api_client=self.api_client,
+ local_store='')
+
class KeepClientServiceTestCase(unittest.TestCase, tutil.ApiClientMock):
def get_service_roots(self, api_client):
keep_client = arvados.KeepClient(api_client=api_client)
services = keep_client.weighted_service_roots(arvados.KeepLocator('0'*32))
- return [urlparse.urlparse(url) for url in sorted(services)]
+ return [urllib.parse.urlparse(url) for url in sorted(services)]
def test_ssl_flag_respected_in_roots(self):
for ssl_flag in [False, True]:
self.assertEqual('100::1', service.hostname)
self.assertEqual(10, service.port)
+ def test_insecure_disables_tls_verify(self):
+ api_client = self.mock_keep_services(count=1)
+ force_timeout = socket.timeout("timed out")
+
+ api_client.insecure = True
+ with tutil.mock_keep_responses(b'foo', 200) as mock:
+ keep_client = arvados.KeepClient(api_client=api_client)
+ keep_client.get('acbd18db4cc2f85cedef654fccc4a4d8+3')
+ self.assertEqual(
+ mock.responses[0].getopt(pycurl.SSL_VERIFYPEER),
+ 0)
+
+ api_client.insecure = False
+ with tutil.mock_keep_responses(b'foo', 200) as mock:
+ keep_client = arvados.KeepClient(api_client=api_client)
+ keep_client.get('acbd18db4cc2f85cedef654fccc4a4d8+3')
+ # getopt()==None here means we didn't change the
+ # default. If we were using real pycurl instead of a mock,
+ # it would return the default value 1.
+ self.assertEqual(
+ mock.responses[0].getopt(pycurl.SSL_VERIFYPEER),
+ None)
+
+ def test_refresh_signature(self):
+ blk_digest = '6f5902ac237024bdd0c176cb93063dc4+11'
+ blk_sig = 'da39a3ee5e6b4b0d3255bfef95601890afd80709@53bed294'
+ local_loc = blk_digest+'+A'+blk_sig
+ remote_loc = blk_digest+'+R'+blk_sig
+ api_client = self.mock_keep_services(count=1)
+ headers = {'X-Keep-Locator':local_loc}
+ with tutil.mock_keep_responses('', 200, **headers):
+ # Check that the translated locator gets returned
+ keep_client = arvados.KeepClient(api_client=api_client)
+ self.assertEqual(local_loc, keep_client.refresh_signature(remote_loc))
+ # Check that refresh_signature() uses the correct method and headers
+ keep_client._get_or_head = mock.MagicMock()
+ keep_client.refresh_signature(remote_loc)
+ args, kwargs = keep_client._get_or_head.call_args_list[0]
+ self.assertIn(remote_loc, args)
+ self.assertEqual("HEAD", kwargs['method'])
+ self.assertIn('X-Keep-Signature', kwargs['headers'])
+
# test_*_timeout verify that KeepClient instructs pycurl to use
# the appropriate connection and read timeouts. They don't care
# whether pycurl actually exhibits the expected timeout behavior
with tutil.mock_keep_responses(force_timeout, 0) as mock:
keep_client = arvados.KeepClient(api_client=api_client)
with self.assertRaises(arvados.errors.KeepWriteError):
- keep_client.put('foo')
+ keep_client.put(b'foo')
self.assertEqual(
mock.responses[0].getopt(pycurl.CONNECTTIMEOUT_MS),
int(arvados.KeepClient.DEFAULT_TIMEOUT[0]*1000))
int(arvados.KeepClient.DEFAULT_TIMEOUT[0]*1000))
self.assertEqual(
mock.responses[0].getopt(pycurl.LOW_SPEED_TIME),
- int(arvados.KeepClient.DEFAULT_TIMEOUT[1]))
+ None)
self.assertEqual(
mock.responses[0].getopt(pycurl.LOW_SPEED_LIMIT),
- int(arvados.KeepClient.DEFAULT_TIMEOUT[2]))
+ None)
def test_proxy_get_timeout(self):
api_client = self.mock_keep_services(service_type='proxy', count=1)
int(arvados.KeepClient.DEFAULT_PROXY_TIMEOUT[0]*1000))
self.assertEqual(
mock.responses[0].getopt(pycurl.LOW_SPEED_TIME),
- int(arvados.KeepClient.DEFAULT_PROXY_TIMEOUT[1]))
+ None)
self.assertEqual(
mock.responses[0].getopt(pycurl.LOW_SPEED_LIMIT),
- int(arvados.KeepClient.DEFAULT_PROXY_TIMEOUT[2]))
+ None)
def test_proxy_put_timeout(self):
api_client = self.mock_keep_services(service_type='proxy', count=1)
def check_errors_from_last_retry(self, verb, exc_class):
api_client = self.mock_keep_services(count=2)
req_mock = tutil.mock_keep_responses(
- "retry error reporting test", 500, 500, 403, 403)
+ "retry error reporting test", 500, 500, 500, 500, 500, 500, 502, 502)
with req_mock, tutil.skip_sleep, \
self.assertRaises(exc_class) as err_check:
keep_client = arvados.KeepClient(api_client=api_client)
getattr(keep_client, verb)('d41d8cd98f00b204e9800998ecf8427e+0',
num_retries=3)
- self.assertEqual([403, 403], [
+ self.assertEqual([502, 502], [
getattr(error, 'status_code', None)
- for error in err_check.exception.request_errors().itervalues()])
+ for error in err_check.exception.request_errors().values()])
+ self.assertRegex(str(err_check.exception), r'failed to (read|write) .* after 4 attempts')
def test_get_error_reflects_last_retry(self):
self.check_errors_from_last_retry('get', arvados.errors.KeepReadError)
self.assertEqual(0, len(exc_check.exception.request_errors()))
def test_oddball_service_get(self):
- body = 'oddball service get'
+ body = b'oddball service get'
api_client = self.mock_keep_services(service_type='fancynewblobstore')
with tutil.mock_keep_responses(body, 200):
keep_client = arvados.KeepClient(api_client=api_client)
self.assertEqual(body, actual)
def test_oddball_service_put(self):
- body = 'oddball service put'
+ body = b'oddball service put'
pdh = tutil.str_keep_locator(body)
api_client = self.mock_keep_services(service_type='fancynewblobstore')
with tutil.mock_keep_responses(pdh, 200):
self.assertEqual(pdh, actual)
def test_oddball_service_writer_count(self):
- body = 'oddball service writer count'
+ body = b'oddball service writer count'
pdh = tutil.str_keep_locator(body)
api_client = self.mock_keep_services(service_type='fancynewblobstore',
count=4)
self.assertEqual(1, req_mock.call_count)
+@tutil.skip_sleep
+class KeepClientCacheTestCase(unittest.TestCase, tutil.ApiClientMock):
+ def setUp(self):
+ self.api_client = self.mock_keep_services(count=2)
+ self.keep_client = arvados.KeepClient(api_client=self.api_client)
+ self.data = b'xyzzy'
+ self.locator = '1271ed5ef305aadabc605b1609e24c52'
+
+ @mock.patch('arvados.KeepClient.KeepService.get')
+ def test_get_request_cache(self, get_mock):
+ with tutil.mock_keep_responses(self.data, 200, 200):
+ self.keep_client.get(self.locator)
+ self.keep_client.get(self.locator)
+ # Request already cached, don't require more than one request
+ get_mock.assert_called_once()
+
+ @mock.patch('arvados.KeepClient.KeepService.get')
+ def test_head_request_cache(self, get_mock):
+ with tutil.mock_keep_responses(self.data, 200, 200):
+ self.keep_client.head(self.locator)
+ self.keep_client.head(self.locator)
+ # Don't cache HEAD requests so that they're not confused with GET reqs
+ self.assertEqual(2, get_mock.call_count)
+
+ @mock.patch('arvados.KeepClient.KeepService.get')
+ def test_head_and_then_get_return_different_responses(self, get_mock):
+ head_resp = None
+ get_resp = None
+ get_mock.side_effect = ['first response', 'second response']
+ with tutil.mock_keep_responses(self.data, 200, 200):
+ head_resp = self.keep_client.head(self.locator)
+ get_resp = self.keep_client.get(self.locator)
+ self.assertEqual('first response', head_resp)
+ # First reponse was not cached because it was from a HEAD request.
+ self.assertNotEqual(head_resp, get_resp)
+
+@tutil.skip_sleep
+class KeepStorageClassesTestCase(unittest.TestCase, tutil.ApiClientMock):
+ def setUp(self):
+ self.api_client = self.mock_keep_services(count=2)
+ self.keep_client = arvados.KeepClient(api_client=self.api_client)
+ self.data = b'xyzzy'
+ self.locator = '1271ed5ef305aadabc605b1609e24c52'
+
+ def test_multiple_default_storage_classes_req_header(self):
+ api_mock = self.api_client_mock()
+ api_mock.config.return_value = {
+ 'StorageClasses': {
+ 'foo': { 'Default': True },
+ 'bar': { 'Default': True },
+ 'baz': { 'Default': False }
+ }
+ }
+ api_client = self.mock_keep_services(api_mock=api_mock, count=2)
+ keep_client = arvados.KeepClient(api_client=api_client)
+ resp_hdr = {
+ 'x-keep-storage-classes-confirmed': 'foo=1, bar=1',
+ 'x-keep-replicas-stored': 1
+ }
+ with tutil.mock_keep_responses(self.locator, 200, **resp_hdr) as mock:
+ keep_client.put(self.data, copies=1)
+ req_hdr = mock.responses[0]
+ self.assertIn(
+ 'X-Keep-Storage-Classes: bar, foo', req_hdr.getopt(pycurl.HTTPHEADER))
+
+ def test_storage_classes_req_header(self):
+ self.assertEqual(
+ self.api_client.config()['StorageClasses'],
+ {'default': {'Default': True}})
+ cases = [
+ # requested, expected
+ [['foo'], 'X-Keep-Storage-Classes: foo'],
+ [['bar', 'foo'], 'X-Keep-Storage-Classes: bar, foo'],
+ [[], 'X-Keep-Storage-Classes: default'],
+ [None, 'X-Keep-Storage-Classes: default'],
+ ]
+ for req_classes, expected_header in cases:
+ headers = {'x-keep-replicas-stored': 1}
+ if req_classes is None or len(req_classes) == 0:
+ confirmed_hdr = 'default=1'
+ elif len(req_classes) > 0:
+ confirmed_hdr = ', '.join(["{}=1".format(cls) for cls in req_classes])
+ headers.update({'x-keep-storage-classes-confirmed': confirmed_hdr})
+ with tutil.mock_keep_responses(self.locator, 200, **headers) as mock:
+ self.keep_client.put(self.data, copies=1, classes=req_classes)
+ req_hdr = mock.responses[0]
+ self.assertIn(expected_header, req_hdr.getopt(pycurl.HTTPHEADER))
+
+ def test_partial_storage_classes_put(self):
+ headers = {
+ 'x-keep-replicas-stored': 1,
+ 'x-keep-storage-classes-confirmed': 'foo=1'}
+ with tutil.mock_keep_responses(self.locator, 200, 503, **headers) as mock:
+ with self.assertRaises(arvados.errors.KeepWriteError):
+ self.keep_client.put(self.data, copies=1, classes=['foo', 'bar'])
+ # 1st request, both classes pending
+ req1_headers = mock.responses[0].getopt(pycurl.HTTPHEADER)
+ self.assertIn('X-Keep-Storage-Classes: bar, foo', req1_headers)
+ # 2nd try, 'foo' class already satisfied
+ req2_headers = mock.responses[1].getopt(pycurl.HTTPHEADER)
+ self.assertIn('X-Keep-Storage-Classes: bar', req2_headers)
+
+ def test_successful_storage_classes_put_requests(self):
+ cases = [
+ # wanted_copies, wanted_classes, confirmed_copies, confirmed_classes, expected_requests
+ [ 1, ['foo'], 1, 'foo=1', 1],
+ [ 1, ['foo'], 2, 'foo=2', 1],
+ [ 2, ['foo'], 2, 'foo=2', 1],
+ [ 2, ['foo'], 1, 'foo=1', 2],
+ [ 1, ['foo', 'bar'], 1, 'foo=1, bar=1', 1],
+ [ 1, ['foo', 'bar'], 2, 'foo=2, bar=2', 1],
+ [ 2, ['foo', 'bar'], 2, 'foo=2, bar=2', 1],
+ [ 2, ['foo', 'bar'], 1, 'foo=1, bar=1', 2],
+ [ 1, ['foo', 'bar'], 1, None, 1],
+ [ 1, ['foo'], 1, None, 1],
+ [ 2, ['foo'], 2, None, 1],
+ [ 2, ['foo'], 1, None, 2],
+ ]
+ for w_copies, w_classes, c_copies, c_classes, e_reqs in cases:
+ headers = {'x-keep-replicas-stored': c_copies}
+ if c_classes is not None:
+ headers.update({'x-keep-storage-classes-confirmed': c_classes})
+ with tutil.mock_keep_responses(self.locator, 200, 200, **headers) as mock:
+ case_desc = 'wanted_copies={}, wanted_classes="{}", confirmed_copies={}, confirmed_classes="{}", expected_requests={}'.format(w_copies, ', '.join(w_classes), c_copies, c_classes, e_reqs)
+ self.assertEqual(self.locator,
+ self.keep_client.put(self.data, copies=w_copies, classes=w_classes),
+ case_desc)
+ self.assertEqual(e_reqs, mock.call_count, case_desc)
+
+ def test_failed_storage_classes_put_requests(self):
+ cases = [
+ # wanted_copies, wanted_classes, confirmed_copies, confirmed_classes, return_code
+ [ 1, ['foo'], 1, 'bar=1', 200],
+ [ 1, ['foo'], 1, None, 503],
+ [ 2, ['foo'], 1, 'bar=1, foo=0', 200],
+ [ 3, ['foo'], 1, 'bar=1, foo=1', 200],
+ [ 3, ['foo', 'bar'], 1, 'bar=2, foo=1', 200],
+ ]
+ for w_copies, w_classes, c_copies, c_classes, return_code in cases:
+ headers = {'x-keep-replicas-stored': c_copies}
+ if c_classes is not None:
+ headers.update({'x-keep-storage-classes-confirmed': c_classes})
+ with tutil.mock_keep_responses(self.locator, return_code, return_code, **headers):
+ case_desc = 'wanted_copies={}, wanted_classes="{}", confirmed_copies={}, confirmed_classes="{}"'.format(w_copies, ', '.join(w_classes), c_copies, c_classes)
+ with self.assertRaises(arvados.errors.KeepWriteError, msg=case_desc):
+ self.keep_client.put(self.data, copies=w_copies, classes=w_classes)
+
+@tutil.skip_sleep
+class KeepXRequestIdTestCase(unittest.TestCase, tutil.ApiClientMock):
+ def setUp(self):
+ self.api_client = self.mock_keep_services(count=2)
+ self.keep_client = arvados.KeepClient(api_client=self.api_client)
+ self.data = b'xyzzy'
+ self.locator = '1271ed5ef305aadabc605b1609e24c52'
+ self.test_id = arvados.util.new_request_id()
+ self.assertRegex(self.test_id, r'^req-[a-z0-9]{20}$')
+ # If we don't set request_id to None explicitly here, it will
+ # return <MagicMock name='api_client_mock.request_id'
+ # id='123456789'>:
+ self.api_client.request_id = None
+
+ def test_default_to_api_client_request_id(self):
+ self.api_client.request_id = self.test_id
+ with tutil.mock_keep_responses(self.locator, 200, 200) as mock:
+ self.keep_client.put(self.data)
+ self.assertEqual(2, len(mock.responses))
+ for resp in mock.responses:
+ self.assertProvidedRequestId(resp)
+
+ with tutil.mock_keep_responses(self.data, 200) as mock:
+ self.keep_client.get(self.locator)
+ self.assertProvidedRequestId(mock.responses[0])
+
+ with tutil.mock_keep_responses(b'', 200) as mock:
+ self.keep_client.head(self.locator)
+ self.assertProvidedRequestId(mock.responses[0])
+
+ def test_explicit_request_id(self):
+ with tutil.mock_keep_responses(self.locator, 200, 200) as mock:
+ self.keep_client.put(self.data, request_id=self.test_id)
+ self.assertEqual(2, len(mock.responses))
+ for resp in mock.responses:
+ self.assertProvidedRequestId(resp)
+
+ with tutil.mock_keep_responses(self.data, 200) as mock:
+ self.keep_client.get(self.locator, request_id=self.test_id)
+ self.assertProvidedRequestId(mock.responses[0])
+
+ with tutil.mock_keep_responses(b'', 200) as mock:
+ self.keep_client.head(self.locator, request_id=self.test_id)
+ self.assertProvidedRequestId(mock.responses[0])
+
+ def test_automatic_request_id(self):
+ with tutil.mock_keep_responses(self.locator, 200, 200) as mock:
+ self.keep_client.put(self.data)
+ self.assertEqual(2, len(mock.responses))
+ for resp in mock.responses:
+ self.assertAutomaticRequestId(resp)
+
+ with tutil.mock_keep_responses(self.data, 200) as mock:
+ self.keep_client.get(self.locator)
+ self.assertAutomaticRequestId(mock.responses[0])
+
+ with tutil.mock_keep_responses(b'', 200) as mock:
+ self.keep_client.head(self.locator)
+ self.assertAutomaticRequestId(mock.responses[0])
+
+ def assertAutomaticRequestId(self, resp):
+ hdr = [x for x in resp.getopt(pycurl.HTTPHEADER)
+ if x.startswith('X-Request-Id: ')][0]
+ self.assertNotEqual(hdr, 'X-Request-Id: '+self.test_id)
+ self.assertRegex(hdr, r'^X-Request-Id: req-[a-z0-9]{20}$')
+
+ def assertProvidedRequestId(self, resp):
+ self.assertIn('X-Request-Id: '+self.test_id,
+ resp.getopt(pycurl.HTTPHEADER))
+
+
@tutil.skip_sleep
class KeepClientRendezvousTestCase(unittest.TestCase, tutil.ApiClientMock):
list('9d81c02e76a3bf54'),
]
self.blocks = [
- "{:064x}".format(x)
+ "{:064x}".format(x).encode()
for x in range(len(self.expected_order))]
self.hashes = [
hashlib.md5(self.blocks[x]).hexdigest()
self.assertRaises(arvados.errors.KeepRequestError):
op(i)
got_order = [
- re.search(r'//\[?keep0x([0-9a-f]+)', resp.getopt(pycurl.URL)).group(1)
+ re.search(r'//\[?keep0x([0-9a-f]+)', resp.getopt(pycurl.URL).decode()).group(1)
for resp in mock.responses]
self.assertEqual(self.expected_order[i]*2, got_order)
self.assertRaises(arvados.errors.KeepWriteError):
self.keep_client.put(self.blocks[i], num_retries=2, copies=copies)
got_order = [
- re.search(r'//\[?keep0x([0-9a-f]+)', resp.getopt(pycurl.URL)).group(1)
+ re.search(r'//\[?keep0x([0-9a-f]+)', resp.getopt(pycurl.URL).decode()).group(1)
for resp in mock.responses]
# With T threads racing to make requests, the position
# of a given server in the sequence of HTTP requests
def test_probe_waste_adding_one_server(self):
hashes = [
- hashlib.md5("{:064x}".format(x)).hexdigest() for x in range(100)]
+ hashlib.md5("{:064x}".format(x).encode()).hexdigest() for x in range(100)]
initial_services = 12
self.api_client = self.mock_keep_services(count=initial_services)
self.keep_client = arvados.KeepClient(api_client=self.api_client)
max_penalty))
def check_64_zeros_error_order(self, verb, exc_class):
- data = '0' * 64
+ data = b'0' * 64
if verb == 'get':
data = tutil.str_keep_locator(data)
# Arbitrary port number:
keep_client = arvados.KeepClient(api_client=api_client)
with mock.patch('pycurl.Curl') as curl_mock, \
self.assertRaises(exc_class) as err_check:
- curl_mock.return_value.side_effect = socket.timeout
+ curl_mock.return_value = tutil.FakeCurl.make(code=500, body=b'')
getattr(keep_client, verb)(data)
- urls = [urlparse.urlparse(url)
+ urls = [urllib.parse.urlparse(url)
for url in err_check.exception.request_errors()]
self.assertEqual([('keep0x' + c, aport) for c in '3eab2d5fc9681074'],
[(url.hostname, url.port) for url in urls])
self.check_64_zeros_error_order('put', arvados.errors.KeepWriteError)
-class KeepClientTimeout(unittest.TestCase, tutil.ApiClientMock):
+class KeepClientTimeout(keepstub.StubKeepServers, unittest.TestCase):
# BANDWIDTH_LOW_LIM must be less than len(DATA) so we can transfer
# 1s worth of data and then trigger bandwidth errors before running
# out of data.
- DATA = 'x'*2**11
+ DATA = b'x'*2**11
BANDWIDTH_LOW_LIM = 1024
TIMEOUT_TIME = 1.0
delta = round(time.time() - self.t0, 3)
self.assertGreaterEqual(delta, self.tmin)
- def setUp(self):
- sock = socket.socket()
- sock.bind(('0.0.0.0', 0))
- self.port = sock.getsockname()[1]
- sock.close()
- self.server = keepstub.Server(('0.0.0.0', self.port), keepstub.Handler)
- self.thread = threading.Thread(target=self.server.serve_forever)
- self.thread.daemon = True # Exit thread if main proc exits
- self.thread.start()
- self.api_client = self.mock_keep_services(
- count=1,
- service_host='localhost',
- service_port=self.port,
- )
-
- def tearDown(self):
- self.server.shutdown()
-
def keepClient(self, timeouts=(0.1, TIMEOUT_TIME, BANDWIDTH_LOW_LIM)):
return arvados.KeepClient(
api_client=self.api_client,
loc = kc.put(self.DATA, copies=1, num_retries=0)
self.server.setbandwidth(0.5*self.BANDWIDTH_LOW_LIM)
with self.assertTakesGreater(self.TIMEOUT_TIME):
- with self.assertRaises(arvados.errors.KeepReadError) as e:
+ with self.assertRaises(arvados.errors.KeepReadError):
kc.get(loc, num_retries=0)
with self.assertTakesGreater(self.TIMEOUT_TIME):
with self.assertRaises(arvados.errors.KeepWriteError):
kc = self.keepClient()
loc = kc.put(self.DATA, copies=1, num_retries=0)
self.server.setbandwidth(self.BANDWIDTH_LOW_LIM)
- self.server.setdelays(response=self.TIMEOUT_TIME)
+ # Note the actual delay must be 1s longer than the low speed
+ # limit interval in order for curl to detect it reliably.
+ self.server.setdelays(response=self.TIMEOUT_TIME+1)
with self.assertTakesGreater(self.TIMEOUT_TIME):
- with self.assertRaises(arvados.errors.KeepReadError) as e:
+ with self.assertRaises(arvados.errors.KeepReadError):
kc.get(loc, num_retries=0)
with self.assertTakesGreater(self.TIMEOUT_TIME):
with self.assertRaises(arvados.errors.KeepWriteError):
kc.put(self.DATA, copies=1, num_retries=0)
with self.assertTakesGreater(self.TIMEOUT_TIME):
- with self.assertRaises(arvados.errors.KeepReadError) as e:
- kc.head(loc, num_retries=0)
+ kc.head(loc, num_retries=0)
def test_low_bandwidth_with_server_mid_delay_failure(self):
kc = self.keepClient()
loc = kc.put(self.DATA, copies=1, num_retries=0)
self.server.setbandwidth(self.BANDWIDTH_LOW_LIM)
- self.server.setdelays(mid_write=self.TIMEOUT_TIME, mid_read=self.TIMEOUT_TIME)
+ # Note the actual delay must be 1s longer than the low speed
+ # limit interval in order for curl to detect it reliably.
+ self.server.setdelays(mid_write=self.TIMEOUT_TIME+1, mid_read=self.TIMEOUT_TIME+1)
with self.assertTakesGreater(self.TIMEOUT_TIME):
with self.assertRaises(arvados.errors.KeepReadError) as e:
kc.get(loc, num_retries=0)
code=200, body='foo', headers={'Content-Length': 3})
self.mock_disks_and_gateways()
locator = 'acbd18db4cc2f85cedef654fccc4a4d8+3+K@' + self.gateways[0]['uuid']
- self.assertEqual('foo', self.keepClient.get(locator))
+ self.assertEqual(b'foo', self.keepClient.get(locator))
self.assertEqual(self.gateway_roots[0]+locator,
- MockCurl.return_value.getopt(pycurl.URL))
+ MockCurl.return_value.getopt(pycurl.URL).decode())
self.assertEqual(True, self.keepClient.head(locator))
@mock.patch('pycurl.Curl')
# Gateways are tried first, in the order given.
for i, root in enumerate(self.gateway_roots):
self.assertEqual(root+locator,
- mocks[i].getopt(pycurl.URL))
+ mocks[i].getopt(pycurl.URL).decode())
# Disk services are tried next.
for i in range(gateways, gateways+disks):
- self.assertRegexpMatches(
- mocks[i].getopt(pycurl.URL),
+ self.assertRegex(
+ mocks[i].getopt(pycurl.URL).decode(),
r'keep0x')
@mock.patch('pycurl.Curl')
gateways = 4
disks = 3
mocks = [
- tutil.FakeCurl.make(code=404, body='')
+ tutil.FakeCurl.make(code=404, body=b'')
for _ in range(gateways+disks)
]
MockCurl.side_effect = tutil.queue_with(mocks)
# Gateways are tried first, in the order given.
for i, root in enumerate(self.gateway_roots):
self.assertEqual(root+locator,
- mocks[i].getopt(pycurl.URL))
+ mocks[i].getopt(pycurl.URL).decode())
# Disk services are tried next.
for i in range(gateways, gateways+disks):
- self.assertRegexpMatches(
- mocks[i].getopt(pycurl.URL),
+ self.assertRegex(
+ mocks[i].getopt(pycurl.URL).decode(),
r'keep0x')
@mock.patch('pycurl.Curl')
def test_get_with_remote_proxy_hint(self, MockCurl):
MockCurl.return_value = tutil.FakeCurl.make(
- code=200, body='foo', headers={'Content-Length': 3})
+ code=200, body=b'foo', headers={'Content-Length': 3})
self.mock_disks_and_gateways()
locator = 'acbd18db4cc2f85cedef654fccc4a4d8+3+K@xyzzy'
- self.assertEqual('foo', self.keepClient.get(locator))
+ self.assertEqual(b'foo', self.keepClient.get(locator))
self.assertEqual('https://keep.xyzzy.arvadosapi.com/'+locator,
- MockCurl.return_value.getopt(pycurl.URL))
+ MockCurl.return_value.getopt(pycurl.URL).decode())
@mock.patch('pycurl.Curl')
def test_head_with_remote_proxy_hint(self, MockCurl):
MockCurl.return_value = tutil.FakeCurl.make(
- code=200, body='foo', headers={'Content-Length': 3})
+ code=200, body=b'foo', headers={'Content-Length': 3})
self.mock_disks_and_gateways()
locator = 'acbd18db4cc2f85cedef654fccc4a4d8+3+K@xyzzy'
self.assertEqual(True, self.keepClient.head(locator))
self.assertEqual('https://keep.xyzzy.arvadosapi.com/'+locator,
- MockCurl.return_value.getopt(pycurl.URL))
+ MockCurl.return_value.getopt(pycurl.URL).decode())
class KeepClientRetryTestMixin(object):
# out appropriate methods in the client.
PROXY_ADDR = 'http://[%s]:65535/' % (tutil.TEST_HOST,)
- TEST_DATA = 'testdata'
+ TEST_DATA = b'testdata'
TEST_LOCATOR = 'ef654c40ab4f1747fc699915d4f70902+8'
def setUp(self):
def check_exception(self, error_class=None, *args, **kwargs):
if error_class is None:
error_class = self.DEFAULT_EXCEPTION
- self.assertRaises(error_class, self.run_method, *args, **kwargs)
+ with self.assertRaises(error_class) as err:
+ self.run_method(*args, **kwargs)
+ return err
def test_immediate_success(self):
with self.TEST_PATCHER(self.DEFAULT_EXPECT, 200):
def test_error_after_retries_exhausted(self):
with self.TEST_PATCHER(self.DEFAULT_EXPECT, 500, 500, 200):
- self.check_exception(num_retries=1)
+ err = self.check_exception(num_retries=1)
+ self.assertRegex(str(err.exception), r'failed to .* after 2 attempts')
def test_num_retries_instance_fallback(self):
self.client_kwargs['num_retries'] = 3
self.check_exception(copies=2, num_retries=3)
-class KeepClientAvoidClientOverreplicationTestCase(unittest.TestCase, tutil.ApiClientMock):
-
-
+class AvoidOverreplication(unittest.TestCase, tutil.ApiClientMock):
+
class FakeKeepService(object):
- def __init__(self, delay, will_succeed, replicas=1):
+ def __init__(self, delay, will_succeed=False, will_raise=None, replicas=1):
self.delay = delay
- self.success = will_succeed
+ self.will_succeed = will_succeed
+ self.will_raise = will_raise
self._result = {}
self._result['headers'] = {}
self._result['headers']['x-keep-replicas-stored'] = str(replicas)
+ self._result['headers']['x-keep-storage-classes-confirmed'] = 'default={}'.format(replicas)
self._result['body'] = 'foobar'
-
- def put(self, data_hash, data, timeout):
+
+ def put(self, data_hash, data, timeout, headers):
time.sleep(self.delay)
- return self.success
-
+ if self.will_raise is not None:
+ raise self.will_raise
+ return self.will_succeed
+
def last_result(self):
- return self._result
-
+ if self.will_succeed:
+ return self._result
+ else:
+ return {"status_code": 500, "body": "didn't succeed"}
+
def finished(self):
return False
-
-
- def test_only_write_enough_on_success(self):
- copies = 3
- pool = arvados.KeepClient.KeepWriterThreadPool(
+
+ def setUp(self):
+ self.copies = 3
+ self.pool = arvados.KeepClient.KeepWriterThreadPool(
data = 'foo',
data_hash = 'acbd18db4cc2f85cedef654fccc4a4d8+3',
- max_service_replicas = copies,
- copies = copies
+ max_service_replicas = self.copies,
+ copies = self.copies
)
+
+ def test_only_write_enough_on_success(self):
for i in range(10):
ks = self.FakeKeepService(delay=i/10.0, will_succeed=True)
- pool.add_task(ks, None)
- pool.join()
- self.assertEqual(pool.done(), copies)
+ self.pool.add_task(ks, None)
+ self.pool.join()
+ self.assertEqual(self.pool.done(), (self.copies, []))
def test_only_write_enough_on_partial_success(self):
- copies = 3
- pool = arvados.KeepClient.KeepWriterThreadPool(
- data = 'foo',
- data_hash = 'acbd18db4cc2f85cedef654fccc4a4d8+3',
- max_service_replicas = copies,
- copies = copies
- )
for i in range(5):
ks = self.FakeKeepService(delay=i/10.0, will_succeed=False)
- pool.add_task(ks, None)
+ self.pool.add_task(ks, None)
+ ks = self.FakeKeepService(delay=i/10.0, will_succeed=True)
+ self.pool.add_task(ks, None)
+ self.pool.join()
+ self.assertEqual(self.pool.done(), (self.copies, []))
+
+ def test_only_write_enough_when_some_crash(self):
+ for i in range(5):
+ ks = self.FakeKeepService(delay=i/10.0, will_raise=Exception())
+ self.pool.add_task(ks, None)
+ ks = self.FakeKeepService(delay=i/10.0, will_succeed=True)
+ self.pool.add_task(ks, None)
+ self.pool.join()
+ self.assertEqual(self.pool.done(), (self.copies, []))
+
+ def test_fail_when_too_many_crash(self):
+ for i in range(self.copies+1):
+ ks = self.FakeKeepService(delay=i/10.0, will_raise=Exception())
+ self.pool.add_task(ks, None)
+ for i in range(self.copies-1):
ks = self.FakeKeepService(delay=i/10.0, will_succeed=True)
- pool.add_task(ks, None)
- pool.join()
- self.assertEqual(pool.done(), copies)
-
+ self.pool.add_task(ks, None)
+ self.pool.join()
+ self.assertEqual(self.pool.done(), (self.copies-1, []))
+
@tutil.skip_sleep
class RetryNeedsMultipleServices(unittest.TestCase, tutil.ApiClientMock):
with self.assertRaises(arvados.errors.KeepWriteError):
self.keep_client.put('foo', num_retries=1, copies=2)
self.assertEqual(2, req_mock.call_count)
+
+class KeepClientAPIErrorTest(unittest.TestCase):
+ def test_api_fail(self):
+ class ApiMock(object):
+ def __getattr__(self, r):
+ if r == "api_token":
+ return "abc"
+ elif r == "insecure":
+ return False
+ elif r == "config":
+ return lambda: {}
+ else:
+ raise arvados.errors.KeepReadError()
+ keep_client = arvados.KeepClient(api_client=ApiMock(),
+ proxy='', local_store='')
+
+ # The bug this is testing for is that if an API (not
+ # keepstore) exception is thrown as part of a get(), the next
+ # attempt to get that same block will result in a deadlock.
+ # This is why there are two get()s in a row. Unfortunately,
+ # the failure mode for this test is that the test suite
+ # deadlocks, there isn't a good way to avoid that without
+ # adding a special case that has no use except for this test.
+
+ with self.assertRaises(arvados.errors.KeepReadError):
+ keep_client.get("acbd18db4cc2f85cedef654fccc4a4d8+3")
+ with self.assertRaises(arvados.errors.KeepReadError):
+ keep_client.get("acbd18db4cc2f85cedef654fccc4a4d8+3")