10467: Merge branch 'master' into 10467-client-disconnect
[arvados.git] / sdk / python / tests / test_keep_client.py
index 2b380f066641ec25ac980b6f9183df94b728bb5d..85b5bc81f00902a2a816d606bbc2cecff06de289 100644 (file)
@@ -69,6 +69,7 @@ class KeepTestCase(run_test_server.TestCaseWithServers):
                          blob_str,
                          'wrong content from Keep.get(md5(<binarydata>))')
 
+    @unittest.skip("unreliable test - please fix and close #8752")
     def test_KeepSingleCopyRWTest(self):
         blob_str = '\xff\xfe\xfd\xfc\x00\x01\x02\x03'
         blob_locator = self.keep_client.put(blob_str, copies=1)
@@ -103,6 +104,17 @@ class KeepTestCase(run_test_server.TestCaseWithServers):
             # Must be a string type
             self.keep_client.put({})
 
+    def test_KeepHeadTest(self):
+        locator = self.keep_client.put('test_head')
+        self.assertRegexpMatches(
+            locator,
+            '^b9a772c7049325feb7130fff1f8333e9\+9',
+            'wrong md5 hash from Keep.put for "test_head": ' + locator)
+        self.assertEqual(True, self.keep_client.head(locator))
+        self.assertEqual(self.keep_client.get(locator),
+                         'test_head',
+                         'wrong content from Keep.get for "test_head"')
+
 class KeepPermissionTestCase(run_test_server.TestCaseWithServers):
     MAIN_SERVER = {}
     KEEP_SERVER = {'blob_signing_key': 'abcdefghijk0123456789',
@@ -228,8 +240,8 @@ class KeepProxyTestCase(run_test_server.TestCaseWithServers):
         super(KeepProxyTestCase, self).tearDown()
 
     def test_KeepProxyTest1(self):
-        # Will use ARVADOS_KEEP_PROXY environment variable that is set by
-        # setUpClass().
+        # Will use ARVADOS_KEEP_SERVICES environment variable that
+        # is set by setUpClass().
         keep_client = arvados.KeepClient(api_client=self.api_client,
                                          local_store='')
         baz_locator = keep_client.put('baz')
@@ -258,6 +270,22 @@ class KeepProxyTestCase(run_test_server.TestCaseWithServers):
                          'wrong content from Keep.get(md5("baz2"))')
         self.assertTrue(keep_client.using_proxy)
 
+    def test_KeepProxyTestMultipleURIs(self):
+        # Test using ARVADOS_KEEP_SERVICES env var overriding any
+        # existing proxy setting and setting multiple proxies
+        arvados.config.settings()['ARVADOS_KEEP_SERVICES'] = 'http://10.0.0.1 https://foo.example.org:1234/'
+        keep_client = arvados.KeepClient(api_client=self.api_client,
+                                         local_store='')
+        uris = [x['_service_root'] for x in keep_client._keep_services]
+        self.assertEqual(uris, ['http://10.0.0.1/',
+                                'https://foo.example.org:1234/'])
+
+    def test_KeepProxyTestInvalidURI(self):
+        arvados.config.settings()['ARVADOS_KEEP_SERVICES'] = 'bad.uri.org'
+        with self.assertRaises(arvados.errors.ArgumentError):
+            keep_client = arvados.KeepClient(api_client=self.api_client,
+                                             local_store='')
+
 
 class KeepClientServiceTestCase(unittest.TestCase, tutil.ApiClientMock):
     def get_service_roots(self, api_client):
@@ -317,6 +345,23 @@ class KeepClientServiceTestCase(unittest.TestCase, tutil.ApiClientMock):
                 mock.responses[0].getopt(pycurl.LOW_SPEED_LIMIT),
                 int(arvados.KeepClient.DEFAULT_TIMEOUT[2]))
 
+    def test_head_timeout(self):
+        api_client = self.mock_keep_services(count=1)
+        force_timeout = socket.timeout("timed out")
+        with tutil.mock_keep_responses(force_timeout, 0) as mock:
+            keep_client = arvados.KeepClient(api_client=api_client)
+            with self.assertRaises(arvados.errors.KeepReadError):
+                keep_client.head('ffffffffffffffffffffffffffffffff')
+            self.assertEqual(
+                mock.responses[0].getopt(pycurl.CONNECTTIMEOUT_MS),
+                int(arvados.KeepClient.DEFAULT_TIMEOUT[0]*1000))
+            self.assertEqual(
+                mock.responses[0].getopt(pycurl.LOW_SPEED_TIME),
+                int(arvados.KeepClient.DEFAULT_TIMEOUT[1]))
+            self.assertEqual(
+                mock.responses[0].getopt(pycurl.LOW_SPEED_LIMIT),
+                int(arvados.KeepClient.DEFAULT_TIMEOUT[2]))
+
     def test_proxy_get_timeout(self):
         api_client = self.mock_keep_services(service_type='proxy', count=1)
         force_timeout = socket.timeout("timed out")
@@ -334,6 +379,23 @@ class KeepClientServiceTestCase(unittest.TestCase, tutil.ApiClientMock):
                 mock.responses[0].getopt(pycurl.LOW_SPEED_LIMIT),
                 int(arvados.KeepClient.DEFAULT_PROXY_TIMEOUT[2]))
 
+    def test_proxy_head_timeout(self):
+        api_client = self.mock_keep_services(service_type='proxy', count=1)
+        force_timeout = socket.timeout("timed out")
+        with tutil.mock_keep_responses(force_timeout, 0) as mock:
+            keep_client = arvados.KeepClient(api_client=api_client)
+            with self.assertRaises(arvados.errors.KeepReadError):
+                keep_client.head('ffffffffffffffffffffffffffffffff')
+            self.assertEqual(
+                mock.responses[0].getopt(pycurl.CONNECTTIMEOUT_MS),
+                int(arvados.KeepClient.DEFAULT_PROXY_TIMEOUT[0]*1000))
+            self.assertEqual(
+                mock.responses[0].getopt(pycurl.LOW_SPEED_TIME),
+                int(arvados.KeepClient.DEFAULT_PROXY_TIMEOUT[1]))
+            self.assertEqual(
+                mock.responses[0].getopt(pycurl.LOW_SPEED_LIMIT),
+                int(arvados.KeepClient.DEFAULT_PROXY_TIMEOUT[2]))
+
     def test_proxy_put_timeout(self):
         api_client = self.mock_keep_services(service_type='proxy', count=1)
         force_timeout = socket.timeout("timed out")
@@ -363,6 +425,9 @@ class KeepClientServiceTestCase(unittest.TestCase, tutil.ApiClientMock):
     def test_get_error_with_no_services(self):
         self.check_no_services_error('get', arvados.errors.KeepReadError)
 
+    def test_head_error_with_no_services(self):
+        self.check_no_services_error('head', arvados.errors.KeepReadError)
+
     def test_put_error_with_no_services(self):
         self.check_no_services_error('put', arvados.errors.KeepWriteError)
 
@@ -382,6 +447,9 @@ class KeepClientServiceTestCase(unittest.TestCase, tutil.ApiClientMock):
     def test_get_error_reflects_last_retry(self):
         self.check_errors_from_last_retry('get', arvados.errors.KeepReadError)
 
+    def test_head_error_reflects_last_retry(self):
+        self.check_errors_from_last_retry('head', arvados.errors.KeepReadError)
+
     def test_put_error_reflects_last_retry(self):
         self.check_errors_from_last_retry('put', arvados.errors.KeepWriteError)
 
@@ -476,6 +544,10 @@ class KeepClientRendezvousTestCase(unittest.TestCase, tutil.ApiClientMock):
         self._test_probe_order_against_reference_set(
             lambda i: self.keep_client.get(self.hashes[i], num_retries=1))
 
+    def test_head_probe_order_against_reference_set(self):
+        self._test_probe_order_against_reference_set(
+            lambda i: self.keep_client.head(self.hashes[i], num_retries=1))
+
     def test_put_probe_order_against_reference_set(self):
         # copies=1 prevents the test from being sensitive to races
         # between writer threads.
@@ -686,6 +758,9 @@ class KeepClientTimeout(unittest.TestCase, tutil.ApiClientMock):
         with self.assertTakesGreater(self.TIMEOUT_TIME):
             with self.assertRaises(arvados.errors.KeepWriteError):
                 kc.put(self.DATA, copies=1, num_retries=0)
+        with self.assertTakesGreater(self.TIMEOUT_TIME):
+            with self.assertRaises(arvados.errors.KeepReadError) as e:
+                kc.head(loc, num_retries=0)
 
     def test_low_bandwidth_with_server_mid_delay_failure(self):
         kc = self.keepClient()
@@ -734,10 +809,10 @@ class KeepClientTimeout(unittest.TestCase, tutil.ApiClientMock):
         # Allow 10s to connect, then 1s for response. Nothing should
         # work, and everything should take at least 1s to return.
         kc = self.keepClient(timeouts=(10, 1))
-        with self.assertTakesBetween(1, 1.9):
+        with self.assertTakesBetween(1, 9):
             with self.assertRaises(arvados.errors.KeepReadError):
                 kc.get(loc, num_retries=0)
-        with self.assertTakesBetween(1, 1.9):
+        with self.assertTakesBetween(1, 9):
             with self.assertRaises(arvados.errors.KeepWriteError):
                 kc.put(self.DATA, copies=1, num_retries=0)
 
@@ -768,6 +843,7 @@ class KeepClientGatewayTestCase(unittest.TestCase, tutil.ApiClientMock):
         self.assertEqual('foo', self.keepClient.get(locator))
         self.assertEqual(self.gateway_roots[0]+locator,
                          MockCurl.return_value.getopt(pycurl.URL))
+        self.assertEqual(True, self.keepClient.head(locator))
 
     @mock.patch('pycurl.Curl')
     def test_get_with_gateway_hints_in_order(self, MockCurl):
@@ -793,6 +869,30 @@ class KeepClientGatewayTestCase(unittest.TestCase, tutil.ApiClientMock):
                 mocks[i].getopt(pycurl.URL),
                 r'keep0x')
 
+    @mock.patch('pycurl.Curl')
+    def test_head_with_gateway_hints_in_order(self, MockCurl):
+        gateways = 4
+        disks = 3
+        mocks = [
+            tutil.FakeCurl.make(code=404, body='')
+            for _ in range(gateways+disks)
+        ]
+        MockCurl.side_effect = tutil.queue_with(mocks)
+        self.mock_disks_and_gateways(gateways=gateways, disks=disks)
+        locator = '+'.join(['acbd18db4cc2f85cedef654fccc4a4d8+3'] +
+                           ['K@'+gw['uuid'] for gw in self.gateways])
+        with self.assertRaises(arvados.errors.NotFoundError):
+            self.keepClient.head(locator)
+        # Gateways are tried first, in the order given.
+        for i, root in enumerate(self.gateway_roots):
+            self.assertEqual(root+locator,
+                             mocks[i].getopt(pycurl.URL))
+        # Disk services are tried next.
+        for i in range(gateways, gateways+disks):
+            self.assertRegexpMatches(
+                mocks[i].getopt(pycurl.URL),
+                r'keep0x')
+
     @mock.patch('pycurl.Curl')
     def test_get_with_remote_proxy_hint(self, MockCurl):
         MockCurl.return_value = tutil.FakeCurl.make(
@@ -803,6 +903,16 @@ class KeepClientGatewayTestCase(unittest.TestCase, tutil.ApiClientMock):
         self.assertEqual('https://keep.xyzzy.arvadosapi.com/'+locator,
                          MockCurl.return_value.getopt(pycurl.URL))
 
+    @mock.patch('pycurl.Curl')
+    def test_head_with_remote_proxy_hint(self, MockCurl):
+        MockCurl.return_value = tutil.FakeCurl.make(
+            code=200, body='foo', headers={'Content-Length': 3})
+        self.mock_disks_and_gateways()
+        locator = 'acbd18db4cc2f85cedef654fccc4a4d8+3+K@xyzzy'
+        self.assertEqual(True, self.keepClient.head(locator))
+        self.assertEqual('https://keep.xyzzy.arvadosapi.com/'+locator,
+                         MockCurl.return_value.getopt(pycurl.URL))
+
 
 class KeepClientRetryTestMixin(object):
     # Testing with a local Keep store won't exercise the retry behavior.
@@ -852,6 +962,10 @@ class KeepClientRetryTestMixin(object):
         with self.TEST_PATCHER(self.DEFAULT_EXPECT, 500, 200):
             self.check_success(num_retries=3)
 
+    def test_exception_then_success(self):
+        with self.TEST_PATCHER(self.DEFAULT_EXPECT, Exception('mock err'), 200):
+            self.check_success(num_retries=3)
+
     def test_no_default_retry(self):
         with self.TEST_PATCHER(self.DEFAULT_EXPECT, 500, 200):
             self.check_exception()
@@ -914,6 +1028,43 @@ class KeepClientRetryGetTestCase(KeepClientRetryTestMixin, unittest.TestCase):
                 (self.DEFAULT_EXPECT, 200)):
             self.check_success(locator=self.HINTED_LOCATOR)
 
+@tutil.skip_sleep
+class KeepClientRetryHeadTestCase(KeepClientRetryTestMixin, unittest.TestCase):
+    DEFAULT_EXPECT = True
+    DEFAULT_EXCEPTION = arvados.errors.KeepReadError
+    HINTED_LOCATOR = KeepClientRetryTestMixin.TEST_LOCATOR + '+K@xyzzy'
+    TEST_PATCHER = staticmethod(tutil.mock_keep_responses)
+
+    def run_method(self, locator=KeepClientRetryTestMixin.TEST_LOCATOR,
+                   *args, **kwargs):
+        return self.new_client().head(locator, *args, **kwargs)
+
+    def test_specific_exception_when_not_found(self):
+        with tutil.mock_keep_responses(self.DEFAULT_EXPECT, 404, 200):
+            self.check_exception(arvados.errors.NotFoundError, num_retries=3)
+
+    def test_general_exception_with_mixed_errors(self):
+        # head should raise a NotFoundError if no server returns the block,
+        # and a high threshold of servers report that it's not found.
+        # This test rigs up 50/50 disagreement between two servers, and
+        # checks that it does not become a NotFoundError.
+        client = self.new_client()
+        with tutil.mock_keep_responses(self.DEFAULT_EXPECT, 404, 500):
+            with self.assertRaises(arvados.errors.KeepReadError) as exc_check:
+                client.head(self.HINTED_LOCATOR)
+            self.assertNotIsInstance(
+                exc_check.exception, arvados.errors.NotFoundError,
+                "mixed errors raised NotFoundError")
+
+    def test_hint_server_can_succeed_without_retries(self):
+        with tutil.mock_keep_responses(self.DEFAULT_EXPECT, 404, 200, 500):
+            self.check_success(locator=self.HINTED_LOCATOR)
+
+    def test_try_next_server_after_timeout(self):
+        with tutil.mock_keep_responses(
+                (socket.timeout("timed out"), 200),
+                (self.DEFAULT_EXPECT, 200)):
+            self.check_success(locator=self.HINTED_LOCATOR)
 
 @tutil.skip_sleep
 class KeepClientRetryPutTestCase(KeepClientRetryTestMixin, unittest.TestCase):
@@ -928,3 +1079,108 @@ class KeepClientRetryPutTestCase(KeepClientRetryTestMixin, unittest.TestCase):
     def test_do_not_send_multiple_copies_to_same_server(self):
         with tutil.mock_keep_responses(self.DEFAULT_EXPECT, 200):
             self.check_exception(copies=2, num_retries=3)
+
+
+class AvoidOverreplication(unittest.TestCase, tutil.ApiClientMock):
+
+    class FakeKeepService(object):
+        def __init__(self, delay, will_succeed=False, will_raise=None, replicas=1):
+            self.delay = delay
+            self.will_succeed = will_succeed
+            self.will_raise = will_raise
+            self._result = {}
+            self._result['headers'] = {}
+            self._result['headers']['x-keep-replicas-stored'] = str(replicas)
+            self._result['body'] = 'foobar'
+
+        def put(self, data_hash, data, timeout):
+            time.sleep(self.delay)
+            if self.will_raise is not None:
+                raise self.will_raise
+            return self.will_succeed
+
+        def last_result(self):
+            if self.will_succeed:
+                return self._result
+
+        def finished(self):
+            return False
+    
+    def setUp(self):
+        self.copies = 3
+        self.pool = arvados.KeepClient.KeepWriterThreadPool(
+            data = 'foo',
+            data_hash = 'acbd18db4cc2f85cedef654fccc4a4d8+3',
+            max_service_replicas = self.copies,
+            copies = self.copies
+        )
+
+    def test_only_write_enough_on_success(self):
+        for i in range(10):
+            ks = self.FakeKeepService(delay=i/10.0, will_succeed=True)
+            self.pool.add_task(ks, None)
+        self.pool.join()
+        self.assertEqual(self.pool.done(), self.copies)
+
+    def test_only_write_enough_on_partial_success(self):
+        for i in range(5):
+            ks = self.FakeKeepService(delay=i/10.0, will_succeed=False)
+            self.pool.add_task(ks, None)
+            ks = self.FakeKeepService(delay=i/10.0, will_succeed=True)
+            self.pool.add_task(ks, None)
+        self.pool.join()
+        self.assertEqual(self.pool.done(), self.copies)
+
+    def test_only_write_enough_when_some_crash(self):
+        for i in range(5):
+            ks = self.FakeKeepService(delay=i/10.0, will_raise=Exception())
+            self.pool.add_task(ks, None)
+            ks = self.FakeKeepService(delay=i/10.0, will_succeed=True)
+            self.pool.add_task(ks, None)
+        self.pool.join()
+        self.assertEqual(self.pool.done(), self.copies)
+
+    def test_fail_when_too_many_crash(self):
+        for i in range(self.copies+1):
+            ks = self.FakeKeepService(delay=i/10.0, will_raise=Exception())
+            self.pool.add_task(ks, None)
+        for i in range(self.copies-1):
+            ks = self.FakeKeepService(delay=i/10.0, will_succeed=True)
+            self.pool.add_task(ks, None)
+        self.pool.join()
+        self.assertEqual(self.pool.done(), self.copies-1)
+    
+
+@tutil.skip_sleep
+class RetryNeedsMultipleServices(unittest.TestCase, tutil.ApiClientMock):
+    # Test put()s that need two distinct servers to succeed, possibly
+    # requiring multiple passes through the retry loop.
+
+    def setUp(self):
+        self.api_client = self.mock_keep_services(count=2)
+        self.keep_client = arvados.KeepClient(api_client=self.api_client)
+
+    def test_success_after_exception(self):
+        with tutil.mock_keep_responses(
+                'acbd18db4cc2f85cedef654fccc4a4d8+3',
+                Exception('mock err'), 200, 200) as req_mock:
+            self.keep_client.put('foo', num_retries=1, copies=2)
+        self.assertEqual(3, req_mock.call_count)
+
+    def test_success_after_retryable_error(self):
+        with tutil.mock_keep_responses(
+                'acbd18db4cc2f85cedef654fccc4a4d8+3',
+                500, 200, 200) as req_mock:
+            self.keep_client.put('foo', num_retries=1, copies=2)
+        self.assertEqual(3, req_mock.call_count)
+
+    def test_fail_after_final_error(self):
+        # First retry loop gets a 200 (can't achieve replication by
+        # storing again on that server) and a 400 (can't retry that
+        # server at all), so we shouldn't try a third request.
+        with tutil.mock_keep_responses(
+                'acbd18db4cc2f85cedef654fccc4a4d8+3',
+                200, 400, 200) as req_mock:
+            with self.assertRaises(arvados.errors.KeepWriteError):
+                self.keep_client.put('foo', num_retries=1, copies=2)
+        self.assertEqual(2, req_mock.call_count)