12684: Remove custom retry logic from PySDK
authorBrett Smith <brett.smith@curii.com>
Wed, 3 May 2023 18:19:06 +0000 (14:19 -0400)
committerBrett Smith <brett.smith@curii.com>
Thu, 4 May 2023 21:43:07 +0000 (17:43 -0400)
This logic traces its roots back to
`5722c604c6f5dc1553674d179ec016ec12e2b090`. The goal of that commit was to
work around a bug in httplib, which we no longer use as a client
library. `31eb1bdc31e1d030844a6fdc7f4ba4286ec79d4f` made an analogous
change for httplib2.

`8a0eb69984a93852ec888cd3e02b778b0be758ed` made three major changes:

1. Proactively close sockets if they seem likely to be stale
2. Wrap the retry logic in a loop
3. Generalize catching `httplib.BadStatusLine` to `httplib.HTTPException`
   (which covers all kinds of malformed HTTP responses)

However, #1 functionally obsoletes the exception handlers added in the
earlier commits. Preemptively closing the sockets prevents httplib/2
from trying to reuse stale ones. So these exception handlers, along with
their retry loops, no longer serve their original purpose.

Remove this logic in favor of using the retry logic built into
googleapiclient. That logic is easier to configure and more refined.

Arvados-DCO-1.1-Signed-off-by: Brett Smith <brett.smith@curii.com>

sdk/python/arvados/api.py
sdk/python/tests/test_api.py

index 19154f3e8b368f5b0dbeb631e4290ac7f32d101f..537ad2082029333192302a8deb4bec24d5032d27 100644 (file)
@@ -37,9 +37,13 @@ from . import cache
 _logger = logging.getLogger('arvados.api')
 
 MAX_IDLE_CONNECTION_DURATION = 30
-RETRY_DELAY_INITIAL = 2
-RETRY_DELAY_BACKOFF = 2
-RETRY_COUNT = 2
+
+# These constants supported our own retry logic that we've since removed in
+# favor of using googleapiclient's num_retries. They're kept here purely for
+# API compatibility, but set to 0 to indicate no retries happen.
+RETRY_DELAY_INITIAL = 0
+RETRY_DELAY_BACKOFF = 0
+RETRY_COUNT = 0
 
 if sys.version_info >= (3,):
     httplib2.SSLHandshakeError = None
@@ -75,12 +79,7 @@ def _intercept_http_request(self, uri, method="GET", headers={}, **kwargs):
 
         headers['Authorization'] = 'OAuth2 %s' % self.arvados_api_token
 
-        retryable = method in [
-            'DELETE', 'GET', 'HEAD', 'OPTIONS', 'PUT']
-        retry_count = self._retry_count if retryable else 0
-
-        if (not retryable and
-            time.time() - self._last_request_time > self._max_keepalive_idle):
+        if (time.time() - self._last_request_time) > self._max_keepalive_idle:
             # High probability of failure due to connection atrophy. Make
             # sure this request [re]opens a new connection by closing and
             # forgetting all cached connections first.
@@ -88,32 +87,11 @@ def _intercept_http_request(self, uri, method="GET", headers={}, **kwargs):
                 conn.close()
             self.connections.clear()
 
-        delay = self._retry_delay_initial
-        for _ in range(retry_count):
-            self._last_request_time = time.time()
-            try:
-                return self.orig_http_request(uri, method, headers=headers, **kwargs)
-            except http.client.HTTPException:
-                _logger.debug("[%s] Retrying API request in %d s after HTTP error",
-                              headers['X-Request-Id'], delay, exc_info=True)
-            except ssl.SSLCertVerificationError as e:
-                raise ssl.SSLCertVerificationError(e.args[0], "Could not connect to %s\n%s\nPossible causes: remote SSL/TLS certificate expired, or was issued by an untrusted certificate authority." % (uri, e)) from None
-            except socket.error:
-                # This is the one case where httplib2 doesn't close the
-                # underlying connection first.  Close all open
-                # connections, expecting this object only has the one
-                # connection to the API server.  This is safe because
-                # httplib2 reopens connections when needed.
-                _logger.debug("[%s] Retrying API request in %d s after socket error",
-                              headers['X-Request-Id'], delay, exc_info=True)
-                for conn in self.connections.values():
-                    conn.close()
-
-            time.sleep(delay)
-            delay = delay * self._retry_delay_backoff
-
         self._last_request_time = time.time()
-        return self.orig_http_request(uri, method, headers=headers, **kwargs)
+        try:
+            return self.orig_http_request(uri, method, headers=headers, **kwargs)
+        except ssl.SSLCertVerificationError as e:
+            raise ssl.SSLCertVerificationError(e.args[0], "Could not connect to %s\n%s\nPossible causes: remote SSL/TLS certificate expired, or was issued by an untrusted certificate authority." % (uri, e)) from None
     except Exception as e:
         # Prepend "[request_id] " to the error message, which we
         # assume is the first string argument passed to the exception
@@ -131,9 +109,6 @@ def _patch_http_request(http, api_token):
     http.request = types.MethodType(_intercept_http_request, http)
     http._last_request_time = 0
     http._max_keepalive_idle = MAX_IDLE_CONNECTION_DURATION
-    http._retry_delay_initial = RETRY_DELAY_INITIAL
-    http._retry_delay_backoff = RETRY_DELAY_BACKOFF
-    http._retry_count = RETRY_COUNT
     http._request_id = util.new_request_id
     return http
 
index 20c4f346a9363f501e6ed305eb3a48aa7612c9e1..780ff07bff523b53b73f1ea5035ad134fa7dd44e 100644 (file)
@@ -27,9 +27,6 @@ from arvados.api import (
     normalize_api_kwargs,
     api_kwargs_from_config,
     OrderedJsonModel,
-    RETRY_DELAY_INITIAL,
-    RETRY_DELAY_BACKOFF,
-    RETRY_COUNT,
 )
 from .arvados_testutil import fake_httplib2_response, queue_with
 
@@ -341,7 +338,7 @@ class ArvadosApiTest(run_test_server.TestCaseWithServers):
                 api_client(*args, insecure=True)
 
 
-class RetryREST(unittest.TestCase):
+class PreCloseSocketTestCase(unittest.TestCase):
     def setUp(self):
         self.api = arvados.api('v1')
         self.assertTrue(hasattr(self.api._http, 'orig_http_request'),
@@ -353,59 +350,6 @@ class RetryREST(unittest.TestCase):
         # All requests succeed by default. Tests override as needed.
         self.api._http.orig_http_request.return_value = self.request_success
 
-    @mock.patch('time.sleep')
-    def test_socket_error_retry_get(self, sleep):
-        self.api._http.orig_http_request.side_effect = (
-            socket.error('mock error'),
-            self.request_success,
-        )
-        self.assertEqual(self.api.users().current().execute(),
-                         self.mock_response)
-        self.assertGreater(self.api._http.orig_http_request.call_count, 1,
-                           "client got the right response without retrying")
-        self.assertEqual(sleep.call_args_list,
-                         [mock.call(RETRY_DELAY_INITIAL)])
-
-    @mock.patch('time.sleep')
-    def test_same_automatic_request_id_on_retry(self, sleep):
-        self.api._http.orig_http_request.side_effect = (
-            socket.error('mock error'),
-            self.request_success,
-        )
-        self.api.users().current().execute()
-        calls = self.api._http.orig_http_request.call_args_list
-        self.assertEqual(len(calls), 2)
-        self.assertEqual(
-            calls[0][1]['headers']['X-Request-Id'],
-            calls[1][1]['headers']['X-Request-Id'])
-        self.assertRegex(calls[0][1]['headers']['X-Request-Id'], r'^req-[a-z0-9]{20}$')
-
-    @mock.patch('time.sleep')
-    def test_provided_request_id_on_retry(self, sleep):
-        self.api.request_id='fake-request-id'
-        self.api._http.orig_http_request.side_effect = (
-            socket.error('mock error'),
-            self.request_success,
-        )
-        self.api.users().current().execute()
-        calls = self.api._http.orig_http_request.call_args_list
-        self.assertEqual(len(calls), 2)
-        for call in calls:
-            self.assertEqual(call[1]['headers']['X-Request-Id'], 'fake-request-id')
-
-    @mock.patch('time.sleep')
-    def test_socket_error_retry_delay(self, sleep):
-        self.api._http.orig_http_request.side_effect = socket.error('mock')
-        self.api._http._retry_count = 3
-        with self.assertRaises(socket.error):
-            self.api.users().current().execute()
-        self.assertEqual(self.api._http.orig_http_request.call_count, 4)
-        self.assertEqual(sleep.call_args_list, [
-            mock.call(RETRY_DELAY_INITIAL),
-            mock.call(RETRY_DELAY_INITIAL * RETRY_DELAY_BACKOFF),
-            mock.call(RETRY_DELAY_INITIAL * RETRY_DELAY_BACKOFF**2),
-        ])
-
     @mock.patch('time.time', side_effect=[i*2**20 for i in range(99)])
     def test_close_old_connections_non_retryable(self, sleep):
         self._test_connection_close(expect=1)
@@ -429,18 +373,6 @@ class RetryREST(unittest.TestCase):
         for c in mock_conns.values():
             self.assertEqual(c.close.call_count, expect)
 
-    @mock.patch('time.sleep')
-    def test_socket_error_no_retry_post(self, sleep):
-        self.api._http.orig_http_request.side_effect = (
-            socket.error('mock error'),
-            self.request_success,
-        )
-        with self.assertRaises(socket.error):
-            self.api.users().create(body={}).execute()
-        self.assertEqual(self.api._http.orig_http_request.call_count, 1,
-                         "client should try non-retryable method exactly once")
-        self.assertEqual(sleep.call_args_list, [])
-
 
 if __name__ == '__main__':
     unittest.main()