From bb39fb01d3147c6009ee35920ae0637201b11dd2 Mon Sep 17 00:00:00 2001 From: Brett Smith Date: Thu, 4 May 2023 16:21:08 -0400 Subject: [PATCH] 12684: Support num_retries in PySDK client constructors This lets users set their preferred retry strategy once, rather than in every call to execute(), which is error-prone. The default num_retries is 10 because we expect most users to care more about eventual success than responsiveness. See the added release notes for further discussion and rationale. Changes to the rest of the code are mostly about supporting this consistently. Tests that relied on the old no-default-num_retries behavior now specify that explicitly. Arvados-DCO-1.1-Signed-off-by: Brett Smith --- doc/admin/upgrading.html.textile.liquid | 11 +++ doc/sdk/python/api-client.html.textile.liquid | 15 ++- sdk/cwl/arvados_cwl/__init__.py | 29 +++++- sdk/python/arvados/api.py | 25 ++++- sdk/python/arvados/collection.py | 4 +- sdk/python/arvados/commands/_util.py | 4 +- sdk/python/arvados/commands/arv_copy.py | 10 +- .../arvados/commands/federation_migrate.py | 10 +- sdk/python/arvados/commands/get.py | 2 +- sdk/python/arvados/commands/keepdocker.py | 2 +- sdk/python/arvados/commands/ls.py | 2 +- sdk/python/arvados/commands/put.py | 2 +- sdk/python/arvados/commands/ws.py | 5 +- sdk/python/arvados/keep.py | 4 +- sdk/python/arvados/stream.py | 2 +- sdk/python/tests/test_api.py | 73 +++++++++++++++ sdk/python/tests/test_collections.py | 4 +- sdk/python/tests/test_keep_client.py | 93 ++++++++++++++----- sdk/python/tests/test_retry_job_helpers.py | 2 +- sdk/python/tests/test_stream.py | 7 -- services/fuse/arvados_fuse/command.py | 3 + services/fuse/tests/test_command_args.py | 2 +- services/fuse/tests/test_retry.py | 4 +- 23 files changed, 249 insertions(+), 66 deletions(-) diff --git a/doc/admin/upgrading.html.textile.liquid b/doc/admin/upgrading.html.textile.liquid index 5c76f534ac..6ff346983d 100644 --- a/doc/admin/upgrading.html.textile.liquid +++ b/doc/admin/upgrading.html.textile.liquid @@ -32,6 +32,17 @@ h2(#main). development main (as of 2023-04-18) "previous: Upgrading to 2.6.1":#v2_6_1 +h3. Python SDK automatically retries failed requests much more + +The Python SDK has always provided functionality to retry API requests that fail due to temporary problems like network failures, by passing @num_retries=N@ to a request's @execute()@ method. In this release, API client constructor functions like @arvados.api@ also accept a @num_retries@ argument. This value is stored on the client object and used as a floor for all API requests made with this client. This allows developers to set their preferred retry strategy once, without having to pass it to each @execute()@ call. + +The default value for @num_retries@ in API constructor functions is 10. This means that an API request that repeatedly encounters temporary problems may spend up to about 35 minutes retrying in the worst case. We believe this is an appropriate default for most users, where eventual success is a much greater concern than responsiveness. If you have client applications where this is undesirable, update them to pass a lower @num_retries@ value to the constructor function. You can even pass @num_retries=0@ to have the API client act as it did before, like this: + +{% codeblock as python %} +import arvados +arv_client = arvados.api('v1', num_retries=0, ...) +{% endcodeblock %} + h3. UseAWSS3v2Driver option removed The old "v1" S3 driver for keepstore has been removed. The new "v2" implementation, which has been the default since Arvados 2.5.0, is always used. The @Volumes.*.DriverParameters.UseAWSS3v2Driver@ configuration key is no longer recognized. If your config file uses it, remove it to avoid warning messages at startup. diff --git a/doc/sdk/python/api-client.html.textile.liquid b/doc/sdk/python/api-client.html.textile.liquid index 020c0fc62c..dabd2d37f8 100644 --- a/doc/sdk/python/api-client.html.textile.liquid +++ b/doc/sdk/python/api-client.html.textile.liquid @@ -46,7 +46,7 @@ The API client has a method that corresponds to each "type of resource supported Each resource object has a method that corresponds to each API method supported by that resource type. You call these methods with the keyword arguments and values documented in the API reference. They return an API request object. -Each API request object has an @execute()@ method. You may pass a @num_retries@ integer argument to retry the operation that many times, with exponential back-off, in case of temporary errors like network problems. If it ultimately succeeds, it returns the kind of object documented in the API reference for that method. Usually that's a dictionary with details about the object you requested. If there's a problem, it raises an exception. +Each API request object has an @execute()@ method. If it succeeds, it returns the kind of object documented in the API reference for that method. Usually that's a dictionary with details about the object you requested. If there's a problem, it raises an exception. Putting it all together, basic API requests usually look like: @@ -54,10 +54,19 @@ Putting it all together, basic API requests usually look like: arv_object = arv_client.resource_type().api_method( argument=..., other_argument=..., -).execute(num_retries=3) +).execute() {% endcodeblock %} -The following sections detail how to call "common resource methods in the API":{{site.baseurl}}/api/methods.html with more concrete examples. Additional methods may be available on specific resource types. +Later sections detail how to call "common resource methods in the API":{{site.baseurl}}/api/methods.html with more concrete examples. Additional methods may be available on specific resource types. + +h3. Retrying failed requests + +If you execute an API request and it fails because of a temporary error like a network problem, the SDK waits with randomized exponential back-off, then retries the request. You can specify the maximum number of retries by passing a @num_retries@ integer to either @arvados.api@ or the @execute()@ method; the SDK will use whichever number is greater. The default number of retries is 10, which means that an API request could take up to about 35 minutes if the temporary problem persists that long. To disable automatic retries, just pass @num_retries=0@ to @arvados.api@: + +{% codeblock as python %} +import arvados +arv_client = arvados.api('v1', num_retries=0, ...) +{% endcodeblock %} h2. get method diff --git a/sdk/cwl/arvados_cwl/__init__.py b/sdk/cwl/arvados_cwl/__init__.py index fe27b91ab2..4722433972 100644 --- a/sdk/cwl/arvados_cwl/__init__.py +++ b/sdk/cwl/arvados_cwl/__init__.py @@ -68,7 +68,10 @@ def versionstring(): def arg_parser(): # type: () -> argparse.ArgumentParser - parser = argparse.ArgumentParser(description='Arvados executor for Common Workflow Language') + parser = argparse.ArgumentParser( + description='Arvados executor for Common Workflow Language', + parents=[arv_cmd.retry_opt], + ) parser.add_argument("--basedir", help="Base directory used to resolve relative references in the input, default to directory of input object file or current directory (if inputs piped/provided on command line).") @@ -333,8 +336,14 @@ def main(args=sys.argv[1:], try: if api_client is None: api_client = arvados.safeapi.ThreadSafeApiCache( - api_params={"model": OrderedJsonModel(), "timeout": arvargs.http_timeout}, - keep_params={"num_retries": 4}, + api_params={ + 'model': OrderedJsonModel(), + 'num_retries': arvargs.retries, + 'timeout': arvargs.http_timeout, + }, + keep_params={ + 'num_retries': arvargs.retries, + }, version='v1', ) keep_client = api_client.keep @@ -342,8 +351,18 @@ def main(args=sys.argv[1:], api_client.users().current().execute() if keep_client is None: block_cache = arvados.keep.KeepBlockCache(disk_cache=True) - keep_client = arvados.keep.KeepClient(api_client=api_client, num_retries=4, block_cache=block_cache) - executor = ArvCwlExecutor(api_client, arvargs, keep_client=keep_client, num_retries=4, stdout=stdout) + keep_client = arvados.keep.KeepClient( + api_client=api_client, + block_cache=block_cache, + num_retries=arvargs.retries, + ) + executor = ArvCwlExecutor( + api_client, + arvargs, + keep_client=keep_client, + num_retries=arvargs.retries, + stdout=stdout, + ) except WorkflowException as e: logger.error(e, exc_info=(sys.exc_info()[1] if arvargs.debug else False)) return 1 diff --git a/sdk/python/arvados/api.py b/sdk/python/arvados/api.py index 537ad20820..2e33e0f2cb 100644 --- a/sdk/python/arvados/api.py +++ b/sdk/python/arvados/api.py @@ -27,6 +27,7 @@ import time import types import apiclient +import apiclient.http from apiclient import discovery as apiclient_discovery from apiclient import errors as apiclient_errors from . import config @@ -68,6 +69,19 @@ class OrderedJsonModel(apiclient.model.JsonModel): return body +_orig_retry_request = apiclient.http._retry_request +def _retry_request(http, num_retries, *args, **kwargs): + try: + num_retries = max(num_retries, http.num_retries) + except AttributeError: + # `http` client object does not have a `num_retries` attribute. + # It apparently hasn't gone through _patch_http_request, possibly + # because this isn't an Arvados API client. We need to continue on to + # avoid interfering with other Google API clients. + pass + return _orig_retry_request(http, num_retries, *args, **kwargs) +apiclient.http._retry_request = _retry_request + def _intercept_http_request(self, uri, method="GET", headers={}, **kwargs): if not headers.get('X-Request-Id'): headers['X-Request-Id'] = self._request_id() @@ -102,9 +116,10 @@ def _intercept_http_request(self, uri, method="GET", headers={}, **kwargs): raise type(e)(*e.args) raise -def _patch_http_request(http, api_token): +def _patch_http_request(http, api_token, num_retries): http.arvados_api_token = api_token http.max_request_size = 0 + http.num_retries = num_retries http.orig_http_request = http.request http.request = types.MethodType(_intercept_http_request, http) http._last_request_time = 0 @@ -157,6 +172,7 @@ def api_client( cache=True, http=None, insecure=False, + num_retries=10, request_id=None, timeout=5*60, **kwargs, @@ -194,6 +210,10 @@ def api_client( insecure: bool : If true, ignore SSL certificate validation errors. Default `False`. + num_retries: int + : The number of times to retry each API request if it encounters a + temporary failure. Default 10. + request_id: str | None : Default `X-Request-Id` header value for outgoing requests that don't already provide one. If `None` or omitted, generate a random @@ -214,13 +234,14 @@ def api_client( ) if http.timeout is None: http.timeout = timeout - http = _patch_http_request(http, token) + http = _patch_http_request(http, token, num_retries) svc = apiclient_discovery.build( 'arvados', version, cache_discovery=False, discoveryServiceUrl=discoveryServiceUrl, http=http, + num_retries=num_retries, **kwargs, ) svc.api_token = token diff --git a/sdk/python/arvados/collection.py b/sdk/python/arvados/collection.py index ebca15c54b..23b4393a94 100644 --- a/sdk/python/arvados/collection.py +++ b/sdk/python/arvados/collection.py @@ -1256,7 +1256,7 @@ class Collection(RichCollectionBase): def __init__(self, manifest_locator_or_text=None, api_client=None, keep_client=None, - num_retries=None, + num_retries=10, parent=None, apiconfig=None, block_manager=None, @@ -1324,7 +1324,7 @@ class Collection(RichCollectionBase): else: self._config = config.settings() - self.num_retries = num_retries if num_retries is not None else 0 + self.num_retries = num_retries self._manifest_locator = None self._manifest_text = None self._portable_data_hash = None diff --git a/sdk/python/arvados/commands/_util.py b/sdk/python/arvados/commands/_util.py index d10d38eb5b..17454b7d17 100644 --- a/sdk/python/arvados/commands/_util.py +++ b/sdk/python/arvados/commands/_util.py @@ -17,9 +17,9 @@ def _pos_int(s): return num retry_opt = argparse.ArgumentParser(add_help=False) -retry_opt.add_argument('--retries', type=_pos_int, default=3, help=""" +retry_opt.add_argument('--retries', type=_pos_int, default=10, help=""" Maximum number of times to retry server requests that encounter temporary -failures (e.g., server down). Default 3.""") +failures (e.g., server down). Default 10.""") def _ignore_error(error): return None diff --git a/sdk/python/arvados/commands/arv_copy.py b/sdk/python/arvados/commands/arv_copy.py index 7951842acc..63c0cbea28 100755 --- a/sdk/python/arvados/commands/arv_copy.py +++ b/sdk/python/arvados/commands/arv_copy.py @@ -129,8 +129,8 @@ def main(): args.source_arvados = args.object_uuid[:5] # Create API clients for the source and destination instances - src_arv = api_for_instance(args.source_arvados) - dst_arv = api_for_instance(args.destination_arvados) + src_arv = api_for_instance(args.source_arvados, args.retries) + dst_arv = api_for_instance(args.destination_arvados, args.retries) if not args.project_uuid: args.project_uuid = dst_arv.users().current().execute(num_retries=args.retries)["uuid"] @@ -187,7 +187,7 @@ def set_src_owner_uuid(resource, uuid, args): # Otherwise, it is presumed to be the name of a file in # $HOME/.config/arvados/instance_name.conf # -def api_for_instance(instance_name): +def api_for_instance(instance_name, num_retries): if not instance_name: # Use environment return arvados.api('v1', model=OrderedJsonModel()) @@ -214,7 +214,9 @@ def api_for_instance(instance_name): host=cfg['ARVADOS_API_HOST'], token=cfg['ARVADOS_API_TOKEN'], insecure=api_is_insecure, - model=OrderedJsonModel()) + model=OrderedJsonModel(), + num_retries=num_retries, + ) else: abort('need ARVADOS_API_HOST and ARVADOS_API_TOKEN for {}'.format(instance_name)) return client diff --git a/sdk/python/arvados/commands/federation_migrate.py b/sdk/python/arvados/commands/federation_migrate.py index 5c1bb29e76..32b3211f14 100755 --- a/sdk/python/arvados/commands/federation_migrate.py +++ b/sdk/python/arvados/commands/federation_migrate.py @@ -24,6 +24,7 @@ import os import hashlib import re from arvados._version import __version__ +from . import _util as arv_cmd EMAIL=0 USERNAME=1 @@ -43,10 +44,10 @@ def connect_clusters(args): host = r[0] token = r[1] print("Contacting %s" % (host)) - arv = arvados.api(host=host, token=token, cache=False) + arv = arvados.api(host=host, token=token, cache=False, num_retries=args.retries) clusters[arv._rootDesc["uuidPrefix"]] = arv else: - arv = arvados.api(cache=False) + arv = arvados.api(cache=False, num_retries=args.retries) rh = arv._rootDesc["remoteHosts"] tok = arv.api_client_authorizations().current().execute() token = "v2/%s/%s" % (tok["uuid"], tok["api_token"]) @@ -326,7 +327,10 @@ def migrate_user(args, migratearv, email, new_user_uuid, old_user_uuid): def main(): - parser = argparse.ArgumentParser(description='Migrate users to federated identity, see https://doc.arvados.org/admin/merge-remote-account.html') + parser = argparse.ArgumentParser( + description='Migrate users to federated identity, see https://doc.arvados.org/admin/merge-remote-account.html', + parents=[arv_cmd.retry_opt], + ) parser.add_argument( '--version', action='version', version="%s %s" % (sys.argv[0], __version__), help='Print version and exit.') diff --git a/sdk/python/arvados/commands/get.py b/sdk/python/arvados/commands/get.py index bb421def61..89b333808e 100755 --- a/sdk/python/arvados/commands/get.py +++ b/sdk/python/arvados/commands/get.py @@ -155,7 +155,7 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr): request_id = arvados.util.new_request_id() logger.info('X-Request-Id: '+request_id) - api_client = arvados.api('v1', request_id=request_id) + api_client = arvados.api('v1', request_id=request_id, num_retries=args.retries) r = re.search(r'^(.*?)(/.*)?$', args.locator) col_loc = r.group(1) diff --git a/sdk/python/arvados/commands/keepdocker.py b/sdk/python/arvados/commands/keepdocker.py index 2d5c0150c9..922256a27e 100644 --- a/sdk/python/arvados/commands/keepdocker.py +++ b/sdk/python/arvados/commands/keepdocker.py @@ -359,7 +359,7 @@ def _uuid2pdh(api, uuid): def main(arguments=None, stdout=sys.stdout, install_sig_handlers=True, api=None): args = arg_parser.parse_args(arguments) if api is None: - api = arvados.api('v1') + api = arvados.api('v1', num_retries=args.retries) if args.image is None or args.image == 'images': fmt = "{:30} {:10} {:12} {:29} {:20}\n" diff --git a/sdk/python/arvados/commands/ls.py b/sdk/python/arvados/commands/ls.py index 86e728ed49..ac038f5040 100644 --- a/sdk/python/arvados/commands/ls.py +++ b/sdk/python/arvados/commands/ls.py @@ -43,7 +43,7 @@ def main(args, stdout, stderr, api_client=None, logger=None): args = parse_args(args) if api_client is None: - api_client = arvados.api('v1') + api_client = arvados.api('v1', num_retries=args.retries) if logger is None: logger = logging.getLogger('arvados.arv-ls') diff --git a/sdk/python/arvados/commands/put.py b/sdk/python/arvados/commands/put.py index be7cd629c9..0e732eafde 100644 --- a/sdk/python/arvados/commands/put.py +++ b/sdk/python/arvados/commands/put.py @@ -1136,7 +1136,7 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr, logging.getLogger('arvados').handlers[0].setFormatter(formatter) if api_client is None: - api_client = arvados.api('v1', request_id=request_id) + api_client = arvados.api('v1', request_id=request_id, num_retries=args.retries) if install_sig_handlers: arv_cmd.install_signal_handlers() diff --git a/sdk/python/arvados/commands/ws.py b/sdk/python/arvados/commands/ws.py index 37dab55d60..04a90cf20b 100644 --- a/sdk/python/arvados/commands/ws.py +++ b/sdk/python/arvados/commands/ws.py @@ -10,12 +10,13 @@ import arvados import json from arvados.events import subscribe from arvados._version import __version__ +from . import _util as arv_cmd import signal def main(arguments=None): logger = logging.getLogger('arvados.arv-ws') - parser = argparse.ArgumentParser() + parser = argparse.ArgumentParser(parents=[arv_cmd.retry_opt]) parser.add_argument('--version', action='version', version="%s %s" % (sys.argv[0], __version__), help='Print version and exit.') @@ -56,7 +57,7 @@ def main(arguments=None): filters = new_filters known_component_jobs = pipeline_jobs - api = arvados.api('v1') + api = arvados.api('v1', num_retries=args.retries) if args.uuid: filters += [ ['object_uuid', '=', args.uuid] ] diff --git a/sdk/python/arvados/keep.py b/sdk/python/arvados/keep.py index 8658774cbb..a2c8fd2494 100644 --- a/sdk/python/arvados/keep.py +++ b/sdk/python/arvados/keep.py @@ -835,7 +835,7 @@ class KeepClient(object): def __init__(self, api_client=None, proxy=None, timeout=DEFAULT_TIMEOUT, proxy_timeout=DEFAULT_PROXY_TIMEOUT, api_token=None, local_store=None, block_cache=None, - num_retries=0, session=None): + num_retries=10, session=None): """Initialize a new KeepClient. Arguments: @@ -888,7 +888,7 @@ class KeepClient(object): :num_retries: The default number of times to retry failed requests. This will be used as the default num_retries value when get() and - put() are called. Default 0. + put() are called. Default 10. """ self.lock = threading.Lock() if proxy is None: diff --git a/sdk/python/arvados/stream.py b/sdk/python/arvados/stream.py index edfb7711b8..eadfbbec07 100644 --- a/sdk/python/arvados/stream.py +++ b/sdk/python/arvados/stream.py @@ -24,7 +24,7 @@ from ._normalize_stream import normalize_stream class StreamReader(object): def __init__(self, tokens, keep=None, debug=False, _empty=False, - num_retries=0): + num_retries=10): self._stream_name = None self._data_locators = [] self._files = collections.OrderedDict() diff --git a/sdk/python/tests/test_api.py b/sdk/python/tests/test_api.py index 780ff07bff..15fead7ada 100644 --- a/sdk/python/tests/test_api.py +++ b/sdk/python/tests/test_api.py @@ -7,6 +7,7 @@ from builtins import str from builtins import range import arvados import collections +import contextlib import httplib2 import itertools import json @@ -14,6 +15,7 @@ import mimetypes import os import socket import string +import sys import unittest import urllib.parse as urlparse @@ -338,6 +340,77 @@ class ArvadosApiTest(run_test_server.TestCaseWithServers): api_client(*args, insecure=True) +class ConstructNumRetriesTestCase(unittest.TestCase): + @staticmethod + def _fake_retry_request(http, num_retries, req_type, sleep, rand, uri, method, *args, **kwargs): + return http.request(uri, method, *args, **kwargs) + + @contextlib.contextmanager + def patch_retry(self): + # We have this dedicated context manager that goes through `sys.modules` + # instead of just using `mock.patch` because of the unfortunate + # `arvados.api` name collision. + orig_func = sys.modules['arvados.api']._orig_retry_request + expect_name = 'googleapiclient.http._retry_request' + self.assertEqual( + '{0.__module__}.{0.__name__}'.format(orig_func), expect_name, + f"test setup problem: {expect_name} not at arvados.api._orig_retry_request", + ) + retry_mock = mock.Mock(wraps=self._fake_retry_request) + sys.modules['arvados.api']._orig_retry_request = retry_mock + try: + yield retry_mock + finally: + sys.modules['arvados.api']._orig_retry_request = orig_func + + def _iter_num_retries(self, retry_mock): + for call in retry_mock.call_args_list: + try: + yield call.args[1] + except IndexError: + yield call.kwargs['num_retries'] + + def test_default_num_retries(self): + with self.patch_retry() as retry_mock: + client = arvados.api('v1') + actual = set(self._iter_num_retries(retry_mock)) + self.assertEqual(len(actual), 1) + self.assertTrue(actual.pop() > 6, "num_retries lower than expected") + + def _test_calls(self, init_arg, call_args, expected): + with self.patch_retry() as retry_mock: + client = arvados.api('v1', num_retries=init_arg) + for num_retries in call_args: + client.users().current().execute(num_retries=num_retries) + actual = self._iter_num_retries(retry_mock) + # The constructor makes two requests with its num_retries argument: + # one for the discovery document, and one for the config. + self.assertEqual(next(actual, None), init_arg) + self.assertEqual(next(actual, None), init_arg) + self.assertEqual(list(actual), expected) + + def test_discovery_num_retries(self): + for num_retries in [0, 5, 55]: + with self.subTest(f"num_retries={num_retries}"): + self._test_calls(num_retries, [], []) + + def test_num_retries_called_le_init(self): + for n in [6, 10]: + with self.subTest(f"init_arg={n}"): + call_args = [n - 4, n - 2, n] + expected = [n] * 3 + self._test_calls(n, call_args, expected) + + def test_num_retries_called_ge_init(self): + for n in [0, 10]: + with self.subTest(f"init_arg={n}"): + call_args = [n, n + 4, n + 8] + self._test_calls(n, call_args, call_args) + + def test_num_retries_called_mixed(self): + self._test_calls(5, [2, 6, 4, 8], [5, 6, 5, 8]) + + class PreCloseSocketTestCase(unittest.TestCase): def setUp(self): self.api = arvados.api('v1') diff --git a/sdk/python/tests/test_collections.py b/sdk/python/tests/test_collections.py index 8986cf2258..c79607fca9 100644 --- a/sdk/python/tests/test_collections.py +++ b/sdk/python/tests/test_collections.py @@ -538,11 +538,11 @@ class CollectionReaderTestCase(unittest.TestCase, CollectionTestMixin): self.mock_get_collection(client, status, 'foo_file') return client - def test_init_no_default_retries(self): + def test_init_default_retries(self): client = self.api_client_mock(200) reader = arvados.CollectionReader(self.DEFAULT_UUID, api_client=client) reader.manifest_text() - client.collections().get().execute.assert_called_with(num_retries=0) + client.collections().get().execute.assert_called_with(num_retries=10) def test_uuid_init_success(self): client = self.api_client_mock(200) diff --git a/sdk/python/tests/test_keep_client.py b/sdk/python/tests/test_keep_client.py index 0fe3961136..f472c0830e 100644 --- a/sdk/python/tests/test_keep_client.py +++ b/sdk/python/tests/test_keep_client.py @@ -276,7 +276,7 @@ class KeepClientServiceTestCase(unittest.TestCase, tutil.ApiClientMock, DiskCach try: # this will fail, but it ensures we get the service # discovery response - keep_client.put('baz2') + keep_client.put('baz2', num_retries=0) except: pass self.assertTrue(keep_client.using_proxy) @@ -338,7 +338,11 @@ class KeepClientServiceTestCase(unittest.TestCase, tutil.ApiClientMock, DiskCach api_client = self.mock_keep_services(count=1) force_timeout = socket.timeout("timed out") with tutil.mock_keep_responses(force_timeout, 0) as mock: - keep_client = arvados.KeepClient(api_client=api_client, block_cache=self.make_block_cache(self.disk_cache)) + keep_client = arvados.KeepClient( + api_client=api_client, + block_cache=self.make_block_cache(self.disk_cache), + num_retries=0, + ) with self.assertRaises(arvados.errors.KeepReadError): keep_client.get('ffffffffffffffffffffffffffffffff') self.assertEqual( @@ -355,7 +359,11 @@ class KeepClientServiceTestCase(unittest.TestCase, tutil.ApiClientMock, DiskCach api_client = self.mock_keep_services(count=1) force_timeout = socket.timeout("timed out") with tutil.mock_keep_responses(force_timeout, 0) as mock: - keep_client = arvados.KeepClient(api_client=api_client, block_cache=self.make_block_cache(self.disk_cache)) + keep_client = arvados.KeepClient( + api_client=api_client, + block_cache=self.make_block_cache(self.disk_cache), + num_retries=0, + ) with self.assertRaises(arvados.errors.KeepWriteError): keep_client.put(b'foo') self.assertEqual( @@ -372,7 +380,11 @@ class KeepClientServiceTestCase(unittest.TestCase, tutil.ApiClientMock, DiskCach api_client = self.mock_keep_services(count=1) force_timeout = socket.timeout("timed out") with tutil.mock_keep_responses(force_timeout, 0) as mock: - keep_client = arvados.KeepClient(api_client=api_client, block_cache=self.make_block_cache(self.disk_cache)) + keep_client = arvados.KeepClient( + api_client=api_client, + block_cache=self.make_block_cache(self.disk_cache), + num_retries=0, + ) with self.assertRaises(arvados.errors.KeepReadError): keep_client.head('ffffffffffffffffffffffffffffffff') self.assertEqual( @@ -389,7 +401,11 @@ class KeepClientServiceTestCase(unittest.TestCase, tutil.ApiClientMock, DiskCach api_client = self.mock_keep_services(service_type='proxy', count=1) force_timeout = socket.timeout("timed out") with tutil.mock_keep_responses(force_timeout, 0) as mock: - keep_client = arvados.KeepClient(api_client=api_client, block_cache=self.make_block_cache(self.disk_cache)) + keep_client = arvados.KeepClient( + api_client=api_client, + block_cache=self.make_block_cache(self.disk_cache), + num_retries=0, + ) with self.assertRaises(arvados.errors.KeepReadError): keep_client.get('ffffffffffffffffffffffffffffffff') self.assertEqual( @@ -406,7 +422,11 @@ class KeepClientServiceTestCase(unittest.TestCase, tutil.ApiClientMock, DiskCach api_client = self.mock_keep_services(service_type='proxy', count=1) force_timeout = socket.timeout("timed out") with tutil.mock_keep_responses(force_timeout, 0) as mock: - keep_client = arvados.KeepClient(api_client=api_client, block_cache=self.make_block_cache(self.disk_cache)) + keep_client = arvados.KeepClient( + api_client=api_client, + block_cache=self.make_block_cache(self.disk_cache), + num_retries=0, + ) with self.assertRaises(arvados.errors.KeepReadError): keep_client.head('ffffffffffffffffffffffffffffffff') self.assertEqual( @@ -424,7 +444,10 @@ class KeepClientServiceTestCase(unittest.TestCase, tutil.ApiClientMock, DiskCach api_client = self.mock_keep_services(service_type='proxy', count=1) force_timeout = socket.timeout("timed out") with tutil.mock_keep_responses(force_timeout, 0) as mock: - keep_client = arvados.KeepClient(api_client=api_client) + keep_client = arvados.KeepClient( + api_client=api_client, + num_retries=0, + ) with self.assertRaises(arvados.errors.KeepWriteError): keep_client.put('foo') self.assertEqual( @@ -441,7 +464,11 @@ class KeepClientServiceTestCase(unittest.TestCase, tutil.ApiClientMock, DiskCach api_client = mock.MagicMock(name='api_client') api_client.keep_services().accessible().execute.side_effect = ( arvados.errors.ApiError) - keep_client = arvados.KeepClient(api_client=api_client, block_cache=self.make_block_cache(self.disk_cache)) + keep_client = arvados.KeepClient( + api_client=api_client, + block_cache=self.make_block_cache(self.disk_cache), + num_retries=0, + ) with self.assertRaises(exc_class) as err_check: getattr(keep_client, verb)('d41d8cd98f00b204e9800998ecf8427e+0') self.assertEqual(0, len(err_check.exception.request_errors())) @@ -461,7 +488,11 @@ class KeepClientServiceTestCase(unittest.TestCase, tutil.ApiClientMock, DiskCach "retry error reporting test", 500, 500, 500, 500, 500, 500, 502, 502) with req_mock, tutil.skip_sleep, \ self.assertRaises(exc_class) as err_check: - keep_client = arvados.KeepClient(api_client=api_client, block_cache=self.make_block_cache(self.disk_cache)) + keep_client = arvados.KeepClient( + api_client=api_client, + block_cache=self.make_block_cache(self.disk_cache), + num_retries=0, + ) getattr(keep_client, verb)('d41d8cd98f00b204e9800998ecf8427e+0', num_retries=3) self.assertEqual([502, 502], [ @@ -484,7 +515,11 @@ class KeepClientServiceTestCase(unittest.TestCase, tutil.ApiClientMock, DiskCach api_client = self.mock_keep_services(count=3) with tutil.mock_keep_responses(data_loc, 200, 500, 500) as req_mock, \ self.assertRaises(arvados.errors.KeepWriteError) as exc_check: - keep_client = arvados.KeepClient(api_client=api_client, block_cache=self.make_block_cache(self.disk_cache)) + keep_client = arvados.KeepClient( + api_client=api_client, + block_cache=self.make_block_cache(self.disk_cache), + num_retries=0, + ) keep_client.put(data) self.assertEqual(2, len(exc_check.exception.request_errors())) @@ -494,8 +529,12 @@ class KeepClientServiceTestCase(unittest.TestCase, tutil.ApiClientMock, DiskCach api_client = self.mock_keep_services(service_type='proxy', read_only=True, count=1) with tutil.mock_keep_responses(data_loc, 200, 500, 500) as req_mock, \ self.assertRaises(arvados.errors.KeepWriteError) as exc_check: - keep_client = arvados.KeepClient(api_client=api_client, block_cache=self.make_block_cache(self.disk_cache)) - keep_client.put(data) + keep_client = arvados.KeepClient( + api_client=api_client, + block_cache=self.make_block_cache(self.disk_cache), + num_retries=0, + ) + keep_client.put(data) self.assertEqual(True, ("no Keep services available" in str(exc_check.exception))) self.assertEqual(0, len(exc_check.exception.request_errors())) @@ -503,7 +542,11 @@ class KeepClientServiceTestCase(unittest.TestCase, tutil.ApiClientMock, DiskCach body = b'oddball service get' api_client = self.mock_keep_services(service_type='fancynewblobstore') with tutil.mock_keep_responses(body, 200): - keep_client = arvados.KeepClient(api_client=api_client, block_cache=self.make_block_cache(self.disk_cache)) + keep_client = arvados.KeepClient( + api_client=api_client, + block_cache=self.make_block_cache(self.disk_cache), + num_retries=0, + ) actual = keep_client.get(tutil.str_keep_locator(body)) self.assertEqual(body, actual) @@ -512,7 +555,11 @@ class KeepClientServiceTestCase(unittest.TestCase, tutil.ApiClientMock, DiskCach pdh = tutil.str_keep_locator(body) api_client = self.mock_keep_services(service_type='fancynewblobstore') with tutil.mock_keep_responses(pdh, 200): - keep_client = arvados.KeepClient(api_client=api_client, block_cache=self.make_block_cache(self.disk_cache)) + keep_client = arvados.KeepClient( + api_client=api_client, + block_cache=self.make_block_cache(self.disk_cache), + num_retries=0, + ) actual = keep_client.put(body, copies=1) self.assertEqual(pdh, actual) @@ -524,7 +571,11 @@ class KeepClientServiceTestCase(unittest.TestCase, tutil.ApiClientMock, DiskCach headers = {'x-keep-replicas-stored': 3} with tutil.mock_keep_responses(pdh, 200, 418, 418, 418, **headers) as req_mock: - keep_client = arvados.KeepClient(api_client=api_client, block_cache=self.make_block_cache(self.disk_cache)) + keep_client = arvados.KeepClient( + api_client=api_client, + block_cache=self.make_block_cache(self.disk_cache), + num_retries=0, + ) actual = keep_client.put(body, copies=2) self.assertEqual(pdh, actual) self.assertEqual(1, req_mock.call_count) @@ -638,7 +689,7 @@ class KeepStorageClassesTestCase(unittest.TestCase, tutil.ApiClientMock, DiskCac 'x-keep-storage-classes-confirmed': 'foo=1'} with tutil.mock_keep_responses(self.locator, 200, 503, **headers) as mock: with self.assertRaises(arvados.errors.KeepWriteError): - self.keep_client.put(self.data, copies=1, classes=['foo', 'bar']) + self.keep_client.put(self.data, copies=1, classes=['foo', 'bar'], num_retries=0) # 1st request, both classes pending req1_headers = mock.responses[0].getopt(pycurl.HTTPHEADER) self.assertIn('X-Keep-Storage-Classes: bar, foo', req1_headers) @@ -689,7 +740,7 @@ class KeepStorageClassesTestCase(unittest.TestCase, tutil.ApiClientMock, DiskCac with tutil.mock_keep_responses(self.locator, return_code, return_code, **headers): case_desc = 'wanted_copies={}, wanted_classes="{}", confirmed_copies={}, confirmed_classes="{}"'.format(w_copies, ', '.join(w_classes), c_copies, c_classes) with self.assertRaises(arvados.errors.KeepWriteError, msg=case_desc): - self.keep_client.put(self.data, copies=w_copies, classes=w_classes) + self.keep_client.put(self.data, copies=w_copies, classes=w_classes, num_retries=0) @tutil.skip_sleep @parameterized.parameterized_class([{"disk_cache": True}, {"disk_cache": False}]) @@ -1250,10 +1301,6 @@ class KeepClientRetryTestMixin(object): with self.TEST_PATCHER(self.DEFAULT_EXPECT, Exception('mock err'), 200): self.check_success(num_retries=3) - def test_no_default_retry(self): - with self.TEST_PATCHER(self.DEFAULT_EXPECT, 500, 200): - self.check_exception() - def test_no_retry_after_permanent_error(self): with self.TEST_PATCHER(self.DEFAULT_EXPECT, 403, 200): self.check_exception(num_retries=3) @@ -1293,7 +1340,7 @@ class KeepClientRetryGetTestCase(KeepClientRetryTestMixin, unittest.TestCase, Di # and a high threshold of servers report that it's not found. # This test rigs up 50/50 disagreement between two servers, and # checks that it does not become a NotFoundError. - client = self.new_client() + client = self.new_client(num_retries=0) with tutil.mock_keep_responses(self.DEFAULT_EXPECT, 404, 500): with self.assertRaises(arvados.errors.KeepReadError) as exc_check: client.get(self.HINTED_LOCATOR) @@ -1341,7 +1388,7 @@ class KeepClientRetryHeadTestCase(KeepClientRetryTestMixin, unittest.TestCase, D # and a high threshold of servers report that it's not found. # This test rigs up 50/50 disagreement between two servers, and # checks that it does not become a NotFoundError. - client = self.new_client() + client = self.new_client(num_retries=0) with tutil.mock_keep_responses(self.DEFAULT_EXPECT, 404, 500): with self.assertRaises(arvados.errors.KeepReadError) as exc_check: client.head(self.HINTED_LOCATOR) diff --git a/sdk/python/tests/test_retry_job_helpers.py b/sdk/python/tests/test_retry_job_helpers.py index 76c62cb0ce..9389b25c88 100644 --- a/sdk/python/tests/test_retry_job_helpers.py +++ b/sdk/python/tests/test_retry_job_helpers.py @@ -28,7 +28,7 @@ class ApiClientRetryTestMixin(object): def setUp(self): # Patch arvados.api() to return our mock API, so we can mock # its http requests. - self.api_client = arvados.api('v1', cache=False) + self.api_client = arvados.api('v1', cache=False, num_retries=0) self.api_patch = mock.patch('arvados.api', return_value=self.api_client) self.api_patch.start() diff --git a/sdk/python/tests/test_stream.py b/sdk/python/tests/test_stream.py index dc84a037f8..12a3340eab 100644 --- a/sdk/python/tests/test_stream.py +++ b/sdk/python/tests/test_stream.py @@ -223,13 +223,6 @@ class StreamRetryTestMixin(object): reader = self.reader_for('bar_file') self.assertEqual(b'bar', self.read_for_test(reader, 3)) - @tutil.skip_sleep - def test_read_no_default_retry(self): - with tutil.mock_keep_responses('', 500): - reader = self.reader_for('user_agreement') - with self.assertRaises(arvados.errors.KeepReadError): - self.read_for_test(reader, 10) - @tutil.skip_sleep def test_read_with_instance_retries(self): with tutil.mock_keep_responses('foo', 500, 200): diff --git a/services/fuse/arvados_fuse/command.py b/services/fuse/arvados_fuse/command.py index e275825a61..95b9a9773b 100644 --- a/services/fuse/arvados_fuse/command.py +++ b/services/fuse/arvados_fuse/command.py @@ -230,6 +230,9 @@ class Mount(object): try: self.api = arvados.safeapi.ThreadSafeApiCache( apiconfig=arvados.config.settings(), + api_params={ + 'num_retries': self.args.retries, + }, # default value of file_cache is 0, this tells KeepBlockCache to # choose a default based on whether disk_cache is enabled or not. keep_params={ diff --git a/services/fuse/tests/test_command_args.py b/services/fuse/tests/test_command_args.py index ed59029628..600bb0fe22 100644 --- a/services/fuse/tests/test_command_args.py +++ b/services/fuse/tests/test_command_args.py @@ -292,7 +292,7 @@ class MountErrorTest(unittest.TestCase): def test_bogus_host(self): arvados.config._settings["ARVADOS_API_HOST"] = "100::" - with self.assertRaises(SystemExit) as ex: + with self.assertRaises(SystemExit) as ex, mock.patch('time.sleep'): args = arvados_fuse.command.ArgumentParser().parse_args([self.mntdir]) arvados_fuse.command.Mount(args, logger=self.logger).run() self.assertEqual(1, ex.exception.code) diff --git a/services/fuse/tests/test_retry.py b/services/fuse/tests/test_retry.py index b69707af4f..44ab5cce91 100644 --- a/services/fuse/tests/test_retry.py +++ b/services/fuse/tests/test_retry.py @@ -38,8 +38,8 @@ class KeepClientRetry(unittest.TestCase): pass self.assertEqual(num_retries, kc.call_args[1].get('num_retries')) - def test_default_retry_3(self): - self._test_retry(3, []) + def test_default_retry_10(self): + self._test_retry(10, []) def test_retry_2(self): self._test_retry(2, ['--retries=2']) -- 2.30.2