X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/26096146e35e14aa334fd31753138935d36caf58..763e5bd313592a1c1f161b80bc07c94a49f8fb91:/sdk/python/arvados/api.py diff --git a/sdk/python/arvados/api.py b/sdk/python/arvados/api.py index 85d4197588..2ddce8e4c0 100644 --- a/sdk/python/arvados/api.py +++ b/sdk/python/arvados/api.py @@ -1,6 +1,13 @@ # Copyright (C) The Arvados Authors. All rights reserved. # # SPDX-License-Identifier: Apache-2.0 +"""Arvados API client + +The code in this module builds Arvados API client objects you can use to submit +Arvados API requests. This includes extending the underlying HTTP client with +niceties such as caching, X-Request-Id header for tracking, and more. The main +client constructors are `api` and `api_from_config`. +""" from __future__ import absolute_import from future import standard_library @@ -20,19 +27,29 @@ import time import types import apiclient +import apiclient.http from apiclient import discovery as apiclient_discovery from apiclient import errors as apiclient_errors from . import config from . import errors +from . import retry from . import util from . import cache _logger = logging.getLogger('arvados.api') MAX_IDLE_CONNECTION_DURATION = 30 -RETRY_DELAY_INITIAL = 2 -RETRY_DELAY_BACKOFF = 2 -RETRY_COUNT = 2 + +# These constants supported our own retry logic that we've since removed in +# favor of using googleapiclient's num_retries. They're kept here purely for +# API compatibility, but set to 0 to indicate no retries happen. +RETRY_DELAY_INITIAL = 0 +RETRY_DELAY_BACKOFF = 0 +RETRY_COUNT = 0 + +# An unused HTTP 5xx status code to request a retry internally. +# See _intercept_http_request. This should not be user-visible. +_RETRY_4XX_STATUS = 545 if sys.version_info >= (3,): httplib2.SSLHandshakeError = None @@ -41,7 +58,7 @@ class OrderedJsonModel(apiclient.model.JsonModel): """Model class for JSON that preserves the contents' order. API clients that care about preserving the order of fields in API - server responses can use this model to do so, like this:: + server responses can use this model to do so, like this: from arvados.api import OrderedJsonModel client = arvados.api('v1', ..., model=OrderedJsonModel()) @@ -57,6 +74,24 @@ class OrderedJsonModel(apiclient.model.JsonModel): return body +_orig_retry_request = apiclient.http._retry_request +def _retry_request(http, num_retries, *args, **kwargs): + try: + num_retries = max(num_retries, http.num_retries) + except AttributeError: + # `http` client object does not have a `num_retries` attribute. + # It apparently hasn't gone through _patch_http_request, possibly + # because this isn't an Arvados API client. Pass through to + # avoid interfering with other Google API clients. + return _orig_retry_request(http, num_retries, *args, **kwargs) + response, body = _orig_retry_request(http, num_retries, *args, **kwargs) + # If _intercept_http_request ran out of retries for a 4xx response, + # restore the original status code. + if response.status == _RETRY_4XX_STATUS: + response.status = int(response['status']) + return (response, body) +apiclient.http._retry_request = _retry_request + def _intercept_http_request(self, uri, method="GET", headers={}, **kwargs): if not headers.get('X-Request-Id'): headers['X-Request-Id'] = self._request_id() @@ -68,12 +103,7 @@ def _intercept_http_request(self, uri, method="GET", headers={}, **kwargs): headers['Authorization'] = 'OAuth2 %s' % self.arvados_api_token - retryable = method in [ - 'DELETE', 'GET', 'HEAD', 'OPTIONS', 'PUT'] - retry_count = self._retry_count if retryable else 0 - - if (not retryable and - time.time() - self._last_request_time > self._max_keepalive_idle): + if (time.time() - self._last_request_time) > self._max_keepalive_idle: # High probability of failure due to connection atrophy. Make # sure this request [re]opens a new connection by closing and # forgetting all cached connections first. @@ -81,32 +111,17 @@ def _intercept_http_request(self, uri, method="GET", headers={}, **kwargs): conn.close() self.connections.clear() - delay = self._retry_delay_initial - for _ in range(retry_count): - self._last_request_time = time.time() - try: - return self.orig_http_request(uri, method, headers=headers, **kwargs) - except http.client.HTTPException: - _logger.debug("[%s] Retrying API request in %d s after HTTP error", - headers['X-Request-Id'], delay, exc_info=True) - except ssl.SSLCertVerificationError as e: - raise ssl.SSLCertVerificationError(e.args[0], "Could not connect to %s\n%s\nPossible causes: remote SSL/TLS certificate expired, or was issued by an untrusted certificate authority." % (uri, e)) from None - except socket.error: - # This is the one case where httplib2 doesn't close the - # underlying connection first. Close all open - # connections, expecting this object only has the one - # connection to the API server. This is safe because - # httplib2 reopens connections when needed. - _logger.debug("[%s] Retrying API request in %d s after socket error", - headers['X-Request-Id'], delay, exc_info=True) - for conn in self.connections.values(): - conn.close() - - time.sleep(delay) - delay = delay * self._retry_delay_backoff - self._last_request_time = time.time() - return self.orig_http_request(uri, method, headers=headers, **kwargs) + try: + response, body = self.orig_http_request(uri, method, headers=headers, **kwargs) + except ssl.SSLCertVerificationError as e: + raise ssl.SSLCertVerificationError(e.args[0], "Could not connect to %s\n%s\nPossible causes: remote SSL/TLS certificate expired, or was issued by an untrusted certificate authority." % (uri, e)) from None + # googleapiclient only retries 403, 429, and 5xx status codes. + # If we got another 4xx status that we want to retry, convert it into + # 5xx so googleapiclient handles it the way we want. + if response.status in retry._HTTP_CAN_RETRY and response.status < 500: + response.status = _RETRY_4XX_STATUS + return (response, body) except Exception as e: # Prepend "[request_id] " to the error message, which we # assume is the first string argument passed to the exception @@ -117,16 +132,14 @@ def _intercept_http_request(self, uri, method="GET", headers={}, **kwargs): raise type(e)(*e.args) raise -def _patch_http_request(http, api_token): +def _patch_http_request(http, api_token, num_retries): http.arvados_api_token = api_token http.max_request_size = 0 + http.num_retries = num_retries http.orig_http_request = http.request http.request = types.MethodType(_intercept_http_request, http) http._last_request_time = 0 http._max_keepalive_idle = MAX_IDLE_CONNECTION_DURATION - http._retry_delay_initial = RETRY_DELAY_INITIAL - http._retry_delay_backoff = RETRY_DELAY_BACKOFF - http._retry_count = RETRY_COUNT http._request_id = util.new_request_id return http @@ -167,130 +180,280 @@ def http_cache(data_type): return None return cache.SafeHTTPCache(path, max_age=60*60*24*2) -def api(version=None, cache=True, host=None, token=None, insecure=False, - request_id=None, timeout=5*60, **kwargs): - """Return an apiclient Resources object for an Arvados instance. - - :version: - A string naming the version of the Arvados API to use (for - example, 'v1'). - - :cache: - Use a cache (~/.cache/arvados/discovery) for the discovery - document. - - :host: - The Arvados API server host (and optional :port) to connect to. - - :token: - The authentication token to send with each API call. - - :insecure: - If True, ignore SSL certificate validation errors. - - :timeout: - A timeout value for http requests. - - :request_id: - Default X-Request-Id header value for outgoing requests that - don't already provide one. If None or omitted, generate a random +def api_client( + version, + discoveryServiceUrl, + token, + *, + cache=True, + http=None, + insecure=False, + num_retries=10, + request_id=None, + timeout=5*60, + **kwargs, +): + """Build an Arvados API client + + This function returns a `googleapiclient.discovery.Resource` object + constructed from the given arguments. This is a relatively low-level + interface that requires all the necessary inputs as arguments. Most + users will prefer to use `api` which can accept more flexible inputs. + + Arguments: + + version: str + : A string naming the version of the Arvados API to use. + + discoveryServiceUrl: str + : The URL used to discover APIs passed directly to + `googleapiclient.discovery.build`. + + token: str + : The authentication token to send with each API call. + + Keyword-only arguments: + + cache: bool + : If true, loads the API discovery document from, or saves it to, a cache + on disk (located at `~/.cache/arvados/discovery`). + + http: httplib2.Http | None + : The HTTP client object the API client object will use to make requests. + If not provided, this function will build its own to use. Either way, the + object will be patched as part of the build process. + + insecure: bool + : If true, ignore SSL certificate validation errors. Default `False`. + + num_retries: int + : The number of times to retry each API request if it encounters a + temporary failure. Default 10. + + request_id: str | None + : Default `X-Request-Id` header value for outgoing requests that + don't already provide one. If `None` or omitted, generate a random ID. When retrying failed requests, the same ID is used on all attempts. - Additional keyword arguments will be passed directly to - `apiclient_discovery.build` if a new Resource object is created. - If the `discoveryServiceUrl` or `http` keyword arguments are - missing, this function will set default values for them, based on - the current Arvados configuration settings. + timeout: int + : A timeout value for HTTP requests in seconds. Default 300 (5 minutes). + Additional keyword arguments will be passed directly to + `googleapiclient.discovery.build`. """ - - if not version: - version = 'v1' - _logger.info("Using default API version. " + - "Call arvados.api('%s') instead." % - version) - if 'discoveryServiceUrl' in kwargs: - if host: - raise ValueError("both discoveryServiceUrl and host provided") - # Here we can't use a token from environment, config file, - # etc. Those probably have nothing to do with the host - # provided by the caller. - if not token: - raise ValueError("discoveryServiceUrl provided, but token missing") - elif host and token: - pass - elif not host and not token: - return api_from_config( - version=version, cache=cache, timeout=timeout, - request_id=request_id, **kwargs) - else: - # Caller provided one but not the other - if not host: - raise ValueError("token argument provided, but host missing.") - else: - raise ValueError("host argument provided, but token missing.") - - if host: - # Caller wants us to build the discoveryServiceUrl - kwargs['discoveryServiceUrl'] = ( - 'https://%s/discovery/v1/apis/{api}/{apiVersion}/rest' % (host,)) - - if 'http' not in kwargs: - http_kwargs = {'ca_certs': util.ca_certs_path()} - if cache: - http_kwargs['cache'] = http_cache('discovery') - if insecure: - http_kwargs['disable_ssl_certificate_validation'] = True - kwargs['http'] = httplib2.Http(**http_kwargs) - - if kwargs['http'].timeout is None: - kwargs['http'].timeout = timeout - - kwargs['http'] = _patch_http_request(kwargs['http'], token) - - svc = apiclient_discovery.build('arvados', version, cache_discovery=False, **kwargs) + if http is None: + http = httplib2.Http( + ca_certs=util.ca_certs_path(), + cache=http_cache('discovery') if cache else None, + disable_ssl_certificate_validation=bool(insecure), + ) + if http.timeout is None: + http.timeout = timeout + http = _patch_http_request(http, token, num_retries) + + svc = apiclient_discovery.build( + 'arvados', version, + cache_discovery=False, + discoveryServiceUrl=discoveryServiceUrl, + http=http, + num_retries=num_retries, + **kwargs, + ) svc.api_token = token svc.insecure = insecure svc.request_id = request_id svc.config = lambda: util.get_config_once(svc) svc.vocabulary = lambda: util.get_vocabulary_once(svc) svc.close_connections = types.MethodType(_close_connections, svc) - kwargs['http'].max_request_size = svc._rootDesc.get('maxRequestSize', 0) - kwargs['http'].cache = None - kwargs['http']._request_id = lambda: svc.request_id or util.new_request_id() + http.max_request_size = svc._rootDesc.get('maxRequestSize', 0) + http.cache = None + http._request_id = lambda: svc.request_id or util.new_request_id() return svc -def api_from_config(version=None, apiconfig=None, **kwargs): - """Return an apiclient Resources object enabling access to an Arvados server - instance. +def normalize_api_kwargs( + version=None, + discoveryServiceUrl=None, + host=None, + token=None, + **kwargs, +): + """Validate kwargs from `api` and build kwargs for `api_client` + + This method takes high-level keyword arguments passed to the `api` + constructor and normalizes them into a new dictionary that can be passed + as keyword arguments to `api_client`. It raises `ValueError` if required + arguments are missing or conflict. + + Arguments: + + version: str | None + : A string naming the version of the Arvados API to use. If not specified, + the code will log a warning and fall back to 'v1'. - :version: - A string naming the version of the Arvados REST API to use (for - example, 'v1'). + discoveryServiceUrl: str | None + : The URL used to discover APIs passed directly to + `googleapiclient.discovery.build`. It is an error to pass both + `discoveryServiceUrl` and `host`. - :apiconfig: - If provided, this should be a dict-like object (must support the get() - method) with entries for ARVADOS_API_HOST, ARVADOS_API_TOKEN, and - optionally ARVADOS_API_HOST_INSECURE. If not provided, use - arvados.config (which gets these parameters from the environment by - default.) + host: str | None + : The hostname and optional port number of the Arvados API server. Used to + build `discoveryServiceUrl`. It is an error to pass both + `discoveryServiceUrl` and `host`. - Other keyword arguments such as `cache` will be passed along `api()` + token: str + : The authentication token to send with each API call. + Additional keyword arguments will be included in the return value. + """ + if discoveryServiceUrl and host: + raise ValueError("both discoveryServiceUrl and host provided") + elif discoveryServiceUrl: + url_src = "discoveryServiceUrl" + elif host: + url_src = "host argument" + discoveryServiceUrl = 'https://%s/discovery/v1/apis/{api}/{apiVersion}/rest' % (host,) + elif token: + # This specific error message gets priority for backwards compatibility. + raise ValueError("token argument provided, but host missing.") + else: + raise ValueError("neither discoveryServiceUrl nor host provided") + if not token: + raise ValueError("%s provided, but token missing" % (url_src,)) + if not version: + version = 'v1' + _logger.info( + "Using default API version. Call arvados.api(%r) instead.", + version, + ) + return { + 'discoveryServiceUrl': discoveryServiceUrl, + 'token': token, + 'version': version, + **kwargs, + } + +def api_kwargs_from_config(version=None, apiconfig=None, **kwargs): + """Build `api_client` keyword arguments from configuration + + This function accepts a mapping with Arvados configuration settings like + `ARVADOS_API_HOST` and converts them into a mapping of keyword arguments + that can be passed to `api_client`. If `ARVADOS_API_HOST` or + `ARVADOS_API_TOKEN` are not configured, it raises `ValueError`. + + Arguments: + + version: str | None + : A string naming the version of the Arvados API to use. If not specified, + the code will log a warning and fall back to 'v1'. + + apiconfig: Mapping[str, str] | None + : A mapping with entries for `ARVADOS_API_HOST`, `ARVADOS_API_TOKEN`, and + optionally `ARVADOS_API_HOST_INSECURE`. If not provided, calls + `arvados.config.settings` to get these parameters from user configuration. + + Additional keyword arguments will be included in the return value. """ - # Load from user configuration or environment if apiconfig is None: apiconfig = config.settings() + missing = " and ".join( + key + for key in ['ARVADOS_API_HOST', 'ARVADOS_API_TOKEN'] + if key not in apiconfig + ) + if missing: + raise ValueError( + "%s not set.\nPlease set in %s or export environment variable." % + (missing, config.default_config_file), + ) + return normalize_api_kwargs( + version, + None, + apiconfig['ARVADOS_API_HOST'], + apiconfig['ARVADOS_API_TOKEN'], + insecure=config.flag_is_true('ARVADOS_API_HOST_INSECURE', apiconfig), + **kwargs, + ) + +def api(version=None, cache=True, host=None, token=None, insecure=False, + request_id=None, timeout=5*60, *, + discoveryServiceUrl=None, **kwargs): + """Dynamically build an Arvados API client + + This function provides a high-level "do what I mean" interface to build an + Arvados API client object. You can call it with no arguments to build a + client from user configuration; pass `host` and `token` arguments just + like you would write in user configuration; or pass additional arguments + for lower-level control over the client. + + This function returns a `arvados.safeapi.ThreadSafeApiCache`, an + API-compatible wrapper around `googleapiclient.discovery.Resource`. If + you're handling concurrency yourself and/or your application is very + performance-sensitive, consider calling `api_client` directly. + + Arguments: + + version: str | None + : A string naming the version of the Arvados API to use. If not specified, + the code will log a warning and fall back to 'v1'. + + host: str | None + : The hostname and optional port number of the Arvados API server. + + token: str | None + : The authentication token to send with each API call. + + discoveryServiceUrl: str | None + : The URL used to discover APIs passed directly to + `googleapiclient.discovery.build`. - errors = [] - for x in ['ARVADOS_API_HOST', 'ARVADOS_API_TOKEN']: - if x not in apiconfig: - errors.append(x) - if errors: - raise ValueError(" and ".join(errors)+" not set.\nPlease set in %s or export environment variable." % config.default_config_file) - host = apiconfig.get('ARVADOS_API_HOST') - token = apiconfig.get('ARVADOS_API_TOKEN') - insecure = config.flag_is_true('ARVADOS_API_HOST_INSECURE', apiconfig) - - return api(version=version, host=host, token=token, insecure=insecure, **kwargs) + If `host`, `token`, and `discoveryServiceUrl` are all omitted, `host` and + `token` will be loaded from the user's configuration. Otherwise, you must + pass `token` and one of `host` or `discoveryServiceUrl`. It is an error to + pass both `host` and `discoveryServiceUrl`. + + Other arguments are passed directly to `api_client`. See that function's + docstring for more information about their meaning. + """ + kwargs.update( + cache=cache, + insecure=insecure, + request_id=request_id, + timeout=timeout, + ) + if discoveryServiceUrl or host or token: + kwargs.update(normalize_api_kwargs(version, discoveryServiceUrl, host, token)) + else: + kwargs.update(api_kwargs_from_config(version)) + version = kwargs.pop('version') + # We do the import here to avoid a circular import at the top level. + from .safeapi import ThreadSafeApiCache + return ThreadSafeApiCache({}, {}, kwargs, version) + +def api_from_config(version=None, apiconfig=None, **kwargs): + """Build an Arvados API client from a configuration mapping + + This function builds an Arvados API client from a mapping with user + configuration. It accepts that mapping as an argument, so you can use a + configuration that's different from what the user has set up. + + This function returns a `arvados.safeapi.ThreadSafeApiCache`, an + API-compatible wrapper around `googleapiclient.discovery.Resource`. If + you're handling concurrency yourself and/or your application is very + performance-sensitive, consider calling `api_client` directly. + + Arguments: + + version: str | None + : A string naming the version of the Arvados API to use. If not specified, + the code will log a warning and fall back to 'v1'. + + apiconfig: Mapping[str, str] | None + : A mapping with entries for `ARVADOS_API_HOST`, `ARVADOS_API_TOKEN`, and + optionally `ARVADOS_API_HOST_INSECURE`. If not provided, calls + `arvados.config.settings` to get these parameters from user configuration. + + Other arguments are passed directly to `api_client`. See that function's + docstring for more information about their meaning. + """ + return api(**api_kwargs_from_config(version, apiconfig, **kwargs))