# Copyright (C) The Arvados Authors. All rights reserved.
#
# SPDX-License-Identifier: Apache-2.0
+"""Arvados API client
+
+The code in this module builds Arvados API client objects you can use to submit
+Arvados API requests. This includes extending the underlying HTTP client with
+niceties such as caching, X-Request-Id header for tracking, and more. The main
+client constructors are `api` and `api_from_config`.
+"""
from __future__ import absolute_import
from future import standard_library
import os
import re
import socket
+import ssl
import sys
+import threading
import time
import types
import apiclient
+import apiclient.http
from apiclient import discovery as apiclient_discovery
from apiclient import errors as apiclient_errors
from . import config
from . import errors
+from . import retry
from . import util
from . import cache
+from .logging import GoogleHTTPClientFilter, log_handler
_logger = logging.getLogger('arvados.api')
+_googleapiclient_log_lock = threading.Lock()
MAX_IDLE_CONNECTION_DURATION = 30
-RETRY_DELAY_INITIAL = 2
-RETRY_DELAY_BACKOFF = 2
-RETRY_COUNT = 2
+
+# These constants supported our own retry logic that we've since removed in
+# favor of using googleapiclient's num_retries. They're kept here purely for
+# API compatibility, but set to 0 to indicate no retries happen.
+RETRY_DELAY_INITIAL = 0
+RETRY_DELAY_BACKOFF = 0
+RETRY_COUNT = 0
+
+# An unused HTTP 5xx status code to request a retry internally.
+# See _intercept_http_request. This should not be user-visible.
+_RETRY_4XX_STATUS = 545
if sys.version_info >= (3,):
httplib2.SSLHandshakeError = None
"""Model class for JSON that preserves the contents' order.
API clients that care about preserving the order of fields in API
- server responses can use this model to do so, like this::
+ server responses can use this model to do so, like this:
from arvados.api import OrderedJsonModel
client = arvados.api('v1', ..., model=OrderedJsonModel())
return body
-def _intercept_http_request(self, uri, method="GET", headers={}, **kwargs):
- if (self.max_request_size and
- kwargs.get('body') and
- self.max_request_size < len(kwargs['body'])):
- raise apiclient_errors.MediaUploadSizeError("Request size %i bytes exceeds published limit of %i bytes" % (len(kwargs['body']), self.max_request_size))
-
- if config.get("ARVADOS_EXTERNAL_CLIENT", "") == "true":
- headers['X-External-Client'] = '1'
+_orig_retry_request = apiclient.http._retry_request
+def _retry_request(http, num_retries, *args, **kwargs):
+ try:
+ num_retries = max(num_retries, http.num_retries)
+ except AttributeError:
+ # `http` client object does not have a `num_retries` attribute.
+ # It apparently hasn't gone through _patch_http_request, possibly
+ # because this isn't an Arvados API client. Pass through to
+ # avoid interfering with other Google API clients.
+ return _orig_retry_request(http, num_retries, *args, **kwargs)
+ response, body = _orig_retry_request(http, num_retries, *args, **kwargs)
+ # If _intercept_http_request ran out of retries for a 4xx response,
+ # restore the original status code.
+ if response.status == _RETRY_4XX_STATUS:
+ response.status = int(response['status'])
+ return (response, body)
+apiclient.http._retry_request = _retry_request
- headers['Authorization'] = 'OAuth2 %s' % self.arvados_api_token
+def _intercept_http_request(self, uri, method="GET", headers={}, **kwargs):
if not headers.get('X-Request-Id'):
headers['X-Request-Id'] = self._request_id()
+ try:
+ if (self.max_request_size and
+ kwargs.get('body') and
+ self.max_request_size < len(kwargs['body'])):
+ raise apiclient_errors.MediaUploadSizeError("Request size %i bytes exceeds published limit of %i bytes" % (len(kwargs['body']), self.max_request_size))
- retryable = method in [
- 'DELETE', 'GET', 'HEAD', 'OPTIONS', 'PUT']
- retry_count = self._retry_count if retryable else 0
-
- if (not retryable and
- time.time() - self._last_request_time > self._max_keepalive_idle):
- # High probability of failure due to connection atrophy. Make
- # sure this request [re]opens a new connection by closing and
- # forgetting all cached connections first.
- for conn in self.connections.values():
- conn.close()
- self.connections.clear()
-
- delay = self._retry_delay_initial
- for _ in range(retry_count):
- self._last_request_time = time.time()
- try:
- return self.orig_http_request(uri, method, headers=headers, **kwargs)
- except http.client.HTTPException:
- _logger.debug("Retrying API request in %d s after HTTP error",
- delay, exc_info=True)
- except socket.error:
- # This is the one case where httplib2 doesn't close the
- # underlying connection first. Close all open
- # connections, expecting this object only has the one
- # connection to the API server. This is safe because
- # httplib2 reopens connections when needed.
- _logger.debug("Retrying API request in %d s after socket error",
- delay, exc_info=True)
+ headers['Authorization'] = 'OAuth2 %s' % self.arvados_api_token
+
+ if (time.time() - self._last_request_time) > self._max_keepalive_idle:
+ # High probability of failure due to connection atrophy. Make
+ # sure this request [re]opens a new connection by closing and
+ # forgetting all cached connections first.
for conn in self.connections.values():
conn.close()
- except httplib2.SSLHandshakeError as e:
- # Intercept and re-raise with a better error message.
- raise httplib2.SSLHandshakeError("Could not connect to %s\n%s\nPossible causes: remote SSL/TLS certificate expired, or was issued by an untrusted certificate authority." % (uri, e))
+ self.connections.clear()
- time.sleep(delay)
- delay = delay * self._retry_delay_backoff
-
- self._last_request_time = time.time()
- return self.orig_http_request(uri, method, headers=headers, **kwargs)
-
-def _patch_http_request(http, api_token):
+ self._last_request_time = time.time()
+ try:
+ response, body = self.orig_http_request(uri, method, headers=headers, **kwargs)
+ except ssl.SSLCertVerificationError as e:
+ raise ssl.SSLCertVerificationError(e.args[0], "Could not connect to %s\n%s\nPossible causes: remote SSL/TLS certificate expired, or was issued by an untrusted certificate authority." % (uri, e)) from None
+ # googleapiclient only retries 403, 429, and 5xx status codes.
+ # If we got another 4xx status that we want to retry, convert it into
+ # 5xx so googleapiclient handles it the way we want.
+ if response.status in retry._HTTP_CAN_RETRY and response.status < 500:
+ response.status = _RETRY_4XX_STATUS
+ return (response, body)
+ except Exception as e:
+ # Prepend "[request_id] " to the error message, which we
+ # assume is the first string argument passed to the exception
+ # constructor.
+ for i in range(len(e.args or ())):
+ if type(e.args[i]) == type(""):
+ e.args = e.args[:i] + ("[{}] {}".format(headers['X-Request-Id'], e.args[i]),) + e.args[i+1:]
+ raise type(e)(*e.args)
+ raise
+
+def _patch_http_request(http, api_token, num_retries):
http.arvados_api_token = api_token
http.max_request_size = 0
+ http.num_retries = num_retries
http.orig_http_request = http.request
http.request = types.MethodType(_intercept_http_request, http)
http._last_request_time = 0
http._max_keepalive_idle = MAX_IDLE_CONNECTION_DURATION
- http._retry_delay_initial = RETRY_DELAY_INITIAL
- http._retry_delay_backoff = RETRY_DELAY_BACKOFF
- http._retry_count = RETRY_COUNT
http._request_id = util.new_request_id
return http
+def _close_connections(self):
+ for conn in self._http.connections.values():
+ conn.close()
+
# Monkey patch discovery._cast() so objects and arrays get serialized
# with json.dumps() instead of str().
_cast_orig = apiclient_discovery._cast
return None
return cache.SafeHTTPCache(path, max_age=60*60*24*2)
-def api(version=None, cache=True, host=None, token=None, insecure=False,
- request_id=None, timeout=10, **kwargs):
- """Return an apiclient Resources object for an Arvados instance.
+def api_client(
+ version,
+ discoveryServiceUrl,
+ token,
+ *,
+ cache=True,
+ http=None,
+ insecure=False,
+ num_retries=10,
+ request_id=None,
+ timeout=5*60,
+ **kwargs,
+):
+ """Build an Arvados API client
+
+ This function returns a `googleapiclient.discovery.Resource` object
+ constructed from the given arguments. This is a relatively low-level
+ interface that requires all the necessary inputs as arguments. Most
+ users will prefer to use `api` which can accept more flexible inputs.
+
+ Arguments:
+
+ version: str
+ : A string naming the version of the Arvados API to use.
+
+ discoveryServiceUrl: str
+ : The URL used to discover APIs passed directly to
+ `googleapiclient.discovery.build`.
+
+ token: str
+ : The authentication token to send with each API call.
+
+ Keyword-only arguments:
+
+ cache: bool
+ : If true, loads the API discovery document from, or saves it to, a cache
+ on disk (located at `~/.cache/arvados/discovery`).
+
+ http: httplib2.Http | None
+ : The HTTP client object the API client object will use to make requests.
+ If not provided, this function will build its own to use. Either way, the
+ object will be patched as part of the build process.
+
+ insecure: bool
+ : If true, ignore SSL certificate validation errors. Default `False`.
+
+ num_retries: int
+ : The number of times to retry each API request if it encounters a
+ temporary failure. Default 10.
+
+ request_id: str | None
+ : Default `X-Request-Id` header value for outgoing requests that
+ don't already provide one. If `None` or omitted, generate a random
+ ID. When retrying failed requests, the same ID is used on all
+ attempts.
- :version:
- A string naming the version of the Arvados API to use (for
- example, 'v1').
+ timeout: int
+ : A timeout value for HTTP requests in seconds. Default 300 (5 minutes).
- :cache:
- Use a cache (~/.cache/arvados/discovery) for the discovery
- document.
+ Additional keyword arguments will be passed directly to
+ `googleapiclient.discovery.build`.
+ """
+ if http is None:
+ http = httplib2.Http(
+ ca_certs=util.ca_certs_path(),
+ cache=http_cache('discovery') if cache else None,
+ disable_ssl_certificate_validation=bool(insecure),
+ )
+ if http.timeout is None:
+ http.timeout = timeout
+ http = _patch_http_request(http, token, num_retries)
+
+ # The first time a client is instantiated, temporarily route
+ # googleapiclient.http retry logs if they're not already. These are
+ # important because temporary problems fetching the discovery document
+ # can cause clients to appear to hang early. This can be removed after
+ # we have a more general story for handling googleapiclient logs (#20521).
+ client_logger = logging.getLogger('googleapiclient.http')
+ client_logger_preconfigured = (
+ # "first time a client is instantiated" = thread that acquires this lock
+ # It is never released.
+ _googleapiclient_log_lock.acquire(blocking=False)
+ and not client_logger.hasHandlers()
+ )
+ if not client_logger_preconfigured:
+ client_level = client_logger.level
+ client_filter = GoogleHTTPClientFilter()
+ client_logger.addFilter(client_filter)
+ client_logger.addHandler(log_handler)
+ if logging.NOTSET < client_level < client_filter.retry_levelno:
+ client_logger.setLevel(client_level)
+ else:
+ client_logger.setLevel(client_filter.retry_levelno)
+ svc = apiclient_discovery.build(
+ 'arvados', version,
+ cache_discovery=False,
+ discoveryServiceUrl=discoveryServiceUrl,
+ http=http,
+ num_retries=num_retries,
+ **kwargs,
+ )
+ if not client_logger_preconfigured:
+ client_logger.removeHandler(log_handler)
+ client_logger.removeFilter(client_filter)
+ client_logger.setLevel(client_level)
+ svc.api_token = token
+ svc.insecure = insecure
+ svc.request_id = request_id
+ svc.config = lambda: util.get_config_once(svc)
+ svc.vocabulary = lambda: util.get_vocabulary_once(svc)
+ svc.close_connections = types.MethodType(_close_connections, svc)
+ http.max_request_size = svc._rootDesc.get('maxRequestSize', 0)
+ http.cache = None
+ http._request_id = lambda: svc.request_id or util.new_request_id()
+ return svc
- :host:
- The Arvados API server host (and optional :port) to connect to.
+def normalize_api_kwargs(
+ version=None,
+ discoveryServiceUrl=None,
+ host=None,
+ token=None,
+ **kwargs,
+):
+ """Validate kwargs from `api` and build kwargs for `api_client`
- :token:
- The authentication token to send with each API call.
+ This method takes high-level keyword arguments passed to the `api`
+ constructor and normalizes them into a new dictionary that can be passed
+ as keyword arguments to `api_client`. It raises `ValueError` if required
+ arguments are missing or conflict.
- :insecure:
- If True, ignore SSL certificate validation errors.
+ Arguments:
- :timeout:
- A timeout value for http requests.
+ version: str | None
+ : A string naming the version of the Arvados API to use. If not specified,
+ the code will log a warning and fall back to 'v1'.
- :request_id:
- Default X-Request-Id header value for outgoing requests that
- don't already provide one. If None or omitted, generate a random
- ID. When retrying failed requests, the same ID is used on all
- attempts.
+ discoveryServiceUrl: str | None
+ : The URL used to discover APIs passed directly to
+ `googleapiclient.discovery.build`. It is an error to pass both
+ `discoveryServiceUrl` and `host`.
- Additional keyword arguments will be passed directly to
- `apiclient_discovery.build` if a new Resource object is created.
- If the `discoveryServiceUrl` or `http` keyword arguments are
- missing, this function will set default values for them, based on
- the current Arvados configuration settings.
+ host: str | None
+ : The hostname and optional port number of the Arvados API server. Used to
+ build `discoveryServiceUrl`. It is an error to pass both
+ `discoveryServiceUrl` and `host`.
- """
+ token: str
+ : The authentication token to send with each API call.
+ Additional keyword arguments will be included in the return value.
+ """
+ if discoveryServiceUrl and host:
+ raise ValueError("both discoveryServiceUrl and host provided")
+ elif discoveryServiceUrl:
+ url_src = "discoveryServiceUrl"
+ elif host:
+ url_src = "host argument"
+ discoveryServiceUrl = 'https://%s/discovery/v1/apis/{api}/{apiVersion}/rest' % (host,)
+ elif token:
+ # This specific error message gets priority for backwards compatibility.
+ raise ValueError("token argument provided, but host missing.")
+ else:
+ raise ValueError("neither discoveryServiceUrl nor host provided")
+ if not token:
+ raise ValueError("%s provided, but token missing" % (url_src,))
if not version:
version = 'v1'
- _logger.info("Using default API version. " +
- "Call arvados.api('%s') instead." %
- version)
- if 'discoveryServiceUrl' in kwargs:
- if host:
- raise ValueError("both discoveryServiceUrl and host provided")
- # Here we can't use a token from environment, config file,
- # etc. Those probably have nothing to do with the host
- # provided by the caller.
- if not token:
- raise ValueError("discoveryServiceUrl provided, but token missing")
- elif host and token:
- pass
- elif not host and not token:
- return api_from_config(
- version=version, cache=cache, request_id=request_id, **kwargs)
- else:
- # Caller provided one but not the other
- if not host:
- raise ValueError("token argument provided, but host missing.")
- else:
- raise ValueError("host argument provided, but token missing.")
+ _logger.info(
+ "Using default API version. Call arvados.api(%r) instead.",
+ version,
+ )
+ return {
+ 'discoveryServiceUrl': discoveryServiceUrl,
+ 'token': token,
+ 'version': version,
+ **kwargs,
+ }
+
+def api_kwargs_from_config(version=None, apiconfig=None, **kwargs):
+ """Build `api_client` keyword arguments from configuration
+
+ This function accepts a mapping with Arvados configuration settings like
+ `ARVADOS_API_HOST` and converts them into a mapping of keyword arguments
+ that can be passed to `api_client`. If `ARVADOS_API_HOST` or
+ `ARVADOS_API_TOKEN` are not configured, it raises `ValueError`.
+
+ Arguments:
+
+ version: str | None
+ : A string naming the version of the Arvados API to use. If not specified,
+ the code will log a warning and fall back to 'v1'.
+
+ apiconfig: Mapping[str, str] | None
+ : A mapping with entries for `ARVADOS_API_HOST`, `ARVADOS_API_TOKEN`, and
+ optionally `ARVADOS_API_HOST_INSECURE`. If not provided, calls
+ `arvados.config.settings` to get these parameters from user configuration.
+
+ Additional keyword arguments will be included in the return value.
+ """
+ if apiconfig is None:
+ apiconfig = config.settings()
+ missing = " and ".join(
+ key
+ for key in ['ARVADOS_API_HOST', 'ARVADOS_API_TOKEN']
+ if key not in apiconfig
+ )
+ if missing:
+ raise ValueError(
+ "%s not set.\nPlease set in %s or export environment variable." %
+ (missing, config.default_config_file),
+ )
+ return normalize_api_kwargs(
+ version,
+ None,
+ apiconfig['ARVADOS_API_HOST'],
+ apiconfig['ARVADOS_API_TOKEN'],
+ insecure=config.flag_is_true('ARVADOS_API_HOST_INSECURE', apiconfig),
+ **kwargs,
+ )
- if host:
- # Caller wants us to build the discoveryServiceUrl
- kwargs['discoveryServiceUrl'] = (
- 'https://%s/discovery/v1/apis/{api}/{apiVersion}/rest' % (host,))
+def api(version=None, cache=True, host=None, token=None, insecure=False,
+ request_id=None, timeout=5*60, *,
+ discoveryServiceUrl=None, **kwargs):
+ """Dynamically build an Arvados API client
- if 'http' not in kwargs:
- http_kwargs = {'ca_certs': util.ca_certs_path()}
- if cache:
- http_kwargs['cache'] = http_cache('discovery')
- if insecure:
- http_kwargs['disable_ssl_certificate_validation'] = True
- kwargs['http'] = httplib2.Http(**http_kwargs)
+ This function provides a high-level "do what I mean" interface to build an
+ Arvados API client object. You can call it with no arguments to build a
+ client from user configuration; pass `host` and `token` arguments just
+ like you would write in user configuration; or pass additional arguments
+ for lower-level control over the client.
- if kwargs['http'].timeout is None:
- kwargs['http'].timeout = timeout
+ This function returns a `arvados.safeapi.ThreadSafeApiCache`, an
+ API-compatible wrapper around `googleapiclient.discovery.Resource`. If
+ you're handling concurrency yourself and/or your application is very
+ performance-sensitive, consider calling `api_client` directly.
- kwargs['http'] = _patch_http_request(kwargs['http'], token)
+ Arguments:
- svc = apiclient_discovery.build('arvados', version, cache_discovery=False, **kwargs)
- svc.api_token = token
- svc.insecure = insecure
- svc.request_id = request_id
- svc.config = lambda: util.get_config_once(svc)
- kwargs['http'].max_request_size = svc._rootDesc.get('maxRequestSize', 0)
- kwargs['http'].cache = None
- kwargs['http']._request_id = lambda: svc.request_id or util.new_request_id()
- return svc
+ version: str | None
+ : A string naming the version of the Arvados API to use. If not specified,
+ the code will log a warning and fall back to 'v1'.
-def api_from_config(version=None, apiconfig=None, **kwargs):
- """Return an apiclient Resources object enabling access to an Arvados server
- instance.
+ host: str | None
+ : The hostname and optional port number of the Arvados API server.
- :version:
- A string naming the version of the Arvados REST API to use (for
- example, 'v1').
+ token: str | None
+ : The authentication token to send with each API call.
- :apiconfig:
- If provided, this should be a dict-like object (must support the get()
- method) with entries for ARVADOS_API_HOST, ARVADOS_API_TOKEN, and
- optionally ARVADOS_API_HOST_INSECURE. If not provided, use
- arvados.config (which gets these parameters from the environment by
- default.)
+ discoveryServiceUrl: str | None
+ : The URL used to discover APIs passed directly to
+ `googleapiclient.discovery.build`.
- Other keyword arguments such as `cache` will be passed along `api()`
+ If `host`, `token`, and `discoveryServiceUrl` are all omitted, `host` and
+ `token` will be loaded from the user's configuration. Otherwise, you must
+ pass `token` and one of `host` or `discoveryServiceUrl`. It is an error to
+ pass both `host` and `discoveryServiceUrl`.
+ Other arguments are passed directly to `api_client`. See that function's
+ docstring for more information about their meaning.
"""
- # Load from user configuration or environment
- if apiconfig is None:
- apiconfig = config.settings()
+ kwargs.update(
+ cache=cache,
+ insecure=insecure,
+ request_id=request_id,
+ timeout=timeout,
+ )
+ if discoveryServiceUrl or host or token:
+ kwargs.update(normalize_api_kwargs(version, discoveryServiceUrl, host, token))
+ else:
+ kwargs.update(api_kwargs_from_config(version))
+ version = kwargs.pop('version')
+ # We do the import here to avoid a circular import at the top level.
+ from .safeapi import ThreadSafeApiCache
+ return ThreadSafeApiCache({}, {}, kwargs, version)
+
+def api_from_config(version=None, apiconfig=None, **kwargs):
+ """Build an Arvados API client from a configuration mapping
+
+ This function builds an Arvados API client from a mapping with user
+ configuration. It accepts that mapping as an argument, so you can use a
+ configuration that's different from what the user has set up.
+
+ This function returns a `arvados.safeapi.ThreadSafeApiCache`, an
+ API-compatible wrapper around `googleapiclient.discovery.Resource`. If
+ you're handling concurrency yourself and/or your application is very
+ performance-sensitive, consider calling `api_client` directly.
+
+ Arguments:
- errors = []
- for x in ['ARVADOS_API_HOST', 'ARVADOS_API_TOKEN']:
- if x not in apiconfig:
- errors.append(x)
- if errors:
- raise ValueError(" and ".join(errors)+" not set.\nPlease set in %s or export environment variable." % config.default_config_file)
- host = apiconfig.get('ARVADOS_API_HOST')
- token = apiconfig.get('ARVADOS_API_TOKEN')
- insecure = config.flag_is_true('ARVADOS_API_HOST_INSECURE', apiconfig)
-
- return api(version=version, host=host, token=token, insecure=insecure, **kwargs)
+ version: str | None
+ : A string naming the version of the Arvados API to use. If not specified,
+ the code will log a warning and fall back to 'v1'.
+
+ apiconfig: Mapping[str, str] | None
+ : A mapping with entries for `ARVADOS_API_HOST`, `ARVADOS_API_TOKEN`, and
+ optionally `ARVADOS_API_HOST_INSECURE`. If not provided, calls
+ `arvados.config.settings` to get these parameters from user configuration.
+
+ Other arguments are passed directly to `api_client`. See that function's
+ docstring for more information about their meaning.
+ """
+ return api(**api_kwargs_from_config(version, apiconfig, **kwargs))