X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/38fcd08dda022d0167840fbb65222fe99b75fcf5..82c424076577660d96173213a0d2db5f7c1450d7:/sdk/python/arvados/api.py diff --git a/sdk/python/arvados/api.py b/sdk/python/arvados/api.py index 1af50b3110..e0d1c50f03 100644 --- a/sdk/python/arvados/api.py +++ b/sdk/python/arvados/api.py @@ -1,21 +1,31 @@ +# Copyright (C) The Arvados Authors. All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import absolute_import +from future import standard_library +standard_library.install_aliases() +from builtins import range import collections -import httplib +import http.client import httplib2 import json import logging import os import re import socket +import ssl +import sys import time import types import apiclient from apiclient import discovery as apiclient_discovery from apiclient import errors as apiclient_errors -import config -import errors -import util -import cache +from . import config +from . import errors +from . import util +from . import cache _logger = logging.getLogger('arvados.api') @@ -24,6 +34,9 @@ RETRY_DELAY_INITIAL = 2 RETRY_DELAY_BACKOFF = 2 RETRY_COUNT = 2 +if sys.version_info >= (3,): + httplib2.SSLHandshakeError = None + class OrderedJsonModel(apiclient.model.JsonModel): """Model class for JSON that preserves the contents' order. @@ -44,56 +57,68 @@ class OrderedJsonModel(apiclient.model.JsonModel): return body -def _intercept_http_request(self, uri, **kwargs): - if (self.max_request_size and - kwargs.get('body') and - self.max_request_size < len(kwargs['body'])): - raise apiclient_errors.MediaUploadSizeError("Request size %i bytes exceeds published limit of %i bytes" % (len(kwargs['body']), self.max_request_size)) - - if 'headers' not in kwargs: - kwargs['headers'] = {} - - if config.get("ARVADOS_EXTERNAL_CLIENT", "") == "true": - kwargs['headers']['X-External-Client'] = '1' - - kwargs['headers']['Authorization'] = 'OAuth2 %s' % self.arvados_api_token - - retryable = kwargs.get('method', 'GET') in [ - 'DELETE', 'GET', 'HEAD', 'OPTIONS', 'PUT'] - retry_count = self._retry_count if retryable else 0 - - if (not retryable and - time.time() - self._last_request_time > self._max_keepalive_idle): - # High probability of failure due to connection atrophy. Make - # sure this request [re]opens a new connection by closing and - # forgetting all cached connections first. - for conn in self.connections.itervalues(): - conn.close() - self.connections.clear() - - delay = self._retry_delay_initial - for _ in range(retry_count): - self._last_request_time = time.time() - try: - return self.orig_http_request(uri, **kwargs) - except httplib.HTTPException: - _logger.debug("Retrying API request in %d s after HTTP error", - delay, exc_info=True) - except socket.error: - # This is the one case where httplib2 doesn't close the - # underlying connection first. Close all open - # connections, expecting this object only has the one - # connection to the API server. This is safe because - # httplib2 reopens connections when needed. - _logger.debug("Retrying API request in %d s after socket error", - delay, exc_info=True) - for conn in self.connections.itervalues(): +def _intercept_http_request(self, uri, method="GET", headers={}, **kwargs): + if not headers.get('X-Request-Id'): + headers['X-Request-Id'] = self._request_id() + try: + if (self.max_request_size and + kwargs.get('body') and + self.max_request_size < len(kwargs['body'])): + raise apiclient_errors.MediaUploadSizeError("Request size %i bytes exceeds published limit of %i bytes" % (len(kwargs['body']), self.max_request_size)) + + if config.get("ARVADOS_EXTERNAL_CLIENT", "") == "true": + headers['X-External-Client'] = '1' + + headers['Authorization'] = 'OAuth2 %s' % self.arvados_api_token + + retryable = method in [ + 'DELETE', 'GET', 'HEAD', 'OPTIONS', 'PUT'] + retry_count = self._retry_count if retryable else 0 + + if (not retryable and + time.time() - self._last_request_time > self._max_keepalive_idle): + # High probability of failure due to connection atrophy. Make + # sure this request [re]opens a new connection by closing and + # forgetting all cached connections first. + for conn in self.connections.values(): conn.close() - time.sleep(delay) - delay = delay * self._retry_delay_backoff + self.connections.clear() + + delay = self._retry_delay_initial + for _ in range(retry_count): + self._last_request_time = time.time() + try: + return self.orig_http_request(uri, method, headers=headers, **kwargs) + except http.client.HTTPException: + _logger.debug("[%s] Retrying API request in %d s after HTTP error", + headers['X-Request-Id'], delay, exc_info=True) + except ssl.SSLCertVerificationError as e: + raise ssl.SSLCertVerificationError(e.args[0], "Could not connect to %s\n%s\nPossible causes: remote SSL/TLS certificate expired, or was issued by an untrusted certificate authority." % (uri, e)) from None + except socket.error: + # This is the one case where httplib2 doesn't close the + # underlying connection first. Close all open + # connections, expecting this object only has the one + # connection to the API server. This is safe because + # httplib2 reopens connections when needed. + _logger.debug("[%s] Retrying API request in %d s after socket error", + headers['X-Request-Id'], delay, exc_info=True) + for conn in self.connections.values(): + conn.close() + + time.sleep(delay) + delay = delay * self._retry_delay_backoff - self._last_request_time = time.time() - return self.orig_http_request(uri, **kwargs) + self._last_request_time = time.time() + return self.orig_http_request(uri, method, headers=headers, **kwargs) + except Exception as e: + # Prepend "[request_id] " to the error message, which we + # assume is the first string argument passed to the exception + # constructor. + for i in range(len(e.args or ())): + if type(e.args[i]) == type(""): + e.args = e.args[:i] + ("[{}] {}".format(headers['X-Request-Id'], e.args[i]),) + e.args[i+1:] + raise type(e)(*e.args) + raise def _patch_http_request(http, api_token): http.arvados_api_token = api_token @@ -105,6 +130,7 @@ def _patch_http_request(http, api_token): http._retry_delay_initial = RETRY_DELAY_INITIAL http._retry_delay_backoff = RETRY_DELAY_BACKOFF http._retry_count = RETRY_COUNT + http._request_id = util.new_request_id return http # Monkey patch discovery._cast() so objects and arrays get serialized @@ -113,6 +139,7 @@ _cast_orig = apiclient_discovery._cast def _cast_objects_too(value, schema_type): global _cast_orig if (type(value) != type('') and + type(value) != type(b'') and (schema_type == 'object' or schema_type == 'array')): return json.dumps(value) else: @@ -139,7 +166,8 @@ def http_cache(data_type): return None return cache.SafeHTTPCache(path, max_age=60*60*24*2) -def api(version=None, cache=True, host=None, token=None, insecure=False, **kwargs): +def api(version=None, cache=True, host=None, token=None, insecure=False, + request_id=None, timeout=5*60, **kwargs): """Return an apiclient Resources object for an Arvados instance. :version: @@ -159,6 +187,15 @@ def api(version=None, cache=True, host=None, token=None, insecure=False, **kwarg :insecure: If True, ignore SSL certificate validation errors. + :timeout: + A timeout value for http requests. + + :request_id: + Default X-Request-Id header value for outgoing requests that + don't already provide one. If None or omitted, generate a random + ID. When retrying failed requests, the same ID is used on all + attempts. + Additional keyword arguments will be passed directly to `apiclient_discovery.build` if a new Resource object is created. If the `discoveryServiceUrl` or `http` keyword arguments are @@ -183,7 +220,9 @@ def api(version=None, cache=True, host=None, token=None, insecure=False, **kwarg elif host and token: pass elif not host and not token: - return api_from_config(version=version, cache=cache, **kwargs) + return api_from_config( + version=version, cache=cache, timeout=timeout, + request_id=request_id, **kwargs) else: # Caller provided one but not the other if not host: @@ -204,13 +243,20 @@ def api(version=None, cache=True, host=None, token=None, insecure=False, **kwarg http_kwargs['disable_ssl_certificate_validation'] = True kwargs['http'] = httplib2.Http(**http_kwargs) + if kwargs['http'].timeout is None: + kwargs['http'].timeout = timeout + kwargs['http'] = _patch_http_request(kwargs['http'], token) svc = apiclient_discovery.build('arvados', version, cache_discovery=False, **kwargs) svc.api_token = token svc.insecure = insecure + svc.request_id = request_id + svc.config = lambda: util.get_config_once(svc) + svc.vocabulary = lambda: util.get_vocabulary_once(svc) kwargs['http'].max_request_size = svc._rootDesc.get('maxRequestSize', 0) kwargs['http'].cache = None + kwargs['http']._request_id = lambda: svc.request_id or util.new_request_id() return svc def api_from_config(version=None, apiconfig=None, **kwargs): @@ -235,9 +281,12 @@ def api_from_config(version=None, apiconfig=None, **kwargs): if apiconfig is None: apiconfig = config.settings() + errors = [] for x in ['ARVADOS_API_HOST', 'ARVADOS_API_TOKEN']: if x not in apiconfig: - raise ValueError("%s is not set. Aborting." % x) + errors.append(x) + if errors: + raise ValueError(" and ".join(errors)+" not set.\nPlease set in %s or export environment variable." % config.default_config_file) host = apiconfig.get('ARVADOS_API_HOST') token = apiconfig.get('ARVADOS_API_TOKEN') insecure = config.flag_is_true('ARVADOS_API_HOST_INSECURE', apiconfig)