1 # Copyright (C) The Arvados Authors. All rights reserved.
3 # SPDX-License-Identifier: Apache-2.0
6 The code in this module builds Arvados API client objects you can use to submit
7 Arvados API requests. This includes extending the underlying HTTP client with
8 niceties such as caching, X-Request-Id header for tracking, and more. The main
9 client constructors are `api` and `api_from_config`.
12 from __future__ import absolute_import
13 from future import standard_library
14 standard_library.install_aliases()
15 from builtins import range
32 from apiclient import discovery as apiclient_discovery
33 from apiclient import errors as apiclient_errors
39 from .logging import GoogleHTTPClientFilter, log_handler
41 _logger = logging.getLogger('arvados.api')
42 _googleapiclient_log_lock = threading.Lock()
44 MAX_IDLE_CONNECTION_DURATION = 30
46 # These constants supported our own retry logic that we've since removed in
47 # favor of using googleapiclient's num_retries. They're kept here purely for
48 # API compatibility, but set to 0 to indicate no retries happen.
49 RETRY_DELAY_INITIAL = 0
50 RETRY_DELAY_BACKOFF = 0
53 # An unused HTTP 5xx status code to request a retry internally.
54 # See _intercept_http_request. This should not be user-visible.
55 _RETRY_4XX_STATUS = 545
57 if sys.version_info >= (3,):
58 httplib2.SSLHandshakeError = None
60 class OrderedJsonModel(apiclient.model.JsonModel):
61 """Model class for JSON that preserves the contents' order.
63 API clients that care about preserving the order of fields in API
64 server responses can use this model to do so, like this:
66 from arvados.api import OrderedJsonModel
67 client = arvados.api('v1', ..., model=OrderedJsonModel())
70 def deserialize(self, content):
71 # This is a very slightly modified version of the parent class'
72 # implementation. Copyright (c) 2010 Google.
73 content = content.decode('utf-8')
74 body = json.loads(content, object_pairs_hook=collections.OrderedDict)
75 if self._data_wrapper and isinstance(body, dict) and 'data' in body:
80 _orig_retry_request = apiclient.http._retry_request
81 def _retry_request(http, num_retries, *args, **kwargs):
83 num_retries = max(num_retries, http.num_retries)
84 except AttributeError:
85 # `http` client object does not have a `num_retries` attribute.
86 # It apparently hasn't gone through _patch_http_request, possibly
87 # because this isn't an Arvados API client. Pass through to
88 # avoid interfering with other Google API clients.
89 return _orig_retry_request(http, num_retries, *args, **kwargs)
90 response, body = _orig_retry_request(http, num_retries, *args, **kwargs)
91 # If _intercept_http_request ran out of retries for a 4xx response,
92 # restore the original status code.
93 if response.status == _RETRY_4XX_STATUS:
94 response.status = int(response['status'])
95 return (response, body)
96 apiclient.http._retry_request = _retry_request
98 def _intercept_http_request(self, uri, method="GET", headers={}, **kwargs):
99 if not headers.get('X-Request-Id'):
100 headers['X-Request-Id'] = self._request_id()
102 if (self.max_request_size and
103 kwargs.get('body') and
104 self.max_request_size < len(kwargs['body'])):
105 raise apiclient_errors.MediaUploadSizeError("Request size %i bytes exceeds published limit of %i bytes" % (len(kwargs['body']), self.max_request_size))
107 headers['Authorization'] = 'OAuth2 %s' % self.arvados_api_token
109 if (time.time() - self._last_request_time) > self._max_keepalive_idle:
110 # High probability of failure due to connection atrophy. Make
111 # sure this request [re]opens a new connection by closing and
112 # forgetting all cached connections first.
113 for conn in self.connections.values():
115 self.connections.clear()
117 self._last_request_time = time.time()
119 response, body = self.orig_http_request(uri, method, headers=headers, **kwargs)
120 except ssl.SSLCertVerificationError as e:
121 raise ssl.SSLCertVerificationError(e.args[0], "Could not connect to %s\n%s\nPossible causes: remote SSL/TLS certificate expired, or was issued by an untrusted certificate authority." % (uri, e)) from None
122 # googleapiclient only retries 403, 429, and 5xx status codes.
123 # If we got another 4xx status that we want to retry, convert it into
124 # 5xx so googleapiclient handles it the way we want.
125 if response.status in retry._HTTP_CAN_RETRY and response.status < 500:
126 response.status = _RETRY_4XX_STATUS
127 return (response, body)
128 except Exception as e:
129 # Prepend "[request_id] " to the error message, which we
130 # assume is the first string argument passed to the exception
132 for i in range(len(e.args or ())):
133 if type(e.args[i]) == type(""):
134 e.args = e.args[:i] + ("[{}] {}".format(headers['X-Request-Id'], e.args[i]),) + e.args[i+1:]
135 raise type(e)(*e.args)
138 def _patch_http_request(http, api_token, num_retries):
139 http.arvados_api_token = api_token
140 http.max_request_size = 0
141 http.num_retries = num_retries
142 http.orig_http_request = http.request
143 http.request = types.MethodType(_intercept_http_request, http)
144 http._last_request_time = 0
145 http._max_keepalive_idle = MAX_IDLE_CONNECTION_DURATION
146 http._request_id = util.new_request_id
149 def _close_connections(self):
150 for conn in self._http.connections.values():
153 # Monkey patch discovery._cast() so objects and arrays get serialized
154 # with json.dumps() instead of str().
155 _cast_orig = apiclient_discovery._cast
156 def _cast_objects_too(value, schema_type):
158 if (type(value) != type('') and
159 type(value) != type(b'') and
160 (schema_type == 'object' or schema_type == 'array')):
161 return json.dumps(value)
163 return _cast_orig(value, schema_type)
164 apiclient_discovery._cast = _cast_objects_too
166 # Convert apiclient's HttpErrors into our own API error subclass for better
168 # Reassigning apiclient_errors.HttpError is not sufficient because most of the
169 # apiclient submodules import the class into their own namespace.
170 def _new_http_error(cls, *args, **kwargs):
171 return super(apiclient_errors.HttpError, cls).__new__(
172 errors.ApiError, *args, **kwargs)
173 apiclient_errors.HttpError.__new__ = staticmethod(_new_http_error)
175 def http_cache(data_type):
176 homedir = os.environ.get('HOME')
177 if not homedir or len(homedir) == 0:
179 path = homedir + '/.cache/arvados/' + data_type
181 util.mkdir_dash_p(path)
184 return cache.SafeHTTPCache(path, max_age=60*60*24*2)
199 """Build an Arvados API client
201 This function returns a `googleapiclient.discovery.Resource` object
202 constructed from the given arguments. This is a relatively low-level
203 interface that requires all the necessary inputs as arguments. Most
204 users will prefer to use `api` which can accept more flexible inputs.
209 : A string naming the version of the Arvados API to use.
211 discoveryServiceUrl: str
212 : The URL used to discover APIs passed directly to
213 `googleapiclient.discovery.build`.
216 : The authentication token to send with each API call.
218 Keyword-only arguments:
221 : If true, loads the API discovery document from, or saves it to, a cache
222 on disk (located at `~/.cache/arvados/discovery`).
224 http: httplib2.Http | None
225 : The HTTP client object the API client object will use to make requests.
226 If not provided, this function will build its own to use. Either way, the
227 object will be patched as part of the build process.
230 : If true, ignore SSL certificate validation errors. Default `False`.
233 : The number of times to retry each API request if it encounters a
234 temporary failure. Default 10.
236 request_id: str | None
237 : Default `X-Request-Id` header value for outgoing requests that
238 don't already provide one. If `None` or omitted, generate a random
239 ID. When retrying failed requests, the same ID is used on all
243 : A timeout value for HTTP requests in seconds. Default 300 (5 minutes).
245 Additional keyword arguments will be passed directly to
246 `googleapiclient.discovery.build`.
249 http = httplib2.Http(
250 ca_certs=util.ca_certs_path(),
251 cache=http_cache('discovery') if cache else None,
252 disable_ssl_certificate_validation=bool(insecure),
254 if http.timeout is None:
255 http.timeout = timeout
256 http = _patch_http_request(http, token, num_retries)
258 # The first time a client is instantiated, temporarily route
259 # googleapiclient.http retry logs if they're not already. These are
260 # important because temporary problems fetching the discovery document
261 # can cause clients to appear to hang early. This can be removed after
262 # we have a more general story for handling googleapiclient logs (#20521).
263 client_logger = logging.getLogger('googleapiclient.http')
264 # "first time a client is instantiated" = thread that acquires this lock
265 # It is never released.
266 # googleapiclient sets up its own NullHandler so we detect if logging is
267 # configured by looking for a real handler anywhere in the hierarchy.
268 client_logger_unconfigured = _googleapiclient_log_lock.acquire(blocking=False) and all(
269 isinstance(handler, logging.NullHandler)
270 for logger_name in ['', 'googleapiclient', 'googleapiclient.http']
271 for handler in logging.getLogger(logger_name).handlers
273 if client_logger_unconfigured:
274 client_level = client_logger.level
275 client_filter = GoogleHTTPClientFilter()
276 client_logger.addFilter(client_filter)
277 client_logger.addHandler(log_handler)
278 if logging.NOTSET < client_level < client_filter.retry_levelno:
279 client_logger.setLevel(client_level)
281 client_logger.setLevel(client_filter.retry_levelno)
283 svc = apiclient_discovery.build(
285 cache_discovery=False,
286 discoveryServiceUrl=discoveryServiceUrl,
288 num_retries=num_retries,
292 if client_logger_unconfigured:
293 client_logger.removeHandler(log_handler)
294 client_logger.removeFilter(client_filter)
295 client_logger.setLevel(client_level)
296 svc.api_token = token
297 svc.insecure = insecure
298 svc.request_id = request_id
299 svc.config = lambda: util.get_config_once(svc)
300 svc.vocabulary = lambda: util.get_vocabulary_once(svc)
301 svc.close_connections = types.MethodType(_close_connections, svc)
302 http.max_request_size = svc._rootDesc.get('maxRequestSize', 0)
304 http._request_id = lambda: svc.request_id or util.new_request_id()
307 def normalize_api_kwargs(
309 discoveryServiceUrl=None,
314 """Validate kwargs from `api` and build kwargs for `api_client`
316 This method takes high-level keyword arguments passed to the `api`
317 constructor and normalizes them into a new dictionary that can be passed
318 as keyword arguments to `api_client`. It raises `ValueError` if required
319 arguments are missing or conflict.
324 : A string naming the version of the Arvados API to use. If not specified,
325 the code will log a warning and fall back to 'v1'.
327 discoveryServiceUrl: str | None
328 : The URL used to discover APIs passed directly to
329 `googleapiclient.discovery.build`. It is an error to pass both
330 `discoveryServiceUrl` and `host`.
333 : The hostname and optional port number of the Arvados API server. Used to
334 build `discoveryServiceUrl`. It is an error to pass both
335 `discoveryServiceUrl` and `host`.
338 : The authentication token to send with each API call.
340 Additional keyword arguments will be included in the return value.
342 if discoveryServiceUrl and host:
343 raise ValueError("both discoveryServiceUrl and host provided")
344 elif discoveryServiceUrl:
345 url_src = "discoveryServiceUrl"
347 url_src = "host argument"
348 discoveryServiceUrl = 'https://%s/discovery/v1/apis/{api}/{apiVersion}/rest' % (host,)
350 # This specific error message gets priority for backwards compatibility.
351 raise ValueError("token argument provided, but host missing.")
353 raise ValueError("neither discoveryServiceUrl nor host provided")
355 raise ValueError("%s provided, but token missing" % (url_src,))
359 "Using default API version. Call arvados.api(%r) instead.",
363 'discoveryServiceUrl': discoveryServiceUrl,
369 def api_kwargs_from_config(version=None, apiconfig=None, **kwargs):
370 """Build `api_client` keyword arguments from configuration
372 This function accepts a mapping with Arvados configuration settings like
373 `ARVADOS_API_HOST` and converts them into a mapping of keyword arguments
374 that can be passed to `api_client`. If `ARVADOS_API_HOST` or
375 `ARVADOS_API_TOKEN` are not configured, it raises `ValueError`.
380 : A string naming the version of the Arvados API to use. If not specified,
381 the code will log a warning and fall back to 'v1'.
383 apiconfig: Mapping[str, str] | None
384 : A mapping with entries for `ARVADOS_API_HOST`, `ARVADOS_API_TOKEN`, and
385 optionally `ARVADOS_API_HOST_INSECURE`. If not provided, calls
386 `arvados.config.settings` to get these parameters from user configuration.
388 Additional keyword arguments will be included in the return value.
390 if apiconfig is None:
391 apiconfig = config.settings()
392 missing = " and ".join(
394 for key in ['ARVADOS_API_HOST', 'ARVADOS_API_TOKEN']
395 if key not in apiconfig
399 "%s not set.\nPlease set in %s or export environment variable." %
400 (missing, config.default_config_file),
402 return normalize_api_kwargs(
405 apiconfig['ARVADOS_API_HOST'],
406 apiconfig['ARVADOS_API_TOKEN'],
407 insecure=config.flag_is_true('ARVADOS_API_HOST_INSECURE', apiconfig),
411 def api(version=None, cache=True, host=None, token=None, insecure=False,
412 request_id=None, timeout=5*60, *,
413 discoveryServiceUrl=None, **kwargs):
414 """Dynamically build an Arvados API client
416 This function provides a high-level "do what I mean" interface to build an
417 Arvados API client object. You can call it with no arguments to build a
418 client from user configuration; pass `host` and `token` arguments just
419 like you would write in user configuration; or pass additional arguments
420 for lower-level control over the client.
422 This function returns a `arvados.safeapi.ThreadSafeApiCache`, an
423 API-compatible wrapper around `googleapiclient.discovery.Resource`. If
424 you're handling concurrency yourself and/or your application is very
425 performance-sensitive, consider calling `api_client` directly.
430 : A string naming the version of the Arvados API to use. If not specified,
431 the code will log a warning and fall back to 'v1'.
434 : The hostname and optional port number of the Arvados API server.
437 : The authentication token to send with each API call.
439 discoveryServiceUrl: str | None
440 : The URL used to discover APIs passed directly to
441 `googleapiclient.discovery.build`.
443 If `host`, `token`, and `discoveryServiceUrl` are all omitted, `host` and
444 `token` will be loaded from the user's configuration. Otherwise, you must
445 pass `token` and one of `host` or `discoveryServiceUrl`. It is an error to
446 pass both `host` and `discoveryServiceUrl`.
448 Other arguments are passed directly to `api_client`. See that function's
449 docstring for more information about their meaning.
454 request_id=request_id,
457 if discoveryServiceUrl or host or token:
458 kwargs.update(normalize_api_kwargs(version, discoveryServiceUrl, host, token))
460 kwargs.update(api_kwargs_from_config(version))
461 version = kwargs.pop('version')
462 # We do the import here to avoid a circular import at the top level.
463 from .safeapi import ThreadSafeApiCache
464 return ThreadSafeApiCache({}, {}, kwargs, version)
466 def api_from_config(version=None, apiconfig=None, **kwargs):
467 """Build an Arvados API client from a configuration mapping
469 This function builds an Arvados API client from a mapping with user
470 configuration. It accepts that mapping as an argument, so you can use a
471 configuration that's different from what the user has set up.
473 This function returns a `arvados.safeapi.ThreadSafeApiCache`, an
474 API-compatible wrapper around `googleapiclient.discovery.Resource`. If
475 you're handling concurrency yourself and/or your application is very
476 performance-sensitive, consider calling `api_client` directly.
481 : A string naming the version of the Arvados API to use. If not specified,
482 the code will log a warning and fall back to 'v1'.
484 apiconfig: Mapping[str, str] | None
485 : A mapping with entries for `ARVADOS_API_HOST`, `ARVADOS_API_TOKEN`, and
486 optionally `ARVADOS_API_HOST_INSECURE`. If not provided, calls
487 `arvados.config.settings` to get these parameters from user configuration.
489 Other arguments are passed directly to `api_client`. See that function's
490 docstring for more information about their meaning.
492 return api(**api_kwargs_from_config(version, apiconfig, **kwargs))