1 # Copyright (C) The Arvados Authors. All rights reserved.
3 # SPDX-License-Identifier: Apache-2.0
6 The code in this module builds Arvados API client objects you can use to submit
7 Arvados API requests. This includes extending the underlying HTTP client with
8 niceties such as caching, X-Request-Id header for tracking, and more. The main
9 client constructors are `api` and `api_from_config`.
12 from __future__ import absolute_import
13 from future import standard_library
14 standard_library.install_aliases()
15 from builtins import range
33 from apiclient import discovery as apiclient_discovery
34 from apiclient import errors as apiclient_errors
40 from .logging import GoogleHTTPClientFilter, log_handler
42 _logger = logging.getLogger('arvados.api')
43 _googleapiclient_log_lock = threading.Lock()
45 MAX_IDLE_CONNECTION_DURATION = 30
47 Number of seconds that API client HTTP connections should be allowed to idle
48 in keepalive state before they are forced closed. Client code can adjust this
49 constant, and it will be used for all Arvados API clients constructed after
53 # An unused HTTP 5xx status code to request a retry internally.
54 # See _intercept_http_request. This should not be user-visible.
55 _RETRY_4XX_STATUS = 545
57 if sys.version_info >= (3,):
58 httplib2.SSLHandshakeError = None
60 _orig_retry_request = apiclient.http._retry_request
61 def _retry_request(http, num_retries, *args, **kwargs):
63 num_retries = max(num_retries, http.num_retries)
64 except AttributeError:
65 # `http` client object does not have a `num_retries` attribute.
66 # It apparently hasn't gone through _patch_http_request, possibly
67 # because this isn't an Arvados API client. Pass through to
68 # avoid interfering with other Google API clients.
69 return _orig_retry_request(http, num_retries, *args, **kwargs)
70 response, body = _orig_retry_request(http, num_retries, *args, **kwargs)
71 # If _intercept_http_request ran out of retries for a 4xx response,
72 # restore the original status code.
73 if response.status == _RETRY_4XX_STATUS:
74 response.status = int(response['status'])
75 return (response, body)
76 apiclient.http._retry_request = _retry_request
78 def _intercept_http_request(self, uri, method="GET", headers={}, **kwargs):
79 if not headers.get('X-Request-Id'):
80 headers['X-Request-Id'] = self._request_id()
82 if (self.max_request_size and
83 kwargs.get('body') and
84 self.max_request_size < len(kwargs['body'])):
85 raise apiclient_errors.MediaUploadSizeError("Request size %i bytes exceeds published limit of %i bytes" % (len(kwargs['body']), self.max_request_size))
87 headers['Authorization'] = 'OAuth2 %s' % self.arvados_api_token
89 if (time.time() - self._last_request_time) > self._max_keepalive_idle:
90 # High probability of failure due to connection atrophy. Make
91 # sure this request [re]opens a new connection by closing and
92 # forgetting all cached connections first.
93 for conn in self.connections.values():
95 self.connections.clear()
97 self._last_request_time = time.time()
99 response, body = self.orig_http_request(uri, method, headers=headers, **kwargs)
100 except ssl.SSLCertVerificationError as e:
101 raise ssl.SSLCertVerificationError(e.args[0], "Could not connect to %s\n%s\nPossible causes: remote SSL/TLS certificate expired, or was issued by an untrusted certificate authority." % (uri, e)) from None
102 # googleapiclient only retries 403, 429, and 5xx status codes.
103 # If we got another 4xx status that we want to retry, convert it into
104 # 5xx so googleapiclient handles it the way we want.
105 if response.status in retry._HTTP_CAN_RETRY and response.status < 500:
106 response.status = _RETRY_4XX_STATUS
107 return (response, body)
108 except Exception as e:
109 # Prepend "[request_id] " to the error message, which we
110 # assume is the first string argument passed to the exception
112 for i in range(len(e.args or ())):
113 if type(e.args[i]) == type(""):
114 e.args = e.args[:i] + ("[{}] {}".format(headers['X-Request-Id'], e.args[i]),) + e.args[i+1:]
115 raise type(e)(*e.args)
118 def _patch_http_request(http, api_token, num_retries):
119 http.arvados_api_token = api_token
120 http.max_request_size = 0
121 http.num_retries = num_retries
122 http.orig_http_request = http.request
123 http.request = types.MethodType(_intercept_http_request, http)
124 http._last_request_time = 0
125 http._max_keepalive_idle = MAX_IDLE_CONNECTION_DURATION
126 http._request_id = util.new_request_id
129 def _close_connections(self):
130 for conn in self._http.connections.values():
133 # Monkey patch discovery._cast() so objects and arrays get serialized
134 # with json.dumps() instead of str().
135 _cast_orig = apiclient_discovery._cast
136 def _cast_objects_too(value, schema_type):
138 if (type(value) != type('') and
139 type(value) != type(b'') and
140 (schema_type == 'object' or schema_type == 'array')):
141 return json.dumps(value)
143 return _cast_orig(value, schema_type)
144 apiclient_discovery._cast = _cast_objects_too
146 # Convert apiclient's HttpErrors into our own API error subclass for better
148 # Reassigning apiclient_errors.HttpError is not sufficient because most of the
149 # apiclient submodules import the class into their own namespace.
150 def _new_http_error(cls, *args, **kwargs):
151 return super(apiclient_errors.HttpError, cls).__new__(
152 errors.ApiError, *args, **kwargs)
153 apiclient_errors.HttpError.__new__ = staticmethod(_new_http_error)
155 def http_cache(data_type):
156 """Set up an HTTP file cache
158 This function constructs and returns an `arvados.cache.SafeHTTPCache`
159 backed by the filesystem under `~/.cache/arvados/`, or `None` if the
160 directory cannot be set up. The return value can be passed to
161 `httplib2.Http` as the `cache` argument.
165 * data_type: str --- The name of the subdirectory under `~/.cache/arvados`
166 where data is cached.
169 homedir = pathlib.Path.home()
172 path = pathlib.Path(homedir, '.cache', 'arvados', data_type)
174 path.mkdir(parents=True, exist_ok=True)
177 return cache.SafeHTTPCache(str(path), max_age=60*60*24*2)
192 """Build an Arvados API client
194 This function returns a `googleapiclient.discovery.Resource` object
195 constructed from the given arguments. This is a relatively low-level
196 interface that requires all the necessary inputs as arguments. Most
197 users will prefer to use `api` which can accept more flexible inputs.
201 * version: str --- A string naming the version of the Arvados API to use.
203 * discoveryServiceUrl: str --- The URL used to discover APIs passed
204 directly to `googleapiclient.discovery.build`.
206 * token: str --- The authentication token to send with each API call.
208 Keyword-only arguments:
210 * cache: bool --- If true, loads the API discovery document from, or
211 saves it to, a cache on disk (located at
212 `~/.cache/arvados/discovery`).
214 * http: httplib2.Http | None --- The HTTP client object the API client
215 object will use to make requests. If not provided, this function will
216 build its own to use. Either way, the object will be patched as part
217 of the build process.
219 * insecure: bool --- If true, ignore SSL certificate validation
220 errors. Default `False`.
222 * num_retries: int --- The number of times to retry each API request if
223 it encounters a temporary failure. Default 10.
225 * request_id: str | None --- Default `X-Request-Id` header value for
226 outgoing requests that don't already provide one. If `None` or
227 omitted, generate a random ID. When retrying failed requests, the same
228 ID is used on all attempts.
230 * timeout: int --- A timeout value for HTTP requests in seconds. Default
233 Additional keyword arguments will be passed directly to
234 `googleapiclient.discovery.build`.
238 http = httplib2.Http(
239 ca_certs=util.ca_certs_path(),
240 cache=http_cache('discovery') if cache else None,
241 disable_ssl_certificate_validation=bool(insecure),
243 if http.timeout is None:
244 http.timeout = timeout
245 http = _patch_http_request(http, token, num_retries)
247 # The first time a client is instantiated, temporarily route
248 # googleapiclient.http retry logs if they're not already. These are
249 # important because temporary problems fetching the discovery document
250 # can cause clients to appear to hang early. This can be removed after
251 # we have a more general story for handling googleapiclient logs (#20521).
252 client_logger = logging.getLogger('googleapiclient.http')
253 # "first time a client is instantiated" = thread that acquires this lock
254 # It is never released.
255 # googleapiclient sets up its own NullHandler so we detect if logging is
256 # configured by looking for a real handler anywhere in the hierarchy.
257 client_logger_unconfigured = _googleapiclient_log_lock.acquire(blocking=False) and all(
258 isinstance(handler, logging.NullHandler)
259 for logger_name in ['', 'googleapiclient', 'googleapiclient.http']
260 for handler in logging.getLogger(logger_name).handlers
262 if client_logger_unconfigured:
263 client_level = client_logger.level
264 client_filter = GoogleHTTPClientFilter()
265 client_logger.addFilter(client_filter)
266 client_logger.addHandler(log_handler)
267 if logging.NOTSET < client_level < client_filter.retry_levelno:
268 client_logger.setLevel(client_level)
270 client_logger.setLevel(client_filter.retry_levelno)
272 svc = apiclient_discovery.build(
274 cache_discovery=False,
275 discoveryServiceUrl=discoveryServiceUrl,
277 num_retries=num_retries,
281 if client_logger_unconfigured:
282 client_logger.removeHandler(log_handler)
283 client_logger.removeFilter(client_filter)
284 client_logger.setLevel(client_level)
285 svc.api_token = token
286 svc.insecure = insecure
287 svc.request_id = request_id
288 svc.config = lambda: util.get_config_once(svc)
289 svc.vocabulary = lambda: util.get_vocabulary_once(svc)
290 svc.close_connections = types.MethodType(_close_connections, svc)
291 http.max_request_size = svc._rootDesc.get('maxRequestSize', 0)
293 http._request_id = lambda: svc.request_id or util.new_request_id()
296 def normalize_api_kwargs(
298 discoveryServiceUrl=None,
303 """Validate kwargs from `api` and build kwargs for `api_client`
305 This method takes high-level keyword arguments passed to the `api`
306 constructor and normalizes them into a new dictionary that can be passed
307 as keyword arguments to `api_client`. It raises `ValueError` if required
308 arguments are missing or conflict.
312 * version: str | None --- A string naming the version of the Arvados API
313 to use. If not specified, the code will log a warning and fall back to
316 * discoveryServiceUrl: str | None --- The URL used to discover APIs
317 passed directly to `googleapiclient.discovery.build`. It is an error
318 to pass both `discoveryServiceUrl` and `host`.
320 * host: str | None --- The hostname and optional port number of the
321 Arvados API server. Used to build `discoveryServiceUrl`. It is an
322 error to pass both `discoveryServiceUrl` and `host`.
324 * token: str --- The authentication token to send with each API call.
326 Additional keyword arguments will be included in the return value.
328 if discoveryServiceUrl and host:
329 raise ValueError("both discoveryServiceUrl and host provided")
330 elif discoveryServiceUrl:
331 url_src = "discoveryServiceUrl"
333 url_src = "host argument"
334 discoveryServiceUrl = 'https://%s/discovery/v1/apis/{api}/{apiVersion}/rest' % (host,)
336 # This specific error message gets priority for backwards compatibility.
337 raise ValueError("token argument provided, but host missing.")
339 raise ValueError("neither discoveryServiceUrl nor host provided")
341 raise ValueError("%s provided, but token missing" % (url_src,))
345 "Using default API version. Call arvados.api(%r) instead.",
349 'discoveryServiceUrl': discoveryServiceUrl,
355 def api_kwargs_from_config(version=None, apiconfig=None, **kwargs):
356 """Build `api_client` keyword arguments from configuration
358 This function accepts a mapping with Arvados configuration settings like
359 `ARVADOS_API_HOST` and converts them into a mapping of keyword arguments
360 that can be passed to `api_client`. If `ARVADOS_API_HOST` or
361 `ARVADOS_API_TOKEN` are not configured, it raises `ValueError`.
365 * version: str | None --- A string naming the version of the Arvados API
366 to use. If not specified, the code will log a warning and fall back to
369 * apiconfig: Mapping[str, str] | None --- A mapping with entries for
370 `ARVADOS_API_HOST`, `ARVADOS_API_TOKEN`, and optionally
371 `ARVADOS_API_HOST_INSECURE`. If not provided, calls
372 `arvados.config.settings` to get these parameters from user
375 Additional keyword arguments will be included in the return value.
377 if apiconfig is None:
378 apiconfig = config.settings()
379 missing = " and ".join(
381 for key in ['ARVADOS_API_HOST', 'ARVADOS_API_TOKEN']
382 if key not in apiconfig
386 "%s not set.\nPlease set in %s or export environment variable." %
387 (missing, config.default_config_file),
389 return normalize_api_kwargs(
392 apiconfig['ARVADOS_API_HOST'],
393 apiconfig['ARVADOS_API_TOKEN'],
394 insecure=config.flag_is_true('ARVADOS_API_HOST_INSECURE', apiconfig),
398 def api(version=None, cache=True, host=None, token=None, insecure=False,
399 request_id=None, timeout=5*60, *,
400 discoveryServiceUrl=None, **kwargs):
401 """Dynamically build an Arvados API client
403 This function provides a high-level "do what I mean" interface to build an
404 Arvados API client object. You can call it with no arguments to build a
405 client from user configuration; pass `host` and `token` arguments just
406 like you would write in user configuration; or pass additional arguments
407 for lower-level control over the client.
409 This function returns a `arvados.safeapi.ThreadSafeApiCache`, an
410 API-compatible wrapper around `googleapiclient.discovery.Resource`. If
411 you're handling concurrency yourself and/or your application is very
412 performance-sensitive, consider calling `api_client` directly.
416 * version: str | None --- A string naming the version of the Arvados API
417 to use. If not specified, the code will log a warning and fall back to
420 * host: str | None --- The hostname and optional port number of the
423 * token: str | None --- The authentication token to send with each API
426 * discoveryServiceUrl: str | None --- The URL used to discover APIs
427 passed directly to `googleapiclient.discovery.build`.
429 If `host`, `token`, and `discoveryServiceUrl` are all omitted, `host` and
430 `token` will be loaded from the user's configuration. Otherwise, you must
431 pass `token` and one of `host` or `discoveryServiceUrl`. It is an error to
432 pass both `host` and `discoveryServiceUrl`.
434 Other arguments are passed directly to `api_client`. See that function's
435 docstring for more information about their meaning.
440 request_id=request_id,
443 if discoveryServiceUrl or host or token:
444 kwargs.update(normalize_api_kwargs(version, discoveryServiceUrl, host, token))
446 kwargs.update(api_kwargs_from_config(version))
447 version = kwargs.pop('version')
448 # We do the import here to avoid a circular import at the top level.
449 from .safeapi import ThreadSafeApiCache
450 return ThreadSafeApiCache({}, {}, kwargs, version)
452 def api_from_config(version=None, apiconfig=None, **kwargs):
453 """Build an Arvados API client from a configuration mapping
455 This function builds an Arvados API client from a mapping with user
456 configuration. It accepts that mapping as an argument, so you can use a
457 configuration that's different from what the user has set up.
459 This function returns a `arvados.safeapi.ThreadSafeApiCache`, an
460 API-compatible wrapper around `googleapiclient.discovery.Resource`. If
461 you're handling concurrency yourself and/or your application is very
462 performance-sensitive, consider calling `api_client` directly.
466 * version: str | None --- A string naming the version of the Arvados API
467 to use. If not specified, the code will log a warning and fall back to
470 * apiconfig: Mapping[str, str] | None --- A mapping with entries for
471 `ARVADOS_API_HOST`, `ARVADOS_API_TOKEN`, and optionally
472 `ARVADOS_API_HOST_INSECURE`. If not provided, calls
473 `arvados.config.settings` to get these parameters from user
476 Other arguments are passed directly to `api_client`. See that function's
477 docstring for more information about their meaning.
479 return api(**api_kwargs_from_config(version, apiconfig, **kwargs))
481 class OrderedJsonModel(apiclient.model.JsonModel):
482 """Model class for JSON that preserves the contents' order
484 .. WARNING:: Deprecated
485 This model is redundant now that Python dictionaries preserve insertion
486 ordering. Code that passes this model to API constructors can remove it.
488 In Python versions before 3.6, API clients that cared about preserving the
489 order of fields in API server responses could use this model to do so.
490 Typical usage looked like:
492 from arvados.api import OrderedJsonModel
493 client = arvados.api('v1', ..., model=OrderedJsonModel())
495 @util._deprecated(preferred="the default model and rely on Python's built-in dictionary ordering")
496 def __init__(self, data_wrapper=False):
497 return super().__init__(data_wrapper)
500 RETRY_DELAY_INITIAL = 0
502 .. WARNING:: Deprecated
503 This constant was used by retry code in previous versions of the Arvados SDK.
504 Changing the value has no effect anymore.
505 Prefer passing `num_retries` to an API client constructor instead.
506 Refer to the constructor docstrings for details.
509 RETRY_DELAY_BACKOFF = 0
511 .. WARNING:: Deprecated
512 This constant was used by retry code in previous versions of the Arvados SDK.
513 Changing the value has no effect anymore.
514 Prefer passing `num_retries` to an API client constructor instead.
515 Refer to the constructor docstrings for details.
520 .. WARNING:: Deprecated
521 This constant was used by retry code in previous versions of the Arvados SDK.
522 Changing the value has no effect anymore.
523 Prefer passing `num_retries` to an API client constructor instead.
524 Refer to the constructor docstrings for details.