1 # Copyright (C) The Arvados Authors. All rights reserved.
3 # SPDX-License-Identifier: Apache-2.0
6 The code in this module builds Arvados API client objects you can use to submit
7 Arvados API requests. This includes extending the underlying HTTP client with
8 niceties such as caching, X-Request-Id header for tracking, and more. The main
9 client constructors are `api` and `api_from_config`.
36 from apiclient import discovery as apiclient_discovery
37 from apiclient import errors as apiclient_errors
43 from .logging import GoogleHTTPClientFilter, log_handler
45 _logger = logging.getLogger('arvados.api')
46 _googleapiclient_log_lock = threading.Lock()
48 MAX_IDLE_CONNECTION_DURATION = 30
50 Number of seconds that API client HTTP connections should be allowed to idle
51 in keepalive state before they are forced closed. Client code can adjust this
52 constant, and it will be used for all Arvados API clients constructed after
56 # An unused HTTP 5xx status code to request a retry internally.
57 # See _intercept_http_request. This should not be user-visible.
58 _RETRY_4XX_STATUS = 545
60 if sys.version_info >= (3,):
61 httplib2.SSLHandshakeError = None
63 _orig_retry_request = apiclient.http._retry_request
64 def _retry_request(http, num_retries, *args, **kwargs):
66 num_retries = max(num_retries, http.num_retries)
67 except AttributeError:
68 # `http` client object does not have a `num_retries` attribute.
69 # It apparently hasn't gone through _patch_http_request, possibly
70 # because this isn't an Arvados API client. Pass through to
71 # avoid interfering with other Google API clients.
72 return _orig_retry_request(http, num_retries, *args, **kwargs)
73 response, body = _orig_retry_request(http, num_retries, *args, **kwargs)
74 # If _intercept_http_request ran out of retries for a 4xx response,
75 # restore the original status code.
76 if response.status == _RETRY_4XX_STATUS:
77 response.status = int(response['status'])
78 return (response, body)
79 apiclient.http._retry_request = _retry_request
81 def _intercept_http_request(self, uri, method="GET", headers={}, **kwargs):
82 if not headers.get('X-Request-Id'):
83 headers['X-Request-Id'] = self._request_id()
85 if (self.max_request_size and
86 kwargs.get('body') and
87 self.max_request_size < len(kwargs['body'])):
88 raise apiclient_errors.MediaUploadSizeError("Request size %i bytes exceeds published limit of %i bytes" % (len(kwargs['body']), self.max_request_size))
90 headers['Authorization'] = 'OAuth2 %s' % self.arvados_api_token
92 if (time.time() - self._last_request_time) > self._max_keepalive_idle:
93 # High probability of failure due to connection atrophy. Make
94 # sure this request [re]opens a new connection by closing and
95 # forgetting all cached connections first.
96 for conn in self.connections.values():
98 self.connections.clear()
100 self._last_request_time = time.time()
102 response, body = self.orig_http_request(uri, method, headers=headers, **kwargs)
103 except ssl.SSLCertVerificationError as e:
104 raise ssl.SSLCertVerificationError(e.args[0], "Could not connect to %s\n%s\nPossible causes: remote SSL/TLS certificate expired, or was issued by an untrusted certificate authority." % (uri, e)) from None
105 # googleapiclient only retries 403, 429, and 5xx status codes.
106 # If we got another 4xx status that we want to retry, convert it into
107 # 5xx so googleapiclient handles it the way we want.
108 if response.status in retry._HTTP_CAN_RETRY and response.status < 500:
109 response.status = _RETRY_4XX_STATUS
110 return (response, body)
111 except Exception as e:
112 # Prepend "[request_id] " to the error message, which we
113 # assume is the first string argument passed to the exception
115 for i in range(len(e.args or ())):
116 if type(e.args[i]) == type(""):
117 e.args = e.args[:i] + ("[{}] {}".format(headers['X-Request-Id'], e.args[i]),) + e.args[i+1:]
118 raise type(e)(*e.args)
121 def _patch_http_request(http, api_token, num_retries):
122 http.arvados_api_token = api_token
123 http.max_request_size = 0
124 http.num_retries = num_retries
125 http.orig_http_request = http.request
126 http.request = types.MethodType(_intercept_http_request, http)
127 http._last_request_time = 0
128 http._max_keepalive_idle = MAX_IDLE_CONNECTION_DURATION
129 http._request_id = util.new_request_id
132 def _close_connections(self):
133 for conn in self._http.connections.values():
136 # Monkey patch discovery._cast() so objects and arrays get serialized
137 # with json.dumps() instead of str().
138 _cast_orig = apiclient_discovery._cast
139 def _cast_objects_too(value, schema_type):
141 if (type(value) != type('') and
142 type(value) != type(b'') and
143 (schema_type == 'object' or schema_type == 'array')):
144 return json.dumps(value)
146 return _cast_orig(value, schema_type)
147 apiclient_discovery._cast = _cast_objects_too
149 # Convert apiclient's HttpErrors into our own API error subclass for better
151 # Reassigning apiclient_errors.HttpError is not sufficient because most of the
152 # apiclient submodules import the class into their own namespace.
153 def _new_http_error(cls, *args, **kwargs):
154 return super(apiclient_errors.HttpError, cls).__new__(
155 errors.ApiError, *args, **kwargs)
156 apiclient_errors.HttpError.__new__ = staticmethod(_new_http_error)
158 def http_cache(data_type: str) -> cache.SafeHTTPCache:
159 """Set up an HTTP file cache
161 This function constructs and returns an `arvados.cache.SafeHTTPCache`
162 backed by the filesystem under `~/.cache/arvados/`, or `None` if the
163 directory cannot be set up. The return value can be passed to
164 `httplib2.Http` as the `cache` argument.
168 * data_type: str --- The name of the subdirectory under `~/.cache/arvados`
169 where data is cached.
172 homedir = pathlib.Path.home()
175 path = pathlib.Path(homedir, '.cache', 'arvados', data_type)
177 path.mkdir(parents=True, exist_ok=True)
180 return cache.SafeHTTPCache(str(path), max_age=60*60*24*2)
184 discoveryServiceUrl: str,
188 http: Optional[httplib2.Http]=None,
189 insecure: bool=False,
191 request_id: Optional[str]=None,
194 ) -> apiclient_discovery.Resource:
195 """Build an Arvados API client
197 This function returns a `googleapiclient.discovery.Resource` object
198 constructed from the given arguments. This is a relatively low-level
199 interface that requires all the necessary inputs as arguments. Most
200 users will prefer to use `api` which can accept more flexible inputs.
204 * version: str --- A string naming the version of the Arvados API to use.
206 * discoveryServiceUrl: str --- The URL used to discover APIs passed
207 directly to `googleapiclient.discovery.build`.
209 * token: str --- The authentication token to send with each API call.
211 Keyword-only arguments:
213 * cache: bool --- If true, loads the API discovery document from, or
214 saves it to, a cache on disk (located at
215 `~/.cache/arvados/discovery`).
217 * http: httplib2.Http | None --- The HTTP client object the API client
218 object will use to make requests. If not provided, this function will
219 build its own to use. Either way, the object will be patched as part
220 of the build process.
222 * insecure: bool --- If true, ignore SSL certificate validation
223 errors. Default `False`.
225 * num_retries: int --- The number of times to retry each API request if
226 it encounters a temporary failure. Default 10.
228 * request_id: str | None --- Default `X-Request-Id` header value for
229 outgoing requests that don't already provide one. If `None` or
230 omitted, generate a random ID. When retrying failed requests, the same
231 ID is used on all attempts.
233 * timeout: int --- A timeout value for HTTP requests in seconds. Default
236 Additional keyword arguments will be passed directly to
237 `googleapiclient.discovery.build`.
240 http = httplib2.Http(
241 ca_certs=util.ca_certs_path(),
242 cache=http_cache('discovery') if cache else None,
243 disable_ssl_certificate_validation=bool(insecure),
245 if http.timeout is None:
246 http.timeout = timeout
247 http = _patch_http_request(http, token, num_retries)
249 # The first time a client is instantiated, temporarily route
250 # googleapiclient.http retry logs if they're not already. These are
251 # important because temporary problems fetching the discovery document
252 # can cause clients to appear to hang early. This can be removed after
253 # we have a more general story for handling googleapiclient logs (#20521).
254 client_logger = logging.getLogger('googleapiclient.http')
255 # "first time a client is instantiated" = thread that acquires this lock
256 # It is never released.
257 # googleapiclient sets up its own NullHandler so we detect if logging is
258 # configured by looking for a real handler anywhere in the hierarchy.
259 client_logger_unconfigured = _googleapiclient_log_lock.acquire(blocking=False) and all(
260 isinstance(handler, logging.NullHandler)
261 for logger_name in ['', 'googleapiclient', 'googleapiclient.http']
262 for handler in logging.getLogger(logger_name).handlers
264 if client_logger_unconfigured:
265 client_level = client_logger.level
266 client_filter = GoogleHTTPClientFilter()
267 client_logger.addFilter(client_filter)
268 client_logger.addHandler(log_handler)
269 if logging.NOTSET < client_level < client_filter.retry_levelno:
270 client_logger.setLevel(client_level)
272 client_logger.setLevel(client_filter.retry_levelno)
274 svc = apiclient_discovery.build(
276 cache_discovery=False,
277 discoveryServiceUrl=discoveryServiceUrl,
279 num_retries=num_retries,
283 if client_logger_unconfigured:
284 client_logger.removeHandler(log_handler)
285 client_logger.removeFilter(client_filter)
286 client_logger.setLevel(client_level)
287 svc.api_token = token
288 svc.insecure = insecure
289 svc.request_id = request_id
290 svc.config = lambda: util.get_config_once(svc)
291 svc.vocabulary = lambda: util.get_vocabulary_once(svc)
292 svc.close_connections = types.MethodType(_close_connections, svc)
293 http.max_request_size = svc._rootDesc.get('maxRequestSize', 0)
295 http._request_id = lambda: svc.request_id or util.new_request_id()
298 def normalize_api_kwargs(
299 version: Optional[str]=None,
300 discoveryServiceUrl: Optional[str]=None,
301 host: Optional[str]=None,
302 token: Optional[str]=None,
305 """Validate kwargs from `api` and build kwargs for `api_client`
307 This method takes high-level keyword arguments passed to the `api`
308 constructor and normalizes them into a new dictionary that can be passed
309 as keyword arguments to `api_client`. It raises `ValueError` if required
310 arguments are missing or conflict.
314 * version: str | None --- A string naming the version of the Arvados API
315 to use. If not specified, the code will log a warning and fall back to
318 * discoveryServiceUrl: str | None --- The URL used to discover APIs
319 passed directly to `googleapiclient.discovery.build`. It is an error
320 to pass both `discoveryServiceUrl` and `host`.
322 * host: str | None --- The hostname and optional port number of the
323 Arvados API server. Used to build `discoveryServiceUrl`. It is an
324 error to pass both `discoveryServiceUrl` and `host`.
326 * token: str --- The authentication token to send with each API call.
328 Additional keyword arguments will be included in the return value.
330 if discoveryServiceUrl and host:
331 raise ValueError("both discoveryServiceUrl and host provided")
332 elif discoveryServiceUrl:
333 url_src = "discoveryServiceUrl"
335 url_src = "host argument"
336 discoveryServiceUrl = 'https://%s/discovery/v1/apis/{api}/{apiVersion}/rest' % (host,)
338 # This specific error message gets priority for backwards compatibility.
339 raise ValueError("token argument provided, but host missing.")
341 raise ValueError("neither discoveryServiceUrl nor host provided")
343 raise ValueError("%s provided, but token missing" % (url_src,))
347 "Using default API version. Call arvados.api(%r) instead.",
351 'discoveryServiceUrl': discoveryServiceUrl,
357 def api_kwargs_from_config(
358 version: Optional[str]=None,
359 apiconfig: Optional[Mapping[str, str]]=None,
362 """Build `api_client` keyword arguments from configuration
364 This function accepts a mapping with Arvados configuration settings like
365 `ARVADOS_API_HOST` and converts them into a mapping of keyword arguments
366 that can be passed to `api_client`. If `ARVADOS_API_HOST` or
367 `ARVADOS_API_TOKEN` are not configured, it raises `ValueError`.
371 * version: str | None --- A string naming the version of the Arvados API
372 to use. If not specified, the code will log a warning and fall back to
375 * apiconfig: Mapping[str, str] | None --- A mapping with entries for
376 `ARVADOS_API_HOST`, `ARVADOS_API_TOKEN`, and optionally
377 `ARVADOS_API_HOST_INSECURE`. If not provided, calls
378 `arvados.config.settings` to get these parameters from user
381 Additional keyword arguments will be included in the return value.
383 if apiconfig is None:
384 apiconfig = config.settings()
385 missing = " and ".join(
387 for key in ['ARVADOS_API_HOST', 'ARVADOS_API_TOKEN']
388 if key not in apiconfig
392 "%s not set.\nPlease set in %s or export environment variable." %
393 (missing, config.default_config_file),
395 return normalize_api_kwargs(
398 apiconfig['ARVADOS_API_HOST'],
399 apiconfig['ARVADOS_API_TOKEN'],
400 insecure=config.flag_is_true('ARVADOS_API_HOST_INSECURE', apiconfig),
405 version: Optional[str]=None,
407 host: Optional[str]=None,
408 token: Optional[str]=None,
409 insecure: bool=False,
410 request_id: Optional[str]=None,
413 discoveryServiceUrl: Optional[str]=None,
415 ) -> 'arvados.safeapi.ThreadSafeApiCache':
416 """Dynamically build an Arvados API client
418 This function provides a high-level "do what I mean" interface to build an
419 Arvados API client object. You can call it with no arguments to build a
420 client from user configuration; pass `host` and `token` arguments just
421 like you would write in user configuration; or pass additional arguments
422 for lower-level control over the client.
424 This function returns a `arvados.safeapi.ThreadSafeApiCache`, an
425 API-compatible wrapper around `googleapiclient.discovery.Resource`. If
426 you're handling concurrency yourself and/or your application is very
427 performance-sensitive, consider calling `api_client` directly.
431 * version: str | None --- A string naming the version of the Arvados API
432 to use. If not specified, the code will log a warning and fall back to
435 * host: str | None --- The hostname and optional port number of the
438 * token: str | None --- The authentication token to send with each API
441 * discoveryServiceUrl: str | None --- The URL used to discover APIs
442 passed directly to `googleapiclient.discovery.build`.
444 If `host`, `token`, and `discoveryServiceUrl` are all omitted, `host` and
445 `token` will be loaded from the user's configuration. Otherwise, you must
446 pass `token` and one of `host` or `discoveryServiceUrl`. It is an error to
447 pass both `host` and `discoveryServiceUrl`.
449 Other arguments are passed directly to `api_client`. See that function's
450 docstring for more information about their meaning.
455 request_id=request_id,
458 if discoveryServiceUrl or host or token:
459 kwargs.update(normalize_api_kwargs(version, discoveryServiceUrl, host, token))
461 kwargs.update(api_kwargs_from_config(version))
462 version = kwargs.pop('version')
463 # We do the import here to avoid a circular import at the top level.
464 from .safeapi import ThreadSafeApiCache
465 return ThreadSafeApiCache({}, {}, kwargs, version)
468 version: Optional[str]=None,
469 apiconfig: Optional[Mapping[str, str]]=None,
471 ) -> 'arvados.safeapi.ThreadSafeApiCache':
472 """Build an Arvados API client from a configuration mapping
474 This function builds an Arvados API client from a mapping with user
475 configuration. It accepts that mapping as an argument, so you can use a
476 configuration that's different from what the user has set up.
478 This function returns a `arvados.safeapi.ThreadSafeApiCache`, an
479 API-compatible wrapper around `googleapiclient.discovery.Resource`. If
480 you're handling concurrency yourself and/or your application is very
481 performance-sensitive, consider calling `api_client` directly.
485 * version: str | None --- A string naming the version of the Arvados API
486 to use. If not specified, the code will log a warning and fall back to
489 * apiconfig: Mapping[str, str] | None --- A mapping with entries for
490 `ARVADOS_API_HOST`, `ARVADOS_API_TOKEN`, and optionally
491 `ARVADOS_API_HOST_INSECURE`. If not provided, calls
492 `arvados.config.settings` to get these parameters from user
495 Other arguments are passed directly to `api_client`. See that function's
496 docstring for more information about their meaning.
498 return api(**api_kwargs_from_config(version, apiconfig, **kwargs))
500 class OrderedJsonModel(apiclient.model.JsonModel):
501 """Model class for JSON that preserves the contents' order
503 .. WARNING:: Deprecated
504 This model is redundant now that Python dictionaries preserve insertion
505 ordering. Code that passes this model to API constructors can remove it.
507 In Python versions before 3.6, API clients that cared about preserving the
508 order of fields in API server responses could use this model to do so.
509 Typical usage looked like:
511 from arvados.api import OrderedJsonModel
512 client = arvados.api('v1', ..., model=OrderedJsonModel())
514 @util._deprecated(preferred="the default model and rely on Python's built-in dictionary ordering")
515 def __init__(self, data_wrapper=False):
516 return super().__init__(data_wrapper)
519 RETRY_DELAY_INITIAL = 0
521 .. WARNING:: Deprecated
522 This constant was used by retry code in previous versions of the Arvados SDK.
523 Changing the value has no effect anymore.
524 Prefer passing `num_retries` to an API client constructor instead.
525 Refer to the constructor docstrings for details.
528 RETRY_DELAY_BACKOFF = 0
530 .. WARNING:: Deprecated
531 This constant was used by retry code in previous versions of the Arvados SDK.
532 Changing the value has no effect anymore.
533 Prefer passing `num_retries` to an API client constructor instead.
534 Refer to the constructor docstrings for details.
539 .. WARNING:: Deprecated
540 This constant was used by retry code in previous versions of the Arvados SDK.
541 Changing the value has no effect anymore.
542 Prefer passing `num_retries` to an API client constructor instead.
543 Refer to the constructor docstrings for details.