1 # Copyright (C) The Arvados Authors. All rights reserved.
3 # SPDX-License-Identifier: Apache-2.0
4 """Arvados REST API client
6 This module provides classes and functions to construct an Arvados REST API
7 client. Most users will want to use one of these constructor functions, in
10 * `arvados.api.api` provides a high-level interface to construct a client from
11 either arguments or user configuration. You can call this module just like
12 a function as a shortcut for calling `arvados.api.api`.
14 * `arvados.api.api_from_config` constructs a client from user configuration in
17 * `arvados.api.api_client` provides a lower-level interface to construct a
18 simpler client object that is not threadsafe.
20 Other classes and functions is this module support creating and customizing
21 the client for specialized use-cases.
23 The methods on an Arvados REST API client are generated dynamically at
24 runtime. The `arvados.api_resources` module documents those methods and
25 return values for the current version of Arvados. It does not
26 implement anything so you don't need to import it, but it's a helpful
27 reference to understand how to use the Arvados REST API client.
57 from apiclient import discovery as apiclient_discovery
58 from apiclient import errors as apiclient_errors
64 from ._internal import basedirs
65 from .logging import GoogleHTTPClientFilter, log_handler
67 _logger = logging.getLogger('arvados.api')
68 _googleapiclient_log_lock = threading.Lock()
70 MAX_IDLE_CONNECTION_DURATION = 30
72 Number of seconds that API client HTTP connections should be allowed to idle
73 in keepalive state before they are forced closed. Client code can adjust this
74 constant, and it will be used for all Arvados API clients constructed after
78 # An unused HTTP 5xx status code to request a retry internally.
79 # See _intercept_http_request. This should not be user-visible.
80 _RETRY_4XX_STATUS = 545
82 if sys.version_info >= (3,):
83 httplib2.SSLHandshakeError = None
85 _orig_retry_request = apiclient.http._retry_request
86 def _retry_request(http, num_retries, *args, **kwargs):
88 num_retries = max(num_retries, http.num_retries)
89 except AttributeError:
90 # `http` client object does not have a `num_retries` attribute.
91 # It apparently hasn't gone through _patch_http_request, possibly
92 # because this isn't an Arvados API client. Pass through to
93 # avoid interfering with other Google API clients.
94 return _orig_retry_request(http, num_retries, *args, **kwargs)
95 response, body = _orig_retry_request(http, num_retries, *args, **kwargs)
96 # If _intercept_http_request ran out of retries for a 4xx response,
97 # restore the original status code.
98 if response.status == _RETRY_4XX_STATUS:
99 response.status = int(response['status'])
100 return (response, body)
101 apiclient.http._retry_request = _retry_request
103 def _intercept_http_request(self, uri, method="GET", headers={}, **kwargs):
104 if not headers.get('X-Request-Id'):
105 headers['X-Request-Id'] = self._request_id()
107 if (self.max_request_size and
108 kwargs.get('body') and
109 self.max_request_size < len(kwargs['body'])):
110 raise apiclient_errors.MediaUploadSizeError("Request size %i bytes exceeds published limit of %i bytes" % (len(kwargs['body']), self.max_request_size))
112 headers['Authorization'] = 'Bearer %s' % self.arvados_api_token
114 if (time.time() - self._last_request_time) > self._max_keepalive_idle:
115 # High probability of failure due to connection atrophy. Make
116 # sure this request [re]opens a new connection by closing and
117 # forgetting all cached connections first.
118 for conn in self.connections.values():
120 self.connections.clear()
122 self._last_request_time = time.time()
124 response, body = self.orig_http_request(uri, method, headers=headers, **kwargs)
125 except ssl.CertificateError as e:
126 raise ssl.CertificateError(e.args[0], "Could not connect to %s\n%s\nPossible causes: remote SSL/TLS certificate expired, or was issued by an untrusted certificate authority." % (uri, e)) from None
127 # googleapiclient only retries 403, 429, and 5xx status codes.
128 # If we got another 4xx status that we want to retry, convert it into
129 # 5xx so googleapiclient handles it the way we want.
130 if response.status in retry._HTTP_CAN_RETRY and response.status < 500:
131 response.status = _RETRY_4XX_STATUS
132 return (response, body)
133 except Exception as e:
134 # Prepend "[request_id] " to the error message, which we
135 # assume is the first string argument passed to the exception
137 for i in range(len(e.args or ())):
138 if type(e.args[i]) == type(""):
139 e.args = e.args[:i] + ("[{}] {}".format(headers['X-Request-Id'], e.args[i]),) + e.args[i+1:]
140 raise type(e)(*e.args)
143 def _patch_http_request(http, api_token, num_retries):
144 http.arvados_api_token = api_token
145 http.max_request_size = 0
146 http.num_retries = num_retries
147 http.orig_http_request = http.request
148 http.request = types.MethodType(_intercept_http_request, http)
149 http._last_request_time = 0
150 http._max_keepalive_idle = MAX_IDLE_CONNECTION_DURATION
151 http._request_id = util.new_request_id
154 def _close_connections(self):
155 for conn in self._http.connections.values():
158 # Monkey patch discovery._cast() so objects and arrays get serialized
159 # with json.dumps() instead of str().
160 _cast_orig = apiclient_discovery._cast
161 def _cast_objects_too(value, schema_type):
163 if (type(value) != type('') and
164 type(value) != type(b'') and
165 (schema_type == 'object' or schema_type == 'array')):
166 return json.dumps(value)
168 return _cast_orig(value, schema_type)
169 apiclient_discovery._cast = _cast_objects_too
171 # Convert apiclient's HttpErrors into our own API error subclass for better
173 # Reassigning apiclient_errors.HttpError is not sufficient because most of the
174 # apiclient submodules import the class into their own namespace.
175 def _new_http_error(cls, *args, **kwargs):
176 return super(apiclient_errors.HttpError, cls).__new__(
177 errors.ApiError, *args, **kwargs)
178 apiclient_errors.HttpError.__new__ = staticmethod(_new_http_error)
180 class ThreadSafeHTTPCache:
181 """Thread-safe replacement for `httplib2.FileCache`
183 `arvados.api.http_cache` is the preferred way to construct this object.
184 Refer to that function's docstring for details.
187 def __init__(self, path=None, max_age=None):
189 if max_age is not None:
191 self._clean(threshold=time.time() - max_age)
195 def _clean(self, threshold=0):
196 for ent in os.listdir(self._dir):
197 fnm = os.path.join(self._dir, ent)
198 if os.path.isdir(fnm) or not fnm.endswith('.tmp'):
201 if stat.st_mtime < threshold:
204 except OSError as err:
205 if err.errno != errno.ENOENT:
211 def _filename(self, url):
212 return os.path.join(self._dir, hashlib.md5(url.encode('utf-8')).hexdigest()+'.tmp')
215 filename = self._filename(url)
217 with open(filename, 'rb') as f:
219 except (IOError, OSError):
222 def set(self, url, content):
224 fd, tempname = tempfile.mkstemp(dir=self._dir)
229 f = os.fdopen(fd, 'wb')
237 os.rename(tempname, self._filename(url))
243 def delete(self, url):
245 os.unlink(self._filename(url))
246 except OSError as err:
247 if err.errno != errno.ENOENT:
251 class ThreadSafeAPIClient(object):
252 """Thread-safe wrapper for an Arvados API client
254 This class takes all the arguments necessary to build a lower-level
255 Arvados API client `googleapiclient.discovery.Resource`, then
256 transparently builds and wraps a unique object per thread. This works
257 around the fact that the client's underlying HTTP client object is not
262 * apiconfig: Mapping[str, str] | None --- A mapping with entries for
263 `ARVADOS_API_HOST`, `ARVADOS_API_TOKEN`, and optionally
264 `ARVADOS_API_HOST_INSECURE`. If not provided, uses
265 `arvados.config.settings` to get these parameters from user
266 configuration. You can pass an empty mapping to build the client
267 solely from `api_params`.
269 * keep_params: Mapping[str, Any] --- Keyword arguments used to construct
270 an associated `arvados.keep.KeepClient`.
272 * api_params: Mapping[str, Any] --- Keyword arguments used to construct
273 each thread's API client. These have the same meaning as in the
274 `arvados.api.api` function.
276 * version: str | None --- A string naming the version of the Arvados API
277 to use. If not specified, the code will log a warning and fall back to
282 apiconfig: Optional[Mapping[str, str]]=None,
283 keep_params: Optional[Mapping[str, Any]]={},
284 api_params: Optional[Mapping[str, Any]]={},
285 version: Optional[str]=None,
287 if apiconfig or apiconfig is None:
288 self._api_kwargs = api_kwargs_from_config(version, apiconfig, **api_params)
290 self._api_kwargs = normalize_api_kwargs(version, **api_params)
291 self.api_token = self._api_kwargs['token']
292 self.request_id = self._api_kwargs.get('request_id')
293 self.local = threading.local()
294 self.keep = keep.KeepClient(api_client=self, **keep_params)
296 def localapi(self) -> 'googleapiclient.discovery.Resource':
298 client = self.local.api
299 except AttributeError:
300 client = api_client(**self._api_kwargs)
301 client._http._request_id = lambda: self.request_id or util.new_request_id()
302 self.local.api = client
305 def __getattr__(self, name: str) -> Any:
306 # Proxy nonexistent attributes to the thread-local API client.
307 return getattr(self.localapi(), name)
310 def http_cache(data_type: str) -> Optional[ThreadSafeHTTPCache]:
311 """Set up an HTTP file cache
313 This function constructs and returns an `arvados.api.ThreadSafeHTTPCache`
314 backed by the filesystem under a cache directory from the environment, or
315 `None` if the directory cannot be set up. The return value can be passed to
316 `httplib2.Http` as the `cache` argument.
320 * data_type: str --- The name of the subdirectory
321 where data is cached.
324 path = basedirs.BaseDirectories('CACHE').storage_path(data_type)
325 except (OSError, RuntimeError):
328 return ThreadSafeHTTPCache(str(path), max_age=60*60*24*2)
332 discoveryServiceUrl: str,
336 http: Optional[httplib2.Http]=None,
337 insecure: bool=False,
339 request_id: Optional[str]=None,
342 ) -> apiclient_discovery.Resource:
343 """Build an Arvados API client
345 This function returns a `googleapiclient.discovery.Resource` object
346 constructed from the given arguments. This is a relatively low-level
347 interface that requires all the necessary inputs as arguments. Most
348 users will prefer to use `api` which can accept more flexible inputs.
352 * version: str --- A string naming the version of the Arvados API to use.
354 * discoveryServiceUrl: str --- The URL used to discover APIs passed
355 directly to `googleapiclient.discovery.build`.
357 * token: str --- The authentication token to send with each API call.
359 Keyword-only arguments:
361 * cache: bool --- If true, loads the API discovery document from, or
362 saves it to, a cache on disk.
364 * http: httplib2.Http | None --- The HTTP client object the API client
365 object will use to make requests. If not provided, this function will
366 build its own to use. Either way, the object will be patched as part
367 of the build process.
369 * insecure: bool --- If true, ignore SSL certificate validation
370 errors. Default `False`.
372 * num_retries: int --- The number of times to retry each API request if
373 it encounters a temporary failure. Default 10.
375 * request_id: str | None --- Default `X-Request-Id` header value for
376 outgoing requests that don't already provide one. If `None` or
377 omitted, generate a random ID. When retrying failed requests, the same
378 ID is used on all attempts.
380 * timeout: int --- A timeout value for HTTP requests in seconds. Default
383 Additional keyword arguments will be passed directly to
384 `googleapiclient.discovery.build`.
387 http = httplib2.Http(
388 ca_certs=util.ca_certs_path(),
389 cache=http_cache('discovery') if cache else None,
390 disable_ssl_certificate_validation=bool(insecure),
392 if http.timeout is None:
393 http.timeout = timeout
394 http = _patch_http_request(http, token, num_retries)
396 # The first time a client is instantiated, temporarily route
397 # googleapiclient.http retry logs if they're not already. These are
398 # important because temporary problems fetching the discovery document
399 # can cause clients to appear to hang early. This can be removed after
400 # we have a more general story for handling googleapiclient logs (#20521).
401 client_logger = logging.getLogger('googleapiclient.http')
402 # "first time a client is instantiated" = thread that acquires this lock
403 # It is never released.
404 # googleapiclient sets up its own NullHandler so we detect if logging is
405 # configured by looking for a real handler anywhere in the hierarchy.
406 client_logger_unconfigured = _googleapiclient_log_lock.acquire(blocking=False) and all(
407 isinstance(handler, logging.NullHandler)
408 for logger_name in ['', 'googleapiclient', 'googleapiclient.http']
409 for handler in logging.getLogger(logger_name).handlers
411 if client_logger_unconfigured:
412 client_level = client_logger.level
413 client_filter = GoogleHTTPClientFilter()
414 client_logger.addFilter(client_filter)
415 client_logger.addHandler(log_handler)
416 if logging.NOTSET < client_level < client_filter.retry_levelno:
417 client_logger.setLevel(client_level)
419 client_logger.setLevel(client_filter.retry_levelno)
421 svc = apiclient_discovery.build(
423 cache_discovery=False,
424 discoveryServiceUrl=discoveryServiceUrl,
426 num_retries=num_retries,
430 if client_logger_unconfigured:
431 client_logger.removeHandler(log_handler)
432 client_logger.removeFilter(client_filter)
433 client_logger.setLevel(client_level)
434 svc.api_token = token
435 svc.insecure = insecure
436 svc.request_id = request_id
437 svc.config = lambda: util.get_config_once(svc)
438 svc.vocabulary = lambda: util.get_vocabulary_once(svc)
439 svc.close_connections = types.MethodType(_close_connections, svc)
440 http.max_request_size = svc._rootDesc.get('maxRequestSize', 0)
442 http._request_id = lambda: svc.request_id or util.new_request_id()
445 def normalize_api_kwargs(
446 version: Optional[str]=None,
447 discoveryServiceUrl: Optional[str]=None,
448 host: Optional[str]=None,
449 token: Optional[str]=None,
452 """Validate kwargs from `api` and build kwargs for `api_client`
454 This method takes high-level keyword arguments passed to the `api`
455 constructor and normalizes them into a new dictionary that can be passed
456 as keyword arguments to `api_client`. It raises `ValueError` if required
457 arguments are missing or conflict.
461 * version: str | None --- A string naming the version of the Arvados API
462 to use. If not specified, the code will log a warning and fall back to
465 * discoveryServiceUrl: str | None --- The URL used to discover APIs
466 passed directly to `googleapiclient.discovery.build`. It is an error
467 to pass both `discoveryServiceUrl` and `host`.
469 * host: str | None --- The hostname and optional port number of the
470 Arvados API server. Used to build `discoveryServiceUrl`. It is an
471 error to pass both `discoveryServiceUrl` and `host`.
473 * token: str --- The authentication token to send with each API call.
475 Additional keyword arguments will be included in the return value.
477 if discoveryServiceUrl and host:
478 raise ValueError("both discoveryServiceUrl and host provided")
479 elif discoveryServiceUrl:
480 url_src = "discoveryServiceUrl"
482 url_src = "host argument"
483 discoveryServiceUrl = 'https://%s/discovery/v1/apis/{api}/{apiVersion}/rest' % (host,)
485 # This specific error message gets priority for backwards compatibility.
486 raise ValueError("token argument provided, but host missing.")
488 raise ValueError("neither discoveryServiceUrl nor host provided")
490 raise ValueError("%s provided, but token missing" % (url_src,))
494 "Using default API version. Call arvados.api(%r) instead.",
498 'discoveryServiceUrl': discoveryServiceUrl,
504 def api_kwargs_from_config(
505 version: Optional[str]=None,
506 apiconfig: Optional[Mapping[str, str]]=None,
509 """Build `api_client` keyword arguments from configuration
511 This function accepts a mapping with Arvados configuration settings like
512 `ARVADOS_API_HOST` and converts them into a mapping of keyword arguments
513 that can be passed to `api_client`. If `ARVADOS_API_HOST` or
514 `ARVADOS_API_TOKEN` are not configured, it raises `ValueError`.
518 * version: str | None --- A string naming the version of the Arvados API
519 to use. If not specified, the code will log a warning and fall back to
522 * apiconfig: Mapping[str, str] | None --- A mapping with entries for
523 `ARVADOS_API_HOST`, `ARVADOS_API_TOKEN`, and optionally
524 `ARVADOS_API_HOST_INSECURE`. If not provided, calls
525 `arvados.config.settings` to get these parameters from user
528 Additional keyword arguments will be included in the return value.
530 if apiconfig is None:
531 apiconfig = config.settings()
532 missing = " and ".join(
534 for key in ['ARVADOS_API_HOST', 'ARVADOS_API_TOKEN']
535 if key not in apiconfig
539 "%s not set.\nPlease set in %s or export environment variable." %
540 (missing, config.default_config_file),
542 return normalize_api_kwargs(
545 apiconfig['ARVADOS_API_HOST'],
546 apiconfig['ARVADOS_API_TOKEN'],
547 insecure=config.flag_is_true('ARVADOS_API_HOST_INSECURE', apiconfig),
552 version: Optional[str]=None,
554 host: Optional[str]=None,
555 token: Optional[str]=None,
556 insecure: bool=False,
557 request_id: Optional[str]=None,
560 discoveryServiceUrl: Optional[str]=None,
562 ) -> ThreadSafeAPIClient:
563 """Dynamically build an Arvados API client
565 This function provides a high-level "do what I mean" interface to build an
566 Arvados API client object. You can call it with no arguments to build a
567 client from user configuration; pass `host` and `token` arguments just
568 like you would write in user configuration; or pass additional arguments
569 for lower-level control over the client.
571 This function returns a `arvados.api.ThreadSafeAPIClient`, an
572 API-compatible wrapper around `googleapiclient.discovery.Resource`. If
573 you're handling concurrency yourself and/or your application is very
574 performance-sensitive, consider calling `api_client` directly.
578 * version: str | None --- A string naming the version of the Arvados API
579 to use. If not specified, the code will log a warning and fall back to
582 * host: str | None --- The hostname and optional port number of the
585 * token: str | None --- The authentication token to send with each API
588 * discoveryServiceUrl: str | None --- The URL used to discover APIs
589 passed directly to `googleapiclient.discovery.build`.
591 If `host`, `token`, and `discoveryServiceUrl` are all omitted, `host` and
592 `token` will be loaded from the user's configuration. Otherwise, you must
593 pass `token` and one of `host` or `discoveryServiceUrl`. It is an error to
594 pass both `host` and `discoveryServiceUrl`.
596 Other arguments are passed directly to `api_client`. See that function's
597 docstring for more information about their meaning.
602 request_id=request_id,
605 if discoveryServiceUrl or host or token:
606 kwargs.update(normalize_api_kwargs(version, discoveryServiceUrl, host, token))
608 kwargs.update(api_kwargs_from_config(version))
609 version = kwargs.pop('version')
610 return ThreadSafeAPIClient({}, {}, kwargs, version)
613 version: Optional[str]=None,
614 apiconfig: Optional[Mapping[str, str]]=None,
616 ) -> ThreadSafeAPIClient:
617 """Build an Arvados API client from a configuration mapping
619 This function builds an Arvados API client from a mapping with user
620 configuration. It accepts that mapping as an argument, so you can use a
621 configuration that's different from what the user has set up.
623 This function returns a `arvados.api.ThreadSafeAPIClient`, an
624 API-compatible wrapper around `googleapiclient.discovery.Resource`. If
625 you're handling concurrency yourself and/or your application is very
626 performance-sensitive, consider calling `api_client` directly.
630 * version: str | None --- A string naming the version of the Arvados API
631 to use. If not specified, the code will log a warning and fall back to
634 * apiconfig: Mapping[str, str] | None --- A mapping with entries for
635 `ARVADOS_API_HOST`, `ARVADOS_API_TOKEN`, and optionally
636 `ARVADOS_API_HOST_INSECURE`. If not provided, calls
637 `arvados.config.settings` to get these parameters from user
640 Other arguments are passed directly to `api_client`. See that function's
641 docstring for more information about their meaning.
643 return api(**api_kwargs_from_config(version, apiconfig, **kwargs))