1 # Copyright (C) The Arvados Authors. All rights reserved.
3 # SPDX-License-Identifier: Apache-2.0
6 The code in this module builds Arvados API client objects you can use to submit
7 Arvados API requests. This includes extending the underlying HTTP client with
8 niceties such as caching, X-Request-Id header for tracking, and more. The main
9 client constructors are `api` and `api_from_config`.
12 from __future__ import absolute_import
13 from future import standard_library
14 standard_library.install_aliases()
15 from builtins import range
33 from apiclient import discovery as apiclient_discovery
34 from apiclient import errors as apiclient_errors
40 from .logging import GoogleHTTPClientFilter, log_handler
42 _logger = logging.getLogger('arvados.api')
43 _googleapiclient_log_lock = threading.Lock()
45 MAX_IDLE_CONNECTION_DURATION = 30
47 # These constants supported our own retry logic that we've since removed in
48 # favor of using googleapiclient's num_retries. They're kept here purely for
49 # API compatibility, but set to 0 to indicate no retries happen.
50 RETRY_DELAY_INITIAL = 0
51 RETRY_DELAY_BACKOFF = 0
54 # An unused HTTP 5xx status code to request a retry internally.
55 # See _intercept_http_request. This should not be user-visible.
56 _RETRY_4XX_STATUS = 545
58 if sys.version_info >= (3,):
59 httplib2.SSLHandshakeError = None
61 class OrderedJsonModel(apiclient.model.JsonModel):
62 """Model class for JSON that preserves the contents' order.
64 API clients that care about preserving the order of fields in API
65 server responses can use this model to do so, like this:
67 from arvados.api import OrderedJsonModel
68 client = arvados.api('v1', ..., model=OrderedJsonModel())
71 def deserialize(self, content):
72 # This is a very slightly modified version of the parent class'
73 # implementation. Copyright (c) 2010 Google.
74 content = content.decode('utf-8')
75 body = json.loads(content, object_pairs_hook=collections.OrderedDict)
76 if self._data_wrapper and isinstance(body, dict) and 'data' in body:
81 _orig_retry_request = apiclient.http._retry_request
82 def _retry_request(http, num_retries, *args, **kwargs):
84 num_retries = max(num_retries, http.num_retries)
85 except AttributeError:
86 # `http` client object does not have a `num_retries` attribute.
87 # It apparently hasn't gone through _patch_http_request, possibly
88 # because this isn't an Arvados API client. Pass through to
89 # avoid interfering with other Google API clients.
90 return _orig_retry_request(http, num_retries, *args, **kwargs)
91 response, body = _orig_retry_request(http, num_retries, *args, **kwargs)
92 # If _intercept_http_request ran out of retries for a 4xx response,
93 # restore the original status code.
94 if response.status == _RETRY_4XX_STATUS:
95 response.status = int(response['status'])
96 return (response, body)
97 apiclient.http._retry_request = _retry_request
99 def _intercept_http_request(self, uri, method="GET", headers={}, **kwargs):
100 if not headers.get('X-Request-Id'):
101 headers['X-Request-Id'] = self._request_id()
103 if (self.max_request_size and
104 kwargs.get('body') and
105 self.max_request_size < len(kwargs['body'])):
106 raise apiclient_errors.MediaUploadSizeError("Request size %i bytes exceeds published limit of %i bytes" % (len(kwargs['body']), self.max_request_size))
108 headers['Authorization'] = 'OAuth2 %s' % self.arvados_api_token
110 if (time.time() - self._last_request_time) > self._max_keepalive_idle:
111 # High probability of failure due to connection atrophy. Make
112 # sure this request [re]opens a new connection by closing and
113 # forgetting all cached connections first.
114 for conn in self.connections.values():
116 self.connections.clear()
118 self._last_request_time = time.time()
120 response, body = self.orig_http_request(uri, method, headers=headers, **kwargs)
121 except ssl.SSLCertVerificationError as e:
122 raise ssl.SSLCertVerificationError(e.args[0], "Could not connect to %s\n%s\nPossible causes: remote SSL/TLS certificate expired, or was issued by an untrusted certificate authority." % (uri, e)) from None
123 # googleapiclient only retries 403, 429, and 5xx status codes.
124 # If we got another 4xx status that we want to retry, convert it into
125 # 5xx so googleapiclient handles it the way we want.
126 if response.status in retry._HTTP_CAN_RETRY and response.status < 500:
127 response.status = _RETRY_4XX_STATUS
128 return (response, body)
129 except Exception as e:
130 # Prepend "[request_id] " to the error message, which we
131 # assume is the first string argument passed to the exception
133 for i in range(len(e.args or ())):
134 if type(e.args[i]) == type(""):
135 e.args = e.args[:i] + ("[{}] {}".format(headers['X-Request-Id'], e.args[i]),) + e.args[i+1:]
136 raise type(e)(*e.args)
139 def _patch_http_request(http, api_token, num_retries):
140 http.arvados_api_token = api_token
141 http.max_request_size = 0
142 http.num_retries = num_retries
143 http.orig_http_request = http.request
144 http.request = types.MethodType(_intercept_http_request, http)
145 http._last_request_time = 0
146 http._max_keepalive_idle = MAX_IDLE_CONNECTION_DURATION
147 http._request_id = util.new_request_id
150 def _close_connections(self):
151 for conn in self._http.connections.values():
154 # Monkey patch discovery._cast() so objects and arrays get serialized
155 # with json.dumps() instead of str().
156 _cast_orig = apiclient_discovery._cast
157 def _cast_objects_too(value, schema_type):
159 if (type(value) != type('') and
160 type(value) != type(b'') and
161 (schema_type == 'object' or schema_type == 'array')):
162 return json.dumps(value)
164 return _cast_orig(value, schema_type)
165 apiclient_discovery._cast = _cast_objects_too
167 # Convert apiclient's HttpErrors into our own API error subclass for better
169 # Reassigning apiclient_errors.HttpError is not sufficient because most of the
170 # apiclient submodules import the class into their own namespace.
171 def _new_http_error(cls, *args, **kwargs):
172 return super(apiclient_errors.HttpError, cls).__new__(
173 errors.ApiError, *args, **kwargs)
174 apiclient_errors.HttpError.__new__ = staticmethod(_new_http_error)
176 def http_cache(data_type):
178 homedir = pathlib.Path.home()
181 path = pathlib.Path(homedir, '.cache', 'arvados', data_type)
183 path.mkdir(parents=True, exist_ok=True)
186 return cache.SafeHTTPCache(str(path), max_age=60*60*24*2)
201 """Build an Arvados API client
203 This function returns a `googleapiclient.discovery.Resource` object
204 constructed from the given arguments. This is a relatively low-level
205 interface that requires all the necessary inputs as arguments. Most
206 users will prefer to use `api` which can accept more flexible inputs.
211 : A string naming the version of the Arvados API to use.
213 discoveryServiceUrl: str
214 : The URL used to discover APIs passed directly to
215 `googleapiclient.discovery.build`.
218 : The authentication token to send with each API call.
220 Keyword-only arguments:
223 : If true, loads the API discovery document from, or saves it to, a cache
224 on disk (located at `~/.cache/arvados/discovery`).
226 http: httplib2.Http | None
227 : The HTTP client object the API client object will use to make requests.
228 If not provided, this function will build its own to use. Either way, the
229 object will be patched as part of the build process.
232 : If true, ignore SSL certificate validation errors. Default `False`.
235 : The number of times to retry each API request if it encounters a
236 temporary failure. Default 10.
238 request_id: str | None
239 : Default `X-Request-Id` header value for outgoing requests that
240 don't already provide one. If `None` or omitted, generate a random
241 ID. When retrying failed requests, the same ID is used on all
245 : A timeout value for HTTP requests in seconds. Default 300 (5 minutes).
247 Additional keyword arguments will be passed directly to
248 `googleapiclient.discovery.build`.
251 http = httplib2.Http(
252 ca_certs=util.ca_certs_path(),
253 cache=http_cache('discovery') if cache else None,
254 disable_ssl_certificate_validation=bool(insecure),
256 if http.timeout is None:
257 http.timeout = timeout
258 http = _patch_http_request(http, token, num_retries)
260 # The first time a client is instantiated, temporarily route
261 # googleapiclient.http retry logs if they're not already. These are
262 # important because temporary problems fetching the discovery document
263 # can cause clients to appear to hang early. This can be removed after
264 # we have a more general story for handling googleapiclient logs (#20521).
265 client_logger = logging.getLogger('googleapiclient.http')
266 # "first time a client is instantiated" = thread that acquires this lock
267 # It is never released.
268 # googleapiclient sets up its own NullHandler so we detect if logging is
269 # configured by looking for a real handler anywhere in the hierarchy.
270 client_logger_unconfigured = _googleapiclient_log_lock.acquire(blocking=False) and all(
271 isinstance(handler, logging.NullHandler)
272 for logger_name in ['', 'googleapiclient', 'googleapiclient.http']
273 for handler in logging.getLogger(logger_name).handlers
275 if client_logger_unconfigured:
276 client_level = client_logger.level
277 client_filter = GoogleHTTPClientFilter()
278 client_logger.addFilter(client_filter)
279 client_logger.addHandler(log_handler)
280 if logging.NOTSET < client_level < client_filter.retry_levelno:
281 client_logger.setLevel(client_level)
283 client_logger.setLevel(client_filter.retry_levelno)
285 svc = apiclient_discovery.build(
287 cache_discovery=False,
288 discoveryServiceUrl=discoveryServiceUrl,
290 num_retries=num_retries,
294 if client_logger_unconfigured:
295 client_logger.removeHandler(log_handler)
296 client_logger.removeFilter(client_filter)
297 client_logger.setLevel(client_level)
298 svc.api_token = token
299 svc.insecure = insecure
300 svc.request_id = request_id
301 svc.config = lambda: util.get_config_once(svc)
302 svc.vocabulary = lambda: util.get_vocabulary_once(svc)
303 svc.close_connections = types.MethodType(_close_connections, svc)
304 http.max_request_size = svc._rootDesc.get('maxRequestSize', 0)
306 http._request_id = lambda: svc.request_id or util.new_request_id()
309 def normalize_api_kwargs(
311 discoveryServiceUrl=None,
316 """Validate kwargs from `api` and build kwargs for `api_client`
318 This method takes high-level keyword arguments passed to the `api`
319 constructor and normalizes them into a new dictionary that can be passed
320 as keyword arguments to `api_client`. It raises `ValueError` if required
321 arguments are missing or conflict.
326 : A string naming the version of the Arvados API to use. If not specified,
327 the code will log a warning and fall back to 'v1'.
329 discoveryServiceUrl: str | None
330 : The URL used to discover APIs passed directly to
331 `googleapiclient.discovery.build`. It is an error to pass both
332 `discoveryServiceUrl` and `host`.
335 : The hostname and optional port number of the Arvados API server. Used to
336 build `discoveryServiceUrl`. It is an error to pass both
337 `discoveryServiceUrl` and `host`.
340 : The authentication token to send with each API call.
342 Additional keyword arguments will be included in the return value.
344 if discoveryServiceUrl and host:
345 raise ValueError("both discoveryServiceUrl and host provided")
346 elif discoveryServiceUrl:
347 url_src = "discoveryServiceUrl"
349 url_src = "host argument"
350 discoveryServiceUrl = 'https://%s/discovery/v1/apis/{api}/{apiVersion}/rest' % (host,)
352 # This specific error message gets priority for backwards compatibility.
353 raise ValueError("token argument provided, but host missing.")
355 raise ValueError("neither discoveryServiceUrl nor host provided")
357 raise ValueError("%s provided, but token missing" % (url_src,))
361 "Using default API version. Call arvados.api(%r) instead.",
365 'discoveryServiceUrl': discoveryServiceUrl,
371 def api_kwargs_from_config(version=None, apiconfig=None, **kwargs):
372 """Build `api_client` keyword arguments from configuration
374 This function accepts a mapping with Arvados configuration settings like
375 `ARVADOS_API_HOST` and converts them into a mapping of keyword arguments
376 that can be passed to `api_client`. If `ARVADOS_API_HOST` or
377 `ARVADOS_API_TOKEN` are not configured, it raises `ValueError`.
382 : A string naming the version of the Arvados API to use. If not specified,
383 the code will log a warning and fall back to 'v1'.
385 apiconfig: Mapping[str, str] | None
386 : A mapping with entries for `ARVADOS_API_HOST`, `ARVADOS_API_TOKEN`, and
387 optionally `ARVADOS_API_HOST_INSECURE`. If not provided, calls
388 `arvados.config.settings` to get these parameters from user configuration.
390 Additional keyword arguments will be included in the return value.
392 if apiconfig is None:
393 apiconfig = config.settings()
394 missing = " and ".join(
396 for key in ['ARVADOS_API_HOST', 'ARVADOS_API_TOKEN']
397 if key not in apiconfig
401 "%s not set.\nPlease set in %s or export environment variable." %
402 (missing, config.default_config_file),
404 return normalize_api_kwargs(
407 apiconfig['ARVADOS_API_HOST'],
408 apiconfig['ARVADOS_API_TOKEN'],
409 insecure=config.flag_is_true('ARVADOS_API_HOST_INSECURE', apiconfig),
413 def api(version=None, cache=True, host=None, token=None, insecure=False,
414 request_id=None, timeout=5*60, *,
415 discoveryServiceUrl=None, **kwargs):
416 """Dynamically build an Arvados API client
418 This function provides a high-level "do what I mean" interface to build an
419 Arvados API client object. You can call it with no arguments to build a
420 client from user configuration; pass `host` and `token` arguments just
421 like you would write in user configuration; or pass additional arguments
422 for lower-level control over the client.
424 This function returns a `arvados.safeapi.ThreadSafeApiCache`, an
425 API-compatible wrapper around `googleapiclient.discovery.Resource`. If
426 you're handling concurrency yourself and/or your application is very
427 performance-sensitive, consider calling `api_client` directly.
432 : A string naming the version of the Arvados API to use. If not specified,
433 the code will log a warning and fall back to 'v1'.
436 : The hostname and optional port number of the Arvados API server.
439 : The authentication token to send with each API call.
441 discoveryServiceUrl: str | None
442 : The URL used to discover APIs passed directly to
443 `googleapiclient.discovery.build`.
445 If `host`, `token`, and `discoveryServiceUrl` are all omitted, `host` and
446 `token` will be loaded from the user's configuration. Otherwise, you must
447 pass `token` and one of `host` or `discoveryServiceUrl`. It is an error to
448 pass both `host` and `discoveryServiceUrl`.
450 Other arguments are passed directly to `api_client`. See that function's
451 docstring for more information about their meaning.
456 request_id=request_id,
459 if discoveryServiceUrl or host or token:
460 kwargs.update(normalize_api_kwargs(version, discoveryServiceUrl, host, token))
462 kwargs.update(api_kwargs_from_config(version))
463 version = kwargs.pop('version')
464 # We do the import here to avoid a circular import at the top level.
465 from .safeapi import ThreadSafeApiCache
466 return ThreadSafeApiCache({}, {}, kwargs, version)
468 def api_from_config(version=None, apiconfig=None, **kwargs):
469 """Build an Arvados API client from a configuration mapping
471 This function builds an Arvados API client from a mapping with user
472 configuration. It accepts that mapping as an argument, so you can use a
473 configuration that's different from what the user has set up.
475 This function returns a `arvados.safeapi.ThreadSafeApiCache`, an
476 API-compatible wrapper around `googleapiclient.discovery.Resource`. If
477 you're handling concurrency yourself and/or your application is very
478 performance-sensitive, consider calling `api_client` directly.
483 : A string naming the version of the Arvados API to use. If not specified,
484 the code will log a warning and fall back to 'v1'.
486 apiconfig: Mapping[str, str] | None
487 : A mapping with entries for `ARVADOS_API_HOST`, `ARVADOS_API_TOKEN`, and
488 optionally `ARVADOS_API_HOST_INSECURE`. If not provided, calls
489 `arvados.config.settings` to get these parameters from user configuration.
491 Other arguments are passed directly to `api_client`. See that function's
492 docstring for more information about their meaning.
494 return api(**api_kwargs_from_config(version, apiconfig, **kwargs))