1 # Copyright (C) The Arvados Authors. All rights reserved.
3 # SPDX-License-Identifier: Apache-2.0
6 The code in this module builds Arvados API client objects you can use to submit
7 Arvados API requests. This includes extending the underlying HTTP client with
8 niceties such as caching, X-Request-Id header for tracking, and more. The main
9 client constructors are `api` and `api_from_config`.
12 from __future__ import absolute_import
13 from future import standard_library
14 standard_library.install_aliases()
15 from builtins import range
30 from apiclient import discovery as apiclient_discovery
31 from apiclient import errors as apiclient_errors
37 _logger = logging.getLogger('arvados.api')
39 MAX_IDLE_CONNECTION_DURATION = 30
40 RETRY_DELAY_INITIAL = 2
41 RETRY_DELAY_BACKOFF = 2
44 if sys.version_info >= (3,):
45 httplib2.SSLHandshakeError = None
47 class OrderedJsonModel(apiclient.model.JsonModel):
48 """Model class for JSON that preserves the contents' order.
50 API clients that care about preserving the order of fields in API
51 server responses can use this model to do so, like this:
53 from arvados.api import OrderedJsonModel
54 client = arvados.api('v1', ..., model=OrderedJsonModel())
57 def deserialize(self, content):
58 # This is a very slightly modified version of the parent class'
59 # implementation. Copyright (c) 2010 Google.
60 content = content.decode('utf-8')
61 body = json.loads(content, object_pairs_hook=collections.OrderedDict)
62 if self._data_wrapper and isinstance(body, dict) and 'data' in body:
67 def _intercept_http_request(self, uri, method="GET", headers={}, **kwargs):
68 if not headers.get('X-Request-Id'):
69 headers['X-Request-Id'] = self._request_id()
71 if (self.max_request_size and
72 kwargs.get('body') and
73 self.max_request_size < len(kwargs['body'])):
74 raise apiclient_errors.MediaUploadSizeError("Request size %i bytes exceeds published limit of %i bytes" % (len(kwargs['body']), self.max_request_size))
76 headers['Authorization'] = 'OAuth2 %s' % self.arvados_api_token
78 retryable = method in [
79 'DELETE', 'GET', 'HEAD', 'OPTIONS', 'PUT']
80 retry_count = self._retry_count if retryable else 0
83 time.time() - self._last_request_time > self._max_keepalive_idle):
84 # High probability of failure due to connection atrophy. Make
85 # sure this request [re]opens a new connection by closing and
86 # forgetting all cached connections first.
87 for conn in self.connections.values():
89 self.connections.clear()
91 delay = self._retry_delay_initial
92 for _ in range(retry_count):
93 self._last_request_time = time.time()
95 return self.orig_http_request(uri, method, headers=headers, **kwargs)
96 except http.client.HTTPException:
97 _logger.debug("[%s] Retrying API request in %d s after HTTP error",
98 headers['X-Request-Id'], delay, exc_info=True)
99 except ssl.SSLCertVerificationError as e:
100 raise ssl.SSLCertVerificationError(e.args[0], "Could not connect to %s\n%s\nPossible causes: remote SSL/TLS certificate expired, or was issued by an untrusted certificate authority." % (uri, e)) from None
102 # This is the one case where httplib2 doesn't close the
103 # underlying connection first. Close all open
104 # connections, expecting this object only has the one
105 # connection to the API server. This is safe because
106 # httplib2 reopens connections when needed.
107 _logger.debug("[%s] Retrying API request in %d s after socket error",
108 headers['X-Request-Id'], delay, exc_info=True)
109 for conn in self.connections.values():
113 delay = delay * self._retry_delay_backoff
115 self._last_request_time = time.time()
116 return self.orig_http_request(uri, method, headers=headers, **kwargs)
117 except Exception as e:
118 # Prepend "[request_id] " to the error message, which we
119 # assume is the first string argument passed to the exception
121 for i in range(len(e.args or ())):
122 if type(e.args[i]) == type(""):
123 e.args = e.args[:i] + ("[{}] {}".format(headers['X-Request-Id'], e.args[i]),) + e.args[i+1:]
124 raise type(e)(*e.args)
127 def _patch_http_request(http, api_token):
128 http.arvados_api_token = api_token
129 http.max_request_size = 0
130 http.orig_http_request = http.request
131 http.request = types.MethodType(_intercept_http_request, http)
132 http._last_request_time = 0
133 http._max_keepalive_idle = MAX_IDLE_CONNECTION_DURATION
134 http._retry_delay_initial = RETRY_DELAY_INITIAL
135 http._retry_delay_backoff = RETRY_DELAY_BACKOFF
136 http._retry_count = RETRY_COUNT
137 http._request_id = util.new_request_id
140 def _close_connections(self):
141 for conn in self._http.connections.values():
144 # Monkey patch discovery._cast() so objects and arrays get serialized
145 # with json.dumps() instead of str().
146 _cast_orig = apiclient_discovery._cast
147 def _cast_objects_too(value, schema_type):
149 if (type(value) != type('') and
150 type(value) != type(b'') and
151 (schema_type == 'object' or schema_type == 'array')):
152 return json.dumps(value)
154 return _cast_orig(value, schema_type)
155 apiclient_discovery._cast = _cast_objects_too
157 # Convert apiclient's HttpErrors into our own API error subclass for better
159 # Reassigning apiclient_errors.HttpError is not sufficient because most of the
160 # apiclient submodules import the class into their own namespace.
161 def _new_http_error(cls, *args, **kwargs):
162 return super(apiclient_errors.HttpError, cls).__new__(
163 errors.ApiError, *args, **kwargs)
164 apiclient_errors.HttpError.__new__ = staticmethod(_new_http_error)
166 def http_cache(data_type):
167 homedir = os.environ.get('HOME')
168 if not homedir or len(homedir) == 0:
170 path = homedir + '/.cache/arvados/' + data_type
172 util.mkdir_dash_p(path)
175 return cache.SafeHTTPCache(path, max_age=60*60*24*2)
189 """Build an Arvados API client
191 This function returns a `googleapiclient.discovery.Resource` object
192 constructed from the given arguments. This is a relatively low-level
193 interface that requires all the necessary inputs as arguments. Most
194 users will prefer to use `api` which can accept more flexible inputs.
199 : A string naming the version of the Arvados API to use.
201 discoveryServiceUrl: str
202 : The URL used to discover APIs passed directly to
203 `googleapiclient.discovery.build`.
206 : The authentication token to send with each API call.
208 Keyword-only arguments:
211 : If true, loads the API discovery document from, or saves it to, a cache
212 on disk (located at `~/.cache/arvados/discovery`).
214 http: httplib2.Http | None
215 : The HTTP client object the API client object will use to make requests.
216 If not provided, this function will build its own to use. Either way, the
217 object will be patched as part of the build process.
220 : If true, ignore SSL certificate validation errors. Default `False`.
222 request_id: str | None
223 : Default `X-Request-Id` header value for outgoing requests that
224 don't already provide one. If `None` or omitted, generate a random
225 ID. When retrying failed requests, the same ID is used on all
229 : A timeout value for HTTP requests in seconds. Default 300 (5 minutes).
231 Additional keyword arguments will be passed directly to
232 `googleapiclient.discovery.build`.
235 http = httplib2.Http(
236 ca_certs=util.ca_certs_path(),
237 cache=http_cache('discovery') if cache else None,
238 disable_ssl_certificate_validation=bool(insecure),
240 if http.timeout is None:
241 http.timeout = timeout
242 http = _patch_http_request(http, token)
244 svc = apiclient_discovery.build(
246 cache_discovery=False,
247 discoveryServiceUrl=discoveryServiceUrl,
251 svc.api_token = token
252 svc.insecure = insecure
253 svc.request_id = request_id
254 svc.config = lambda: util.get_config_once(svc)
255 svc.vocabulary = lambda: util.get_vocabulary_once(svc)
256 svc.close_connections = types.MethodType(_close_connections, svc)
257 http.max_request_size = svc._rootDesc.get('maxRequestSize', 0)
259 http._request_id = lambda: svc.request_id or util.new_request_id()
262 def normalize_api_kwargs(
264 discoveryServiceUrl=None,
269 """Validate kwargs from `api` and build kwargs for `api_client`
271 This method takes high-level keyword arguments passed to the `api`
272 constructor and normalizes them into a new dictionary that can be passed
273 as keyword arguments to `api_client`. It raises `ValueError` if required
274 arguments are missing or conflict.
279 : A string naming the version of the Arvados API to use. If not specified,
280 the code will log a warning and fall back to 'v1'.
282 discoveryServiceUrl: str | None
283 : The URL used to discover APIs passed directly to
284 `googleapiclient.discovery.build`. It is an error to pass both
285 `discoveryServiceUrl` and `host`.
288 : The hostname and optional port number of the Arvados API server. Used to
289 build `discoveryServiceUrl`. It is an error to pass both
290 `discoveryServiceUrl` and `host`.
293 : The authentication token to send with each API call.
295 Additional keyword arguments will be included in the return value.
297 if discoveryServiceUrl and host:
298 raise ValueError("both discoveryServiceUrl and host provided")
299 elif discoveryServiceUrl:
300 url_src = "discoveryServiceUrl"
302 url_src = "host argument"
303 discoveryServiceUrl = 'https://%s/discovery/v1/apis/{api}/{apiVersion}/rest' % (host,)
305 # This specific error message gets priority for backwards compatibility.
306 raise ValueError("token argument provided, but host missing.")
308 raise ValueError("neither discoveryServiceUrl nor host provided")
310 raise ValueError("%s provided, but token missing" % (url_src,))
314 "Using default API version. Call arvados.api(%r) instead.",
318 'discoveryServiceUrl': discoveryServiceUrl,
324 def api_kwargs_from_config(version=None, apiconfig=None, **kwargs):
325 """Build `api_client` keyword arguments from configuration
327 This function accepts a mapping with Arvados configuration settings like
328 `ARVADOS_API_HOST` and converts them into a mapping of keyword arguments
329 that can be passed to `api_client`. If `ARVADOS_API_HOST` or
330 `ARVADOS_API_TOKEN` are not configured, it raises `ValueError`.
335 : A string naming the version of the Arvados API to use. If not specified,
336 the code will log a warning and fall back to 'v1'.
338 apiconfig: Mapping[str, str] | None
339 : A mapping with entries for `ARVADOS_API_HOST`, `ARVADOS_API_TOKEN`, and
340 optionally `ARVADOS_API_HOST_INSECURE`. If not provided, calls
341 `arvados.config.settings` to get these parameters from user configuration.
343 Additional keyword arguments will be included in the return value.
345 if apiconfig is None:
346 apiconfig = config.settings()
347 missing = " and ".join(
349 for key in ['ARVADOS_API_HOST', 'ARVADOS_API_TOKEN']
350 if key not in apiconfig
354 "%s not set.\nPlease set in %s or export environment variable." %
355 (missing, config.default_config_file),
357 return normalize_api_kwargs(
360 apiconfig['ARVADOS_API_HOST'],
361 apiconfig['ARVADOS_API_TOKEN'],
362 insecure=config.flag_is_true('ARVADOS_API_HOST_INSECURE', apiconfig),
366 def api(version=None, cache=True, host=None, token=None, insecure=False,
367 request_id=None, timeout=5*60, *,
368 discoveryServiceUrl=None, **kwargs):
369 """Dynamically build an Arvados API client
371 This function provides a high-level "do what I mean" interface to build an
372 Arvados API client object. You can call it with no arguments to build a
373 client from user configuration; pass `host` and `token` arguments just
374 like you would write in user configuration; or pass additional arguments
375 for lower-level control over the client.
377 This function returns a `arvados.safeapi.ThreadSafeApiCache`, an
378 API-compatible wrapper around `googleapiclient.discovery.Resource`. If
379 you're handling concurrency yourself and/or your application is very
380 performance-sensitive, consider calling `api_client` directly.
385 : A string naming the version of the Arvados API to use. If not specified,
386 the code will log a warning and fall back to 'v1'.
389 : The hostname and optional port number of the Arvados API server.
392 : The authentication token to send with each API call.
394 discoveryServiceUrl: str | None
395 : The URL used to discover APIs passed directly to
396 `googleapiclient.discovery.build`.
398 If `host`, `token`, and `discoveryServiceUrl` are all omitted, `host` and
399 `token` will be loaded from the user's configuration. Otherwise, you must
400 pass `token` and one of `host` or `discoveryServiceUrl`. It is an error to
401 pass both `host` and `discoveryServiceUrl`.
403 Other arguments are passed directly to `api_client`. See that function's
404 docstring for more information about their meaning.
409 request_id=request_id,
412 if discoveryServiceUrl or host or token:
413 kwargs.update(normalize_api_kwargs(version, discoveryServiceUrl, host, token))
415 kwargs.update(api_kwargs_from_config(version))
416 version = kwargs.pop('version')
417 # We do the import here to avoid a circular import at the top level.
418 from .safeapi import ThreadSafeApiCache
419 return ThreadSafeApiCache({}, {}, kwargs, version)
421 def api_from_config(version=None, apiconfig=None, **kwargs):
422 """Build an Arvados API client from a configuration mapping
424 This function builds an Arvados API client from a mapping with user
425 configuration. It accepts that mapping as an argument, so you can use a
426 configuration that's different from what the user has set up.
428 This function returns a `arvados.safeapi.ThreadSafeApiCache`, an
429 API-compatible wrapper around `googleapiclient.discovery.Resource`. If
430 you're handling concurrency yourself and/or your application is very
431 performance-sensitive, consider calling `api_client` directly.
436 : A string naming the version of the Arvados API to use. If not specified,
437 the code will log a warning and fall back to 'v1'.
439 apiconfig: Mapping[str, str] | None
440 : A mapping with entries for `ARVADOS_API_HOST`, `ARVADOS_API_TOKEN`, and
441 optionally `ARVADOS_API_HOST_INSECURE`. If not provided, calls
442 `arvados.config.settings` to get these parameters from user configuration.
444 Other arguments are passed directly to `api_client`. See that function's
445 docstring for more information about their meaning.
447 return api(**api_kwargs_from_config(version, apiconfig, **kwargs))