1 # Copyright (C) The Arvados Authors. All rights reserved.
3 # SPDX-License-Identifier: Apache-2.0
6 The code in this module builds Arvados API client objects you can use to submit
7 Arvados API requests. This includes extending the underlying HTTP client with
8 niceties such as caching, X-Request-Id header for tracking, and more. The main
9 client constructors are `api` and `api_from_config`.
12 from __future__ import absolute_import
13 from future import standard_library
14 standard_library.install_aliases()
15 from builtins import range
31 from apiclient import discovery as apiclient_discovery
32 from apiclient import errors as apiclient_errors
38 _logger = logging.getLogger('arvados.api')
40 MAX_IDLE_CONNECTION_DURATION = 30
42 # These constants supported our own retry logic that we've since removed in
43 # favor of using googleapiclient's num_retries. They're kept here purely for
44 # API compatibility, but set to 0 to indicate no retries happen.
45 RETRY_DELAY_INITIAL = 0
46 RETRY_DELAY_BACKOFF = 0
49 if sys.version_info >= (3,):
50 httplib2.SSLHandshakeError = None
52 class OrderedJsonModel(apiclient.model.JsonModel):
53 """Model class for JSON that preserves the contents' order.
55 API clients that care about preserving the order of fields in API
56 server responses can use this model to do so, like this:
58 from arvados.api import OrderedJsonModel
59 client = arvados.api('v1', ..., model=OrderedJsonModel())
62 def deserialize(self, content):
63 # This is a very slightly modified version of the parent class'
64 # implementation. Copyright (c) 2010 Google.
65 content = content.decode('utf-8')
66 body = json.loads(content, object_pairs_hook=collections.OrderedDict)
67 if self._data_wrapper and isinstance(body, dict) and 'data' in body:
72 _orig_retry_request = apiclient.http._retry_request
73 def _retry_request(http, num_retries, *args, **kwargs):
75 num_retries = max(num_retries, http.num_retries)
76 except AttributeError:
77 # `http` client object does not have a `num_retries` attribute.
78 # It apparently hasn't gone through _patch_http_request, possibly
79 # because this isn't an Arvados API client. We need to continue on to
80 # avoid interfering with other Google API clients.
82 return _orig_retry_request(http, num_retries, *args, **kwargs)
83 apiclient.http._retry_request = _retry_request
85 def _intercept_http_request(self, uri, method="GET", headers={}, **kwargs):
86 if not headers.get('X-Request-Id'):
87 headers['X-Request-Id'] = self._request_id()
89 if (self.max_request_size and
90 kwargs.get('body') and
91 self.max_request_size < len(kwargs['body'])):
92 raise apiclient_errors.MediaUploadSizeError("Request size %i bytes exceeds published limit of %i bytes" % (len(kwargs['body']), self.max_request_size))
94 headers['Authorization'] = 'OAuth2 %s' % self.arvados_api_token
96 if (time.time() - self._last_request_time) > self._max_keepalive_idle:
97 # High probability of failure due to connection atrophy. Make
98 # sure this request [re]opens a new connection by closing and
99 # forgetting all cached connections first.
100 for conn in self.connections.values():
102 self.connections.clear()
104 self._last_request_time = time.time()
106 return self.orig_http_request(uri, method, headers=headers, **kwargs)
107 except ssl.SSLCertVerificationError as e:
108 raise ssl.SSLCertVerificationError(e.args[0], "Could not connect to %s\n%s\nPossible causes: remote SSL/TLS certificate expired, or was issued by an untrusted certificate authority." % (uri, e)) from None
109 except Exception as e:
110 # Prepend "[request_id] " to the error message, which we
111 # assume is the first string argument passed to the exception
113 for i in range(len(e.args or ())):
114 if type(e.args[i]) == type(""):
115 e.args = e.args[:i] + ("[{}] {}".format(headers['X-Request-Id'], e.args[i]),) + e.args[i+1:]
116 raise type(e)(*e.args)
119 def _patch_http_request(http, api_token, num_retries):
120 http.arvados_api_token = api_token
121 http.max_request_size = 0
122 http.num_retries = num_retries
123 http.orig_http_request = http.request
124 http.request = types.MethodType(_intercept_http_request, http)
125 http._last_request_time = 0
126 http._max_keepalive_idle = MAX_IDLE_CONNECTION_DURATION
127 http._request_id = util.new_request_id
130 def _close_connections(self):
131 for conn in self._http.connections.values():
134 # Monkey patch discovery._cast() so objects and arrays get serialized
135 # with json.dumps() instead of str().
136 _cast_orig = apiclient_discovery._cast
137 def _cast_objects_too(value, schema_type):
139 if (type(value) != type('') and
140 type(value) != type(b'') and
141 (schema_type == 'object' or schema_type == 'array')):
142 return json.dumps(value)
144 return _cast_orig(value, schema_type)
145 apiclient_discovery._cast = _cast_objects_too
147 # Convert apiclient's HttpErrors into our own API error subclass for better
149 # Reassigning apiclient_errors.HttpError is not sufficient because most of the
150 # apiclient submodules import the class into their own namespace.
151 def _new_http_error(cls, *args, **kwargs):
152 return super(apiclient_errors.HttpError, cls).__new__(
153 errors.ApiError, *args, **kwargs)
154 apiclient_errors.HttpError.__new__ = staticmethod(_new_http_error)
156 def http_cache(data_type):
157 homedir = os.environ.get('HOME')
158 if not homedir or len(homedir) == 0:
160 path = homedir + '/.cache/arvados/' + data_type
162 util.mkdir_dash_p(path)
165 return cache.SafeHTTPCache(path, max_age=60*60*24*2)
180 """Build an Arvados API client
182 This function returns a `googleapiclient.discovery.Resource` object
183 constructed from the given arguments. This is a relatively low-level
184 interface that requires all the necessary inputs as arguments. Most
185 users will prefer to use `api` which can accept more flexible inputs.
190 : A string naming the version of the Arvados API to use.
192 discoveryServiceUrl: str
193 : The URL used to discover APIs passed directly to
194 `googleapiclient.discovery.build`.
197 : The authentication token to send with each API call.
199 Keyword-only arguments:
202 : If true, loads the API discovery document from, or saves it to, a cache
203 on disk (located at `~/.cache/arvados/discovery`).
205 http: httplib2.Http | None
206 : The HTTP client object the API client object will use to make requests.
207 If not provided, this function will build its own to use. Either way, the
208 object will be patched as part of the build process.
211 : If true, ignore SSL certificate validation errors. Default `False`.
214 : The number of times to retry each API request if it encounters a
215 temporary failure. Default 10.
217 request_id: str | None
218 : Default `X-Request-Id` header value for outgoing requests that
219 don't already provide one. If `None` or omitted, generate a random
220 ID. When retrying failed requests, the same ID is used on all
224 : A timeout value for HTTP requests in seconds. Default 300 (5 minutes).
226 Additional keyword arguments will be passed directly to
227 `googleapiclient.discovery.build`.
230 http = httplib2.Http(
231 ca_certs=util.ca_certs_path(),
232 cache=http_cache('discovery') if cache else None,
233 disable_ssl_certificate_validation=bool(insecure),
235 if http.timeout is None:
236 http.timeout = timeout
237 http = _patch_http_request(http, token, num_retries)
239 svc = apiclient_discovery.build(
241 cache_discovery=False,
242 discoveryServiceUrl=discoveryServiceUrl,
244 num_retries=num_retries,
247 svc.api_token = token
248 svc.insecure = insecure
249 svc.request_id = request_id
250 svc.config = lambda: util.get_config_once(svc)
251 svc.vocabulary = lambda: util.get_vocabulary_once(svc)
252 svc.close_connections = types.MethodType(_close_connections, svc)
253 http.max_request_size = svc._rootDesc.get('maxRequestSize', 0)
255 http._request_id = lambda: svc.request_id or util.new_request_id()
258 def normalize_api_kwargs(
260 discoveryServiceUrl=None,
265 """Validate kwargs from `api` and build kwargs for `api_client`
267 This method takes high-level keyword arguments passed to the `api`
268 constructor and normalizes them into a new dictionary that can be passed
269 as keyword arguments to `api_client`. It raises `ValueError` if required
270 arguments are missing or conflict.
275 : A string naming the version of the Arvados API to use. If not specified,
276 the code will log a warning and fall back to 'v1'.
278 discoveryServiceUrl: str | None
279 : The URL used to discover APIs passed directly to
280 `googleapiclient.discovery.build`. It is an error to pass both
281 `discoveryServiceUrl` and `host`.
284 : The hostname and optional port number of the Arvados API server. Used to
285 build `discoveryServiceUrl`. It is an error to pass both
286 `discoveryServiceUrl` and `host`.
289 : The authentication token to send with each API call.
291 Additional keyword arguments will be included in the return value.
293 if discoveryServiceUrl and host:
294 raise ValueError("both discoveryServiceUrl and host provided")
295 elif discoveryServiceUrl:
296 url_src = "discoveryServiceUrl"
298 url_src = "host argument"
299 discoveryServiceUrl = 'https://%s/discovery/v1/apis/{api}/{apiVersion}/rest' % (host,)
301 # This specific error message gets priority for backwards compatibility.
302 raise ValueError("token argument provided, but host missing.")
304 raise ValueError("neither discoveryServiceUrl nor host provided")
306 raise ValueError("%s provided, but token missing" % (url_src,))
310 "Using default API version. Call arvados.api(%r) instead.",
314 'discoveryServiceUrl': discoveryServiceUrl,
320 def api_kwargs_from_config(version=None, apiconfig=None, **kwargs):
321 """Build `api_client` keyword arguments from configuration
323 This function accepts a mapping with Arvados configuration settings like
324 `ARVADOS_API_HOST` and converts them into a mapping of keyword arguments
325 that can be passed to `api_client`. If `ARVADOS_API_HOST` or
326 `ARVADOS_API_TOKEN` are not configured, it raises `ValueError`.
331 : A string naming the version of the Arvados API to use. If not specified,
332 the code will log a warning and fall back to 'v1'.
334 apiconfig: Mapping[str, str] | None
335 : A mapping with entries for `ARVADOS_API_HOST`, `ARVADOS_API_TOKEN`, and
336 optionally `ARVADOS_API_HOST_INSECURE`. If not provided, calls
337 `arvados.config.settings` to get these parameters from user configuration.
339 Additional keyword arguments will be included in the return value.
341 if apiconfig is None:
342 apiconfig = config.settings()
343 missing = " and ".join(
345 for key in ['ARVADOS_API_HOST', 'ARVADOS_API_TOKEN']
346 if key not in apiconfig
350 "%s not set.\nPlease set in %s or export environment variable." %
351 (missing, config.default_config_file),
353 return normalize_api_kwargs(
356 apiconfig['ARVADOS_API_HOST'],
357 apiconfig['ARVADOS_API_TOKEN'],
358 insecure=config.flag_is_true('ARVADOS_API_HOST_INSECURE', apiconfig),
362 def api(version=None, cache=True, host=None, token=None, insecure=False,
363 request_id=None, timeout=5*60, *,
364 discoveryServiceUrl=None, **kwargs):
365 """Dynamically build an Arvados API client
367 This function provides a high-level "do what I mean" interface to build an
368 Arvados API client object. You can call it with no arguments to build a
369 client from user configuration; pass `host` and `token` arguments just
370 like you would write in user configuration; or pass additional arguments
371 for lower-level control over the client.
373 This function returns a `arvados.safeapi.ThreadSafeApiCache`, an
374 API-compatible wrapper around `googleapiclient.discovery.Resource`. If
375 you're handling concurrency yourself and/or your application is very
376 performance-sensitive, consider calling `api_client` directly.
381 : A string naming the version of the Arvados API to use. If not specified,
382 the code will log a warning and fall back to 'v1'.
385 : The hostname and optional port number of the Arvados API server.
388 : The authentication token to send with each API call.
390 discoveryServiceUrl: str | None
391 : The URL used to discover APIs passed directly to
392 `googleapiclient.discovery.build`.
394 If `host`, `token`, and `discoveryServiceUrl` are all omitted, `host` and
395 `token` will be loaded from the user's configuration. Otherwise, you must
396 pass `token` and one of `host` or `discoveryServiceUrl`. It is an error to
397 pass both `host` and `discoveryServiceUrl`.
399 Other arguments are passed directly to `api_client`. See that function's
400 docstring for more information about their meaning.
405 request_id=request_id,
408 if discoveryServiceUrl or host or token:
409 kwargs.update(normalize_api_kwargs(version, discoveryServiceUrl, host, token))
411 kwargs.update(api_kwargs_from_config(version))
412 version = kwargs.pop('version')
413 # We do the import here to avoid a circular import at the top level.
414 from .safeapi import ThreadSafeApiCache
415 return ThreadSafeApiCache({}, {}, kwargs, version)
417 def api_from_config(version=None, apiconfig=None, **kwargs):
418 """Build an Arvados API client from a configuration mapping
420 This function builds an Arvados API client from a mapping with user
421 configuration. It accepts that mapping as an argument, so you can use a
422 configuration that's different from what the user has set up.
424 This function returns a `arvados.safeapi.ThreadSafeApiCache`, an
425 API-compatible wrapper around `googleapiclient.discovery.Resource`. If
426 you're handling concurrency yourself and/or your application is very
427 performance-sensitive, consider calling `api_client` directly.
432 : A string naming the version of the Arvados API to use. If not specified,
433 the code will log a warning and fall back to 'v1'.
435 apiconfig: Mapping[str, str] | None
436 : A mapping with entries for `ARVADOS_API_HOST`, `ARVADOS_API_TOKEN`, and
437 optionally `ARVADOS_API_HOST_INSECURE`. If not provided, calls
438 `arvados.config.settings` to get these parameters from user configuration.
440 Other arguments are passed directly to `api_client`. See that function's
441 docstring for more information about their meaning.
443 return api(**api_kwargs_from_config(version, apiconfig, **kwargs))