1 # Copyright (C) The Arvados Authors. All rights reserved.
3 # SPDX-License-Identifier: Apache-2.0
6 The code in this module builds Arvados API client objects you can use to submit
7 Arvados API requests. This includes extending the underlying HTTP client with
8 niceties such as caching, X-Request-Id header for tracking, and more. The main
9 client constructors are `api` and `api_from_config`.
12 from __future__ import absolute_import
13 from future import standard_library
14 standard_library.install_aliases()
15 from builtins import range
31 from apiclient import discovery as apiclient_discovery
32 from apiclient import errors as apiclient_errors
39 _logger = logging.getLogger('arvados.api')
41 MAX_IDLE_CONNECTION_DURATION = 30
43 # These constants supported our own retry logic that we've since removed in
44 # favor of using googleapiclient's num_retries. They're kept here purely for
45 # API compatibility, but set to 0 to indicate no retries happen.
46 RETRY_DELAY_INITIAL = 0
47 RETRY_DELAY_BACKOFF = 0
50 # An unused HTTP 5xx status code to request a retry internally.
51 # See _intercept_http_request. This should not be user-visible.
52 _RETRY_4XX_STATUS = 545
54 if sys.version_info >= (3,):
55 httplib2.SSLHandshakeError = None
57 class OrderedJsonModel(apiclient.model.JsonModel):
58 """Model class for JSON that preserves the contents' order.
60 API clients that care about preserving the order of fields in API
61 server responses can use this model to do so, like this:
63 from arvados.api import OrderedJsonModel
64 client = arvados.api('v1', ..., model=OrderedJsonModel())
67 def deserialize(self, content):
68 # This is a very slightly modified version of the parent class'
69 # implementation. Copyright (c) 2010 Google.
70 content = content.decode('utf-8')
71 body = json.loads(content, object_pairs_hook=collections.OrderedDict)
72 if self._data_wrapper and isinstance(body, dict) and 'data' in body:
77 _orig_retry_request = apiclient.http._retry_request
78 def _retry_request(http, num_retries, *args, **kwargs):
80 num_retries = max(num_retries, http.num_retries)
81 except AttributeError:
82 # `http` client object does not have a `num_retries` attribute.
83 # It apparently hasn't gone through _patch_http_request, possibly
84 # because this isn't an Arvados API client. Pass through to
85 # avoid interfering with other Google API clients.
86 return _orig_retry_request(http, num_retries, *args, **kwargs)
87 response, body = _orig_retry_request(http, num_retries, *args, **kwargs)
88 # If _intercept_http_request ran out of retries for a 4xx response,
89 # restore the original status code.
90 if response.status == _RETRY_4XX_STATUS:
91 response.status = int(response['status'])
92 return (response, body)
93 apiclient.http._retry_request = _retry_request
95 def _intercept_http_request(self, uri, method="GET", headers={}, **kwargs):
96 if not headers.get('X-Request-Id'):
97 headers['X-Request-Id'] = self._request_id()
99 if (self.max_request_size and
100 kwargs.get('body') and
101 self.max_request_size < len(kwargs['body'])):
102 raise apiclient_errors.MediaUploadSizeError("Request size %i bytes exceeds published limit of %i bytes" % (len(kwargs['body']), self.max_request_size))
104 headers['Authorization'] = 'OAuth2 %s' % self.arvados_api_token
106 if (time.time() - self._last_request_time) > self._max_keepalive_idle:
107 # High probability of failure due to connection atrophy. Make
108 # sure this request [re]opens a new connection by closing and
109 # forgetting all cached connections first.
110 for conn in self.connections.values():
112 self.connections.clear()
114 self._last_request_time = time.time()
116 response, body = self.orig_http_request(uri, method, headers=headers, **kwargs)
117 except ssl.SSLCertVerificationError as e:
118 raise ssl.SSLCertVerificationError(e.args[0], "Could not connect to %s\n%s\nPossible causes: remote SSL/TLS certificate expired, or was issued by an untrusted certificate authority." % (uri, e)) from None
119 # googleapiclient only retries 403, 429, and 5xx status codes.
120 # If we got another 4xx status that we want to retry, convert it into
121 # 5xx so googleapiclient handles it the way we want.
122 if response.status in retry._HTTP_CAN_RETRY and response.status < 500:
123 response.status = _RETRY_4XX_STATUS
124 return (response, body)
125 except Exception as e:
126 # Prepend "[request_id] " to the error message, which we
127 # assume is the first string argument passed to the exception
129 for i in range(len(e.args or ())):
130 if type(e.args[i]) == type(""):
131 e.args = e.args[:i] + ("[{}] {}".format(headers['X-Request-Id'], e.args[i]),) + e.args[i+1:]
132 raise type(e)(*e.args)
135 def _patch_http_request(http, api_token, num_retries):
136 http.arvados_api_token = api_token
137 http.max_request_size = 0
138 http.num_retries = num_retries
139 http.orig_http_request = http.request
140 http.request = types.MethodType(_intercept_http_request, http)
141 http._last_request_time = 0
142 http._max_keepalive_idle = MAX_IDLE_CONNECTION_DURATION
143 http._request_id = util.new_request_id
146 def _close_connections(self):
147 for conn in self._http.connections.values():
150 # Monkey patch discovery._cast() so objects and arrays get serialized
151 # with json.dumps() instead of str().
152 _cast_orig = apiclient_discovery._cast
153 def _cast_objects_too(value, schema_type):
155 if (type(value) != type('') and
156 type(value) != type(b'') and
157 (schema_type == 'object' or schema_type == 'array')):
158 return json.dumps(value)
160 return _cast_orig(value, schema_type)
161 apiclient_discovery._cast = _cast_objects_too
163 # Convert apiclient's HttpErrors into our own API error subclass for better
165 # Reassigning apiclient_errors.HttpError is not sufficient because most of the
166 # apiclient submodules import the class into their own namespace.
167 def _new_http_error(cls, *args, **kwargs):
168 return super(apiclient_errors.HttpError, cls).__new__(
169 errors.ApiError, *args, **kwargs)
170 apiclient_errors.HttpError.__new__ = staticmethod(_new_http_error)
172 def http_cache(data_type):
173 homedir = os.environ.get('HOME')
174 if not homedir or len(homedir) == 0:
176 path = homedir + '/.cache/arvados/' + data_type
178 util.mkdir_dash_p(path)
181 return cache.SafeHTTPCache(path, max_age=60*60*24*2)
196 """Build an Arvados API client
198 This function returns a `googleapiclient.discovery.Resource` object
199 constructed from the given arguments. This is a relatively low-level
200 interface that requires all the necessary inputs as arguments. Most
201 users will prefer to use `api` which can accept more flexible inputs.
206 : A string naming the version of the Arvados API to use.
208 discoveryServiceUrl: str
209 : The URL used to discover APIs passed directly to
210 `googleapiclient.discovery.build`.
213 : The authentication token to send with each API call.
215 Keyword-only arguments:
218 : If true, loads the API discovery document from, or saves it to, a cache
219 on disk (located at `~/.cache/arvados/discovery`).
221 http: httplib2.Http | None
222 : The HTTP client object the API client object will use to make requests.
223 If not provided, this function will build its own to use. Either way, the
224 object will be patched as part of the build process.
227 : If true, ignore SSL certificate validation errors. Default `False`.
230 : The number of times to retry each API request if it encounters a
231 temporary failure. Default 10.
233 request_id: str | None
234 : Default `X-Request-Id` header value for outgoing requests that
235 don't already provide one. If `None` or omitted, generate a random
236 ID. When retrying failed requests, the same ID is used on all
240 : A timeout value for HTTP requests in seconds. Default 300 (5 minutes).
242 Additional keyword arguments will be passed directly to
243 `googleapiclient.discovery.build`.
246 http = httplib2.Http(
247 ca_certs=util.ca_certs_path(),
248 cache=http_cache('discovery') if cache else None,
249 disable_ssl_certificate_validation=bool(insecure),
251 if http.timeout is None:
252 http.timeout = timeout
253 http = _patch_http_request(http, token, num_retries)
255 svc = apiclient_discovery.build(
257 cache_discovery=False,
258 discoveryServiceUrl=discoveryServiceUrl,
260 num_retries=num_retries,
263 svc.api_token = token
264 svc.insecure = insecure
265 svc.request_id = request_id
266 svc.config = lambda: util.get_config_once(svc)
267 svc.vocabulary = lambda: util.get_vocabulary_once(svc)
268 svc.close_connections = types.MethodType(_close_connections, svc)
269 http.max_request_size = svc._rootDesc.get('maxRequestSize', 0)
271 http._request_id = lambda: svc.request_id or util.new_request_id()
274 def normalize_api_kwargs(
276 discoveryServiceUrl=None,
281 """Validate kwargs from `api` and build kwargs for `api_client`
283 This method takes high-level keyword arguments passed to the `api`
284 constructor and normalizes them into a new dictionary that can be passed
285 as keyword arguments to `api_client`. It raises `ValueError` if required
286 arguments are missing or conflict.
291 : A string naming the version of the Arvados API to use. If not specified,
292 the code will log a warning and fall back to 'v1'.
294 discoveryServiceUrl: str | None
295 : The URL used to discover APIs passed directly to
296 `googleapiclient.discovery.build`. It is an error to pass both
297 `discoveryServiceUrl` and `host`.
300 : The hostname and optional port number of the Arvados API server. Used to
301 build `discoveryServiceUrl`. It is an error to pass both
302 `discoveryServiceUrl` and `host`.
305 : The authentication token to send with each API call.
307 Additional keyword arguments will be included in the return value.
309 if discoveryServiceUrl and host:
310 raise ValueError("both discoveryServiceUrl and host provided")
311 elif discoveryServiceUrl:
312 url_src = "discoveryServiceUrl"
314 url_src = "host argument"
315 discoveryServiceUrl = 'https://%s/discovery/v1/apis/{api}/{apiVersion}/rest' % (host,)
317 # This specific error message gets priority for backwards compatibility.
318 raise ValueError("token argument provided, but host missing.")
320 raise ValueError("neither discoveryServiceUrl nor host provided")
322 raise ValueError("%s provided, but token missing" % (url_src,))
326 "Using default API version. Call arvados.api(%r) instead.",
330 'discoveryServiceUrl': discoveryServiceUrl,
336 def api_kwargs_from_config(version=None, apiconfig=None, **kwargs):
337 """Build `api_client` keyword arguments from configuration
339 This function accepts a mapping with Arvados configuration settings like
340 `ARVADOS_API_HOST` and converts them into a mapping of keyword arguments
341 that can be passed to `api_client`. If `ARVADOS_API_HOST` or
342 `ARVADOS_API_TOKEN` are not configured, it raises `ValueError`.
347 : A string naming the version of the Arvados API to use. If not specified,
348 the code will log a warning and fall back to 'v1'.
350 apiconfig: Mapping[str, str] | None
351 : A mapping with entries for `ARVADOS_API_HOST`, `ARVADOS_API_TOKEN`, and
352 optionally `ARVADOS_API_HOST_INSECURE`. If not provided, calls
353 `arvados.config.settings` to get these parameters from user configuration.
355 Additional keyword arguments will be included in the return value.
357 if apiconfig is None:
358 apiconfig = config.settings()
359 missing = " and ".join(
361 for key in ['ARVADOS_API_HOST', 'ARVADOS_API_TOKEN']
362 if key not in apiconfig
366 "%s not set.\nPlease set in %s or export environment variable." %
367 (missing, config.default_config_file),
369 return normalize_api_kwargs(
372 apiconfig['ARVADOS_API_HOST'],
373 apiconfig['ARVADOS_API_TOKEN'],
374 insecure=config.flag_is_true('ARVADOS_API_HOST_INSECURE', apiconfig),
378 def api(version=None, cache=True, host=None, token=None, insecure=False,
379 request_id=None, timeout=5*60, *,
380 discoveryServiceUrl=None, **kwargs):
381 """Dynamically build an Arvados API client
383 This function provides a high-level "do what I mean" interface to build an
384 Arvados API client object. You can call it with no arguments to build a
385 client from user configuration; pass `host` and `token` arguments just
386 like you would write in user configuration; or pass additional arguments
387 for lower-level control over the client.
389 This function returns a `arvados.safeapi.ThreadSafeApiCache`, an
390 API-compatible wrapper around `googleapiclient.discovery.Resource`. If
391 you're handling concurrency yourself and/or your application is very
392 performance-sensitive, consider calling `api_client` directly.
397 : A string naming the version of the Arvados API to use. If not specified,
398 the code will log a warning and fall back to 'v1'.
401 : The hostname and optional port number of the Arvados API server.
404 : The authentication token to send with each API call.
406 discoveryServiceUrl: str | None
407 : The URL used to discover APIs passed directly to
408 `googleapiclient.discovery.build`.
410 If `host`, `token`, and `discoveryServiceUrl` are all omitted, `host` and
411 `token` will be loaded from the user's configuration. Otherwise, you must
412 pass `token` and one of `host` or `discoveryServiceUrl`. It is an error to
413 pass both `host` and `discoveryServiceUrl`.
415 Other arguments are passed directly to `api_client`. See that function's
416 docstring for more information about their meaning.
421 request_id=request_id,
424 if discoveryServiceUrl or host or token:
425 kwargs.update(normalize_api_kwargs(version, discoveryServiceUrl, host, token))
427 kwargs.update(api_kwargs_from_config(version))
428 version = kwargs.pop('version')
429 # We do the import here to avoid a circular import at the top level.
430 from .safeapi import ThreadSafeApiCache
431 return ThreadSafeApiCache({}, {}, kwargs, version)
433 def api_from_config(version=None, apiconfig=None, **kwargs):
434 """Build an Arvados API client from a configuration mapping
436 This function builds an Arvados API client from a mapping with user
437 configuration. It accepts that mapping as an argument, so you can use a
438 configuration that's different from what the user has set up.
440 This function returns a `arvados.safeapi.ThreadSafeApiCache`, an
441 API-compatible wrapper around `googleapiclient.discovery.Resource`. If
442 you're handling concurrency yourself and/or your application is very
443 performance-sensitive, consider calling `api_client` directly.
448 : A string naming the version of the Arvados API to use. If not specified,
449 the code will log a warning and fall back to 'v1'.
451 apiconfig: Mapping[str, str] | None
452 : A mapping with entries for `ARVADOS_API_HOST`, `ARVADOS_API_TOKEN`, and
453 optionally `ARVADOS_API_HOST_INSECURE`. If not provided, calls
454 `arvados.config.settings` to get these parameters from user configuration.
456 Other arguments are passed directly to `api_client`. See that function's
457 docstring for more information about their meaning.
459 return api(**api_kwargs_from_config(version, apiconfig, **kwargs))