13 from apiclient import discovery as apiclient_discovery
14 from apiclient import errors as apiclient_errors
20 _logger = logging.getLogger('arvados.api')
22 MAX_IDLE_CONNECTION_DURATION = 30
23 RETRY_DELAY_INITIAL = 2
24 RETRY_DELAY_BACKOFF = 2
27 class OrderedJsonModel(apiclient.model.JsonModel):
28 """Model class for JSON that preserves the contents' order.
30 API clients that care about preserving the order of fields in API
31 server responses can use this model to do so, like this::
33 from arvados.api import OrderedJsonModel
34 client = arvados.api('v1', ..., model=OrderedJsonModel())
37 def deserialize(self, content):
38 # This is a very slightly modified version of the parent class'
39 # implementation. Copyright (c) 2010 Google.
40 content = content.decode('utf-8')
41 body = json.loads(content, object_pairs_hook=collections.OrderedDict)
42 if self._data_wrapper and isinstance(body, dict) and 'data' in body:
47 def _intercept_http_request(self, uri, method="GET", **kwargs):
48 if (self.max_request_size and
49 kwargs.get('body') and
50 self.max_request_size < len(kwargs['body'])):
51 raise apiclient_errors.MediaUploadSizeError("Request size %i bytes exceeds published limit of %i bytes" % (len(kwargs['body']), self.max_request_size))
53 if 'headers' not in kwargs:
54 kwargs['headers'] = {}
56 if config.get("ARVADOS_EXTERNAL_CLIENT", "") == "true":
57 kwargs['headers']['X-External-Client'] = '1'
59 kwargs['headers']['Authorization'] = 'OAuth2 %s' % self.arvados_api_token
61 retryable = method in [
62 'DELETE', 'GET', 'HEAD', 'OPTIONS', 'PUT']
63 retry_count = self._retry_count if retryable else 0
66 time.time() - self._last_request_time > self._max_keepalive_idle):
67 # High probability of failure due to connection atrophy. Make
68 # sure this request [re]opens a new connection by closing and
69 # forgetting all cached connections first.
70 for conn in self.connections.itervalues():
72 self.connections.clear()
74 delay = self._retry_delay_initial
75 for _ in range(retry_count):
76 self._last_request_time = time.time()
78 return self.orig_http_request(uri, method, **kwargs)
79 except httplib.HTTPException:
80 _logger.debug("Retrying API request in %d s after HTTP error",
83 # This is the one case where httplib2 doesn't close the
84 # underlying connection first. Close all open
85 # connections, expecting this object only has the one
86 # connection to the API server. This is safe because
87 # httplib2 reopens connections when needed.
88 _logger.debug("Retrying API request in %d s after socket error",
90 for conn in self.connections.itervalues():
93 delay = delay * self._retry_delay_backoff
95 self._last_request_time = time.time()
96 return self.orig_http_request(uri, method, **kwargs)
98 def _patch_http_request(http, api_token):
99 http.arvados_api_token = api_token
100 http.max_request_size = 0
101 http.orig_http_request = http.request
102 http.request = types.MethodType(_intercept_http_request, http)
103 http._last_request_time = 0
104 http._max_keepalive_idle = MAX_IDLE_CONNECTION_DURATION
105 http._retry_delay_initial = RETRY_DELAY_INITIAL
106 http._retry_delay_backoff = RETRY_DELAY_BACKOFF
107 http._retry_count = RETRY_COUNT
110 # Monkey patch discovery._cast() so objects and arrays get serialized
111 # with json.dumps() instead of str().
112 _cast_orig = apiclient_discovery._cast
113 def _cast_objects_too(value, schema_type):
115 if (type(value) != type('') and
116 (schema_type == 'object' or schema_type == 'array')):
117 return json.dumps(value)
119 return _cast_orig(value, schema_type)
120 apiclient_discovery._cast = _cast_objects_too
122 # Convert apiclient's HttpErrors into our own API error subclass for better
124 # Reassigning apiclient_errors.HttpError is not sufficient because most of the
125 # apiclient submodules import the class into their own namespace.
126 def _new_http_error(cls, *args, **kwargs):
127 return super(apiclient_errors.HttpError, cls).__new__(
128 errors.ApiError, *args, **kwargs)
129 apiclient_errors.HttpError.__new__ = staticmethod(_new_http_error)
131 def http_cache(data_type):
132 homedir = os.environ.get('HOME')
133 if not homedir or len(homedir) == 0:
135 path = homedir + '/.cache/arvados/' + data_type
137 util.mkdir_dash_p(path)
140 return cache.SafeHTTPCache(path, max_age=60*60*24*2)
142 def api(version=None, cache=True, host=None, token=None, insecure=False, **kwargs):
143 """Return an apiclient Resources object for an Arvados instance.
146 A string naming the version of the Arvados API to use (for
150 Use a cache (~/.cache/arvados/discovery) for the discovery
154 The Arvados API server host (and optional :port) to connect to.
157 The authentication token to send with each API call.
160 If True, ignore SSL certificate validation errors.
162 Additional keyword arguments will be passed directly to
163 `apiclient_discovery.build` if a new Resource object is created.
164 If the `discoveryServiceUrl` or `http` keyword arguments are
165 missing, this function will set default values for them, based on
166 the current Arvados configuration settings.
172 _logger.info("Using default API version. " +
173 "Call arvados.api('%s') instead." %
175 if 'discoveryServiceUrl' in kwargs:
177 raise ValueError("both discoveryServiceUrl and host provided")
178 # Here we can't use a token from environment, config file,
179 # etc. Those probably have nothing to do with the host
180 # provided by the caller.
182 raise ValueError("discoveryServiceUrl provided, but token missing")
185 elif not host and not token:
186 return api_from_config(version=version, cache=cache, **kwargs)
188 # Caller provided one but not the other
190 raise ValueError("token argument provided, but host missing.")
192 raise ValueError("host argument provided, but token missing.")
195 # Caller wants us to build the discoveryServiceUrl
196 kwargs['discoveryServiceUrl'] = (
197 'https://%s/discovery/v1/apis/{api}/{apiVersion}/rest' % (host,))
199 if 'http' not in kwargs:
200 http_kwargs = {'ca_certs': util.ca_certs_path()}
202 http_kwargs['cache'] = http_cache('discovery')
204 http_kwargs['disable_ssl_certificate_validation'] = True
205 kwargs['http'] = httplib2.Http(**http_kwargs)
207 kwargs['http'] = _patch_http_request(kwargs['http'], token)
209 svc = apiclient_discovery.build('arvados', version, cache_discovery=False, **kwargs)
210 svc.api_token = token
211 svc.insecure = insecure
212 kwargs['http'].max_request_size = svc._rootDesc.get('maxRequestSize', 0)
213 kwargs['http'].cache = None
216 def api_from_config(version=None, apiconfig=None, **kwargs):
217 """Return an apiclient Resources object enabling access to an Arvados server
221 A string naming the version of the Arvados REST API to use (for
225 If provided, this should be a dict-like object (must support the get()
226 method) with entries for ARVADOS_API_HOST, ARVADOS_API_TOKEN, and
227 optionally ARVADOS_API_HOST_INSECURE. If not provided, use
228 arvados.config (which gets these parameters from the environment by
231 Other keyword arguments such as `cache` will be passed along `api()`
234 # Load from user configuration or environment
235 if apiconfig is None:
236 apiconfig = config.settings()
238 for x in ['ARVADOS_API_HOST', 'ARVADOS_API_TOKEN']:
239 if x not in apiconfig:
240 raise ValueError("%s is not set. Aborting." % x)
241 host = apiconfig.get('ARVADOS_API_HOST')
242 token = apiconfig.get('ARVADOS_API_TOKEN')
243 insecure = config.flag_is_true('ARVADOS_API_HOST_INSECURE', apiconfig)
245 return api(version=version, host=host, token=token, insecure=insecure, **kwargs)