1 from __future__ import absolute_import
14 from apiclient import discovery as apiclient_discovery
15 from apiclient import errors as apiclient_errors
21 _logger = logging.getLogger('arvados.api')
23 MAX_IDLE_CONNECTION_DURATION = 30
24 RETRY_DELAY_INITIAL = 2
25 RETRY_DELAY_BACKOFF = 2
28 class OrderedJsonModel(apiclient.model.JsonModel):
29 """Model class for JSON that preserves the contents' order.
31 API clients that care about preserving the order of fields in API
32 server responses can use this model to do so, like this::
34 from arvados.api import OrderedJsonModel
35 client = arvados.api('v1', ..., model=OrderedJsonModel())
38 def deserialize(self, content):
39 # This is a very slightly modified version of the parent class'
40 # implementation. Copyright (c) 2010 Google.
41 content = content.decode('utf-8')
42 body = json.loads(content, object_pairs_hook=collections.OrderedDict)
43 if self._data_wrapper and isinstance(body, dict) and 'data' in body:
48 def _intercept_http_request(self, uri, **kwargs):
49 if (self.max_request_size and
50 kwargs.get('body') and
51 self.max_request_size < len(kwargs['body'])):
52 raise apiclient_errors.MediaUploadSizeError("Request size %i bytes exceeds published limit of %i bytes" % (len(kwargs['body']), self.max_request_size))
54 if 'headers' not in kwargs:
55 kwargs['headers'] = {}
57 if config.get("ARVADOS_EXTERNAL_CLIENT", "") == "true":
58 kwargs['headers']['X-External-Client'] = '1'
60 kwargs['headers']['Authorization'] = 'OAuth2 %s' % self.arvados_api_token
62 retryable = kwargs.get('method', 'GET') in [
63 'DELETE', 'GET', 'HEAD', 'OPTIONS', 'PUT']
64 retry_count = self._retry_count if retryable else 0
67 time.time() - self._last_request_time > self._max_keepalive_idle):
68 # High probability of failure due to connection atrophy. Make
69 # sure this request [re]opens a new connection by closing and
70 # forgetting all cached connections first.
71 for conn in self.connections.itervalues():
73 self.connections.clear()
75 delay = self._retry_delay_initial
76 for _ in range(retry_count):
77 self._last_request_time = time.time()
79 return self.orig_http_request(uri, **kwargs)
80 except httplib.HTTPException:
81 _logger.debug("Retrying API request in %d s after HTTP error",
84 # This is the one case where httplib2 doesn't close the
85 # underlying connection first. Close all open
86 # connections, expecting this object only has the one
87 # connection to the API server. This is safe because
88 # httplib2 reopens connections when needed.
89 _logger.debug("Retrying API request in %d s after socket error",
91 for conn in self.connections.itervalues():
94 delay = delay * self._retry_delay_backoff
96 self._last_request_time = time.time()
97 return self.orig_http_request(uri, **kwargs)
99 def _patch_http_request(http, api_token):
100 http.arvados_api_token = api_token
101 http.max_request_size = 0
102 http.orig_http_request = http.request
103 http.request = types.MethodType(_intercept_http_request, http)
104 http._last_request_time = 0
105 http._max_keepalive_idle = MAX_IDLE_CONNECTION_DURATION
106 http._retry_delay_initial = RETRY_DELAY_INITIAL
107 http._retry_delay_backoff = RETRY_DELAY_BACKOFF
108 http._retry_count = RETRY_COUNT
111 # Monkey patch discovery._cast() so objects and arrays get serialized
112 # with json.dumps() instead of str().
113 _cast_orig = apiclient_discovery._cast
114 def _cast_objects_too(value, schema_type):
116 if (type(value) != type('') and
117 (schema_type == 'object' or schema_type == 'array')):
118 return json.dumps(value)
120 return _cast_orig(value, schema_type)
121 apiclient_discovery._cast = _cast_objects_too
123 # Convert apiclient's HttpErrors into our own API error subclass for better
125 # Reassigning apiclient_errors.HttpError is not sufficient because most of the
126 # apiclient submodules import the class into their own namespace.
127 def _new_http_error(cls, *args, **kwargs):
128 return super(apiclient_errors.HttpError, cls).__new__(
129 errors.ApiError, *args, **kwargs)
130 apiclient_errors.HttpError.__new__ = staticmethod(_new_http_error)
132 def http_cache(data_type):
133 homedir = os.environ.get('HOME')
134 if not homedir or len(homedir) == 0:
136 path = homedir + '/.cache/arvados/' + data_type
138 util.mkdir_dash_p(path)
141 return cache.SafeHTTPCache(path, max_age=60*60*24*2)
143 def api(version=None, cache=True, host=None, token=None, insecure=False, **kwargs):
144 """Return an apiclient Resources object for an Arvados instance.
147 A string naming the version of the Arvados API to use (for
151 Use a cache (~/.cache/arvados/discovery) for the discovery
155 The Arvados API server host (and optional :port) to connect to.
158 The authentication token to send with each API call.
161 If True, ignore SSL certificate validation errors.
163 Additional keyword arguments will be passed directly to
164 `apiclient_discovery.build` if a new Resource object is created.
165 If the `discoveryServiceUrl` or `http` keyword arguments are
166 missing, this function will set default values for them, based on
167 the current Arvados configuration settings.
173 _logger.info("Using default API version. " +
174 "Call arvados.api('%s') instead." %
176 if 'discoveryServiceUrl' in kwargs:
178 raise ValueError("both discoveryServiceUrl and host provided")
179 # Here we can't use a token from environment, config file,
180 # etc. Those probably have nothing to do with the host
181 # provided by the caller.
183 raise ValueError("discoveryServiceUrl provided, but token missing")
186 elif not host and not token:
187 return api_from_config(version=version, cache=cache, **kwargs)
189 # Caller provided one but not the other
191 raise ValueError("token argument provided, but host missing.")
193 raise ValueError("host argument provided, but token missing.")
196 # Caller wants us to build the discoveryServiceUrl
197 kwargs['discoveryServiceUrl'] = (
198 'https://%s/discovery/v1/apis/{api}/{apiVersion}/rest' % (host,))
200 if 'http' not in kwargs:
201 http_kwargs = {'ca_certs': util.ca_certs_path()}
203 http_kwargs['cache'] = http_cache('discovery')
205 http_kwargs['disable_ssl_certificate_validation'] = True
206 kwargs['http'] = httplib2.Http(**http_kwargs)
208 kwargs['http'] = _patch_http_request(kwargs['http'], token)
210 svc = apiclient_discovery.build('arvados', version, cache_discovery=False, **kwargs)
211 svc.api_token = token
212 svc.insecure = insecure
213 kwargs['http'].max_request_size = svc._rootDesc.get('maxRequestSize', 0)
214 kwargs['http'].cache = None
217 def api_from_config(version=None, apiconfig=None, **kwargs):
218 """Return an apiclient Resources object enabling access to an Arvados server
222 A string naming the version of the Arvados REST API to use (for
226 If provided, this should be a dict-like object (must support the get()
227 method) with entries for ARVADOS_API_HOST, ARVADOS_API_TOKEN, and
228 optionally ARVADOS_API_HOST_INSECURE. If not provided, use
229 arvados.config (which gets these parameters from the environment by
232 Other keyword arguments such as `cache` will be passed along `api()`
235 # Load from user configuration or environment
236 if apiconfig is None:
237 apiconfig = config.settings()
239 for x in ['ARVADOS_API_HOST', 'ARVADOS_API_TOKEN']:
240 if x not in apiconfig:
241 raise ValueError("%s is not set. Aborting." % x)
242 host = apiconfig.get('ARVADOS_API_HOST')
243 token = apiconfig.get('ARVADOS_API_TOKEN')
244 insecure = config.flag_is_true('ARVADOS_API_HOST_INSECURE', apiconfig)
246 return api(version=version, host=host, token=token, insecure=insecure, **kwargs)