1 # Copyright (C) The Arvados Authors. All rights reserved.
3 # SPDX-License-Identifier: Apache-2.0
6 This module provides functions and constants that are useful across a variety
7 of Arvados resource types, or extend the Arvados API client (see `arvados.api`).
36 HEX_RE = re.compile(r'^[0-9a-fA-F]+$')
37 """Regular expression to match a hexadecimal string (case-insensitive)"""
38 CR_UNCOMMITTED = 'Uncommitted'
39 """Constant `state` value for uncommited container requests"""
40 CR_COMMITTED = 'Committed'
41 """Constant `state` value for committed container requests"""
43 """Constant `state` value for finalized container requests"""
45 keep_locator_pattern = re.compile(r'[0-9a-f]{32}\+[0-9]+(\+\S+)*')
46 """Regular expression to match any Keep block locator"""
47 signed_locator_pattern = re.compile(r'[0-9a-f]{32}\+[0-9]+(\+\S+)*\+A\S+(\+\S+)*')
48 """Regular expression to match any Keep block locator with an access token hint"""
49 portable_data_hash_pattern = re.compile(r'[0-9a-f]{32}\+[0-9]+')
50 """Regular expression to match any collection portable data hash"""
51 manifest_pattern = re.compile(r'((\S+)( +[a-f0-9]{32}(\+[0-9]+)(\+\S+)*)+( +[0-9]+:[0-9]+:\S+)+$)+', flags=re.MULTILINE)
52 """Regular expression to match an Arvados collection manifest text"""
53 keep_file_locator_pattern = re.compile(r'([0-9a-f]{32}\+[0-9]+)/(.*)')
54 """Regular expression to match a file path from a collection identified by portable data hash"""
55 keepuri_pattern = re.compile(r'keep:([0-9a-f]{32}\+[0-9]+)/(.*)')
56 """Regular expression to match a `keep:` URI with a collection identified by portable data hash"""
58 uuid_pattern = re.compile(r'[a-z0-9]{5}-[a-z0-9]{5}-[a-z0-9]{15}')
59 """Regular expression to match any Arvados object UUID"""
60 collection_uuid_pattern = re.compile(r'[a-z0-9]{5}-4zz18-[a-z0-9]{15}')
61 """Regular expression to match any Arvados collection UUID"""
62 container_uuid_pattern = re.compile(r'[a-z0-9]{5}-dz642-[a-z0-9]{15}')
63 """Regular expression to match any Arvados container UUID"""
64 group_uuid_pattern = re.compile(r'[a-z0-9]{5}-j7d0g-[a-z0-9]{15}')
65 """Regular expression to match any Arvados group UUID"""
66 link_uuid_pattern = re.compile(r'[a-z0-9]{5}-o0j2j-[a-z0-9]{15}')
67 """Regular expression to match any Arvados link UUID"""
68 user_uuid_pattern = re.compile(r'[a-z0-9]{5}-tpzed-[a-z0-9]{15}')
69 """Regular expression to match any Arvados user UUID"""
71 def is_hex(s: str, *length_args: int) -> bool:
72 """Indicate whether a string is a hexadecimal number
74 This method returns true if all characters in the string are hexadecimal
75 digits. It is case-insensitive.
77 You can also pass optional length arguments to check that the string has
78 the expected number of digits. If you pass one integer, the string must
79 have that length exactly, otherwise the method returns False. If you
80 pass two integers, the string's length must fall within that minimum and
81 maximum (inclusive), otherwise the method returns False.
85 * s: str --- The string to check
87 * length_args: int --- Optional length limit(s) for the string to check
89 num_length_args = len(length_args)
90 if num_length_args > 2:
91 raise arvados.errors.ArgumentError(
92 "is_hex accepts up to 3 arguments ({} given)".format(1 + num_length_args))
93 elif num_length_args == 2:
94 good_len = (length_args[0] <= len(s) <= length_args[1])
95 elif num_length_args == 1:
96 good_len = (len(s) == length_args[0])
99 return bool(good_len and HEX_RE.match(s))
102 fn: Callable[..., 'arvados.api_resources.ArvadosAPIRequest'],
103 order_key: str="created_at",
105 ascending: bool=True,
106 key_fields: Container[str]=('uuid',),
108 ) -> Iterator[Dict[str, Any]]:
109 """Iterate all Arvados resources from an API list call
111 This method takes a method that represents an Arvados API list call, and
112 iterates the objects returned by the API server. It can make multiple API
113 calls to retrieve and iterate all objects available from the API server.
117 * fn: Callable[..., arvados.api_resources.ArvadosAPIRequest] --- A
118 function that wraps an Arvados API method that returns a list of
119 objects. If you have an Arvados API client named `arv`, examples
120 include `arv.collections().list` and `arv.groups().contents`. Note
121 that you should pass the function *without* calling it.
123 * order_key: str --- The name of the primary object field that objects
124 should be sorted by. This name is used to build an `order` argument
125 for `fn`. Default `'created_at'`.
127 * num_retries: int --- This argument is passed through to
128 `arvados.api_resources.ArvadosAPIRequest.execute` for each API call. See
129 that method's docstring for details. Default 0 (meaning API calls will
130 use the `num_retries` value set when the Arvados API client was
133 * ascending: bool --- Used to build an `order` argument for `fn`. If True,
134 all fields will be sorted in `'asc'` (ascending) order. Otherwise, all
135 fields will be sorted in `'desc'` (descending) order.
137 * key_fields: Container[str] --- One or two fields that constitute
138 a unique key for returned items. Normally this should be the
139 default value `('uuid',)`, unless `fn` returns
140 computed_permissions records, in which case it should be
141 `('user_uuid', 'target_uuid')`. If two fields are given, one of
142 them must be equal to `order_key`.
144 Additional keyword arguments will be passed directly to `fn` for each API
145 call. Note that this function sets `count`, `limit`, and `order` as part of
149 tiebreak_keys = set(key_fields) - {order_key}
150 if len(tiebreak_keys) == 0:
151 tiebreak_key = 'uuid'
152 elif len(tiebreak_keys) == 1:
153 tiebreak_key = tiebreak_keys.pop()
155 raise arvados.errors.ArgumentError(
156 "key_fields can have at most one entry that is not order_key")
159 kwargs["limit"] = pagesize
160 kwargs["count"] = 'none'
161 asc = "asc" if ascending else "desc"
162 kwargs["order"] = [f"{order_key} {asc}", f"{tiebreak_key} {asc}"]
163 other_filters = kwargs.get("filters", [])
165 if 'select' in kwargs:
166 kwargs['select'] = list({*kwargs['select'], *key_fields, order_key})
170 expect_full_page = True
171 key_getter = operator.itemgetter(*key_fields)
172 seen_prevpage = set()
173 seen_thispage = set()
175 prev_page_all_same_order_key = False
178 kwargs["filters"] = nextpage+other_filters
179 items = fn(**kwargs).execute(num_retries=num_retries)
181 if len(items["items"]) == 0:
182 if prev_page_all_same_order_key:
183 nextpage = [[order_key, ">" if ascending else "<", lastitem[order_key]]]
184 prev_page_all_same_order_key = False
189 seen_prevpage = seen_thispage
190 seen_thispage = set()
192 for i in items["items"]:
193 # In cases where there's more than one record with the
194 # same order key, the result could include records we
195 # already saw in the last page. Skip them.
196 seen_key = key_getter(i)
197 if seen_key in seen_prevpage:
199 seen_thispage.add(seen_key)
202 firstitem = items["items"][0]
203 lastitem = items["items"][-1]
205 if firstitem[order_key] == lastitem[order_key]:
206 # Got a page where every item has the same order key.
207 # Switch to using tiebreak key for paging.
208 nextpage = [[order_key, "=", lastitem[order_key]], [tiebreak_key, ">" if ascending else "<", lastitem[tiebreak_key]]]
209 prev_page_all_same_order_key = True
211 # Start from the last order key seen, but skip the last
212 # known uuid to avoid retrieving the same row twice. If
213 # there are multiple rows with the same order key it is
214 # still likely we'll end up retrieving duplicate rows.
215 # That's handled by tracking the "seen" rows for each page
216 # so they can be skipped if they show up on the next page.
217 nextpage = [[order_key, ">=" if ascending else "<=", lastitem[order_key]]]
218 if tiebreak_key == "uuid":
219 nextpage += [[tiebreak_key, "!=", lastitem[tiebreak_key]]]
220 prev_page_all_same_order_key = False
222 def iter_computed_permissions(
223 fn: Callable[..., 'arvados.api_resources.ArvadosAPIRequest'],
224 order_key: str='user_uuid',
226 ascending: bool=True,
227 key_fields: Container[str]=('user_uuid', 'target_uuid'),
229 ) -> Iterator[Dict[str, Any]]:
230 """Iterate all `computed_permission` resources
232 This method is the same as `keyset_list_all`, except that its
233 default arguments are suitable for the computed_permissions API.
237 * fn: Callable[..., arvados.api_resources.ArvadosAPIRequest] ---
238 see `keyset_list_all`. Typically this is an instance of
239 `arvados.api_resources.ComputedPermissions.list`. Given an
240 Arvados API client named `arv`, typical usage is
241 `iter_computed_permissions(arv.computed_permissions().list)`.
243 * order_key: str --- see `keyset_list_all`. Default
246 * num_retries: int --- see `keyset_list_all`.
248 * ascending: bool --- see `keyset_list_all`.
250 * key_fields: Container[str] --- see `keyset_list_all`. Default
251 `('user_uuid', 'target_uuid')`.
254 return keyset_list_all(
257 num_retries=num_retries,
259 key_fields=key_fields,
262 def ca_certs_path(fallback: T=httplib2.CA_CERTS) -> Union[str, T]:
263 """Return the path of the best available source of CA certificates
265 This function checks various known paths that provide trusted CA
266 certificates, and returns the first one that exists. It checks:
268 * the path in the `SSL_CERT_FILE` environment variable (used by OpenSSL)
269 * `/etc/arvados/ca-certificates.crt`, respected by all Arvados software
270 * `/etc/ssl/certs/ca-certificates.crt`, the default store on Debian-based
272 * `/etc/pki/tls/certs/ca-bundle.crt`, the default store on Red Hat-based
275 If none of these paths exist, this function returns the value of `fallback`.
279 * fallback: T --- The value to return if none of the known paths exist.
280 The default value is the certificate store of Mozilla's trusted CAs
281 included with the Python [certifi][] package.
283 [certifi]: https://pypi.org/project/certifi/
285 for ca_certs_path in [
286 # SSL_CERT_FILE and SSL_CERT_DIR are openssl overrides - note
287 # that httplib2 itself also supports HTTPLIB2_CA_CERTS.
288 os.environ.get('SSL_CERT_FILE'),
290 '/etc/arvados/ca-certificates.crt',
292 '/etc/ssl/certs/ca-certificates.crt',
294 '/etc/pki/tls/certs/ca-bundle.crt',
296 if ca_certs_path and os.path.exists(ca_certs_path):
300 def new_request_id() -> str:
301 """Return a random request ID
303 This function generates and returns a random string suitable for use as a
304 `X-Request-Id` header value in the Arvados API.
307 # 2**104 > 36**20 > 2**103
308 n = random.getrandbits(104)
312 rid += chr(c+ord('0'))
314 rid += chr(c+ord('a')-10)
318 def get_config_once(svc: 'arvados.api_resources.ArvadosAPIClient') -> Dict[str, Any]:
319 """Return an Arvados cluster's configuration, with caching
321 This function gets and returns the Arvados configuration from the API
322 server. It caches the result on the client object and reuses it on any
327 * svc: arvados.api_resources.ArvadosAPIClient --- The Arvados API client
328 object to use to retrieve and cache the Arvados cluster configuration.
330 if not svc._rootDesc.get('resources').get('configs', False):
331 # Old API server version, no config export endpoint
333 if not hasattr(svc, '_cached_config'):
334 svc._cached_config = svc.configs().get().execute()
335 return svc._cached_config
337 def get_vocabulary_once(svc: 'arvados.api_resources.ArvadosAPIClient') -> Dict[str, Any]:
338 """Return an Arvados cluster's vocabulary, with caching
340 This function gets and returns the Arvados vocabulary from the API
341 server. It caches the result on the client object and reuses it on any
344 .. HINT:: Low-level method
345 This is a relatively low-level wrapper around the Arvados API. Most
346 users will prefer to use `arvados.vocabulary.load_vocabulary`.
350 * svc: arvados.api_resources.ArvadosAPIClient --- The Arvados API client
351 object to use to retrieve and cache the Arvados cluster vocabulary.
353 if not svc._rootDesc.get('resources').get('vocabularies', False):
354 # Old API server version, no vocabulary export endpoint
356 if not hasattr(svc, '_cached_vocabulary'):
357 svc._cached_vocabulary = svc.vocabularies().get().execute()
358 return svc._cached_vocabulary
360 def trim_name(collectionname: str) -> str:
361 """Limit the length of a name to fit within Arvados API limits
363 This function ensures that a string is short enough to use as an object
364 name in the Arvados API, leaving room for text that may be added by the
365 `ensure_unique_name` argument. If the source name is short enough, it is
366 returned unchanged. Otherwise, this function returns a string with excess
367 characters removed from the middle of the source string and replaced with
372 * collectionname: str --- The desired source name
374 max_name_len = 254 - 28
376 if len(collectionname) > max_name_len:
377 over = len(collectionname) - max_name_len
378 split = int(max_name_len/2)
379 collectionname = collectionname[0:split] + "…" + collectionname[split+over:]
381 return collectionname
383 def iter_storage_classes(
384 config: Dict[str, Any],
385 check: Callable[[Dict[str, Any]], bool]=operator.methodcaller('get', 'Default'),
386 fallback: str="default",
388 """Read storage classes from the API client config
390 This function iterates storage class names for classes in `config` that
391 pass `check`. If no matches are found but `fallback` is given, it is
395 for key, value in config.get("StorageClasses", {}).items():
399 if fallback and not any_found: