1 # Copyright (C) The Arvados Authors. All rights reserved.
3 # SPDX-License-Identifier: Apache-2.0
6 This module provides functions and constants that are useful across a variety
7 of Arvados resource types, or extend the Arvados API client (see `arvados.api`).
35 HEX_RE = re.compile(r'^[0-9a-fA-F]+$')
36 """Regular expression to match a hexadecimal string (case-insensitive)"""
37 CR_UNCOMMITTED = 'Uncommitted'
38 """Constant `state` value for uncommited container requests"""
39 CR_COMMITTED = 'Committed'
40 """Constant `state` value for committed container requests"""
42 """Constant `state` value for finalized container requests"""
44 keep_locator_pattern = re.compile(r'[0-9a-f]{32}\+[0-9]+(\+\S+)*')
45 """Regular expression to match any Keep block locator"""
46 signed_locator_pattern = re.compile(r'[0-9a-f]{32}\+[0-9]+(\+\S+)*\+A\S+(\+\S+)*')
47 """Regular expression to match any Keep block locator with an access token hint"""
48 portable_data_hash_pattern = re.compile(r'[0-9a-f]{32}\+[0-9]+')
49 """Regular expression to match any collection portable data hash"""
50 manifest_pattern = re.compile(r'((\S+)( +[a-f0-9]{32}(\+[0-9]+)(\+\S+)*)+( +[0-9]+:[0-9]+:\S+)+$)+', flags=re.MULTILINE)
51 """Regular expression to match an Arvados collection manifest text"""
52 keep_file_locator_pattern = re.compile(r'([0-9a-f]{32}\+[0-9]+)/(.*)')
53 """Regular expression to match a file path from a collection identified by portable data hash"""
54 keepuri_pattern = re.compile(r'keep:([0-9a-f]{32}\+[0-9]+)/(.*)')
55 """Regular expression to match a `keep:` URI with a collection identified by portable data hash"""
57 uuid_pattern = re.compile(r'[a-z0-9]{5}-[a-z0-9]{5}-[a-z0-9]{15}')
58 """Regular expression to match any Arvados object UUID"""
59 collection_uuid_pattern = re.compile(r'[a-z0-9]{5}-4zz18-[a-z0-9]{15}')
60 """Regular expression to match any Arvados collection UUID"""
61 container_uuid_pattern = re.compile(r'[a-z0-9]{5}-dz642-[a-z0-9]{15}')
62 """Regular expression to match any Arvados container UUID"""
63 group_uuid_pattern = re.compile(r'[a-z0-9]{5}-j7d0g-[a-z0-9]{15}')
64 """Regular expression to match any Arvados group UUID"""
65 link_uuid_pattern = re.compile(r'[a-z0-9]{5}-o0j2j-[a-z0-9]{15}')
66 """Regular expression to match any Arvados link UUID"""
67 user_uuid_pattern = re.compile(r'[a-z0-9]{5}-tpzed-[a-z0-9]{15}')
68 """Regular expression to match any Arvados user UUID"""
70 def is_hex(s: str, *length_args: int) -> bool:
71 """Indicate whether a string is a hexadecimal number
73 This method returns true if all characters in the string are hexadecimal
74 digits. It is case-insensitive.
76 You can also pass optional length arguments to check that the string has
77 the expected number of digits. If you pass one integer, the string must
78 have that length exactly, otherwise the method returns False. If you
79 pass two integers, the string's length must fall within that minimum and
80 maximum (inclusive), otherwise the method returns False.
84 * s: str --- The string to check
86 * length_args: int --- Optional length limit(s) for the string to check
88 num_length_args = len(length_args)
89 if num_length_args > 2:
90 raise arvados.errors.ArgumentError(
91 "is_hex accepts up to 3 arguments ({} given)".format(1 + num_length_args))
92 elif num_length_args == 2:
93 good_len = (length_args[0] <= len(s) <= length_args[1])
94 elif num_length_args == 1:
95 good_len = (len(s) == length_args[0])
98 return bool(good_len and HEX_RE.match(s))
101 fn: Callable[..., 'arvados.api_resources.ArvadosAPIRequest'],
102 order_key: str="created_at",
104 ascending: bool=True,
105 key_fields: Container[str]=('uuid',),
107 ) -> Iterator[Dict[str, Any]]:
108 """Iterate all Arvados resources from an API list call
110 This method takes a method that represents an Arvados API list call, and
111 iterates the objects returned by the API server. It can make multiple API
112 calls to retrieve and iterate all objects available from the API server.
116 * fn: Callable[..., arvados.api_resources.ArvadosAPIRequest] --- A
117 function that wraps an Arvados API method that returns a list of
118 objects. If you have an Arvados API client named `arv`, examples
119 include `arv.collections().list` and `arv.groups().contents`. Note
120 that you should pass the function *without* calling it.
122 * order_key: str --- The name of the primary object field that objects
123 should be sorted by. This name is used to build an `order` argument
124 for `fn`. Default `'created_at'`.
126 * num_retries: int --- This argument is passed through to
127 `arvados.api_resources.ArvadosAPIRequest.execute` for each API call. See
128 that method's docstring for details. Default 0 (meaning API calls will
129 use the `num_retries` value set when the Arvados API client was
132 * ascending: bool --- Used to build an `order` argument for `fn`. If True,
133 all fields will be sorted in `'asc'` (ascending) order. Otherwise, all
134 fields will be sorted in `'desc'` (descending) order.
136 * key_fields: Container[str] --- One or two fields that constitute
137 a unique key for returned items. Normally this should be the
138 default value `('uuid',)`, unless `fn` returns
139 computed_permissions records, in which case it should be
140 `('user_uuid', 'target_uuid')`. If two fields are given, one of
141 them must be equal to `order_key`.
143 Additional keyword arguments will be passed directly to `fn` for each API
144 call. Note that this function sets `count`, `limit`, and `order` as part of
148 tiebreak_keys = set(key_fields) - {order_key}
149 if len(tiebreak_keys) == 0:
150 tiebreak_key = 'uuid'
151 elif len(tiebreak_keys) == 1:
152 tiebreak_key = tiebreak_keys.pop()
154 raise arvados.errors.ArgumentError(
155 "key_fields can have at most one entry that is not order_key")
158 kwargs["limit"] = pagesize
159 kwargs["count"] = 'none'
160 asc = "asc" if ascending else "desc"
161 kwargs["order"] = [f"{order_key} {asc}", f"{tiebreak_key} {asc}"]
162 other_filters = kwargs.get("filters", [])
164 if 'select' in kwargs:
165 kwargs['select'] = list({*kwargs['select'], *key_fields, order_key})
169 expect_full_page = True
170 key_getter = operator.itemgetter(*key_fields)
171 seen_prevpage = set()
172 seen_thispage = set()
174 prev_page_all_same_order_key = False
177 kwargs["filters"] = nextpage+other_filters
178 items = fn(**kwargs).execute(num_retries=num_retries)
180 if len(items["items"]) == 0:
181 if prev_page_all_same_order_key:
182 nextpage = [[order_key, ">" if ascending else "<", lastitem[order_key]]]
183 prev_page_all_same_order_key = False
188 seen_prevpage = seen_thispage
189 seen_thispage = set()
191 for i in items["items"]:
192 # In cases where there's more than one record with the
193 # same order key, the result could include records we
194 # already saw in the last page. Skip them.
195 seen_key = key_getter(i)
196 if seen_key in seen_prevpage:
198 seen_thispage.add(seen_key)
201 firstitem = items["items"][0]
202 lastitem = items["items"][-1]
204 if firstitem[order_key] == lastitem[order_key]:
205 # Got a page where every item has the same order key.
206 # Switch to using tiebreak key for paging.
207 nextpage = [[order_key, "=", lastitem[order_key]], [tiebreak_key, ">" if ascending else "<", lastitem[tiebreak_key]]]
208 prev_page_all_same_order_key = True
210 # Start from the last order key seen, but skip the last
211 # known uuid to avoid retrieving the same row twice. If
212 # there are multiple rows with the same order key it is
213 # still likely we'll end up retrieving duplicate rows.
214 # That's handled by tracking the "seen" rows for each page
215 # so they can be skipped if they show up on the next page.
216 nextpage = [[order_key, ">=" if ascending else "<=", lastitem[order_key]]]
217 if tiebreak_key == "uuid":
218 nextpage += [[tiebreak_key, "!=", lastitem[tiebreak_key]]]
219 prev_page_all_same_order_key = False
221 def iter_computed_permissions(
222 fn: Callable[..., 'arvados.api_resources.ArvadosAPIRequest'],
223 order_key: str='user_uuid',
225 ascending: bool=True,
226 key_fields: Container[str]=('user_uuid', 'target_uuid'),
228 ) -> Iterator[Dict[str, Any]]:
229 """Iterate all `computed_permission` resources
231 This method is the same as `keyset_list_all`, except that its
232 default arguments are suitable for the computed_permissions API.
236 * fn: Callable[..., arvados.api_resources.ArvadosAPIRequest] ---
237 see `keyset_list_all`. Typically this is an instance of
238 `arvados.api_resources.ComputedPermissions.list`. Given an
239 Arvados API client named `arv`, typical usage is
240 `iter_computed_permissions(arv.computed_permissions().list)`.
242 * order_key: str --- see `keyset_list_all`. Default
245 * num_retries: int --- see `keyset_list_all`.
247 * ascending: bool --- see `keyset_list_all`.
249 * key_fields: Container[str] --- see `keyset_list_all`. Default
250 `('user_uuid', 'target_uuid')`.
253 return keyset_list_all(
256 num_retries=num_retries,
258 key_fields=key_fields,
261 def ca_certs_path(fallback: T=httplib2.CA_CERTS) -> Union[str, T]:
262 """Return the path of the best available source of CA certificates
264 This function checks various known paths that provide trusted CA
265 certificates, and returns the first one that exists. It checks:
267 * the path in the `SSL_CERT_FILE` environment variable (used by OpenSSL)
268 * `/etc/arvados/ca-certificates.crt`, respected by all Arvados software
269 * `/etc/ssl/certs/ca-certificates.crt`, the default store on Debian-based
271 * `/etc/pki/tls/certs/ca-bundle.crt`, the default store on Red Hat-based
274 If none of these paths exist, this function returns the value of `fallback`.
278 * fallback: T --- The value to return if none of the known paths exist.
279 The default value is the certificate store of Mozilla's trusted CAs
280 included with the Python [certifi][] package.
282 [certifi]: https://pypi.org/project/certifi/
284 for ca_certs_path in [
285 # SSL_CERT_FILE and SSL_CERT_DIR are openssl overrides - note
286 # that httplib2 itself also supports HTTPLIB2_CA_CERTS.
287 os.environ.get('SSL_CERT_FILE'),
289 '/etc/arvados/ca-certificates.crt',
291 '/etc/ssl/certs/ca-certificates.crt',
293 '/etc/pki/tls/certs/ca-bundle.crt',
295 if ca_certs_path and os.path.exists(ca_certs_path):
299 def new_request_id() -> str:
300 """Return a random request ID
302 This function generates and returns a random string suitable for use as a
303 `X-Request-Id` header value in the Arvados API.
306 # 2**104 > 36**20 > 2**103
307 n = random.getrandbits(104)
311 rid += chr(c+ord('0'))
313 rid += chr(c+ord('a')-10)
317 def get_config_once(svc: 'arvados.api_resources.ArvadosAPIClient') -> Dict[str, Any]:
318 """Return an Arvados cluster's configuration, with caching
320 This function gets and returns the Arvados configuration from the API
321 server. It caches the result on the client object and reuses it on any
326 * svc: arvados.api_resources.ArvadosAPIClient --- The Arvados API client
327 object to use to retrieve and cache the Arvados cluster configuration.
329 if not svc._rootDesc.get('resources').get('configs', False):
330 # Old API server version, no config export endpoint
332 if not hasattr(svc, '_cached_config'):
333 svc._cached_config = svc.configs().get().execute()
334 return svc._cached_config
336 def get_vocabulary_once(svc: 'arvados.api_resources.ArvadosAPIClient') -> Dict[str, Any]:
337 """Return an Arvados cluster's vocabulary, with caching
339 This function gets and returns the Arvados vocabulary from the API
340 server. It caches the result on the client object and reuses it on any
343 .. HINT:: Low-level method
344 This is a relatively low-level wrapper around the Arvados API. Most
345 users will prefer to use `arvados.vocabulary.load_vocabulary`.
349 * svc: arvados.api_resources.ArvadosAPIClient --- The Arvados API client
350 object to use to retrieve and cache the Arvados cluster vocabulary.
352 if not svc._rootDesc.get('resources').get('vocabularies', False):
353 # Old API server version, no vocabulary export endpoint
355 if not hasattr(svc, '_cached_vocabulary'):
356 svc._cached_vocabulary = svc.vocabularies().get().execute()
357 return svc._cached_vocabulary
359 def trim_name(collectionname: str) -> str:
360 """Limit the length of a name to fit within Arvados API limits
362 This function ensures that a string is short enough to use as an object
363 name in the Arvados API, leaving room for text that may be added by the
364 `ensure_unique_name` argument. If the source name is short enough, it is
365 returned unchanged. Otherwise, this function returns a string with excess
366 characters removed from the middle of the source string and replaced with
371 * collectionname: str --- The desired source name
373 max_name_len = 254 - 28
375 if len(collectionname) > max_name_len:
376 over = len(collectionname) - max_name_len
377 split = int(max_name_len/2)
378 collectionname = collectionname[0:split] + "…" + collectionname[split+over:]
380 return collectionname