X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/ee434dea5db0b02fe762ff80b2270cf268e84378..82c424076577660d96173213a0d2db5f7c1450d7:/sdk/python/arvados/util.py diff --git a/sdk/python/arvados/util.py b/sdk/python/arvados/util.py index 3f50553c4b..be8a03fc31 100644 --- a/sdk/python/arvados/util.py +++ b/sdk/python/arvados/util.py @@ -1,7 +1,15 @@ +# Copyright (C) The Arvados Authors. All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import division +from builtins import range + import fcntl import hashlib import httplib2 import os +import random import re import subprocess import errno @@ -11,6 +19,9 @@ import arvados from arvados.collection import CollectionReader HEX_RE = re.compile(r'^[0-9a-fA-F]+$') +CR_UNCOMMITTED = 'Uncommitted' +CR_COMMITTED = 'Committed' +CR_FINAL = 'Final' keep_locator_pattern = re.compile(r'[0-9a-f]{32}\+\d+(\+\S+)*') signed_locator_pattern = re.compile(r'[0-9a-f]{32}\+\d+(\+\S+)*\+A\S+(\+\S+)*') @@ -20,6 +31,8 @@ collection_uuid_pattern = re.compile(r'[a-z0-9]{5}-4zz18-[a-z0-9]{15}') group_uuid_pattern = re.compile(r'[a-z0-9]{5}-j7d0g-[a-z0-9]{15}') user_uuid_pattern = re.compile(r'[a-z0-9]{5}-tpzed-[a-z0-9]{15}') link_uuid_pattern = re.compile(r'[a-z0-9]{5}-o0j2j-[a-z0-9]{15}') +job_uuid_pattern = re.compile(r'[a-z0-9]{5}-8i9sb-[a-z0-9]{15}') +container_uuid_pattern = re.compile(r'[a-z0-9]{5}-dz642-[a-z0-9]{15}') manifest_pattern = re.compile(r'((\S+)( +[a-f0-9]{32}(\+\d+)(\+\S+)*)+( +\d+:\d+:\S+)+$)+', flags=re.MULTILINE) def clear_tmpdir(path=None): @@ -375,6 +388,67 @@ def list_all(fn, num_retries=0, **kwargs): offset = c['offset'] + len(c['items']) return items +def keyset_list_all(fn, order_key="created_at", num_retries=0, ascending=True, **kwargs): + pagesize = 1000 + kwargs["limit"] = pagesize + kwargs["count"] = 'none' + kwargs["order"] = ["%s %s" % (order_key, "asc" if ascending else "desc"), "uuid asc"] + other_filters = kwargs.get("filters", []) + + if "select" in kwargs and "uuid" not in kwargs["select"]: + kwargs["select"].append("uuid") + + nextpage = [] + tot = 0 + expect_full_page = True + seen_prevpage = set() + seen_thispage = set() + lastitem = None + prev_page_all_same_order_key = False + + while True: + kwargs["filters"] = nextpage+other_filters + items = fn(**kwargs).execute(num_retries=num_retries) + + if len(items["items"]) == 0: + if prev_page_all_same_order_key: + nextpage = [[order_key, ">" if ascending else "<", lastitem[order_key]]] + prev_page_all_same_order_key = False + continue + else: + return + + seen_prevpage = seen_thispage + seen_thispage = set() + + for i in items["items"]: + # In cases where there's more than one record with the + # same order key, the result could include records we + # already saw in the last page. Skip them. + if i["uuid"] in seen_prevpage: + continue + seen_thispage.add(i["uuid"]) + yield i + + firstitem = items["items"][0] + lastitem = items["items"][-1] + + if firstitem[order_key] == lastitem[order_key]: + # Got a page where every item has the same order key. + # Switch to using uuid for paging. + nextpage = [[order_key, "=", lastitem[order_key]], ["uuid", ">", lastitem["uuid"]]] + prev_page_all_same_order_key = True + else: + # Start from the last order key seen, but skip the last + # known uuid to avoid retrieving the same row twice. If + # there are multiple rows with the same order key it is + # still likely we'll end up retrieving duplicate rows. + # That's handled by tracking the "seen" rows for each page + # so they can be skipped if they show up on the next page. + nextpage = [[order_key, ">=" if ascending else "<=", lastitem[order_key]], ["uuid", "!=", lastitem["uuid"]]] + prev_page_all_same_order_key = False + + def ca_certs_path(fallback=httplib2.CA_CERTS): """Return the path of the best available CA certs source. @@ -383,6 +457,9 @@ def ca_certs_path(fallback=httplib2.CA_CERTS): it returns the value of `fallback` (httplib2's CA certs by default). """ for ca_certs_path in [ + # SSL_CERT_FILE and SSL_CERT_DIR are openssl overrides - note + # that httplib2 itself also supports HTTPLIB2_CA_CERTS. + os.environ.get('SSL_CERT_FILE'), # Arvados specific: '/etc/arvados/ca-certificates.crt', # Debian: @@ -390,6 +467,35 @@ def ca_certs_path(fallback=httplib2.CA_CERTS): # Red Hat: '/etc/pki/tls/certs/ca-bundle.crt', ]: - if os.path.exists(ca_certs_path): + if ca_certs_path and os.path.exists(ca_certs_path): return ca_certs_path return fallback + +def new_request_id(): + rid = "req-" + # 2**104 > 36**20 > 2**103 + n = random.getrandbits(104) + for _ in range(20): + c = n % 36 + if c < 10: + rid += chr(c+ord('0')) + else: + rid += chr(c+ord('a')-10) + n = n // 36 + return rid + +def get_config_once(svc): + if not svc._rootDesc.get('resources').get('configs', False): + # Old API server version, no config export endpoint + return {} + if not hasattr(svc, '_cached_config'): + svc._cached_config = svc.configs().get().execute() + return svc._cached_config + +def get_vocabulary_once(svc): + if not svc._rootDesc.get('resources').get('vocabularies', False): + # Old API server version, no vocabulary export endpoint + return {} + if not hasattr(svc, '_cached_vocabulary'): + svc._cached_vocabulary = svc.vocabularies().get().execute() + return svc._cached_vocabulary