X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/3cc81d1201ec9abfe818207c450df62232f5bce9..35895ee91c820680bb7df9696ab2e92525ead2ac:/sdk/python/arvados/util.py diff --git a/sdk/python/arvados/util.py b/sdk/python/arvados/util.py index 6c9822e9f0..a4b7e64a05 100644 --- a/sdk/python/arvados/util.py +++ b/sdk/python/arvados/util.py @@ -388,6 +388,68 @@ def list_all(fn, num_retries=0, **kwargs): offset = c['offset'] + len(c['items']) return items +def keyset_list_all(fn, order_key="created_at", num_retries=0, ascending=True, **kwargs): + pagesize = 1000 + kwargs["limit"] = pagesize + kwargs["count"] = 'none' + asc = "asc" if ascending else "desc" + kwargs["order"] = ["%s %s" % (order_key, asc), "uuid %s" % asc] + other_filters = kwargs.get("filters", []) + + if "select" in kwargs and "uuid" not in kwargs["select"]: + kwargs["select"].append("uuid") + + nextpage = [] + tot = 0 + expect_full_page = True + seen_prevpage = set() + seen_thispage = set() + lastitem = None + prev_page_all_same_order_key = False + + while True: + kwargs["filters"] = nextpage+other_filters + items = fn(**kwargs).execute(num_retries=num_retries) + + if len(items["items"]) == 0: + if prev_page_all_same_order_key: + nextpage = [[order_key, ">" if ascending else "<", lastitem[order_key]]] + prev_page_all_same_order_key = False + continue + else: + return + + seen_prevpage = seen_thispage + seen_thispage = set() + + for i in items["items"]: + # In cases where there's more than one record with the + # same order key, the result could include records we + # already saw in the last page. Skip them. + if i["uuid"] in seen_prevpage: + continue + seen_thispage.add(i["uuid"]) + yield i + + firstitem = items["items"][0] + lastitem = items["items"][-1] + + if firstitem[order_key] == lastitem[order_key]: + # Got a page where every item has the same order key. + # Switch to using uuid for paging. + nextpage = [[order_key, "=", lastitem[order_key]], ["uuid", ">" if ascending else "<", lastitem["uuid"]]] + prev_page_all_same_order_key = True + else: + # Start from the last order key seen, but skip the last + # known uuid to avoid retrieving the same row twice. If + # there are multiple rows with the same order key it is + # still likely we'll end up retrieving duplicate rows. + # That's handled by tracking the "seen" rows for each page + # so they can be skipped if they show up on the next page. + nextpage = [[order_key, ">=" if ascending else "<=", lastitem[order_key]], ["uuid", "!=", lastitem["uuid"]]] + prev_page_all_same_order_key = False + + def ca_certs_path(fallback=httplib2.CA_CERTS): """Return the path of the best available CA certs source. @@ -430,3 +492,28 @@ def get_config_once(svc): if not hasattr(svc, '_cached_config'): svc._cached_config = svc.configs().get().execute() return svc._cached_config + +def get_vocabulary_once(svc): + if not svc._rootDesc.get('resources').get('vocabularies', False): + # Old API server version, no vocabulary export endpoint + return {} + if not hasattr(svc, '_cached_vocabulary'): + svc._cached_vocabulary = svc.vocabularies().get().execute() + return svc._cached_vocabulary + +def trim_name(collectionname): + """ + trim_name takes a record name (collection name, project name, etc) + and trims it to fit the 255 character name limit, with additional + space for the timestamp added by ensure_unique_name, by removing + excess characters from the middle and inserting an ellipse + """ + + max_name_len = 254 - 28 + + if len(collectionname) > max_name_len: + over = len(collectionname) - max_name_len + split = int(max_name_len/2) + collectionname = collectionname[0:split] + "…" + collectionname[split+over:] + + return collectionname