From d791fa7adb14991c972b6166f39155ff314b7d1e Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Tue, 2 Aug 2022 10:46:54 -0400 Subject: [PATCH] 19280: More metrics Arvados-DCO-1.1-Signed-off-by: Peter Amstutz --- sdk/cwl/arvados_cwl/runner.py | 78 +++++++++++++++++++---------------- 1 file changed, 43 insertions(+), 35 deletions(-) diff --git a/sdk/cwl/arvados_cwl/runner.py b/sdk/cwl/arvados_cwl/runner.py index b00a30ad5f..2582c0a3a3 100644 --- a/sdk/cwl/arvados_cwl/runner.py +++ b/sdk/cwl/arvados_cwl/runner.py @@ -311,17 +311,19 @@ def upload_dependencies(arvrunner, name, document_loader, metadata = scanobj - sc_result = scandeps(uri, scanobj, - loadref_fields, - set(("$include", "location")), - loadref, urljoin=document_loader.fetcher.urljoin, - nestdirs=False) - - optional_deps = scandeps(uri, scanobj, - loadref_fields, - set(("$schemas",)), - loadref, urljoin=document_loader.fetcher.urljoin, - nestdirs=False) + with Perf(metrics, "scandeps include, location"): + sc_result = scandeps(uri, scanobj, + loadref_fields, + set(("$include", "location")), + loadref, urljoin=document_loader.fetcher.urljoin, + nestdirs=False) + + with Perf(metrics, "scandeps $schemas"): + optional_deps = scandeps(uri, scanobj, + loadref_fields, + set(("$schemas",)), + loadref, urljoin=document_loader.fetcher.urljoin, + nestdirs=False) sc_result.extend(optional_deps) @@ -351,30 +353,34 @@ def upload_dependencies(arvrunner, name, document_loader, sc.append(obj) collect_uuids(obj) - visit_class(workflowobj, ("File", "Directory"), collect_uuids) - visit_class(sc_result, ("File", "Directory"), collect_uploads) + with Perf(metrics, "collect uuids"): + visit_class(workflowobj, ("File", "Directory"), collect_uuids) + + with Perf(metrics, "collect uploads"): + visit_class(sc_result, ("File", "Directory"), collect_uploads) # Resolve any collection uuids we found to portable data hashes # and assign them to uuid_map uuid_map = {} fetch_uuids = list(uuids.keys()) - while fetch_uuids: - # For a large number of fetch_uuids, API server may limit - # response size, so keep fetching from API server has nothing - # more to give us. - lookups = arvrunner.api.collections().list( - filters=[["uuid", "in", fetch_uuids]], - count="none", - select=["uuid", "portable_data_hash"]).execute( - num_retries=arvrunner.num_retries) + with Perf(metrics, "fetch_uuids"): + while fetch_uuids: + # For a large number of fetch_uuids, API server may limit + # response size, so keep fetching from API server has nothing + # more to give us. + lookups = arvrunner.api.collections().list( + filters=[["uuid", "in", fetch_uuids]], + count="none", + select=["uuid", "portable_data_hash"]).execute( + num_retries=arvrunner.num_retries) - if not lookups["items"]: - break + if not lookups["items"]: + break - for l in lookups["items"]: - uuid_map[l["uuid"]] = l["portable_data_hash"] + for l in lookups["items"]: + uuid_map[l["uuid"]] = l["portable_data_hash"] - fetch_uuids = [u for u in fetch_uuids if u not in uuid_map] + fetch_uuids = [u for u in fetch_uuids if u not in uuid_map] normalizeFilesDirs(sc) @@ -420,12 +426,13 @@ def upload_dependencies(arvrunner, name, document_loader, else: del discovered[d] - mapper = ArvPathMapper(arvrunner, sc, "", - "keep:%s", - "keep:%s/%s", - name=name, - single_collection=True, - optional_deps=optional_deps) + with Perf(metrics, "mapper"): + mapper = ArvPathMapper(arvrunner, sc, "", + "keep:%s", + "keep:%s/%s", + name=name, + single_collection=True, + optional_deps=optional_deps) keeprefs = set() def addkeepref(k): @@ -469,8 +476,9 @@ def upload_dependencies(arvrunner, name, document_loader, p["location"] = "keep:%s%s" % (uuid_map[uuid], gp.groups()[1] if gp.groups()[1] else "") p[collectionUUID] = uuid - visit_class(workflowobj, ("File", "Directory"), setloc) - visit_class(discovered, ("File", "Directory"), setloc) + with Perf(metrics, "setloc"): + visit_class(workflowobj, ("File", "Directory"), setloc) + visit_class(discovered, ("File", "Directory"), setloc) if discovered_secondaryfiles is not None: for d in discovered: -- 2.30.2