From d8eeb0f02b967153790e54284fd3213b648def20 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Wed, 20 Jul 2022 12:54:20 -0400 Subject: [PATCH] 19280: don't redundantly re-parse the file Arvados-DCO-1.1-Signed-off-by: Peter Amstutz --- sdk/cwl/arvados_cwl/runner.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/sdk/cwl/arvados_cwl/runner.py b/sdk/cwl/arvados_cwl/runner.py index 644713bce2..ed4abbe2ee 100644 --- a/sdk/cwl/arvados_cwl/runner.py +++ b/sdk/cwl/arvados_cwl/runner.py @@ -260,7 +260,8 @@ def discover_secondary_files(fsaccess, builder, inputs, job_order, discovered=No def upload_dependencies(arvrunner, name, document_loader, workflowobj, uri, loadref_run, runtimeContext, - include_primary=True, discovered_secondaryfiles=None): + include_primary=True, discovered_secondaryfiles=None, + cache=None): """Upload the dependencies of the workflowobj document to Keep. Returns a pathmapper object mapping local paths to keep references. Also @@ -279,6 +280,8 @@ def upload_dependencies(arvrunner, name, document_loader, defrg, _ = urllib.parse.urldefrag(joined) if defrg not in loaded: loaded.add(defrg) + if cache is not None and defrg in cache: + return cache[defrg] # Use fetch_text to get raw file (before preprocessing). text = document_loader.fetch_text(defrg) if isinstance(text, bytes): @@ -286,7 +289,10 @@ def upload_dependencies(arvrunner, name, document_loader, else: textIO = StringIO(text) yamlloader = YAML(typ='safe', pure=True) - return yamlloader.load(textIO) + result = yamlloader.load(textIO) + if cache is not None: + cache[defrg] = result + return result else: return {} @@ -652,7 +658,7 @@ def upload_workflow_deps(arvrunner, tool, runtimeContext): document_loader = tool.doc_loader merged_map = {} - + tool_dep_cache = {} def upload_tool_deps(deptool): if "id" in deptool: discovered_secondaryfiles = {} @@ -664,7 +670,8 @@ def upload_workflow_deps(arvrunner, tool, runtimeContext): False, runtimeContext, include_primary=False, - discovered_secondaryfiles=discovered_secondaryfiles) + discovered_secondaryfiles=discovered_secondaryfiles, + cache=tool_dep_cache) document_loader.idx[deptool["id"]] = deptool toolmap = {} for k,v in pm.items(): -- 2.30.2