From e2267bd99209651c61425f335230e515421b2ef4 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Fri, 28 Oct 2022 17:30:39 -0400 Subject: [PATCH] 19678: Fix for parameters called 'name' Also fix regression involving default file references appearing in nested processes (inline declaration of a tool within a workflow). Also fixed some dependency issues preventing arvados/jobs developer image from building. Arvados-DCO-1.1-Signed-off-by: Peter Amstutz --- sdk/cwl/arvados_cwl/runner.py | 53 +++++++++++++++++++-------------- sdk/cwl/setup.py | 3 +- sdk/cwl/tests/19678-name-id.cwl | 26 ++++++++++++++++ sdk/cwl/tests/arvados-tests.yml | 10 +++++++ sdk/dev-jobs.dockerfile | 6 ++++ 5 files changed, 75 insertions(+), 23 deletions(-) create mode 100644 sdk/cwl/tests/19678-name-id.cwl diff --git a/sdk/cwl/arvados_cwl/runner.py b/sdk/cwl/arvados_cwl/runner.py index a307cf7f66..4861039198 100644 --- a/sdk/cwl/arvados_cwl/runner.py +++ b/sdk/cwl/arvados_cwl/runner.py @@ -465,10 +465,7 @@ def upload_dependencies(arvrunner, name, document_loader, builder_job_order, discovered) - _jobloaderctx = jobloaderctx.copy() - loader = schema_salad.ref_resolver.Loader(_jobloaderctx, fetcher_constructor=document_loader.fetcher_constructor) - - copied, _ = loader.resolve_all(copy.deepcopy(cmap(workflowobj)), base_url=uri, checklinks=False) + copied, _ = document_loader.resolve_all(copy.deepcopy(cmap(workflowobj)), base_url=uri, checklinks=False) visit_class(copied, ("CommandLineTool", "Workflow"), discover_default_secondary_files) for d in list(discovered): @@ -698,9 +695,12 @@ def upload_job_order(arvrunner, name, tool, job_order, runtimeContext): tool.tool["inputs"], job_order) + _jobloaderctx = jobloaderctx.copy() + jobloader = schema_salad.ref_resolver.Loader(_jobloaderctx, fetcher_constructor=tool.doc_loader.fetcher_constructor) + jobmapper = upload_dependencies(arvrunner, name, - tool.doc_loader, + jobloader, job_order, job_order.get("id", "#"), False, @@ -728,28 +728,37 @@ def upload_workflow_deps(arvrunner, tool, runtimeContext): merged_map = {} tool_dep_cache = {} + + todo = [] + + # Standard traversal is top down, we want to go bottom up, so use + # the visitor to accumalate a list of nodes to visit, then + # visit them in reverse order. def upload_tool_deps(deptool): if "id" in deptool: - discovered_secondaryfiles = {} - with Perf(metrics, "upload_dependencies %s" % shortname(deptool["id"])): - pm = upload_dependencies(arvrunner, - "%s dependencies" % (shortname(deptool["id"])), - document_loader, - deptool, - deptool["id"], - False, - runtimeContext, - include_primary=False, - discovered_secondaryfiles=discovered_secondaryfiles, - cache=tool_dep_cache) - document_loader.idx[deptool["id"]] = deptool - toolmap = {} - for k,v in pm.items(): - toolmap[k] = v.resolved - merged_map[deptool["id"]] = FileUpdates(toolmap, discovered_secondaryfiles) + todo.append(deptool) tool.visit(upload_tool_deps) + for deptool in reversed(todo): + discovered_secondaryfiles = {} + with Perf(metrics, "upload_dependencies %s" % shortname(deptool["id"])): + pm = upload_dependencies(arvrunner, + "%s dependencies" % (shortname(deptool["id"])), + document_loader, + deptool, + deptool["id"], + False, + runtimeContext, + include_primary=False, + discovered_secondaryfiles=discovered_secondaryfiles, + cache=tool_dep_cache) + document_loader.idx[deptool["id"]] = deptool + toolmap = {} + for k,v in pm.items(): + toolmap[k] = v.resolved + merged_map[deptool["id"]] = FileUpdates(toolmap, discovered_secondaryfiles) + return merged_map def arvados_jobs_image(arvrunner, img, runtimeContext): diff --git a/sdk/cwl/setup.py b/sdk/cwl/setup.py index e70955c20b..33c1ffcbf1 100644 --- a/sdk/cwl/setup.py +++ b/sdk/cwl/setup.py @@ -42,7 +42,8 @@ setup(name='arvados-cwl-runner', 'setuptools', 'ciso8601 >= 2.0.0', 'networkx < 2.6', - 'msgpack==1.0.3' + 'msgpack==1.0.3', + 'importlib-metadata<5' ], data_files=[ ('share/doc/arvados-cwl-runner', ['LICENSE-2.0.txt', 'README.rst']), diff --git a/sdk/cwl/tests/19678-name-id.cwl b/sdk/cwl/tests/19678-name-id.cwl new file mode 100644 index 0000000000..afed34b930 --- /dev/null +++ b/sdk/cwl/tests/19678-name-id.cwl @@ -0,0 +1,26 @@ +# Copyright (C) The Arvados Authors. All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +class: Workflow +cwlVersion: v1.1 +inputs: + - type: + fields: + - name: first + type: string + - name: last + type: string + type: record + id: name +outputs: + - type: + fields: + - name: first + type: string + - name: last + type: string + type: record + id: processed_name + outputSource: name +steps: [] diff --git a/sdk/cwl/tests/arvados-tests.yml b/sdk/cwl/tests/arvados-tests.yml index 2f309cfe81..4ed4d4ac32 100644 --- a/sdk/cwl/tests/arvados-tests.yml +++ b/sdk/cwl/tests/arvados-tests.yml @@ -469,3 +469,13 @@ } tool: 19109-upload-secondary.cwl doc: "Test issue 19109 - correctly discover & upload secondary files" + +- job: 19678-name-id.yml + output: { + "processed_name": { + "first": "foo", + "last": "bar" + } + } + tool: 19678-name-id.cwl + doc: "Test issue 19678 - non-string type input parameter called 'name'" diff --git a/sdk/dev-jobs.dockerfile b/sdk/dev-jobs.dockerfile index b55b056b2d..60db4a8898 100644 --- a/sdk/dev-jobs.dockerfile +++ b/sdk/dev-jobs.dockerfile @@ -35,11 +35,17 @@ ADD cwl/salad_dist/$salad /tmp/ ADD cwl/cwltool_dist/$cwltool /tmp/ ADD cwl/dist/$runner /tmp/ +RUN $pipcmd install wheel RUN cd /tmp/arvados-python-client-* && $pipcmd install . RUN if test -d /tmp/schema-salad-* ; then cd /tmp/schema-salad-* && $pipcmd install . ; fi RUN if test -d /tmp/cwltool-* ; then cd /tmp/cwltool-* && $pipcmd install . ; fi RUN cd /tmp/arvados-cwl-runner-* && $pipcmd install . +# Sometimes Python dependencies install successfully but don't +# actually work. So run arvados-cwl-runner here to catch fun +# dependency errors like pkg_resources.DistributionNotFound. +RUN arvados-cwl-runner --version + # Install dependencies and set up system. RUN /usr/sbin/adduser --disabled-password \ --gecos 'Crunch execution user' crunch && \ -- 2.30.2