19678: Fix for parameters called 'name' 19678-job-loader
authorPeter Amstutz <peter.amstutz@curii.com>
Fri, 28 Oct 2022 21:30:39 +0000 (17:30 -0400)
committerPeter Amstutz <peter.amstutz@curii.com>
Fri, 28 Oct 2022 21:41:23 +0000 (17:41 -0400)
Also fix regression involving default file references appearing in
nested processes (inline declaration of a tool within a workflow).

Also fixed some dependency issues preventing arvados/jobs developer
image from building.

Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz@curii.com>

sdk/cwl/arvados_cwl/runner.py
sdk/cwl/setup.py
sdk/cwl/tests/19678-name-id.cwl [new file with mode: 0644]
sdk/cwl/tests/arvados-tests.yml
sdk/dev-jobs.dockerfile

index a307cf7f666b6e6f5bdda0203abea6f081a7ef86..4861039198a18c36dbd0ae6d805be060cff1e224 100644 (file)
@@ -465,10 +465,7 @@ def upload_dependencies(arvrunner, name, document_loader,
                                  builder_job_order,
                                  discovered)
 
                                  builder_job_order,
                                  discovered)
 
-    _jobloaderctx = jobloaderctx.copy()
-    loader = schema_salad.ref_resolver.Loader(_jobloaderctx, fetcher_constructor=document_loader.fetcher_constructor)
-
-    copied, _ = loader.resolve_all(copy.deepcopy(cmap(workflowobj)), base_url=uri, checklinks=False)
+    copied, _ = document_loader.resolve_all(copy.deepcopy(cmap(workflowobj)), base_url=uri, checklinks=False)
     visit_class(copied, ("CommandLineTool", "Workflow"), discover_default_secondary_files)
 
     for d in list(discovered):
     visit_class(copied, ("CommandLineTool", "Workflow"), discover_default_secondary_files)
 
     for d in list(discovered):
@@ -698,9 +695,12 @@ def upload_job_order(arvrunner, name, tool, job_order, runtimeContext):
                              tool.tool["inputs"],
                              job_order)
 
                              tool.tool["inputs"],
                              job_order)
 
+    _jobloaderctx = jobloaderctx.copy()
+    jobloader = schema_salad.ref_resolver.Loader(_jobloaderctx, fetcher_constructor=tool.doc_loader.fetcher_constructor)
+
     jobmapper = upload_dependencies(arvrunner,
                                     name,
     jobmapper = upload_dependencies(arvrunner,
                                     name,
-                                    tool.doc_loader,
+                                    jobloader,
                                     job_order,
                                     job_order.get("id", "#"),
                                     False,
                                     job_order,
                                     job_order.get("id", "#"),
                                     False,
@@ -728,28 +728,37 @@ def upload_workflow_deps(arvrunner, tool, runtimeContext):
 
     merged_map = {}
     tool_dep_cache = {}
 
     merged_map = {}
     tool_dep_cache = {}
+
+    todo = []
+
+    # Standard traversal is top down, we want to go bottom up, so use
+    # the visitor to accumalate a list of nodes to visit, then
+    # visit them in reverse order.
     def upload_tool_deps(deptool):
         if "id" in deptool:
     def upload_tool_deps(deptool):
         if "id" in deptool:
-            discovered_secondaryfiles = {}
-            with Perf(metrics, "upload_dependencies %s" % shortname(deptool["id"])):
-                pm = upload_dependencies(arvrunner,
-                                         "%s dependencies" % (shortname(deptool["id"])),
-                                         document_loader,
-                                         deptool,
-                                         deptool["id"],
-                                         False,
-                                         runtimeContext,
-                                         include_primary=False,
-                                         discovered_secondaryfiles=discovered_secondaryfiles,
-                                         cache=tool_dep_cache)
-            document_loader.idx[deptool["id"]] = deptool
-            toolmap = {}
-            for k,v in pm.items():
-                toolmap[k] = v.resolved
-            merged_map[deptool["id"]] = FileUpdates(toolmap, discovered_secondaryfiles)
+            todo.append(deptool)
 
     tool.visit(upload_tool_deps)
 
 
     tool.visit(upload_tool_deps)
 
+    for deptool in reversed(todo):
+        discovered_secondaryfiles = {}
+        with Perf(metrics, "upload_dependencies %s" % shortname(deptool["id"])):
+            pm = upload_dependencies(arvrunner,
+                                     "%s dependencies" % (shortname(deptool["id"])),
+                                     document_loader,
+                                     deptool,
+                                     deptool["id"],
+                                     False,
+                                     runtimeContext,
+                                     include_primary=False,
+                                     discovered_secondaryfiles=discovered_secondaryfiles,
+                                     cache=tool_dep_cache)
+        document_loader.idx[deptool["id"]] = deptool
+        toolmap = {}
+        for k,v in pm.items():
+            toolmap[k] = v.resolved
+        merged_map[deptool["id"]] = FileUpdates(toolmap, discovered_secondaryfiles)
+
     return merged_map
 
 def arvados_jobs_image(arvrunner, img, runtimeContext):
     return merged_map
 
 def arvados_jobs_image(arvrunner, img, runtimeContext):
index e70955c20bb9b359c2ff67db666209a3c99a74e8..33c1ffcbf1682471c31d6bfae46d3937139f9dde 100644 (file)
@@ -42,7 +42,8 @@ setup(name='arvados-cwl-runner',
           'setuptools',
           'ciso8601 >= 2.0.0',
           'networkx < 2.6',
           'setuptools',
           'ciso8601 >= 2.0.0',
           'networkx < 2.6',
-          'msgpack==1.0.3'
+          'msgpack==1.0.3',
+          'importlib-metadata<5'
       ],
       data_files=[
           ('share/doc/arvados-cwl-runner', ['LICENSE-2.0.txt', 'README.rst']),
       ],
       data_files=[
           ('share/doc/arvados-cwl-runner', ['LICENSE-2.0.txt', 'README.rst']),
diff --git a/sdk/cwl/tests/19678-name-id.cwl b/sdk/cwl/tests/19678-name-id.cwl
new file mode 100644 (file)
index 0000000..afed34b
--- /dev/null
@@ -0,0 +1,26 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+class: Workflow
+cwlVersion: v1.1
+inputs:
+  - type:
+      fields:
+        - name: first
+          type: string
+        - name: last
+          type: string
+      type: record
+    id: name
+outputs:
+  - type:
+      fields:
+        - name: first
+          type: string
+        - name: last
+          type: string
+      type: record
+    id: processed_name
+    outputSource: name
+steps: []
index 2f309cfe81e6aae5a26ebacdea842d957d07ab0b..4ed4d4ac32fcc014dcdf57b592e1661bb1188926 100644 (file)
   }
   tool: 19109-upload-secondary.cwl
   doc: "Test issue 19109 - correctly discover & upload secondary files"
   }
   tool: 19109-upload-secondary.cwl
   doc: "Test issue 19109 - correctly discover & upload secondary files"
+
+- job: 19678-name-id.yml
+  output: {
+    "processed_name": {
+        "first": "foo",
+        "last": "bar"
+    }
+  }
+  tool: 19678-name-id.cwl
+  doc: "Test issue 19678 - non-string type input parameter called 'name'"
index b55b056b2d38339fe4bb42ddd15ba267099975ea..60db4a889815c1f66e9c1070b35b1077fc5b1d8b 100644 (file)
@@ -35,11 +35,17 @@ ADD cwl/salad_dist/$salad /tmp/
 ADD cwl/cwltool_dist/$cwltool /tmp/
 ADD cwl/dist/$runner /tmp/
 
 ADD cwl/cwltool_dist/$cwltool /tmp/
 ADD cwl/dist/$runner /tmp/
 
+RUN $pipcmd install wheel
 RUN cd /tmp/arvados-python-client-* && $pipcmd install .
 RUN if test -d /tmp/schema-salad-* ; then cd /tmp/schema-salad-* && $pipcmd install . ; fi
 RUN if test -d /tmp/cwltool-* ; then cd /tmp/cwltool-* && $pipcmd install . ; fi
 RUN cd /tmp/arvados-cwl-runner-* && $pipcmd install .
 
 RUN cd /tmp/arvados-python-client-* && $pipcmd install .
 RUN if test -d /tmp/schema-salad-* ; then cd /tmp/schema-salad-* && $pipcmd install . ; fi
 RUN if test -d /tmp/cwltool-* ; then cd /tmp/cwltool-* && $pipcmd install . ; fi
 RUN cd /tmp/arvados-cwl-runner-* && $pipcmd install .
 
+# Sometimes Python dependencies install successfully but don't
+# actually work.  So run arvados-cwl-runner here to catch fun
+# dependency errors like pkg_resources.DistributionNotFound.
+RUN arvados-cwl-runner --version
+
 # Install dependencies and set up system.
 RUN /usr/sbin/adduser --disabled-password \
       --gecos 'Crunch execution user' crunch && \
 # Install dependencies and set up system.
 RUN /usr/sbin/adduser --disabled-password \
       --gecos 'Crunch execution user' crunch && \