Merge branch '12409-cwl-v1.2' refs #12409
authorPeter Amstutz <peter.amstutz@curii.com>
Wed, 18 Mar 2020 19:15:27 +0000 (15:15 -0400)
committerPeter Amstutz <peter.amstutz@curii.com>
Wed, 18 Mar 2020 19:15:27 +0000 (15:15 -0400)
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz@curii.com>

build/build-dev-docker-jobs-image.sh
build/run-tests.sh
sdk/cwl/arvados_cwl/__init__.py
sdk/cwl/arvados_cwl/arvworkflow.py
sdk/cwl/arvados_cwl/runner.py
sdk/cwl/setup.py
sdk/cwl/test_with_arvbox.sh
sdk/cwl/tests/test_container.py
sdk/cwl/tests/test_submit.py
sdk/cwl/tests/wf/scatter2_subwf.cwl
sdk/python/setup.py

index 52df80f58229c906858bf7a93be3c5e161cb287d..a3d439be6f37fd76affd7a95d7eff30f910a5a3e 100755 (executable)
@@ -16,7 +16,7 @@ Syntax:
 WORKSPACE=path         Path to the Arvados source tree to build packages from
 CWLTOOL=path           (optional) Path to cwltool git repository.
 SALAD=path             (optional) Path to schema_salad git repository.
-PYCMD=pythonexec       (optional) Specify the python executable to use in the docker image. Defaults to "python".
+PYCMD=pythonexec       (optional) Specify the python executable to use in the docker image. Defaults to "python3".
 
 EOF
 
@@ -36,13 +36,13 @@ fi
 
 cd "$WORKSPACE"
 
-py=python
+py=python3
 pipcmd=pip
 if [[ -n "$PYCMD" ]] ; then
     py="$PYCMD"
-    if [[ $py = python3 ]] ; then
-       pipcmd=pip3
-    fi
+fi
+if [[ $py = python3 ]] ; then
+    pipcmd=pip3
 fi
 
 (cd sdk/python && python setup.py sdist)
@@ -79,6 +79,7 @@ if [[ $python_sdk_ts -gt $cwl_runner_ts ]]; then
     cwl_runner_version=$(cd sdk/python && nohash_version_from_git 1.0)
 fi
 
-docker build --build-arg sdk=$sdk --build-arg runner=$runner --build-arg salad=$salad --build-arg cwltool=$cwltool --build-arg pythoncmd=$py --build-arg pipcmd=$pipcmd -f "$WORKSPACE/sdk/dev-jobs.dockerfile" -t arvados/jobs:$cwl_runner_version "$WORKSPACE/sdk"
+set -x
+docker build --no-cache --build-arg sdk=$sdk --build-arg runner=$runner --build-arg salad=$salad --build-arg cwltool=$cwltool --build-arg pythoncmd=$py --build-arg pipcmd=$pipcmd -f "$WORKSPACE/sdk/dev-jobs.dockerfile" -t arvados/jobs:$cwl_runner_version "$WORKSPACE/sdk"
 echo arv-keepdocker arvados/jobs $cwl_runner_version
 arv-keepdocker arvados/jobs $cwl_runner_version
index 7328fad45ee76025f6b8d503d379935ba000cd6a..7a3302de9bc98e968845ae6a42b83d3a51c0d663 100755 (executable)
@@ -127,7 +127,7 @@ sdk/go/blockdigest
 sdk/go/asyncbuf
 sdk/go/stats
 sdk/go/crunchrunner
-sdk/cwl
+sdk/cwl:py3
 sdk/R
 sdk/java-v2
 tools/sync-groups
@@ -995,7 +995,6 @@ pythonstuff=(
     sdk/pam
     sdk/python
     sdk/python:py3
-    sdk/cwl
     sdk/cwl:py3
     services/dockercleaner:py3
     services/fuse
@@ -1273,7 +1272,8 @@ else
                         ${verb}_${target}
                         ;;
                     *)
-                        testargs["$target"]="${opts}"
+                       argstarget=${target%:py3}
+                        testargs["$argstarget"]="${opts}"
                         tt="${testfuncargs[${target}]}"
                         tt="${tt:-$target}"
                         do_$verb $tt
index 2b2acd5688ff3475c24af8242ae0a7c5ef3ffcd9..3dd04040ab5d728b5eac21bab601225135ce810e 100644 (file)
@@ -16,43 +16,6 @@ import sys
 import re
 import pkg_resources  # part of setuptools
 
-### begin monkey patch ###
-# Monkey patch solution for bug #16169
-#
-# There is a bug in upstream cwltool where the version updater needs
-# to replace the document fragments in the loader index with the
-# updated ones, but actually it only does it for the root document.
-# Normally we just fix the bug in upstream but that's challenging
-# because current cwltool dropped support for Python 2.7 and we're
-# still supporting py2 in Arvados 2.0 (although py2 support will most
-# likely be dropped in Arvados 2.1).  Making a bugfix fork comes with
-# its own complications (it would need to be added to PyPi) so monkey
-# patching is the least disruptive fix (and is relatively safe because
-# our cwltool dependency is pinned to a specific version).  This
-# should be removed as soon as a bugfix goes into upstream cwltool and
-# we upgrade to it.
-#
-import cwltool.load_tool
-from cwltool.utils import visit_class
-from six.moves import urllib
-original_resolve_and_validate_document = cwltool.load_tool.resolve_and_validate_document
-def wrapped_resolve_and_validate_document(
-        loadingContext,            # type: LoadingContext
-        workflowobj,               # type: Union[CommentedMap, CommentedSeq]
-        uri,                       # type: Text
-        preprocess_only=False,     # type: bool
-        skip_schemas=None,         # type: Optional[bool]
-        ):
-    loadingContext, uri = original_resolve_and_validate_document(loadingContext, workflowobj, uri, preprocess_only, skip_schemas)
-    if loadingContext.do_update in (True, None):
-        fileuri = urllib.parse.urldefrag(uri)[0]
-        def update_index(pr):
-            loadingContext.loader.idx[pr["id"]] = pr
-        visit_class(loadingContext.loader.idx[fileuri], ("CommandLineTool", "Workflow", "ExpressionTool"), update_index)
-    return loadingContext, uri
-cwltool.load_tool.resolve_and_validate_document = wrapped_resolve_and_validate_document
-### end monkey patch ###
-
 from schema_salad.sourceline import SourceLine
 import schema_salad.validate as validate
 import cwltool.main
index 604ad39de78877b96121fae730eb12ea0da080d6..ddd3c00764c7b0fb42fe97adf50622bd1b23e9cb 100644 (file)
@@ -11,9 +11,10 @@ import copy
 import logging
 
 from schema_salad.sourceline import SourceLine, cmap
+import schema_salad.ref_resolver
 
 from cwltool.pack import pack
-from cwltool.load_tool import fetch_document
+from cwltool.load_tool import fetch_document, resolve_and_validate_document
 from cwltool.process import shortname
 from cwltool.workflow import Workflow, WorkflowException, WorkflowStep
 from cwltool.pathmapper import adjustFileObjs, adjustDirObjs, visit_class
@@ -172,7 +173,6 @@ class ArvadosWorkflow(Workflow):
         with SourceLine(self.tool, None, WorkflowException, logger.isEnabledFor(logging.DEBUG)):
             if "id" not in self.tool:
                 raise WorkflowException("%s object must have 'id'" % (self.tool["class"]))
-        document_loader, workflowobj, uri = (self.doc_loader, self.doc_loader.fetch(self.tool["id"]), self.tool["id"])
 
         discover_secondary_files(self.arvrunner.fs_access, builder,
                                  self.tool["inputs"], joborder)
@@ -180,18 +180,22 @@ class ArvadosWorkflow(Workflow):
         with Perf(metrics, "subworkflow upload_deps"):
             upload_dependencies(self.arvrunner,
                                 os.path.basename(joborder.get("id", "#")),
-                                document_loader,
+                                self.doc_loader,
                                 joborder,
                                 joborder.get("id", "#"),
                                 False)
 
             if self.wf_pdh is None:
-                workflowobj["requirements"] = dedup_reqs(self.requirements)
-                workflowobj["hints"] = dedup_reqs(self.hints)
+                packed = pack(self.loadingContext, self.tool["id"], loader=self.doc_loader)
 
-                packed = pack(document_loader, workflowobj, uri, self.metadata)
+                for p in packed["$graph"]:
+                    if p["id"] == "#main":
+                        p["requirements"] = dedup_reqs(self.requirements)
+                        p["hints"] = dedup_reqs(self.hints)
 
                 def visit(item):
+                    if "requirements" in item:
+                        item["requirements"] = [i for i in item["requirements"] if i["class"] != "DockerRequirement"]
                     for t in ("hints", "requirements"):
                         if t not in item:
                             continue
@@ -211,9 +215,6 @@ class ArvadosWorkflow(Workflow):
                                                     raise WorkflowException("Non-top-level ResourceRequirement in single container cannot have expressions")
                                 if not dyn:
                                     self.static_resource_req.append(req)
-                            if req["class"] == "DockerRequirement":
-                                if "http://arvados.org/cwl#dockerCollectionPDH" in req:
-                                    del req["http://arvados.org/cwl#dockerCollectionPDH"]
 
                 visit_class(packed["$graph"], ("Workflow", "CommandLineTool"), visit)
 
@@ -222,9 +223,9 @@ class ArvadosWorkflow(Workflow):
 
                 upload_dependencies(self.arvrunner,
                                     runtimeContext.name,
-                                    document_loader,
+                                    self.doc_loader,
                                     packed,
-                                    uri,
+                                    self.tool["id"],
                                     False)
 
                 # Discover files/directories referenced by the
@@ -301,6 +302,10 @@ class ArvadosWorkflow(Workflow):
             if job_res_reqs[0].get("ramMin", 1024) < 128:
                 job_res_reqs[0]["ramMin"] = 128
 
+        arguments = ["--no-container", "--move-outputs", "--preserve-entire-environment", "workflow.cwl", "cwl.input.yml"]
+        if runtimeContext.debug:
+            arguments.insert(0, '--debug')
+
         wf_runner = cmap({
             "class": "CommandLineTool",
             "baseCommand": "cwltool",
@@ -320,7 +325,7 @@ class ArvadosWorkflow(Workflow):
                     }]
             }],
             "hints": self.hints,
-            "arguments": ["--no-container", "--move-outputs", "--preserve-entire-environment", "workflow.cwl#main", "cwl.input.yml"],
+            "arguments": arguments,
             "id": "#"
         })
         return ArvadosCommandTool(self.arvrunner, wf_runner, self.loadingContext).job(joborder_resolved, output_callback, runtimeContext)
index 2239e0f9df952b9ab75a7e9a96ab46953ab29f94..f0be83032415bf64e0a64c793c8b0b7e9974bdd3 100644 (file)
@@ -424,8 +424,9 @@ def packed_workflow(arvrunner, tool, merged_map):
     A "packed" workflow is one where all the components have been combined into a single document."""
 
     rewrites = {}
-    packed = pack(tool.doc_loader, tool.doc_loader.fetch(tool.tool["id"]),
-                  tool.tool["id"], tool.metadata, rewrite_out=rewrites)
+    packed = pack(arvrunner.loadingContext, tool.tool["id"],
+                  rewrite_out=rewrites,
+                  loader=tool.doc_loader)
 
     rewrite_to_orig = {v: k for k,v in viewitems(rewrites)}
 
index d4bb6d102a935780e13921fe546119244c0364e1..95730a69b11199bff6f20f7051ab91141d9c1acd 100644 (file)
@@ -39,14 +39,11 @@ setup(name='arvados-cwl-runner',
       # file to determine what version of cwltool and schema-salad to
       # build.
       install_requires=[
-          'cwltool==1.0.20190831161204',
-          'schema-salad==4.5.20190815125611',
-          'typing >= 3.6.4',
-          'ruamel.yaml >=0.15.54, <=0.15.77',
+          'cwltool==3.0.20200317203547',
+          'schema-salad==5.0.20200302192450',
           'arvados-python-client{}'.format(pysdk_dep),
           'setuptools',
-          'ciso8601 >= 2.0.0',
-          'networkx < 2.3'
+          'ciso8601 >= 2.0.0'
       ],
       extras_require={
           ':os.name=="posix" and python_version<"3"': ['subprocess32 >= 3.5.1'],
@@ -55,8 +52,8 @@ setup(name='arvados-cwl-runner',
       data_files=[
           ('share/doc/arvados-cwl-runner', ['LICENSE-2.0.txt', 'README.rst']),
       ],
+      python_requires=">=3.5, <4",
       classifiers=[
-          'Programming Language :: Python :: 2',
           'Programming Language :: Python :: 3',
       ],
       test_suite='tests',
index 39c834ed60f93da64db19db3c744c8a1279231b1..76aa43d61180f44882409ac3fb513eeb41921661 100755 (executable)
@@ -12,8 +12,9 @@ fi
 reset_container=1
 leave_running=0
 config=dev
+devcwl=0
 tag="latest"
-pythoncmd=python
+pythoncmd=python3
 suite=conformance
 runapi=containers
 
@@ -40,6 +41,10 @@ while test -n "$1" ; do
             build=1
             shift
             ;;
+        --devcwl)
+            devcwl=1
+            shift
+            ;;
         --pythoncmd)
             pythoncmd=$2
             shift ; shift
@@ -53,7 +58,7 @@ while test -n "$1" ; do
             shift ; shift
             ;;
         -h|--help)
-            echo "$0 [--no-reset-container] [--leave-running] [--config dev|localdemo] [--tag docker_tag] [--build] [--pythoncmd python(2|3)] [--suite (integration|conformance-v1.0|conformance-v1.1)]"
+            echo "$0 [--no-reset-container] [--leave-running] [--config dev|localdemo] [--tag docker_tag] [--build] [--pythoncmd python(2|3)] [--suite (integration|conformance-v1.0|conformance-*)]"
             exit
             ;;
         *)
@@ -107,11 +112,12 @@ if [[ "$suite" = "conformance-v1.0" ]] ; then
      git clone https://github.com/common-workflow-language/common-workflow-language.git
    fi
    cd common-workflow-language
-elif [[ "$suite" = "conformance-v1.1" ]] ; then
-   if ! test -d cwl-v1.1 ; then
-     git clone https://github.com/common-workflow-language/cwl-v1.1.git
+elif [[ "$suite" =~ conformance-(.*) ]] ; then
+   version=\${BASH_REMATCH[1]}
+   if ! test -d cwl-\${version} ; then
+     git clone https://github.com/common-workflow-language/cwl-\${version}.git
    fi
-   cd cwl-v1.1
+   cd cwl-\${version}
 fi
 
 if [[ "$suite" != "integration" ]] ; then
@@ -145,12 +151,18 @@ exec arvados-cwl-runner --api=containers \\\$@
 EOF2
 chmod +x /tmp/cwltest/arv-cwl-containers
 
+EXTRA=--compute-checksum
+
+if [[ $devcwl == 1 ]] ; then
+   EXTRA="\$EXTRA --enable-dev"
+fi
+
 env
 if [[ "$suite" = "integration" ]] ; then
    cd /usr/src/arvados/sdk/cwl/tests
    exec ./arvados-tests.sh $@
 else
-   exec ./run_test.sh RUNNER=/tmp/cwltest/arv-cwl-${runapi} EXTRA=--compute-checksum $@
+   exec ./run_test.sh RUNNER=/tmp/cwltest/arv-cwl-${runapi} EXTRA="\$EXTRA" $@
 fi
 EOF
 
index cb6cfbadbdb00dcfe56872683b5851017eeb0f8f..d331e3552d8210aafeed636c43530114288a4239 100644 (file)
@@ -18,6 +18,7 @@ import os
 import functools
 import cwltool.process
 import cwltool.secrets
+from cwltool.update import INTERNAL_VERSION
 from schema_salad.ref_resolver import Loader
 from schema_salad.sourceline import cmap
 
@@ -60,7 +61,7 @@ class TestContainer(unittest.TestCase):
         cwltool.process._names = set()
 
     def helper(self, runner, enable_reuse=True):
-        document_loader, avsc_names, schema_metadata, metaschema_loader = cwltool.process.get_schema("v1.1")
+        document_loader, avsc_names, schema_metadata, metaschema_loader = cwltool.process.get_schema(INTERNAL_VERSION)
 
         make_fs_access=functools.partial(arvados_cwl.CollectionFsAccess,
                                          collection_cache=arvados_cwl.CollectionCache(runner.api, None, 0))
@@ -69,7 +70,7 @@ class TestContainer(unittest.TestCase):
              "basedir": "",
              "make_fs_access": make_fs_access,
              "loader": Loader({}),
-             "metadata": {"cwlVersion": "v1.1", "http://commonwl.org/cwltool#original_cwlVersion": "v1.0"}})
+             "metadata": {"cwlVersion": INTERNAL_VERSION, "http://commonwl.org/cwltool#original_cwlVersion": "v1.0"}})
         runtimeContext = arvados_cwl.context.ArvRuntimeContext(
             {"work_api": "containers",
              "basedir": "",
@@ -402,7 +403,7 @@ class TestContainer(unittest.TestCase):
         runner.api.collections().get().execute.return_value = {
             "portable_data_hash": "99999999999999999999999999999993+99"}
 
-        document_loader, avsc_names, schema_metadata, metaschema_loader = cwltool.process.get_schema("v1.1")
+        document_loader, avsc_names, schema_metadata, metaschema_loader = cwltool.process.get_schema(INTERNAL_VERSION)
 
         tool = cmap({
             "inputs": [],
@@ -848,7 +849,7 @@ class TestWorkflow(unittest.TestCase):
         cwltool.process._names = set()
 
     def helper(self, runner, enable_reuse=True):
-        document_loader, avsc_names, schema_metadata, metaschema_loader = cwltool.process.get_schema("v1.1")
+        document_loader, avsc_names, schema_metadata, metaschema_loader = cwltool.process.get_schema("v1.0")
 
         make_fs_access=functools.partial(arvados_cwl.CollectionFsAccess,
                                          collection_cache=arvados_cwl.CollectionCache(runner.api, None, 0))
@@ -863,7 +864,7 @@ class TestWorkflow(unittest.TestCase):
              "basedir": "",
              "make_fs_access": make_fs_access,
              "loader": document_loader,
-             "metadata": {"cwlVersion": "v1.1", "http://commonwl.org/cwltool#original_cwlVersion": "v1.0"},
+             "metadata": {"cwlVersion": INTERNAL_VERSION, "http://commonwl.org/cwltool#original_cwlVersion": "v1.0"},
              "construct_tool_object": runner.arv_make_tool})
         runtimeContext = arvados_cwl.context.ArvRuntimeContext(
             {"work_api": "containers",
@@ -928,7 +929,7 @@ class TestWorkflow(unittest.TestCase):
                     "--no-container",
                     "--move-outputs",
                     "--preserve-entire-environment",
-                    "workflow.cwl#main",
+                    "workflow.cwl",
                     "cwl.input.yml"
                 ],
                 "container_image": "99999999999999999999999999999993+99",
@@ -1076,7 +1077,7 @@ class TestWorkflow(unittest.TestCase):
                     u'--no-container',
                     u'--move-outputs',
                     u'--preserve-entire-environment',
-                    u'workflow.cwl#main',
+                    u'workflow.cwl',
                     u'cwl.input.yml'
                 ],
                 'use_existing': True,
index 397ae142253d26b41d11aee03586fbc3e352bb31..562664c698b34df87ec2f19eb64d867ec5461698 100644 (file)
@@ -18,6 +18,7 @@ import mock
 import sys
 import unittest
 import cwltool.process
+import re
 
 from io import BytesIO
 
@@ -1312,7 +1313,7 @@ class TestSubmit(unittest.TestCase):
 
                 self.assertEqual(exited, 1)
                 self.assertRegexpMatches(
-                    capture_stderr.getvalue(),
+                    re.sub(r'[ \n]+', ' ', capture_stderr.getvalue()),
                     r"Expected collection uuid zzzzz-4zz18-zzzzzzzzzzzzzzz to be 99999999999999999999999999999998\+99 but API server reported 99999999999999999999999999999997\+99")
             finally:
                 cwltool_logger.removeHandler(stderr_logger)
index 7a07de5bd23885b25a4627b6260b0e04ac82daba..c54e1707ff0ef7fdb741305ff6804bb19cfdbaac 100644 (file)
@@ -5,8 +5,11 @@
 {
   "$graph": [
     {
+      "$namespaces": {
+        "arv": "http://arvados.org/cwl#"
+      },
       "class": "Workflow",
-      "cwlVersion": "v1.1",
+      "cwlVersion": "v1.0",
       "hints": [],
       "id": "#main",
       "inputs": [
           "run": {
             "baseCommand": "sleep",
             "class": "CommandLineTool",
-            "id": "#main/sleep1/run/subtool",
+            "id": "#main/sleep1/subtool",
             "inputs": [
               {
-                "id": "#main/sleep1/run/subtool/sleeptime",
+                "id": "#main/sleep1/subtool/sleeptime",
                 "inputBinding": {
                   "position": 1
                 },
@@ -71,7 +74,7 @@
             ],
             "outputs": [
               {
-                "id": "#main/sleep1/run/subtool/out",
+                "id": "#main/sleep1/subtool/out",
                 "outputBinding": {
                   "outputEval": "out"
                 },
@@ -83,5 +86,5 @@
       ]
     }
   ],
-  "cwlVersion": "v1.1"
-}
+  "cwlVersion": "v1.0"
+}
\ No newline at end of file
index ff68e2a7fd8888d10d56b96e13a5a9bf2690a177..a726b49fe3814a7d51d7fcb32420ad98abb4150d 100644 (file)
@@ -51,7 +51,7 @@ setup(name='arvados-python-client',
           'google-api-python-client >=1.6.2, <1.7',
           'httplib2 >=0.9.2',
           'pycurl >=7.19.5.1',
-          'ruamel.yaml >=0.15.54, <=0.15.77',
+          'ruamel.yaml >=0.15.54, <=0.16.5',
           'setuptools',
           'ws4py >=0.4.2',
       ],