12409: Preserve CWL document version when using RunInSingleContainer

author Peter Amstutz <peter.amstutz@curii.com>

Wed, 11 Mar 2020 20:05:48 +0000 (16:05 -0400)

committer Peter Amstutz <peter.amstutz@curii.com>

Wed, 11 Mar 2020 20:05:48 +0000 (16:05 -0400)
author Peter Amstutz <peter.amstutz@curii.com>
Wed, 11 Mar 2020 20:05:48 +0000 (16:05 -0400)
committer Peter Amstutz <peter.amstutz@curii.com>
Wed, 11 Mar 2020 20:05:48 +0000 (16:05 -0400)
diff --git a/sdk/cwl/arvados_cwl/__init__.py b/sdk/cwl/arvados_cwl/__init__.py

index 2b2acd5688ff3475c24af8242ae0a7c5ef3ffcd9..3dd04040ab5d728b5eac21bab601225135ce810e 100644 (file)
--- a/sdk/cwl/arvados_cwl/__init__.py
+++ b/sdk/cwl/arvados_cwl/__init__.py
@@ -16,43 +16,6 @@ import sys
  import re
  import pkg_resources  # part of setuptools
  
-### begin monkey patch ###
-# Monkey patch solution for bug #16169
-#
-# There is a bug in upstream cwltool where the version updater needs
-# to replace the document fragments in the loader index with the
-# updated ones, but actually it only does it for the root document.
-# Normally we just fix the bug in upstream but that's challenging
-# because current cwltool dropped support for Python 2.7 and we're
-# still supporting py2 in Arvados 2.0 (although py2 support will most
-# likely be dropped in Arvados 2.1).  Making a bugfix fork comes with
-# its own complications (it would need to be added to PyPi) so monkey
-# patching is the least disruptive fix (and is relatively safe because
-# our cwltool dependency is pinned to a specific version).  This
-# should be removed as soon as a bugfix goes into upstream cwltool and
-# we upgrade to it.
-#
-import cwltool.load_tool
-from cwltool.utils import visit_class
-from six.moves import urllib
-original_resolve_and_validate_document = cwltool.load_tool.resolve_and_validate_document
-def wrapped_resolve_and_validate_document(
-        loadingContext,            # type: LoadingContext
-        workflowobj,               # type: Union[CommentedMap, CommentedSeq]
-        uri,                       # type: Text
-        preprocess_only=False,     # type: bool
-        skip_schemas=None,         # type: Optional[bool]
-        ):
-    loadingContext, uri = original_resolve_and_validate_document(loadingContext, workflowobj, uri, preprocess_only, skip_schemas)
-    if loadingContext.do_update in (True, None):
-        fileuri = urllib.parse.urldefrag(uri)[0]
-        def update_index(pr):
-            loadingContext.loader.idx[pr["id"]] = pr
-        visit_class(loadingContext.loader.idx[fileuri], ("CommandLineTool", "Workflow", "ExpressionTool"), update_index)
-    return loadingContext, uri
-cwltool.load_tool.resolve_and_validate_document = wrapped_resolve_and_validate_document
-### end monkey patch ###
-
  from schema_salad.sourceline import SourceLine
  import schema_salad.validate as validate
  import cwltool.main
diff --git a/sdk/cwl/arvados_cwl/arvworkflow.py b/sdk/cwl/arvados_cwl/arvworkflow.py

index 604ad39de78877b96121fae730eb12ea0da080d6..332dcb60708fc55b05c2d37732f4fa2eace97b3c 100644 (file)
--- a/sdk/cwl/arvados_cwl/arvworkflow.py
+++ b/sdk/cwl/arvados_cwl/arvworkflow.py
@@ -11,9 +11,10 @@ import copy
  import logging
  
  from schema_salad.sourceline import SourceLine, cmap
+import schema_salad.ref_resolver
  
  from cwltool.pack import pack
-from cwltool.load_tool import fetch_document
+from cwltool.load_tool import fetch_document, resolve_and_validate_document
  from cwltool.process import shortname
  from cwltool.workflow import Workflow, WorkflowException, WorkflowStep
  from cwltool.pathmapper import adjustFileObjs, adjustDirObjs, visit_class
@@ -172,7 +173,6 @@ class ArvadosWorkflow(Workflow):
          with SourceLine(self.tool, None, WorkflowException, logger.isEnabledFor(logging.DEBUG)):
              if "id" not in self.tool:
                  raise WorkflowException("%s object must have 'id'" % (self.tool["class"]))
-        document_loader, workflowobj, uri = (self.doc_loader, self.doc_loader.fetch(self.tool["id"]), self.tool["id"])
  
          discover_secondary_files(self.arvrunner.fs_access, builder,
                                   self.tool["inputs"], joborder)
@@ -180,16 +180,44 @@ class ArvadosWorkflow(Workflow):
          with Perf(metrics, "subworkflow upload_deps"):
              upload_dependencies(self.arvrunner,
                                  os.path.basename(joborder.get("id", "#")),
-                                document_loader,
+                                self.doc_loader,
                                  joborder,
                                  joborder.get("id", "#"),
                                  False)
  
              if self.wf_pdh is None:
+                ### We have to reload the document to get the original version.
+                #
+                # The workflow document we have in memory right now was
+                # updated to the internal CWL version.  We need to reload the
+                # document to go back to its original version, because the
+                # version of cwltool installed in the user's container is
+                # likely to reject our internal version, and we don't want to
+                # break existing workflows.
+                #
+                # What's going on here is that the updater replaces the
+                # documents/fragments in the index with updated ones, the
+                # index is also used as a cache, so we need to go through the
+                # loading process with an empty index and updating turned off
+                # so we have the original un-updated documents.
+                #
+                loadingContext = self.loadingContext.copy()
+                loadingContext.do_update = False
+                document_loader = schema_salad.ref_resolver.SubLoader(loadingContext.loader)
+                loadingContext.loader = document_loader
+                loadingContext.loader.idx = {}
+                uri = self.tool["id"]
+                loadingContext, workflowobj, uri = fetch_document(uri, loadingContext)
+                loadingContext, uri = resolve_and_validate_document(
+                    loadingContext, workflowobj, uri
+                )
+                workflowobj, metadata = loadingContext.loader.resolve_ref(uri)
+                ###
+
                  workflowobj["requirements"] = dedup_reqs(self.requirements)
                  workflowobj["hints"] = dedup_reqs(self.hints)
  
-                packed = pack(document_loader, workflowobj, uri, self.metadata)
+                packed = pack(document_loader, workflowobj, uri, metadata)
  
                  def visit(item):
                      for t in ("hints", "requirements"):
diff --git a/sdk/cwl/setup.py b/sdk/cwl/setup.py

index aaefa2afc58ad5f8fda9d2c9059ab226c3ded8d6..5475848786c9aeca90b00dc19599e410c8703e7b 100644 (file)
--- a/sdk/cwl/setup.py
+++ b/sdk/cwl/setup.py
@@ -39,7 +39,7 @@ setup(name='arvados-cwl-runner',
        # file to determine what version of cwltool and schema-salad to
        # build.
        install_requires=[
-          'cwltool==2.0.20200224214940',
+          'cwltool==2.0.20200303141624',
            'schema-salad==5.0.20200302192450',
            'arvados-python-client{}'.format(pysdk_dep),
            'setuptools',
author	Peter Amstutz <peter.amstutz@curii.com>
	Wed, 11 Mar 2020 20:05:48 +0000 (16:05 -0400)
committer	Peter Amstutz <peter.amstutz@curii.com>
	Wed, 11 Mar 2020 20:05:48 +0000 (16:05 -0400)
sdk/cwl/arvados_cwl/__init__.py		patch \| blob \| history
sdk/cwl/arvados_cwl/arvworkflow.py		patch \| blob \| history
sdk/cwl/setup.py		patch \| blob \| history