From 446dffef8db3b0df3367d84d6ab3da1b6c8bcc14 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Wed, 11 Mar 2020 16:05:48 -0400 Subject: [PATCH] 12409: Preserve CWL document version when using RunInSingleContainer Also update cwltool dependency and remove the workaround from #16169 Arvados-DCO-1.1-Signed-off-by: Peter Amstutz --- sdk/cwl/arvados_cwl/__init__.py | 37 ------------------------------ sdk/cwl/arvados_cwl/arvworkflow.py | 36 +++++++++++++++++++++++++---- sdk/cwl/setup.py | 2 +- 3 files changed, 33 insertions(+), 42 deletions(-) diff --git a/sdk/cwl/arvados_cwl/__init__.py b/sdk/cwl/arvados_cwl/__init__.py index 2b2acd5688..3dd04040ab 100644 --- a/sdk/cwl/arvados_cwl/__init__.py +++ b/sdk/cwl/arvados_cwl/__init__.py @@ -16,43 +16,6 @@ import sys import re import pkg_resources # part of setuptools -### begin monkey patch ### -# Monkey patch solution for bug #16169 -# -# There is a bug in upstream cwltool where the version updater needs -# to replace the document fragments in the loader index with the -# updated ones, but actually it only does it for the root document. -# Normally we just fix the bug in upstream but that's challenging -# because current cwltool dropped support for Python 2.7 and we're -# still supporting py2 in Arvados 2.0 (although py2 support will most -# likely be dropped in Arvados 2.1). Making a bugfix fork comes with -# its own complications (it would need to be added to PyPi) so monkey -# patching is the least disruptive fix (and is relatively safe because -# our cwltool dependency is pinned to a specific version). This -# should be removed as soon as a bugfix goes into upstream cwltool and -# we upgrade to it. -# -import cwltool.load_tool -from cwltool.utils import visit_class -from six.moves import urllib -original_resolve_and_validate_document = cwltool.load_tool.resolve_and_validate_document -def wrapped_resolve_and_validate_document( - loadingContext, # type: LoadingContext - workflowobj, # type: Union[CommentedMap, CommentedSeq] - uri, # type: Text - preprocess_only=False, # type: bool - skip_schemas=None, # type: Optional[bool] - ): - loadingContext, uri = original_resolve_and_validate_document(loadingContext, workflowobj, uri, preprocess_only, skip_schemas) - if loadingContext.do_update in (True, None): - fileuri = urllib.parse.urldefrag(uri)[0] - def update_index(pr): - loadingContext.loader.idx[pr["id"]] = pr - visit_class(loadingContext.loader.idx[fileuri], ("CommandLineTool", "Workflow", "ExpressionTool"), update_index) - return loadingContext, uri -cwltool.load_tool.resolve_and_validate_document = wrapped_resolve_and_validate_document -### end monkey patch ### - from schema_salad.sourceline import SourceLine import schema_salad.validate as validate import cwltool.main diff --git a/sdk/cwl/arvados_cwl/arvworkflow.py b/sdk/cwl/arvados_cwl/arvworkflow.py index 604ad39de7..332dcb6070 100644 --- a/sdk/cwl/arvados_cwl/arvworkflow.py +++ b/sdk/cwl/arvados_cwl/arvworkflow.py @@ -11,9 +11,10 @@ import copy import logging from schema_salad.sourceline import SourceLine, cmap +import schema_salad.ref_resolver from cwltool.pack import pack -from cwltool.load_tool import fetch_document +from cwltool.load_tool import fetch_document, resolve_and_validate_document from cwltool.process import shortname from cwltool.workflow import Workflow, WorkflowException, WorkflowStep from cwltool.pathmapper import adjustFileObjs, adjustDirObjs, visit_class @@ -172,7 +173,6 @@ class ArvadosWorkflow(Workflow): with SourceLine(self.tool, None, WorkflowException, logger.isEnabledFor(logging.DEBUG)): if "id" not in self.tool: raise WorkflowException("%s object must have 'id'" % (self.tool["class"])) - document_loader, workflowobj, uri = (self.doc_loader, self.doc_loader.fetch(self.tool["id"]), self.tool["id"]) discover_secondary_files(self.arvrunner.fs_access, builder, self.tool["inputs"], joborder) @@ -180,16 +180,44 @@ class ArvadosWorkflow(Workflow): with Perf(metrics, "subworkflow upload_deps"): upload_dependencies(self.arvrunner, os.path.basename(joborder.get("id", "#")), - document_loader, + self.doc_loader, joborder, joborder.get("id", "#"), False) if self.wf_pdh is None: + ### We have to reload the document to get the original version. + # + # The workflow document we have in memory right now was + # updated to the internal CWL version. We need to reload the + # document to go back to its original version, because the + # version of cwltool installed in the user's container is + # likely to reject our internal version, and we don't want to + # break existing workflows. + # + # What's going on here is that the updater replaces the + # documents/fragments in the index with updated ones, the + # index is also used as a cache, so we need to go through the + # loading process with an empty index and updating turned off + # so we have the original un-updated documents. + # + loadingContext = self.loadingContext.copy() + loadingContext.do_update = False + document_loader = schema_salad.ref_resolver.SubLoader(loadingContext.loader) + loadingContext.loader = document_loader + loadingContext.loader.idx = {} + uri = self.tool["id"] + loadingContext, workflowobj, uri = fetch_document(uri, loadingContext) + loadingContext, uri = resolve_and_validate_document( + loadingContext, workflowobj, uri + ) + workflowobj, metadata = loadingContext.loader.resolve_ref(uri) + ### + workflowobj["requirements"] = dedup_reqs(self.requirements) workflowobj["hints"] = dedup_reqs(self.hints) - packed = pack(document_loader, workflowobj, uri, self.metadata) + packed = pack(document_loader, workflowobj, uri, metadata) def visit(item): for t in ("hints", "requirements"): diff --git a/sdk/cwl/setup.py b/sdk/cwl/setup.py index aaefa2afc5..5475848786 100644 --- a/sdk/cwl/setup.py +++ b/sdk/cwl/setup.py @@ -39,7 +39,7 @@ setup(name='arvados-cwl-runner', # file to determine what version of cwltool and schema-salad to # build. install_requires=[ - 'cwltool==2.0.20200224214940', + 'cwltool==2.0.20200303141624', 'schema-salad==5.0.20200302192450', 'arvados-python-client{}'.format(pysdk_dep), 'setuptools', -- 2.39.5