15295: Check that keep references are well formed
authorPeter Amstutz <pamstutz@veritasgenetics.com>
Mon, 17 Jun 2019 19:08:53 +0000 (15:08 -0400)
committerPeter Amstutz <pamstutz@veritasgenetics.com>
Mon, 17 Jun 2019 19:08:53 +0000 (15:08 -0400)
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz@veritasgenetics.com>

sdk/cwl/arvados_cwl/pathmapper.py
sdk/cwl/arvados_cwl/runner.py
sdk/cwl/tests/15295-bad-keep-ref.cwl [new file with mode: 0644]
sdk/cwl/tests/arvados-tests.yml
sdk/cwl/tests/badkeep.yml [new file with mode: 0644]

index 56c15a4a4344d6c467c0c7dba74b8ae5b126e280..4cd204f7df83ba49197f2cdb6ab2a61673a40b28 100644 (file)
@@ -42,13 +42,13 @@ def trim_listing(obj):
     if obj.get("location", "").startswith("keep:") and "listing" in obj:
         del obj["listing"]
 
+collection_pdh_path = re.compile(r'^keep:[0-9a-f]{32}\+\d+/.+$')
+collection_pdh_pattern = re.compile(r'^keep:([0-9a-f]{32}\+\d+)(/.*)?')
+collection_uuid_pattern = re.compile(r'^keep:([a-z0-9]{5}-4zz18-[a-z0-9]{15})(/.*)?$')
 
 class ArvPathMapper(PathMapper):
     """Convert container-local paths to and from Keep collection ids."""
 
-    pdh_path = re.compile(r'^keep:[0-9a-f]{32}\+\d+/.+$')
-    pdh_dirpath = re.compile(r'^keep:[0-9a-f]{32}\+\d+(/.*)?$')
-
     def __init__(self, arvrunner, referenced_files, input_basedir,
                  collection_pattern, file_pattern, name=None, single_collection=False):
         self.arvrunner = arvrunner
@@ -66,13 +66,17 @@ class ArvPathMapper(PathMapper):
         if "#" in src:
             src = src[:src.index("#")]
 
-        if isinstance(src, basestring) and ArvPathMapper.pdh_dirpath.match(src):
-            self._pathmap[src] = MapperEnt(src, self.collection_pattern % urllib.parse.unquote(src[5:]), srcobj["class"], True)
-            if arvados_cwl.util.collectionUUID in srcobj:
-                self.pdh_to_uuid[src.split("/", 1)[0][5:]] = srcobj[arvados_cwl.util.collectionUUID]
-
         debug = logger.isEnabledFor(logging.DEBUG)
 
+        if isinstance(src, basestring) and src.startswith("keep:"):
+            if collection_pdh_pattern.match(src):
+                self._pathmap[src] = MapperEnt(src, self.collection_pattern % urllib.parse.unquote(src[5:]), srcobj["class"], True)
+                if arvados_cwl.util.collectionUUID in srcobj:
+                    self.pdh_to_uuid[src.split("/", 1)[0][5:]] = srcobj[arvados_cwl.util.collectionUUID]
+            elif not collection_uuid_pattern.match(src):
+                with SourceLine(srcobj, "location", WorkflowException, debug):
+                    raise WorkflowException("Invalid keep reference '%s'" % src)
+
         if src not in self._pathmap:
             if src.startswith("file:"):
                 # Local FS ref, may need to be uploaded or may be on keep
index 912faf0e875b45655de56355374cca025bcc7377..5e42df62413ebcbe63455005cbc6c9a5ae2e8b36 100644 (file)
@@ -44,7 +44,7 @@ from .util import collectionUUID
 import ruamel.yaml as yaml
 
 import arvados_cwl.arvdocker
-from .pathmapper import ArvPathMapper, trim_listing
+from .pathmapper import ArvPathMapper, trim_listing, collection_pdh_pattern, collection_uuid_pattern
 from ._version import __version__
 from . import done
 from . context import ArvRuntimeContext
@@ -194,9 +194,6 @@ def discover_secondary_files(fsaccess, builder, inputs, job_order, discovered=No
         if isinstance(primary, (Mapping, Sequence)):
             set_secondary(fsaccess, builder, inputschema, None, primary, discovered)
 
-collection_uuid_pattern = re.compile(r'^keep:([a-z0-9]{5}-4zz18-[a-z0-9]{15})(/.*)?$')
-collection_pdh_pattern = re.compile(r'^keep:([0-9a-f]{32}\+\d+)(/.*)?')
-
 def upload_dependencies(arvrunner, name, document_loader,
                         workflowobj, uri, loadref_run,
                         include_primary=True, discovered_secondaryfiles=None):
diff --git a/sdk/cwl/tests/15295-bad-keep-ref.cwl b/sdk/cwl/tests/15295-bad-keep-ref.cwl
new file mode 100644 (file)
index 0000000..53c73bb
--- /dev/null
@@ -0,0 +1,12 @@
+cwlVersion: v1.0
+class: CommandLineTool
+requirements:
+  - class: InlineJavascriptRequirement
+arguments:
+  - ls
+  - -l
+  - $(inputs.hello)
+inputs:
+  hello:
+    type: File
+outputs: []
index d649c3bf67706669ee93076a2640958f3194d734..0eb606d25c276f8a793293b3212785bccbf8c5e2 100644 (file)
   }
   tool: 15241-writable-dir.cwl
   doc: Test for writable collections
+
+- job: badkeep.yml
+  output: {}
+  should_fail: true
+  tool: 15295-bad-keep-ref.cwl
+  doc: Test checking for invalid keepref
diff --git a/sdk/cwl/tests/badkeep.yml b/sdk/cwl/tests/badkeep.yml
new file mode 100644 (file)
index 0000000..7f6378a
--- /dev/null
@@ -0,0 +1,3 @@
+hello:
+  class: File
+  location: keep:/4d8a70b1e63b2aad6984e40e338e2373+69/hello.txt