Merge branch '16923-auth-api-client' refs #16923
[arvados.git] / sdk / cwl / arvados_cwl / pathmapper.py
index f08d7d7b2a67a516118a087ca37f01ea9650c869..5bad290773be9f49ef2e87b10b2dac48e70ef75b 100644 (file)
@@ -42,13 +42,13 @@ def trim_listing(obj):
     if obj.get("location", "").startswith("keep:") and "listing" in obj:
         del obj["listing"]
 
+collection_pdh_path = re.compile(r'^keep:[0-9a-f]{32}\+\d+/.+$')
+collection_pdh_pattern = re.compile(r'^keep:([0-9a-f]{32}\+\d+)(/.*)?')
+collection_uuid_pattern = re.compile(r'^keep:([a-z0-9]{5}-4zz18-[a-z0-9]{15})(/.*)?$')
 
 class ArvPathMapper(PathMapper):
     """Convert container-local paths to and from Keep collection ids."""
 
-    pdh_path = re.compile(r'^keep:[0-9a-f]{32}\+\d+/.+$')
-    pdh_dirpath = re.compile(r'^keep:[0-9a-f]{32}\+\d+(/.*)?$')
-
     def __init__(self, arvrunner, referenced_files, input_basedir,
                  collection_pattern, file_pattern, name=None, single_collection=False):
         self.arvrunner = arvrunner
@@ -66,13 +66,17 @@ class ArvPathMapper(PathMapper):
         if "#" in src:
             src = src[:src.index("#")]
 
-        if isinstance(src, basestring) and ArvPathMapper.pdh_dirpath.match(src):
-            self._pathmap[src] = MapperEnt(src, self.collection_pattern % urllib.parse.unquote(src[5:]), srcobj["class"], True)
-            if arvados_cwl.util.collectionUUID in srcobj:
-                self.pdh_to_uuid[src.split("/", 1)[0][5:]] = srcobj[arvados_cwl.util.collectionUUID]
-
         debug = logger.isEnabledFor(logging.DEBUG)
 
+        if isinstance(src, basestring) and src.startswith("keep:"):
+            if collection_pdh_pattern.match(src):
+                self._pathmap[src] = MapperEnt(src, self.collection_pattern % urllib.parse.unquote(src[5:]), srcobj["class"], True)
+                if arvados_cwl.util.collectionUUID in srcobj:
+                    self.pdh_to_uuid[src.split("/", 1)[0][5:]] = srcobj[arvados_cwl.util.collectionUUID]
+            elif not collection_uuid_pattern.match(src):
+                with SourceLine(srcobj, "location", WorkflowException, debug):
+                    raise WorkflowException("Invalid keep reference '%s'" % src)
+
         if src not in self._pathmap:
             if src.startswith("file:"):
                 # Local FS ref, may need to be uploaded or may be on keep
@@ -265,6 +269,13 @@ class ArvPathMapper(PathMapper):
 
 
 class StagingPathMapper(PathMapper):
+    # Note that StagingPathMapper internally maps files from target to source.
+    # Specifically, the 'self._pathmap' dict keys are the target location and the
+    # values are 'MapperEnt' named tuples from which we use the 'resolved' attribute
+    # as the file identifier. This makes it possible to map an input file to multiple
+    # target directories. The exception is for file literals, which store the contents of
+    # the file in 'MapperEnt.resolved' and are therefore still mapped from source to target.
+
     _follow_dirs = True
 
     def __init__(self, referenced_files, basedir, stagedir, separateDirs=True):
@@ -274,6 +285,7 @@ class StagingPathMapper(PathMapper):
     def visit(self, obj, stagedir, basedir, copy=False, staged=False):
         # type: (Dict[unicode, Any], unicode, unicode, bool) -> None
         loc = obj["location"]
+        stagedir = obj.get("dirname") or stagedir
         tgt = os.path.join(stagedir, obj["basename"])
         basetgt, baseext = os.path.splitext(tgt)
 
@@ -307,10 +319,12 @@ class StagingPathMapper(PathMapper):
                     self._pathmap[tgt] = MapperEnt(loc, tgt, "File", staged)
                 self.visitlisting(obj.get("secondaryFiles", []), stagedir, basedir)
 
-    def mapper(self, src):  # type: (Text) -> MapperEnt
+    def mapper(self, src):  # type: (Text) -> MapperEnt.
+        # Overridden to maintain the use case of mapping by source (identifier) to
+        # target regardless of how the map is structured interally.
         def getMapperEnt(src):
             for k,v in viewitems(self._pathmap):
-                if v.resolved == src or (v.type == "CreateFile" and k == src):
+                if (v.type != "CreateFile" and v.resolved == src) or (v.type == "CreateFile" and k == src):
                     return v
 
         if u"#" in src: