18994: Add test case, correctly re-stage files when basename is changed.
[arvados.git] / sdk / cwl / arvados_cwl / pathmapper.py
index 5bad290773be9f49ef2e87b10b2dac48e70ef75b..ba209ade0ec63b0ee1e60b3bf2b0f9351d1c055e 100644 (file)
@@ -21,7 +21,9 @@ import arvados.collection
 from schema_salad.sourceline import SourceLine
 
 from arvados.errors import ApiError
-from cwltool.pathmapper import PathMapper, MapperEnt, abspath, adjustFileObjs, adjustDirObjs
+from cwltool.pathmapper import PathMapper, MapperEnt
+from cwltool.utils import adjustFileObjs, adjustDirObjs
+from cwltool.stdfsaccess import abspath
 from cwltool.workflow import WorkflowException
 
 from .http import http_to_keep
@@ -99,9 +101,12 @@ class ArvPathMapper(PathMapper):
                 if srcobj["class"] == "Directory" and "listing" not in srcobj:
                     raise WorkflowException("Directory literal '%s' is missing `listing`" % src)
             elif src.startswith("http:") or src.startswith("https:"):
-                keepref = http_to_keep(self.arvrunner.api, self.arvrunner.project_uuid, src)
-                logger.info("%s is %s", src, keepref)
-                self._pathmap[src] = MapperEnt(keepref, keepref, srcobj["class"], True)
+                try:
+                    keepref = http_to_keep(self.arvrunner.api, self.arvrunner.project_uuid, src)
+                    logger.info("%s is %s", src, keepref)
+                    self._pathmap[src] = MapperEnt(keepref, keepref, srcobj["class"], True)
+                except Exception as e:
+                    logger.warning(str(e))
             else:
                 self._pathmap[src] = MapperEnt(src, src, srcobj["class"], True)
 
@@ -144,15 +149,16 @@ class ArvPathMapper(PathMapper):
         loc = srcobj["location"]
         if loc.startswith("_:"):
             return True
-        if prefix:
-            if loc != prefix+srcobj["basename"]:
-                return True
-        else:
+        if not prefix:
             i = loc.rfind("/")
             if i > -1:
                 prefix = loc[:i+1]
             else:
                 prefix = loc+"/"
+
+        if loc != prefix+srcobj["basename"]:
+            return True
+
         if srcobj["class"] == "File" and loc not in self._pathmap:
             return True
         for s in srcobj.get("secondaryFiles", []):
@@ -193,6 +199,7 @@ class ArvPathMapper(PathMapper):
                                            "Directory" if os.path.isdir(ab) else "File", True)
 
         for srcobj in referenced_files:
+            print("na na na", srcobj, srcobj["location"].endswith("/"+srcobj["basename"]))
             remap = []
             if srcobj["class"] == "Directory" and srcobj["location"] not in self._pathmap:
                 c = arvados.collection.Collection(api_client=self.arvrunner.api,
@@ -212,16 +219,7 @@ class ArvPathMapper(PathMapper):
 
                 ab = self.collection_pattern % c.portable_data_hash()
                 self._pathmap[srcobj["location"]] = MapperEnt("keep:"+c.portable_data_hash(), ab, "Directory", True)
-            elif srcobj["class"] == "File" and (srcobj.get("secondaryFiles") or
-                (srcobj["location"].startswith("_:") and "contents" in srcobj)):
-
-                # If all secondary files/directories are located in
-                # the same collection as the primary file and the
-                # paths and names that are consistent with staging,
-                # don't create a new collection.
-                if not self.needs_new_collection(srcobj):
-                    continue
-
+            elif srcobj["class"] == "File" and self.needs_new_collection(srcobj):
                 c = arvados.collection.Collection(api_client=self.arvrunner.api,
                                                   keep_client=self.arvrunner.keep_client,
                                                   num_retries=self.arvrunner.num_retries                                                  )