X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/29e3fe60d22218d122a1a3448a144bfcfd64785a..540b72d62a94015f116ba077e279a5f10d666778:/sdk/cwl/arvados_cwl/pathmapper.py

diff --git a/sdk/cwl/arvados_cwl/pathmapper.py b/sdk/cwl/arvados_cwl/pathmapper.py
index 0cbf9eb56b..539188fddd 100644
--- a/sdk/cwl/arvados_cwl/pathmapper.py
+++ b/sdk/cwl/arvados_cwl/pathmapper.py
@@ -26,7 +26,7 @@ from cwltool.utils import adjustFileObjs, adjustDirObjs
 from cwltool.stdfsaccess import abspath
 from cwltool.workflow import WorkflowException
 
-from .http import http_to_keep
+from arvados.http_to_keep import http_to_keep
 
 logger = logging.getLogger('arvados.cwl-runner')
 
@@ -74,7 +74,6 @@ class ArvPathMapper(PathMapper):
 
         if isinstance(src, basestring) and src.startswith("keep:"):
             if collection_pdh_pattern.match(src):
-                #self._pathmap[src] = MapperEnt(src, self.collection_pattern % src[5:], srcobj["class"], True)
                 self._pathmap[src] = MapperEnt(src, self.collection_pattern % urllib.parse.unquote(src[5:]), srcobj["class"], True)
 
                 if arvados_cwl.util.collectionUUID in srcobj:
@@ -94,12 +93,9 @@ class ArvPathMapper(PathMapper):
                                                    raiseOSError=True)
                 with SourceLine(srcobj, "location", WorkflowException, debug):
                     if isinstance(st, arvados.commands.run.UploadFile):
-                        print("VV", (src, ab, st))
                         uploadfiles.add((src, ab, st))
                     elif isinstance(st, arvados.commands.run.ArvFile):
                         self._pathmap[src] = MapperEnt(st.fn, self.collection_pattern % urllib.parse.unquote(st.fn[5:]), "File", True)
-
-                        #self._pathmap[src] = MapperEnt(st.fn, self.collection_pattern % st.fn[5:], "File", True)
                     else:
                         raise WorkflowException("Input file path '%s' is invalid" % st)
             elif src.startswith("_:"):
@@ -109,11 +105,17 @@ class ArvPathMapper(PathMapper):
                     raise WorkflowException("Directory literal '%s' is missing `listing`" % src)
             elif src.startswith("http:") or src.startswith("https:"):
                 try:
-                    keepref = http_to_keep(self.arvrunner.api, self.arvrunner.project_uuid, src)
-                    logger.info("%s is %s", src, keepref)
-                    self._pathmap[src] = MapperEnt(keepref, keepref, srcobj["class"], True)
+                    if self.arvrunner.defer_downloads:
+                        # passthrough, we'll download it later.
+                        self._pathmap[src] = MapperEnt(src, src, srcobj["class"], True)
+                    else:
+                        keepref = "keep:%s/%s" % http_to_keep(self.arvrunner.api, self.arvrunner.project_uuid, src,
+                                                              varying_url_params=self.arvrunner.toplevel_runtimeContext.varying_url_params,
+                                                              prefer_cached_downloads=self.arvrunner.toplevel_runtimeContext.prefer_cached_downloads)
+                        logger.info("%s is %s", src, keepref)
+                        self._pathmap[src] = MapperEnt(keepref, keepref, srcobj["class"], True)
                 except Exception as e:
-                    logger.warning(str(e))
+                    logger.warning("Download error: %s", e)
             else:
                 self._pathmap[src] = MapperEnt(src, src, srcobj["class"], True)
 
@@ -125,7 +127,6 @@ class ArvPathMapper(PathMapper):
                 self.visit(l, uploadfiles)
 
     def addentry(self, obj, c, path, remap):
-        print(obj["location"], self._pathmap)
         if obj["location"] in self._pathmap:
             src, srcpath = self.arvrunner.fs_access.get_collection(self._pathmap[obj["location"]].resolved)
             if srcpath == "":
@@ -146,7 +147,7 @@ class ArvPathMapper(PathMapper):
             for opt in self.optional_deps:
                 if obj["location"] == opt["location"]:
                     return
-            raise SourceLine(obj, "location", WorkflowException).makeError("Don't know what to do with '%s'" % obj["location"])
+            raise SourceLine(obj, "location", WorkflowException).makeError("Can't handle '%s'" % obj["location"])
 
     def needs_new_collection(self, srcobj, prefix=""):
         """Check if files need to be staged into a new collection.
@@ -161,18 +162,26 @@ class ArvPathMapper(PathMapper):
         if loc.startswith("_:"):
             return True
 
-        if not prefix:
-            i = loc.rfind("/")
-            if i > -1:
-                prefix = loc[:i+1]
-                suffix = urllib.parse.quote(urllib.parse.unquote(loc[i+1:]), "/+@")
-            else:
-                prefix = loc+"/"
-                suffix = ""
+        if self.arvrunner.defer_downloads and (loc.startswith("http:") or loc.startswith("https:")):
+            return False
+
+        i = loc.rfind("/")
+        if i > -1:
+            loc_prefix = loc[:i+1]
+            if not prefix:
+                prefix = loc_prefix
+            # quote/unquote to ensure consistent quoting
+            suffix = urllib.parse.quote(urllib.parse.unquote(loc[i+1:]), "/+@")
+        else:
+            # no '/' found
+            loc_prefix = loc+"/"
+            prefix = loc+"/"
+            suffix = ""
+
+        if prefix != loc_prefix:
+            return True
 
-        print("LLL", prefix+suffix, prefix+urllib.parse.quote(srcobj["basename"], "/+@"))
-        if prefix+suffix != prefix+urllib.parse.quote(srcobj["basename"], "/+@"):
-            print("LLL -> needs new collection")
+        if "basename" in srcobj and suffix != urllib.parse.quote(srcobj["basename"], "/+@"):
             return True
 
         if srcobj["class"] == "File" and loc not in self._pathmap:
@@ -181,7 +190,7 @@ class ArvPathMapper(PathMapper):
             if self.needs_new_collection(s, prefix):
                 return True
         if srcobj.get("listing"):
-            prefix = "%s%s/" % (prefix, srcobj["basename"])
+            prefix = "%s%s/" % (prefix, urllib.parse.quote(srcobj.get("basename", suffix), "/+@"))
             for l in srcobj["listing"]:
                 if self.needs_new_collection(l, prefix):
                     return True
@@ -211,12 +220,9 @@ class ArvPathMapper(PathMapper):
                                          packed=False)
 
         for src, ab, st in uploadfiles:
-            print("BBBBB", src, ab, st.fn, urllib.parse.quote(st.fn, "/:+@"))
             self._pathmap[src] = MapperEnt(urllib.parse.quote(st.fn, "/:+@"), urllib.parse.quote(self.collection_pattern % st.fn[5:], "/:+@"),
                                            "Directory" if os.path.isdir(ab) else "File", True)
 
-        print("CCCCC", self._pathmap)
-
         for srcobj in referenced_files:
             remap = []
             if srcobj["class"] == "Directory" and srcobj["location"] not in self._pathmap: