21718: Replace .decode method with str(bytes, "utf-8")
[arvados.git] / sdk / cwl / arvados_cwl / pathmapper.py
index 64fdfa0d04032e97235dc581144d9cb74494c597..ac6df543ad054dcc35f282a8a8510872da4bf36c 100644 (file)
@@ -2,12 +2,6 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
-from future import standard_library
-standard_library.install_aliases()
-from builtins import str
-from past.builtins import basestring
-from future.utils import viewitems
-
 import re
 import logging
 import uuid
@@ -26,7 +20,7 @@ from cwltool.utils import adjustFileObjs, adjustDirObjs
 from cwltool.stdfsaccess import abspath
 from cwltool.workflow import WorkflowException
 
-from .http import http_to_keep
+from arvados.http_to_keep import http_to_keep
 
 logger = logging.getLogger('arvados.cwl-runner')
 
@@ -72,7 +66,7 @@ class ArvPathMapper(PathMapper):
 
         debug = logger.isEnabledFor(logging.DEBUG)
 
-        if isinstance(src, basestring) and src.startswith("keep:"):
+        if isinstance(src, str) and src.startswith("keep:"):
             if collection_pdh_pattern.match(src):
                 self._pathmap[src] = MapperEnt(src, self.collection_pattern % urllib.parse.unquote(src[5:]), srcobj["class"], True)
 
@@ -105,11 +99,18 @@ class ArvPathMapper(PathMapper):
                     raise WorkflowException("Directory literal '%s' is missing `listing`" % src)
             elif src.startswith("http:") or src.startswith("https:"):
                 try:
-                    keepref = http_to_keep(self.arvrunner.api, self.arvrunner.project_uuid, src)
-                    logger.info("%s is %s", src, keepref)
-                    self._pathmap[src] = MapperEnt(keepref, keepref, srcobj["class"], True)
+                    if self.arvrunner.defer_downloads:
+                        # passthrough, we'll download it later.
+                        self._pathmap[src] = MapperEnt(src, src, srcobj["class"], True)
+                    else:
+                        results = http_to_keep(self.arvrunner.api, self.arvrunner.project_uuid, src,
+                                                              varying_url_params=self.arvrunner.toplevel_runtimeContext.varying_url_params,
+                                                              prefer_cached_downloads=self.arvrunner.toplevel_runtimeContext.prefer_cached_downloads)
+                        keepref = "keep:%s/%s" % (results[0], results[1])
+                        logger.info("%s is %s", src, keepref)
+                        self._pathmap[src] = MapperEnt(keepref, keepref, srcobj["class"], True)
                 except Exception as e:
-                    logger.warning(str(e))
+                    logger.warning("Download error: %s", e)
             else:
                 self._pathmap[src] = MapperEnt(src, src, srcobj["class"], True)
 
@@ -141,7 +142,7 @@ class ArvPathMapper(PathMapper):
             for opt in self.optional_deps:
                 if obj["location"] == opt["location"]:
                     return
-            raise SourceLine(obj, "location", WorkflowException).makeError("Don't know what to do with '%s'" % obj["location"])
+            raise SourceLine(obj, "location", WorkflowException).makeError("Can't handle '%s'" % obj["location"])
 
     def needs_new_collection(self, srcobj, prefix=""):
         """Check if files need to be staged into a new collection.
@@ -156,6 +157,9 @@ class ArvPathMapper(PathMapper):
         if loc.startswith("_:"):
             return True
 
+        if self.arvrunner.defer_downloads and (loc.startswith("http:") or loc.startswith("https:")):
+            return False
+
         i = loc.rfind("/")
         if i > -1:
             loc_prefix = loc[:i+1]
@@ -336,7 +340,7 @@ class StagingPathMapper(PathMapper):
         # Overridden to maintain the use case of mapping by source (identifier) to
         # target regardless of how the map is structured interally.
         def getMapperEnt(src):
-            for k,v in viewitems(self._pathmap):
+            for k,v in self._pathmap.items():
                 if (v.type != "CreateFile" and v.resolved == src) or (v.type == "CreateFile" and k == src):
                     return v
 
@@ -355,7 +359,7 @@ class VwdPathMapper(StagingPathMapper):
         # with any secondary files.
         self.visitlisting(referenced_files, self.stagedir, basedir)
 
-        for path, (ab, tgt, type, staged) in viewitems(self._pathmap):
+        for path, (ab, tgt, type, staged) in self._pathmap.items():
             if type in ("File", "Directory") and ab.startswith("keep:"):
                 self._pathmap[path] = MapperEnt("$(task.keep)/%s" % ab[5:], tgt, type, staged)