X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/29e3fe60d22218d122a1a3448a144bfcfd64785a..540b72d62a94015f116ba077e279a5f10d666778:/sdk/cwl/arvados_cwl/pathmapper.py diff --git a/sdk/cwl/arvados_cwl/pathmapper.py b/sdk/cwl/arvados_cwl/pathmapper.py index 0cbf9eb56b..539188fddd 100644 --- a/sdk/cwl/arvados_cwl/pathmapper.py +++ b/sdk/cwl/arvados_cwl/pathmapper.py @@ -26,7 +26,7 @@ from cwltool.utils import adjustFileObjs, adjustDirObjs from cwltool.stdfsaccess import abspath from cwltool.workflow import WorkflowException -from .http import http_to_keep +from arvados.http_to_keep import http_to_keep logger = logging.getLogger('arvados.cwl-runner') @@ -74,7 +74,6 @@ class ArvPathMapper(PathMapper): if isinstance(src, basestring) and src.startswith("keep:"): if collection_pdh_pattern.match(src): - #self._pathmap[src] = MapperEnt(src, self.collection_pattern % src[5:], srcobj["class"], True) self._pathmap[src] = MapperEnt(src, self.collection_pattern % urllib.parse.unquote(src[5:]), srcobj["class"], True) if arvados_cwl.util.collectionUUID in srcobj: @@ -94,12 +93,9 @@ class ArvPathMapper(PathMapper): raiseOSError=True) with SourceLine(srcobj, "location", WorkflowException, debug): if isinstance(st, arvados.commands.run.UploadFile): - print("VV", (src, ab, st)) uploadfiles.add((src, ab, st)) elif isinstance(st, arvados.commands.run.ArvFile): self._pathmap[src] = MapperEnt(st.fn, self.collection_pattern % urllib.parse.unquote(st.fn[5:]), "File", True) - - #self._pathmap[src] = MapperEnt(st.fn, self.collection_pattern % st.fn[5:], "File", True) else: raise WorkflowException("Input file path '%s' is invalid" % st) elif src.startswith("_:"): @@ -109,11 +105,17 @@ class ArvPathMapper(PathMapper): raise WorkflowException("Directory literal '%s' is missing `listing`" % src) elif src.startswith("http:") or src.startswith("https:"): try: - keepref = http_to_keep(self.arvrunner.api, self.arvrunner.project_uuid, src) - logger.info("%s is %s", src, keepref) - self._pathmap[src] = MapperEnt(keepref, keepref, srcobj["class"], True) + if self.arvrunner.defer_downloads: + # passthrough, we'll download it later. + self._pathmap[src] = MapperEnt(src, src, srcobj["class"], True) + else: + keepref = "keep:%s/%s" % http_to_keep(self.arvrunner.api, self.arvrunner.project_uuid, src, + varying_url_params=self.arvrunner.toplevel_runtimeContext.varying_url_params, + prefer_cached_downloads=self.arvrunner.toplevel_runtimeContext.prefer_cached_downloads) + logger.info("%s is %s", src, keepref) + self._pathmap[src] = MapperEnt(keepref, keepref, srcobj["class"], True) except Exception as e: - logger.warning(str(e)) + logger.warning("Download error: %s", e) else: self._pathmap[src] = MapperEnt(src, src, srcobj["class"], True) @@ -125,7 +127,6 @@ class ArvPathMapper(PathMapper): self.visit(l, uploadfiles) def addentry(self, obj, c, path, remap): - print(obj["location"], self._pathmap) if obj["location"] in self._pathmap: src, srcpath = self.arvrunner.fs_access.get_collection(self._pathmap[obj["location"]].resolved) if srcpath == "": @@ -146,7 +147,7 @@ class ArvPathMapper(PathMapper): for opt in self.optional_deps: if obj["location"] == opt["location"]: return - raise SourceLine(obj, "location", WorkflowException).makeError("Don't know what to do with '%s'" % obj["location"]) + raise SourceLine(obj, "location", WorkflowException).makeError("Can't handle '%s'" % obj["location"]) def needs_new_collection(self, srcobj, prefix=""): """Check if files need to be staged into a new collection. @@ -161,18 +162,26 @@ class ArvPathMapper(PathMapper): if loc.startswith("_:"): return True - if not prefix: - i = loc.rfind("/") - if i > -1: - prefix = loc[:i+1] - suffix = urllib.parse.quote(urllib.parse.unquote(loc[i+1:]), "/+@") - else: - prefix = loc+"/" - suffix = "" + if self.arvrunner.defer_downloads and (loc.startswith("http:") or loc.startswith("https:")): + return False + + i = loc.rfind("/") + if i > -1: + loc_prefix = loc[:i+1] + if not prefix: + prefix = loc_prefix + # quote/unquote to ensure consistent quoting + suffix = urllib.parse.quote(urllib.parse.unquote(loc[i+1:]), "/+@") + else: + # no '/' found + loc_prefix = loc+"/" + prefix = loc+"/" + suffix = "" + + if prefix != loc_prefix: + return True - print("LLL", prefix+suffix, prefix+urllib.parse.quote(srcobj["basename"], "/+@")) - if prefix+suffix != prefix+urllib.parse.quote(srcobj["basename"], "/+@"): - print("LLL -> needs new collection") + if "basename" in srcobj and suffix != urllib.parse.quote(srcobj["basename"], "/+@"): return True if srcobj["class"] == "File" and loc not in self._pathmap: @@ -181,7 +190,7 @@ class ArvPathMapper(PathMapper): if self.needs_new_collection(s, prefix): return True if srcobj.get("listing"): - prefix = "%s%s/" % (prefix, srcobj["basename"]) + prefix = "%s%s/" % (prefix, urllib.parse.quote(srcobj.get("basename", suffix), "/+@")) for l in srcobj["listing"]: if self.needs_new_collection(l, prefix): return True @@ -211,12 +220,9 @@ class ArvPathMapper(PathMapper): packed=False) for src, ab, st in uploadfiles: - print("BBBBB", src, ab, st.fn, urllib.parse.quote(st.fn, "/:+@")) self._pathmap[src] = MapperEnt(urllib.parse.quote(st.fn, "/:+@"), urllib.parse.quote(self.collection_pattern % st.fn[5:], "/:+@"), "Directory" if os.path.isdir(ab) else "File", True) - print("CCCCC", self._pathmap) - for srcobj in referenced_files: remap = [] if srcobj["class"] == "Directory" and srcobj["location"] not in self._pathmap: