X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/454ee2b8f0385c542b6f1165a3baf2820425e1a3..c340eecc7a03dd066792e5f046f087b8b3dfced6:/sdk/cwl/arvados_cwl/pathmapper.py diff --git a/sdk/cwl/arvados_cwl/pathmapper.py b/sdk/cwl/arvados_cwl/pathmapper.py index 56c15a4a43..4a91a7a836 100644 --- a/sdk/cwl/arvados_cwl/pathmapper.py +++ b/sdk/cwl/arvados_cwl/pathmapper.py @@ -21,7 +21,9 @@ import arvados.collection from schema_salad.sourceline import SourceLine from arvados.errors import ApiError -from cwltool.pathmapper import PathMapper, MapperEnt, abspath, adjustFileObjs, adjustDirObjs +from cwltool.pathmapper import PathMapper, MapperEnt +from cwltool.utils import adjustFileObjs, adjustDirObjs +from cwltool.stdfsaccess import abspath from cwltool.workflow import WorkflowException from .http import http_to_keep @@ -42,13 +44,13 @@ def trim_listing(obj): if obj.get("location", "").startswith("keep:") and "listing" in obj: del obj["listing"] +collection_pdh_path = re.compile(r'^keep:[0-9a-f]{32}\+\d+/.+$') +collection_pdh_pattern = re.compile(r'^keep:([0-9a-f]{32}\+\d+)(/.*)?') +collection_uuid_pattern = re.compile(r'^keep:([a-z0-9]{5}-4zz18-[a-z0-9]{15})(/.*)?$') class ArvPathMapper(PathMapper): """Convert container-local paths to and from Keep collection ids.""" - pdh_path = re.compile(r'^keep:[0-9a-f]{32}\+\d+/.+$') - pdh_dirpath = re.compile(r'^keep:[0-9a-f]{32}\+\d+(/.*)?$') - def __init__(self, arvrunner, referenced_files, input_basedir, collection_pattern, file_pattern, name=None, single_collection=False): self.arvrunner = arvrunner @@ -66,13 +68,17 @@ class ArvPathMapper(PathMapper): if "#" in src: src = src[:src.index("#")] - if isinstance(src, basestring) and ArvPathMapper.pdh_dirpath.match(src): - self._pathmap[src] = MapperEnt(src, self.collection_pattern % urllib.parse.unquote(src[5:]), srcobj["class"], True) - if arvados_cwl.util.collectionUUID in srcobj: - self.pdh_to_uuid[src.split("/", 1)[0][5:]] = srcobj[arvados_cwl.util.collectionUUID] - debug = logger.isEnabledFor(logging.DEBUG) + if isinstance(src, basestring) and src.startswith("keep:"): + if collection_pdh_pattern.match(src): + self._pathmap[src] = MapperEnt(src, self.collection_pattern % urllib.parse.unquote(src[5:]), srcobj["class"], True) + if arvados_cwl.util.collectionUUID in srcobj: + self.pdh_to_uuid[src.split("/", 1)[0][5:]] = srcobj[arvados_cwl.util.collectionUUID] + elif not collection_uuid_pattern.match(src): + with SourceLine(srcobj, "location", WorkflowException, debug): + raise WorkflowException("Invalid keep reference '%s'" % src) + if src not in self._pathmap: if src.startswith("file:"): # Local FS ref, may need to be uploaded or may be on keep @@ -95,9 +101,12 @@ class ArvPathMapper(PathMapper): if srcobj["class"] == "Directory" and "listing" not in srcobj: raise WorkflowException("Directory literal '%s' is missing `listing`" % src) elif src.startswith("http:") or src.startswith("https:"): - keepref = http_to_keep(self.arvrunner.api, self.arvrunner.project_uuid, src) - logger.info("%s is %s", src, keepref) - self._pathmap[src] = MapperEnt(keepref, keepref, srcobj["class"], True) + try: + keepref = http_to_keep(self.arvrunner.api, self.arvrunner.project_uuid, src) + logger.info("%s is %s", src, keepref) + self._pathmap[src] = MapperEnt(keepref, keepref, srcobj["class"], True) + except Exception as e: + logger.warning(str(e)) else: self._pathmap[src] = MapperEnt(src, src, srcobj["class"], True) @@ -281,6 +290,7 @@ class StagingPathMapper(PathMapper): def visit(self, obj, stagedir, basedir, copy=False, staged=False): # type: (Dict[unicode, Any], unicode, unicode, bool) -> None loc = obj["location"] + stagedir = obj.get("dirname") or stagedir tgt = os.path.join(stagedir, obj["basename"]) basetgt, baseext = os.path.splitext(tgt)