import logging
import uuid
import os
+import urllib
import arvados.commands.run
import arvados.collection
logger = logging.getLogger('arvados.cwl-runner')
+def trim_listing(obj):
+ """Remove 'listing' field from Directory objects that are keep references.
+
+ When Directory objects represent Keep references, it is redundant and
+ potentially very expensive to pass fully enumerated Directory objects
+ between instances of cwl-runner (e.g. a submitting a job, or using the
+ RunInSingleContainer feature), so delete the 'listing' field when it is
+ safe to do so.
+
+ """
+
+ if obj.get("location", "").startswith("keep:") and "listing" in obj:
+ del obj["listing"]
+
+
class ArvPathMapper(PathMapper):
"""Convert container-local paths to and from Keep collection ids."""
pdh_path = re.compile(r'^keep:[0-9a-f]{32}\+\d+/.+$')
- pdh_dirpath = re.compile(r'^keep:[0-9a-f]{32}\+\d+(/.+)?$')
+ pdh_dirpath = re.compile(r'^keep:[0-9a-f]{32}\+\d+(/.*)?$')
def __init__(self, arvrunner, referenced_files, input_basedir,
collection_pattern, file_pattern, name=None, **kwargs):
self.collection_pattern = collection_pattern
self.file_pattern = file_pattern
self.name = name
+ self.referenced_files = [r["location"] for r in referenced_files]
super(ArvPathMapper, self).__init__(referenced_files, input_basedir, None)
def visit(self, srcobj, uploadfiles):
src = src[:src.index("#")]
if isinstance(src, basestring) and ArvPathMapper.pdh_dirpath.match(src):
- self._pathmap[src] = MapperEnt(src, self.collection_pattern % src[5:], srcobj["class"], True)
+ self._pathmap[src] = MapperEnt(src, self.collection_pattern % urllib.unquote(src[5:]), srcobj["class"], True)
if src not in self._pathmap:
if src.startswith("file:"):
if isinstance(st, arvados.commands.run.UploadFile):
uploadfiles.add((src, ab, st))
elif isinstance(st, arvados.commands.run.ArvFile):
- self._pathmap[src] = MapperEnt(st.fn, self.collection_pattern % st.fn[5:], "File", True)
+ self._pathmap[src] = MapperEnt(st.fn, self.collection_pattern % urllib.unquote(st.fn[5:]), "File", True)
else:
raise WorkflowException("Input file path '%s' is invalid" % st)
elif src.startswith("_:"):
- if "contents" in srcobj:
- pass
- else:
- raise WorkflowException("File literal '%s' is missing contents" % src)
+ if srcobj["class"] == "File" and "contents" not in srcobj:
+ raise WorkflowException("File literal '%s' is missing `contents`" % src)
+ if srcobj["class"] == "Directory" and "listing" not in srcobj:
+ raise WorkflowException("Directory literal '%s' is missing `listing`" % src)
else:
- self._pathmap[src] = MapperEnt(src, src, "File", True)
+ self._pathmap[src] = MapperEnt(src, src, srcobj["class"], True)
with SourceLine(srcobj, "secondaryFiles", WorkflowException):
for l in srcobj.get("secondaryFiles", []):
loc = k["location"]
if loc in already_uploaded:
v = already_uploaded[loc]
- self._pathmap[loc] = MapperEnt(v.resolved, self.collection_pattern % v.resolved[5:], "File", True)
+ self._pathmap[loc] = MapperEnt(v.resolved, self.collection_pattern % urllib.unquote(v.resolved[5:]), "File", True)
for srcobj in referenced_files:
self.visit(srcobj, uploadfiles)
project=self.arvrunner.project_uuid)
for src, ab, st in uploadfiles:
- self._pathmap[src] = MapperEnt(st.fn, self.collection_pattern % st.keepref,
+ self._pathmap[src] = MapperEnt(urllib.quote(st.fn, "/:+@"), self.collection_pattern % st.fn[5:],
"Directory" if os.path.isdir(ab) else "File", True)
self.arvrunner.add_uploaded(src, self._pathmap[src])