logger = logging.getLogger('arvados.cwl-runner')
-cwltool.draft2tool.ACCEPTLIST_RE = re.compile(r"^[a-zA-Z0-9._+-]+$")
+cwltool.draft2tool.ACCEPTLIST_RE = re.compile(r".*")
+
+def trim_listing(obj):
+ """Remove 'listing' field from Directory objects that are keep references.
+
+ When Directory objects represent Keep references, it redundant and
+ potentially very expensive to pass fully enumerated Directory objects
+ between instances of cwl-runner (e.g. a submitting a job, or using the
+ RunInSingleContainer feature), so delete the 'listing' field when it is
+ safe to do so.
+ """
+
+ if obj.get("location", "").startswith("keep:") and "listing" in obj:
+ del obj["listing"]
+ if obj.get("location", "").startswith("_:"):
+ del obj["location"]
def upload_dependencies(arvrunner, name, document_loader,
workflowobj, uri, loadref_run):
set(("$include", "$schemas", "location")),
loadref)
- files = []
- def visitFiles(path):
- files.append(path)
-
- adjustFileObjs(sc, visitFiles)
- adjustDirObjs(sc, visitFiles)
-
- normalizeFilesDirs(files)
+ normalizeFilesDirs(sc)
if "id" in workflowobj:
- files.append({"class": "File", "location": workflowobj["id"]})
+ sc.append({"class": "File", "location": workflowobj["id"]})
- mapper = ArvPathMapper(arvrunner, files, "",
+ mapper = ArvPathMapper(arvrunner, sc, "",
"keep:%s",
"keep:%s/%s",
name=name)
def setloc(p):
- p["location"] = mapper.mapper(p["location"]).target
+ if "location" in p and (not p["location"].startswith("_:")) and (not p["location"].startswith("keep:")):
+ p["location"] = mapper.mapper(p["location"]).resolved
adjustFileObjs(workflowobj, setloc)
adjustDirObjs(workflowobj, setloc)
for s in tool.steps:
upload_docker(arvrunner, s.embedded_tool)
+def upload_instance(arvrunner, name, tool, job_order):
+ upload_docker(arvrunner, tool)
+
+ workflowmapper = upload_dependencies(arvrunner,
+ name,
+ tool.doc_loader,
+ tool.tool,
+ tool.tool["id"],
+ True)
+
+ jobmapper = upload_dependencies(arvrunner,
+ os.path.basename(job_order.get("id", "#")),
+ tool.doc_loader,
+ job_order,
+ job_order.get("id", "#"),
+ False)
+
+ adjustDirObjs(job_order, trim_listing)
+
+ if "id" in job_order:
+ del job_order["id"]
+
+ return workflowmapper
+
class Runner(object):
- def __init__(self, runner, tool, job_order, enable_reuse):
+ def __init__(self, runner, tool, job_order, enable_reuse, output_name):
self.arvrunner = runner
self.tool = tool
self.job_order = job_order
self.running = False
self.enable_reuse = enable_reuse
self.uuid = None
+ self.final_output = None
+ self.output_name = output_name
def update_pipeline_component(self, record):
pass
def arvados_job_spec(self, *args, **kwargs):
- upload_docker(self.arvrunner, self.tool)
-
self.name = os.path.basename(self.tool.tool["id"])
-
- workflowmapper = upload_dependencies(self.arvrunner,
- self.name,
- self.tool.doc_loader,
- self.tool.tool,
- self.tool.tool["id"],
- True)
-
- jobmapper = upload_dependencies(self.arvrunner,
- os.path.basename(self.job_order.get("id", "#")),
- self.tool.doc_loader,
- self.job_order,
- self.job_order.get("id", "#"),
- False)
-
- if "id" in self.job_order:
- del self.job_order["id"]
-
- return workflowmapper
-
+ return upload_instance(self.arvrunner, self.name, self.tool, self.job_order)
def done(self, record):
if record["state"] == "Complete":
outputs = None
try:
try:
- outc = arvados.collection.Collection(record["output"])
+ self.final_output = record["output"]
+ outc = arvados.collection.CollectionReader(self.final_output,
+ api_client=self.arvrunner.api,
+ keep_client=self.arvrunner.keep_client,
+ num_retries=self.arvrunner.num_retries)
with outc.open("cwl.output.json") as f:
outputs = json.load(f)
def keepify(fileobj):