Merge branch '9857-cwl-acceptlist-re' refs #9857
[arvados.git] / sdk / cwl / arvados_cwl / runner.py
index 155c6a05039ec65649975d038073185fbc5c4338..a4132ca3f762675ff9dccc0129f3f8139b313f1b 100644 (file)
@@ -21,7 +21,22 @@ from .pathmapper import ArvPathMapper
 
 logger = logging.getLogger('arvados.cwl-runner')
 
-cwltool.draft2tool.ACCEPTLIST_RE = re.compile(r"^[a-zA-Z0-9._+-]+$")
+cwltool.draft2tool.ACCEPTLIST_RE = re.compile(r".*")
+
+def trim_listing(obj):
+    """Remove 'listing' field from Directory objects that are keep references.
+
+    When Directory objects represent Keep references, it redundant and
+    potentially very expensive to pass fully enumerated Directory objects
+    between instances of cwl-runner (e.g. a submitting a job, or using the
+    RunInSingleContainer feature), so delete the 'listing' field when it is
+    safe to do so.
+    """
+
+    if obj.get("location", "").startswith("keep:") and "listing" in obj:
+        del obj["listing"]
+    if obj.get("location", "").startswith("_:"):
+        del obj["location"]
 
 def upload_dependencies(arvrunner, name, document_loader,
                         workflowobj, uri, loadref_run):
@@ -69,25 +84,19 @@ def upload_dependencies(arvrunner, name, document_loader,
                   set(("$include", "$schemas", "location")),
                   loadref)
 
-    files = []
-    def visitFiles(path):
-        files.append(path)
-
-    adjustFileObjs(sc, visitFiles)
-    adjustDirObjs(sc, visitFiles)
-
-    normalizeFilesDirs(files)
+    normalizeFilesDirs(sc)
 
     if "id" in workflowobj:
-        files.append({"class": "File", "location": workflowobj["id"]})
+        sc.append({"class": "File", "location": workflowobj["id"]})
 
-    mapper = ArvPathMapper(arvrunner, files, "",
+    mapper = ArvPathMapper(arvrunner, sc, "",
                            "keep:%s",
                            "keep:%s/%s",
                            name=name)
 
     def setloc(p):
-        p["location"] = mapper.mapper(p["location"]).target
+        if "location" in p and (not p["location"].startswith("_:")) and (not p["location"].startswith("keep:")):
+            p["location"] = mapper.mapper(p["location"]).resolved
     adjustFileObjs(workflowobj, setloc)
     adjustDirObjs(workflowobj, setloc)
 
@@ -135,6 +144,8 @@ class Runner(object):
                                         self.job_order.get("id", "#"),
                                         False)
 
+        adjustDirObjs(self.job_order, trim_listing)
+
         if "id" in self.job_order:
             del self.job_order["id"]