Update storage_classes support for arvados_cwl_runner to work correctly
[arvados.git] / sdk / cwl / arvados_cwl / runner.py
index fe1ef223a098f038ed4153e18489ccccf04fe16d..f907d33951c45a5d707bb15dd18c9154ae1b5bad 100644 (file)
@@ -122,11 +122,18 @@ def upload_dependencies(arvrunner, name, document_loader,
         # that external references in $include and $mixin are captured.
         scanobj = loadref("", workflowobj["id"])
 
-    sc = scandeps(uri, scanobj,
+    sc_result = scandeps(uri, scanobj,
                   loadref_fields,
                   set(("$include", "$schemas", "location")),
                   loadref, urljoin=document_loader.fetcher.urljoin)
 
+    sc = []
+    def only_real(obj):
+        if obj.get("location", "").startswith("file:"):
+            sc.append(obj)
+
+    visit_class(sc_result, ("File", "Directory"), only_real)
+
     normalizeFilesDirs(sc)
 
     if include_primary and "id" in workflowobj:
@@ -217,13 +224,13 @@ def packed_workflow(arvrunner, tool, merged_map):
     packed = pack(tool.doc_loader, tool.doc_loader.fetch(tool.tool["id"]),
                   tool.tool["id"], tool.metadata, rewrite_out=rewrites)
 
-    rewrite_to_orig = {}
-    for k,v in rewrites.items():
-        rewrite_to_orig[v] = k
+    rewrite_to_orig = {v: k for k,v in rewrites.items()}
 
     def visit(v, cur_id):
         if isinstance(v, dict):
             if v.get("class") in ("CommandLineTool", "Workflow"):
+                if "id" not in v:
+                    raise SourceLine(v, None, Exception).makeError("Embedded process object is missing required 'id' field")
                 cur_id = rewrite_to_orig.get(v["id"], v["id"])
             if "location" in v and not v["location"].startswith("keep:"):
                 v["location"] = merged_map[cur_id].resolved[v["location"]]
@@ -346,8 +353,8 @@ class Runner(object):
     def __init__(self, runner, tool, job_order, enable_reuse,
                  output_name, output_tags, submit_runner_ram=0,
                  name=None, on_error=None, submit_runner_image=None,
-                 intermediate_output_ttl=0, merged_map=None, priority=None,
-                 secret_store=None):
+                 intermediate_output_ttl=0, merged_map=None, default_storage_classes="default",
+                 priority=None, secret_store=None):
         self.arvrunner = runner
         self.tool = tool
         self.job_order = job_order
@@ -369,6 +376,7 @@ class Runner(object):
         self.intermediate_output_ttl = intermediate_output_ttl
         self.priority = priority
         self.secret_store = secret_store
+        self.default_storage_classes = default_storage_classes
 
         if submit_runner_ram:
             self.submit_runner_ram = submit_runner_ram