Merge branch 'master' into 11850-singlecontainer-max-requirements
authorjiayong2 <jiayong@math.mit.edu>
Mon, 12 Feb 2018 20:05:29 +0000 (20:05 +0000)
committerjiayong2 <jiayong@math.mit.edu>
Mon, 12 Feb 2018 20:05:29 +0000 (20:05 +0000)
Arvados-DCO-1.1-Signed-off-by: Jiayong Li <jiayong@math.mit.edu>

sdk/cwl/arvados_cwl/arvworkflow.py
sdk/cwl/tests/test_job.py
sdk/cwl/tests/wf/echo-subwf.cwl [new file with mode: 0644]
sdk/cwl/tests/wf/echo-wf.cwl [new file with mode: 0644]
sdk/cwl/tests/wf/echo_a.cwl [new file with mode: 0644]
sdk/cwl/tests/wf/echo_b.cwl [new file with mode: 0644]

index f0f9c77f40fed6c5a27d8160abdfc1d710ddb9f1..15704dba66884e512d10afa5b87cfcfa2f40d78d 100644 (file)
@@ -14,6 +14,7 @@ from cwltool.load_tool import fetch_document
 from cwltool.process import shortname
 from cwltool.workflow import Workflow, WorkflowException
 from cwltool.pathmapper import adjustFileObjs, adjustDirObjs, visit_class
+from cwltool.builder import Builder
 
 import ruamel.yaml as yaml
 
@@ -71,6 +72,32 @@ def dedup_reqs(reqs):
             dedup[r["class"]] = r
     return [dedup[r] for r in sorted(dedup.keys())]
 
+def get_max_res_req(res_reqs):
+    """Take the max of a list of ResourceRequirement."""
+
+    total_res_req = {}
+    exception_msgs = []
+    for a in ("coresMin", "coresMax", "ramMin", "ramMax", "tmpdirMin", "tmpdirMax", "outdirMin", "outdirMax"):
+        total_res_req[a] = []
+        for res_req in res_reqs:
+            if a in res_req:
+                if isinstance(res_req[a], int): # integer check
+                    total_res_req[a].append(res_req[a])
+                else:
+                    msg = SourceLine(res_req).makeError(
+                    "Non-top-level ResourceRequirement in single container cannot have expressions")
+                    exception_msgs.append(msg)
+    if exception_msgs:
+        raise WorkflowException("\n".join(exception_msgs))
+    else:
+        max_res_req = {}
+        for a in total_res_req:
+            if total_res_req[a]:
+                max_res_req[a] = max(total_res_req[a])
+        if max_res_req:
+            max_res_req["class"] = "ResourceRequirement"
+        return cmap(max_res_req)
+
 class ArvadosWorkflow(Workflow):
     """Wrap cwltool Workflow to override selected methods."""
 
@@ -105,6 +132,39 @@ class ArvadosWorkflow(Workflow):
 
                     packed = pack(document_loader, workflowobj, uri, self.metadata)
 
+                    builder = Builder()
+                    builder.job = joborder
+                    builder.requirements = self.requirements
+                    builder.hints = self.hints
+                    builder.resources = {}
+
+                    res_reqs = {"requirements": [], "hints": []}
+                    for t in ("requirements", "hints"):
+                        for item in packed["$graph"]:
+                            if t in item:
+                                if item["id"] == "#main": # evaluate potential expressions in the top-level requirements/hints
+                                    for req in item[t]:
+                                        if req["class"] == "ResourceRequirement":
+                                            eval_req = {"class": "ResourceRequirement"}
+                                            for a in ("coresMin", "coresMax", "ramMin", "ramMax", "tmpdirMin", "tmpdirMax", "outdirMin", "outdirMax"):
+                                                if a in req:
+                                                    eval_req[a] = builder.do_eval(req[a])
+                                            res_reqs[t].append(eval_req)
+                                else:
+                                    for req in item[t]:
+                                        if req["class"] == "ResourceRequirement":
+                                            res_reqs[t].append(req)
+                    max_res_req = {"requirements": get_max_res_req(res_reqs["requirements"]),
+                                   "hints": get_max_res_req(res_reqs["hints"])}
+
+                    new_spec = {"requirements": self.requirements, "hints": self.hints}
+                    for t in ("requirements", "hints"):
+                        for req in new_spec[t]:
+                            if req["class"] == "ResourceRequirement":
+                                new_spec[t].remove(req)
+                        if max_res_req[t]:
+                            new_spec[t].append(max_res_req[t])
+
                     upload_dependencies(self.arvrunner,
                                         kwargs.get("name", ""),
                                         document_loader,
@@ -158,7 +218,7 @@ class ArvadosWorkflow(Workflow):
                 "inputs": self.tool["inputs"],
                 "outputs": self.tool["outputs"],
                 "stdout": "cwl.output.json",
-                "requirements": self.requirements+[
+                "requirements": new_spec["requirements"]+[
                     {
                     "class": "InitialWorkDirRequirement",
                     "listing": [{
@@ -172,7 +232,7 @@ class ArvadosWorkflow(Workflow):
                             "entry": json.dumps(joborder_keepmount, indent=2, sort_keys=True, separators=(',',': ')).replace("\\", "\\\\").replace('$(', '\$(').replace('${', '\${')
                         }]
                 }],
-                "hints": self.hints,
+                "hints": new_spec["hints"],
                 "arguments": ["--no-container", "--move-outputs", "--preserve-entire-environment", "workflow.cwl#main", "cwl.input.yml"],
                 "id": "#"
             })
index 309ac0996ac54cb0cbda7c7940cad937cc8ee688..85189869dba91173c5c5e4bc6f29dd59c06e890c 100644 (file)
@@ -392,6 +392,81 @@ class TestWorkflow(unittest.TestCase):
   "sleeptime": 5
 }''')])
 
+    # The test passes no builder.resources
+    # Hence the default resources will apply: {'cores': 1, 'ram': 1024, 'outdirSize': 1024, 'tmpdirSize': 1024}
+    @mock.patch("arvados.collection.CollectionReader")
+    @mock.patch("arvados.collection.Collection")
+    @mock.patch('arvados.commands.keepdocker.list_images_in_arv')
+    def test_max_resource_singlecontainer(self, list_images_in_arv, mockcollection, mockcollectionreader):
+        arvados_cwl.add_arv_hints()
+
+        api = mock.MagicMock()
+        api._rootDesc = get_rootDesc()
+
+        runner = arvados_cwl.ArvCwlRunner(api)
+        self.assertEqual(runner.work_api, 'jobs')
+
+        list_images_in_arv.return_value = [["zzzzz-4zz18-zzzzzzzzzzzzzzz"]]
+        runner.api.collections().get().execute.return_vaulue = {"portable_data_hash": "99999999999999999999999999999993+99"}
+        runner.api.collections().list().execute.return_vaulue = {"items": [{"portable_data_hash": "99999999999999999999999999999993+99"}]}
+
+        runner.project_uuid = "zzzzz-8i9sb-zzzzzzzzzzzzzzz"
+        runner.ignore_docker_for_reuse = False
+        runner.num_retries = 0
+        document_loader, avsc_names, schema_metadata, metaschema_loader = cwltool.process.get_schema("v1.0")
+
+        make_fs_access=functools.partial(arvados_cwl.CollectionFsAccess,
+                                         collection_cache=arvados_cwl.CollectionCache(runner.api, None, 0))
+        document_loader.fetcher_constructor = functools.partial(arvados_cwl.CollectionFetcher, api_client=api, fs_access=make_fs_access(""))
+        document_loader.fetcher = document_loader.fetcher_constructor(document_loader.cache, document_loader.session)
+        document_loader.fetch_text = document_loader.fetcher.fetch_text
+        document_loader.check_exists = document_loader.fetcher.check_exists
+
+        tool, metadata = document_loader.resolve_ref("tests/wf/echo-wf.cwl")
+        metadata["cwlVersion"] = tool["cwlVersion"]
+
+        mockcollection().portable_data_hash.return_value = "99999999999999999999999999999999+118"
+
+        arvtool = arvados_cwl.ArvadosWorkflow(runner, tool, work_api="jobs", avsc_names=avsc_names,
+                                              basedir="", make_fs_access=make_fs_access, loader=document_loader,
+                                              makeTool=runner.arv_make_tool, metadata=metadata)
+        arvtool.formatgraph = None
+        it = arvtool.job({}, mock.MagicMock(), basedir="", make_fs_access=make_fs_access)
+        it.next().run()
+        it.next().run()
+
+        with open("tests/wf/echo-subwf.cwl") as f:
+            subwf = StripYAMLComments(f.read())
+
+        runner.api.jobs().create.assert_called_with(
+            body=JsonDiffMatcher({
+                'minimum_script_version': 'a3f2cb186e437bfce0031b024b2157b73ed2717d',
+                'repository': 'arvados',
+                'script_version': 'master',
+                'script': 'crunchrunner',
+                'script_parameters': {
+                    'tasks': [{'task.env': {
+                        'HOME': '$(task.outdir)',
+                        'TMPDIR': '$(task.tmpdir)'},
+                               'task.vwd': {
+                                   'workflow.cwl': '$(task.keep)/99999999999999999999999999999999+118/workflow.cwl',
+                                   'cwl.input.yml': '$(task.keep)/99999999999999999999999999999999+118/cwl.input.yml'
+                               },
+                    'command': [u'cwltool', u'--no-container', u'--move-outputs', u'--preserve-entire-environment', u'workflow.cwl#main', u'cwl.input.yml'],
+                    'task.stdout': 'cwl.output.json'}]},
+                'runtime_constraints': {
+                    'min_scratch_mb_per_node': 2048,
+                    'min_cores_per_node': 3,
+                    'docker_image': 'arvados/jobs',
+                    'min_ram_mb_per_node': 1024
+                },
+                'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz'}),
+            filters=[['repository', '=', 'arvados'],
+                     ['script', '=', 'crunchrunner'],
+                     ['script_version', 'in git', 'a3f2cb186e437bfce0031b024b2157b73ed2717d'],
+                     ['docker_image_locator', 'in docker', 'arvados/jobs']],
+            find_or_create=True)
+
     def test_default_work_api(self):
         arvados_cwl.add_arv_hints()
 
diff --git a/sdk/cwl/tests/wf/echo-subwf.cwl b/sdk/cwl/tests/wf/echo-subwf.cwl
new file mode 100644 (file)
index 0000000..29dc3d6
--- /dev/null
@@ -0,0 +1,19 @@
+cwlVersion: v1.0
+class: Workflow
+requirements:
+  ResourceRequirement:
+    coresMin: 1
+
+inputs: []
+
+outputs: []
+
+steps:
+  echo_a:
+    run: echo_a.cwl
+    in: []
+    out: []
+  echo_b:
+    run: echo_b.cwl
+    in: []
+    out: []
diff --git a/sdk/cwl/tests/wf/echo-wf.cwl b/sdk/cwl/tests/wf/echo-wf.cwl
new file mode 100644 (file)
index 0000000..63a5438
--- /dev/null
@@ -0,0 +1,18 @@
+cwlVersion: v1.0
+class: Workflow
+$namespaces:
+  arv: "http://arvados.org/cwl#"
+requirements:
+  SubworkflowFeatureRequirement: {}
+
+inputs: []
+
+outputs: []
+
+steps:
+  echo-subwf:
+    requirements:
+      arv:RunInSingleContainer: {}
+    run: echo-subwf.cwl
+    in: []
+    out: []
diff --git a/sdk/cwl/tests/wf/echo_a.cwl b/sdk/cwl/tests/wf/echo_a.cwl
new file mode 100644 (file)
index 0000000..f16bb09
--- /dev/null
@@ -0,0 +1,10 @@
+cwlVersion: v1.0
+class: CommandLineTool
+requirements:
+  ResourceRequirement:
+    coresMin: 2
+inputs: []
+outputs: []
+baseCommand: echo
+arguments:
+  - "a"
diff --git a/sdk/cwl/tests/wf/echo_b.cwl b/sdk/cwl/tests/wf/echo_b.cwl
new file mode 100644 (file)
index 0000000..1b22157
--- /dev/null
@@ -0,0 +1,10 @@
+cwlVersion: v1.0
+class: CommandLineTool
+requirements:
+  ResourceRequirement:
+    coresMin: 3
+inputs: []
+outputs: []
+baseCommand: echo
+arguments:
+  - "b"