From 80459d52161120ae8e33da140984d596271d5195 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Mon, 26 Jun 2017 10:45:16 -0400 Subject: [PATCH] 11095: Add arv:ReuseRequirement hint. Update tests & documentation. Arvados-DCO-1.1-Signed-off-by: Peter Amstutz --- doc/user/cwl/cwl-extensions.html.textile.liquid | 14 +++++++++++++- sdk/cwl/arvados_cwl/__init__.py | 3 ++- sdk/cwl/arvados_cwl/arv-cwl-schema.yml | 16 ++++++++++++++++ sdk/cwl/arvados_cwl/arvcontainer.py | 8 +++++++- sdk/cwl/arvados_cwl/arvjob.py | 8 +++++++- sdk/cwl/tests/arvados-tests.yml | 5 +++++ sdk/cwl/tests/noreuse.cwl | 16 ++++++++++++++++ sdk/cwl/tests/test_container.py | 7 +++++-- sdk/cwl/tests/test_job.py | 8 ++++++-- 9 files changed, 77 insertions(+), 8 deletions(-) create mode 100644 sdk/cwl/tests/noreuse.cwl diff --git a/doc/user/cwl/cwl-extensions.html.textile.liquid b/doc/user/cwl/cwl-extensions.html.textile.liquid index 8a6203496a..0ba1045670 100644 --- a/doc/user/cwl/cwl-extensions.html.textile.liquid +++ b/doc/user/cwl/cwl-extensions.html.textile.liquid @@ -14,7 +14,7 @@ $namespaces: cwltool: "http://commonwl.org/cwltool#" -Arvados extensions should go into the @hints@ section, for example: +For portability, Arvados extensions should go into the @hints@ section of your CWL file, for example:
 hints:
@@ -29,8 +29,12 @@ hints:
     loadListing: shallow_listing
   arv:IntermediateOutput:
     outputTTL: 3600
+  arv:ReuseRequirement:
+    enableReuse: false
 
+The one exception to this is @arv:APIRequirement@, see note below. + h2. arv:RunInSingleContainer Indicates that a subworkflow should run in a single container and not be scheduled as separate steps. @@ -84,3 +88,11 @@ table(table table-bordered table-condensed). |_. Field |_. Type |_. Description | |outputTTL|int|If the value is greater than zero, consider intermediate output collections to be temporary and should be automatically trashed. Temporary collections will be trashed @outputTTL@ seconds after creation. A value of zero means intermediate output should be retained indefinitely (this is the default behavior). Note: arvados-cwl-runner currently does not take workflow dependencies into account when setting the TTL on an intermediate output collection. If the TTL is too short, it is possible for a collection to be trashed before downstream steps that consume it are started. The recommended minimum value for TTL is the expected duration of the entire the workflow.| + +h2. arv:ReuseRequirement + +Enable/disable work reuse for current process. Default true (work reuse enabled). + +table(table table-bordered table-condensed). +|_. Field |_. Type |_. Description | +|enableReuse|boolean|Enable/disable work reuse for current process. Default true (work reuse enabled).| diff --git a/sdk/cwl/arvados_cwl/__init__.py b/sdk/cwl/arvados_cwl/__init__.py index f7da563cd4..64ec4e2ef2 100644 --- a/sdk/cwl/arvados_cwl/__init__.py +++ b/sdk/cwl/arvados_cwl/__init__.py @@ -685,7 +685,8 @@ def add_arv_hints(): "http://arvados.org/cwl#PartitionRequirement", "http://arvados.org/cwl#APIRequirement", "http://commonwl.org/cwltool#LoadListingRequirement", - "http://arvados.org/cwl#IntermediateOutput" + "http://arvados.org/cwl#IntermediateOutput", + "http://arvados.org/cwl#ReuseRequirement" ]) def main(args, stdout, stderr, api_client=None, keep_client=None): diff --git a/sdk/cwl/arvados_cwl/arv-cwl-schema.yml b/sdk/cwl/arvados_cwl/arv-cwl-schema.yml index 6838c0f1ae..3107628958 100644 --- a/sdk/cwl/arvados_cwl/arv-cwl-schema.yml +++ b/sdk/cwl/arvados_cwl/arv-cwl-schema.yml @@ -149,3 +149,19 @@ $graph: be trashed before downstream steps that consume it are started. The recommended minimum value for TTL is the expected duration of the entire the workflow. + +- name: ReuseRequirement + type: record + extends: cwl:ProcessRequirement + inVocab: false + doc: | + Enable/disable work reuse for current process. Default true (work reuse enabled). + fields: + - name: class + type: string + doc: "Always 'arv:ReuseRequirement'" + jsonldPredicate: + _id: "@type" + _type: "@vocab" + - name: enableReuse + type: boolean diff --git a/sdk/cwl/arvados_cwl/arvcontainer.py b/sdk/cwl/arvados_cwl/arvcontainer.py index b58a858c8d..bf4aab52fb 100644 --- a/sdk/cwl/arvados_cwl/arvcontainer.py +++ b/sdk/cwl/arvados_cwl/arvcontainer.py @@ -184,9 +184,15 @@ class ArvadosContainer(object): container_request["output_ttl"] = self.output_ttl container_request["mounts"] = mounts container_request["runtime_constraints"] = runtime_constraints - container_request["use_existing"] = kwargs.get("enable_reuse", True) container_request["scheduling_parameters"] = scheduling_parameters + enable_reuse = kwargs.get("enable_reuse", True) + if enable_reuse: + reuse_req, _ = get_feature(self, "http://arvados.org/cwl#ReuseRequirement") + if reuse_req: + enable_reuse = reuse_req["enableReuse"] + container_request["use_existing"] = enable_reuse + if kwargs.get("runnerjob", "").startswith("arvwf:"): wfuuid = kwargs["runnerjob"][6:kwargs["runnerjob"].index("#")] wfrecord = self.arvrunner.api.workflows().get(uuid=wfuuid).execute(num_retries=self.arvrunner.num_retries) diff --git a/sdk/cwl/arvados_cwl/arvjob.py b/sdk/cwl/arvados_cwl/arvjob.py index ab063867ac..877f4238a3 100644 --- a/sdk/cwl/arvados_cwl/arvjob.py +++ b/sdk/cwl/arvados_cwl/arvjob.py @@ -124,6 +124,12 @@ class ArvadosJob(object): if not self.arvrunner.ignore_docker_for_reuse: filters.append(["docker_image_locator", "in docker", runtime_constraints["docker_image"]]) + enable_reuse = kwargs.get("enable_reuse", True) + if enable_reuse: + reuse_req, _ = get_feature(self, "http://arvados.org/cwl#ReuseRequirement") + if reuse_req: + enable_reuse = reuse_req["enableReuse"] + try: with Perf(metrics, "create %s" % self.name): response = self.arvrunner.api.jobs().create( @@ -137,7 +143,7 @@ class ArvadosJob(object): "runtime_constraints": runtime_constraints }, filters=filters, - find_or_create=kwargs.get("enable_reuse", True) + find_or_create=enable_reuse ).execute(num_retries=self.arvrunner.num_retries) self.arvrunner.processes[response["uuid"]] = self diff --git a/sdk/cwl/tests/arvados-tests.yml b/sdk/cwl/tests/arvados-tests.yml index 87528b2ae5..c38fb0b90e 100644 --- a/sdk/cwl/tests/arvados-tests.yml +++ b/sdk/cwl/tests/arvados-tests.yml @@ -82,3 +82,8 @@ } tool: wf/listing_deep.cwl doc: test deep directory listing + +- job: null + output: {} + tool: noreuse.cwl + doc: "Test arv:ReuseRequirement" diff --git a/sdk/cwl/tests/noreuse.cwl b/sdk/cwl/tests/noreuse.cwl new file mode 100644 index 0000000000..46771d1101 --- /dev/null +++ b/sdk/cwl/tests/noreuse.cwl @@ -0,0 +1,16 @@ +cwlVersion: v1.0 +class: Workflow +$namespaces: + arv: "http://arvados.org/cwl#" +inputs: [] +outputs: [] +steps: + step1: + in: + message: + default: "hello world" + out: [output] + hints: + arv:ReuseRequirement: + enableReuse: false + run: stdout.cwl \ No newline at end of file diff --git a/sdk/cwl/tests/test_container.py b/sdk/cwl/tests/test_container.py index aab963dcb2..b1f5bfe901 100644 --- a/sdk/cwl/tests/test_container.py +++ b/sdk/cwl/tests/test_container.py @@ -116,6 +116,9 @@ class TestContainer(unittest.TestCase): }, { "class": "http://arvados.org/cwl#IntermediateOutput", "outputTTL": 7200 + }, { + "class": "http://arvados.org/cwl#ReuseRequirement", + "enableReuse": False }], "baseCommand": "ls" }) @@ -127,7 +130,7 @@ class TestContainer(unittest.TestCase): arvtool.formatgraph = None for j in arvtool.job({}, mock.MagicMock(), basedir="", name="test_resource_requirements", make_fs_access=make_fs_access, tmpdir="/tmp"): - j.run() + j.run(enable_reuse=True) call_args, call_kwargs = runner.api.container_requests().create.call_args @@ -143,7 +146,7 @@ class TestContainer(unittest.TestCase): 'keep_cache_ram': 536870912, 'API': True }, - 'use_existing': True, + 'use_existing': False, 'priority': 1, 'mounts': { '/tmp': {'kind': 'tmp', diff --git a/sdk/cwl/tests/test_job.py b/sdk/cwl/tests/test_job.py index a71d1d8e07..d64381b62c 100644 --- a/sdk/cwl/tests/test_job.py +++ b/sdk/cwl/tests/test_job.py @@ -133,6 +133,10 @@ class TestJob(unittest.TestCase): "outputDirType": "keep_output_dir" }, { "class": "http://arvados.org/cwl#APIRequirement", + }, + { + "class": "http://arvados.org/cwl#ReuseRequirement", + "enableReuse": False }], "baseCommand": "ls" } @@ -142,7 +146,7 @@ class TestJob(unittest.TestCase): make_fs_access=make_fs_access, loader=Loader({})) arvtool.formatgraph = None for j in arvtool.job({}, mock.MagicMock(), basedir="", make_fs_access=make_fs_access): - j.run() + j.run(enable_reuse=True) runner.api.jobs().create.assert_called_with( body=JsonDiffMatcher({ 'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz', @@ -166,7 +170,7 @@ class TestJob(unittest.TestCase): 'keep_cache_mb_per_task': 512 } }), - find_or_create=True, + find_or_create=False, filters=[['repository', '=', 'arvados'], ['script', '=', 'crunchrunner'], ['script_version', 'in git', 'a3f2cb186e437bfce0031b024b2157b73ed2717d'], -- 2.30.2