Merge branch '11095-cwl-control-reuse' closes #11095
authorPeter Amstutz <peter.amstutz@curoverse.com>
Wed, 28 Jun 2017 13:23:13 +0000 (09:23 -0400)
committerPeter Amstutz <peter.amstutz@curoverse.com>
Wed, 28 Jun 2017 13:23:13 +0000 (09:23 -0400)
doc/user/cwl/cwl-extensions.html.textile.liquid
sdk/cwl/arvados_cwl/__init__.py
sdk/cwl/arvados_cwl/arv-cwl-schema.yml
sdk/cwl/arvados_cwl/arvcontainer.py
sdk/cwl/arvados_cwl/arvjob.py
sdk/cwl/tests/arvados-tests.yml
sdk/cwl/tests/noreuse.cwl [new file with mode: 0644]
sdk/cwl/tests/test_container.py
sdk/cwl/tests/test_job.py

index 8a6203496afc498f47910182dd1e7e26b55cfaca..0ba104567075a93bac4992eab503d6ad322b94cc 100644 (file)
@@ -14,7 +14,7 @@ $namespaces:
   cwltool: "http://commonwl.org/cwltool#"
 </pre>
 
-Arvados extensions should go into the @hints@ section, for example:
+For portability, Arvados extensions should go into the @hints@ section of your CWL file, for example:
 
 <pre>
 hints:
@@ -29,8 +29,12 @@ hints:
     loadListing: shallow_listing
   arv:IntermediateOutput:
     outputTTL: 3600
+  arv:ReuseRequirement:
+    enableReuse: false
 </pre>
 
+The one exception to this is @arv:APIRequirement@, see note below.
+
 h2. arv:RunInSingleContainer
 
 Indicates that a subworkflow should run in a single container and not be scheduled as separate steps.
@@ -84,3 +88,11 @@ table(table table-bordered table-condensed).
 |_. Field |_. Type |_. Description |
 |outputTTL|int|If the value is greater than zero, consider intermediate output collections to be temporary and should be automatically trashed. Temporary collections will be trashed @outputTTL@ seconds after creation.  A value of zero means intermediate output should be retained indefinitely (this is the default behavior).
 Note: arvados-cwl-runner currently does not take workflow dependencies into account when setting the TTL on an intermediate output collection. If the TTL is too short, it is possible for a collection to be trashed before downstream steps that consume it are started.  The recommended minimum value for TTL is the expected duration of the entire the workflow.|
+
+h2. arv:ReuseRequirement
+
+Enable/disable work reuse for current process.  Default true (work reuse enabled).
+
+table(table table-bordered table-condensed).
+|_. Field |_. Type |_. Description |
+|enableReuse|boolean|Enable/disable work reuse for current process.  Default true (work reuse enabled).|
index f7da563cd4aca1bd650bb19d35dcd28cdab08757..64ec4e2ef2813851607fb2dc0726838fda9cc110 100644 (file)
@@ -685,7 +685,8 @@ def add_arv_hints():
         "http://arvados.org/cwl#PartitionRequirement",
         "http://arvados.org/cwl#APIRequirement",
         "http://commonwl.org/cwltool#LoadListingRequirement",
-        "http://arvados.org/cwl#IntermediateOutput"
+        "http://arvados.org/cwl#IntermediateOutput",
+        "http://arvados.org/cwl#ReuseRequirement"
     ])
 
 def main(args, stdout, stderr, api_client=None, keep_client=None):
index 6838c0f1ae9328878c0e638ae607b6cc3e5d9892..3107628958b3aa1101f419a1943949be2e7340c0 100644 (file)
@@ -149,3 +149,19 @@ $graph:
         be trashed before downstream steps that consume it are started.  The
         recommended minimum value for TTL is the expected duration of the
         entire the workflow.
+
+- name: ReuseRequirement
+  type: record
+  extends: cwl:ProcessRequirement
+  inVocab: false
+  doc: |
+    Enable/disable work reuse for current process.  Default true (work reuse enabled).
+  fields:
+    - name: class
+      type: string
+      doc: "Always 'arv:ReuseRequirement'"
+      jsonldPredicate:
+        _id: "@type"
+        _type: "@vocab"
+    - name: enableReuse
+      type: boolean
index b58a858c8d40775c227038ddb3fe43665b851b3c..bf4aab52fb4b637d2e48161422828f5f5545a021 100644 (file)
@@ -184,9 +184,15 @@ class ArvadosContainer(object):
         container_request["output_ttl"] = self.output_ttl
         container_request["mounts"] = mounts
         container_request["runtime_constraints"] = runtime_constraints
-        container_request["use_existing"] = kwargs.get("enable_reuse", True)
         container_request["scheduling_parameters"] = scheduling_parameters
 
+        enable_reuse = kwargs.get("enable_reuse", True)
+        if enable_reuse:
+            reuse_req, _ = get_feature(self, "http://arvados.org/cwl#ReuseRequirement")
+            if reuse_req:
+                enable_reuse = reuse_req["enableReuse"]
+        container_request["use_existing"] = enable_reuse
+
         if kwargs.get("runnerjob", "").startswith("arvwf:"):
             wfuuid = kwargs["runnerjob"][6:kwargs["runnerjob"].index("#")]
             wfrecord = self.arvrunner.api.workflows().get(uuid=wfuuid).execute(num_retries=self.arvrunner.num_retries)
index ab063867ac1c73bc6d04f20e148da45d9e21003f..877f4238a33e8f41952f37793c4d1c168da89ca0 100644 (file)
@@ -124,6 +124,12 @@ class ArvadosJob(object):
         if not self.arvrunner.ignore_docker_for_reuse:
             filters.append(["docker_image_locator", "in docker", runtime_constraints["docker_image"]])
 
+        enable_reuse = kwargs.get("enable_reuse", True)
+        if enable_reuse:
+            reuse_req, _ = get_feature(self, "http://arvados.org/cwl#ReuseRequirement")
+            if reuse_req:
+                enable_reuse = reuse_req["enableReuse"]
+
         try:
             with Perf(metrics, "create %s" % self.name):
                 response = self.arvrunner.api.jobs().create(
@@ -137,7 +143,7 @@ class ArvadosJob(object):
                         "runtime_constraints": runtime_constraints
                     },
                     filters=filters,
-                    find_or_create=kwargs.get("enable_reuse", True)
+                    find_or_create=enable_reuse
                 ).execute(num_retries=self.arvrunner.num_retries)
 
             self.arvrunner.processes[response["uuid"]] = self
index 87528b2ae595df94a73b24ff9b3e931336edf067..c38fb0b90e490210271d4871c08ae389102ec6f4 100644 (file)
@@ -82,3 +82,8 @@
   }
   tool: wf/listing_deep.cwl
   doc: test deep directory listing
+
+- job: null
+  output: {}
+  tool: noreuse.cwl
+  doc: "Test arv:ReuseRequirement"
diff --git a/sdk/cwl/tests/noreuse.cwl b/sdk/cwl/tests/noreuse.cwl
new file mode 100644 (file)
index 0000000..46771d1
--- /dev/null
@@ -0,0 +1,16 @@
+cwlVersion: v1.0
+class: Workflow
+$namespaces:
+  arv: "http://arvados.org/cwl#"
+inputs: []
+outputs: []
+steps:
+  step1:
+    in:
+      message:
+        default: "hello world"
+    out: [output]
+    hints:
+      arv:ReuseRequirement:
+        enableReuse: false
+    run: stdout.cwl
\ No newline at end of file
index aab963dcb23674ea7850a5fc6cd6f175f50cb3eb..b1f5bfe901bb2365192ea1dc810263bcfc076c2e 100644 (file)
@@ -116,6 +116,9 @@ class TestContainer(unittest.TestCase):
             }, {
                 "class": "http://arvados.org/cwl#IntermediateOutput",
                 "outputTTL": 7200
+            }, {
+                "class": "http://arvados.org/cwl#ReuseRequirement",
+                "enableReuse": False
             }],
             "baseCommand": "ls"
         })
@@ -127,7 +130,7 @@ class TestContainer(unittest.TestCase):
         arvtool.formatgraph = None
         for j in arvtool.job({}, mock.MagicMock(), basedir="", name="test_resource_requirements",
                              make_fs_access=make_fs_access, tmpdir="/tmp"):
-            j.run()
+            j.run(enable_reuse=True)
 
         call_args, call_kwargs = runner.api.container_requests().create.call_args
 
@@ -143,7 +146,7 @@ class TestContainer(unittest.TestCase):
                 'keep_cache_ram': 536870912,
                 'API': True
             },
-            'use_existing': True,
+            'use_existing': False,
             'priority': 1,
             'mounts': {
                 '/tmp': {'kind': 'tmp',
index a71d1d8e074e771757dd0ca12869a4190f5667bd..d64381b62cca44c01f47d20e5a96f8fe13e9b27f 100644 (file)
@@ -133,6 +133,10 @@ class TestJob(unittest.TestCase):
                 "outputDirType": "keep_output_dir"
             }, {
                 "class": "http://arvados.org/cwl#APIRequirement",
+            },
+            {
+                "class": "http://arvados.org/cwl#ReuseRequirement",
+                "enableReuse": False
             }],
             "baseCommand": "ls"
         }
@@ -142,7 +146,7 @@ class TestJob(unittest.TestCase):
                                                  make_fs_access=make_fs_access, loader=Loader({}))
         arvtool.formatgraph = None
         for j in arvtool.job({}, mock.MagicMock(), basedir="", make_fs_access=make_fs_access):
-            j.run()
+            j.run(enable_reuse=True)
         runner.api.jobs().create.assert_called_with(
             body=JsonDiffMatcher({
                 'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz',
@@ -166,7 +170,7 @@ class TestJob(unittest.TestCase):
                     'keep_cache_mb_per_task': 512
                 }
             }),
-            find_or_create=True,
+            find_or_create=False,
             filters=[['repository', '=', 'arvados'],
                      ['script', '=', 'crunchrunner'],
                      ['script_version', 'in git', 'a3f2cb186e437bfce0031b024b2157b73ed2717d'],