Merge branch '2411-check-copyright'
authorTom Clegg <tom@curoverse.com>
Thu, 29 Jun 2017 11:51:32 +0000 (07:51 -0400)
committerTom Clegg <tom@curoverse.com>
Thu, 29 Jun 2017 11:51:32 +0000 (07:51 -0400)
refs #2411

Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom@curoverse.com>

doc/user/cwl/cwl-extensions.html.textile.liquid
sdk/cwl/arvados_cwl/__init__.py
sdk/cwl/arvados_cwl/arv-cwl-schema.yml
sdk/cwl/arvados_cwl/arvcontainer.py
sdk/cwl/arvados_cwl/arvjob.py
sdk/cwl/tests/arvados-tests.yml
sdk/cwl/tests/noreuse.cwl [new file with mode: 0644]
sdk/cwl/tests/test_container.py
sdk/cwl/tests/test_job.py

index 8e224f0305455a9f5eda3720d9aa71f399830030..95422b6bd32206019a8703e8a94a6c57735b92dd 100644 (file)
@@ -19,7 +19,7 @@ $namespaces:
   cwltool: "http://commonwl.org/cwltool#"
 </pre>
 
-Arvados extensions should go into the @hints@ section, for example:
+For portability, Arvados extensions should go into the @hints@ section of your CWL file, for example:
 
 <pre>
 hints:
@@ -34,8 +34,12 @@ hints:
     loadListing: shallow_listing
   arv:IntermediateOutput:
     outputTTL: 3600
+  arv:ReuseRequirement:
+    enableReuse: false
 </pre>
 
+The one exception to this is @arv:APIRequirement@, see note below.
+
 h2. arv:RunInSingleContainer
 
 Indicates that a subworkflow should run in a single container and not be scheduled as separate steps.
@@ -89,3 +93,11 @@ table(table table-bordered table-condensed).
 |_. Field |_. Type |_. Description |
 |outputTTL|int|If the value is greater than zero, consider intermediate output collections to be temporary and should be automatically trashed. Temporary collections will be trashed @outputTTL@ seconds after creation.  A value of zero means intermediate output should be retained indefinitely (this is the default behavior).
 Note: arvados-cwl-runner currently does not take workflow dependencies into account when setting the TTL on an intermediate output collection. If the TTL is too short, it is possible for a collection to be trashed before downstream steps that consume it are started.  The recommended minimum value for TTL is the expected duration of the entire the workflow.|
+
+h2. arv:ReuseRequirement
+
+Enable/disable work reuse for current process.  Default true (work reuse enabled).
+
+table(table table-bordered table-condensed).
+|_. Field |_. Type |_. Description |
+|enableReuse|boolean|Enable/disable work reuse for current process.  Default true (work reuse enabled).|
index b79c453cfbeb115eae736f5fbd621453950d1a05..4584d955e6fcc6080d56dfe9a5dd277c7f6bb414 100644 (file)
@@ -688,7 +688,8 @@ def add_arv_hints():
         "http://arvados.org/cwl#PartitionRequirement",
         "http://arvados.org/cwl#APIRequirement",
         "http://commonwl.org/cwltool#LoadListingRequirement",
-        "http://arvados.org/cwl#IntermediateOutput"
+        "http://arvados.org/cwl#IntermediateOutput",
+        "http://arvados.org/cwl#ReuseRequirement"
     ])
 
 def main(args, stdout, stderr, api_client=None, keep_client=None):
index dc056ef18c3094c79e757b3d8a5730ded1363099..7ae2239e2e81dd4f9345e4b2a47743eb585987e9 100644 (file)
@@ -153,3 +153,19 @@ $graph:
         be trashed before downstream steps that consume it are started.  The
         recommended minimum value for TTL is the expected duration of the
         entire the workflow.
+
+- name: ReuseRequirement
+  type: record
+  extends: cwl:ProcessRequirement
+  inVocab: false
+  doc: |
+    Enable/disable work reuse for current process.  Default true (work reuse enabled).
+  fields:
+    - name: class
+      type: string
+      doc: "Always 'arv:ReuseRequirement'"
+      jsonldPredicate:
+        _id: "@type"
+        _type: "@vocab"
+    - name: enableReuse
+      type: boolean
index d3bc7ae944c523f29f0f6857244b654d4b561f83..4ab65d9d8774708613787b3a694f64bf876004da 100644 (file)
@@ -188,9 +188,15 @@ class ArvadosContainer(object):
         container_request["output_ttl"] = self.output_ttl
         container_request["mounts"] = mounts
         container_request["runtime_constraints"] = runtime_constraints
-        container_request["use_existing"] = kwargs.get("enable_reuse", True)
         container_request["scheduling_parameters"] = scheduling_parameters
 
+        enable_reuse = kwargs.get("enable_reuse", True)
+        if enable_reuse:
+            reuse_req, _ = get_feature(self, "http://arvados.org/cwl#ReuseRequirement")
+            if reuse_req:
+                enable_reuse = reuse_req["enableReuse"]
+        container_request["use_existing"] = enable_reuse
+
         if kwargs.get("runnerjob", "").startswith("arvwf:"):
             wfuuid = kwargs["runnerjob"][6:kwargs["runnerjob"].index("#")]
             wfrecord = self.arvrunner.api.workflows().get(uuid=wfuuid).execute(num_retries=self.arvrunner.num_retries)
index aa55cac4b75f5d14bc3f3817dc57b12e20ffb76e..64cd2aa04e562ded1d081ab71c56088cae54edb3 100644 (file)
@@ -128,6 +128,12 @@ class ArvadosJob(object):
         if not self.arvrunner.ignore_docker_for_reuse:
             filters.append(["docker_image_locator", "in docker", runtime_constraints["docker_image"]])
 
+        enable_reuse = kwargs.get("enable_reuse", True)
+        if enable_reuse:
+            reuse_req, _ = get_feature(self, "http://arvados.org/cwl#ReuseRequirement")
+            if reuse_req:
+                enable_reuse = reuse_req["enableReuse"]
+
         try:
             with Perf(metrics, "create %s" % self.name):
                 response = self.arvrunner.api.jobs().create(
@@ -141,7 +147,7 @@ class ArvadosJob(object):
                         "runtime_constraints": runtime_constraints
                     },
                     filters=filters,
-                    find_or_create=kwargs.get("enable_reuse", True)
+                    find_or_create=enable_reuse
                 ).execute(num_retries=self.arvrunner.num_retries)
 
             self.arvrunner.processes[response["uuid"]] = self
index e8c708a1684362059f7e842efe2659991646b90e..d3bdefcd03f3d95fe585ae555ae8703e5588f312 100644 (file)
@@ -86,3 +86,8 @@
   }
   tool: wf/listing_deep.cwl
   doc: test deep directory listing
+
+- job: null
+  output: {}
+  tool: noreuse.cwl
+  doc: "Test arv:ReuseRequirement"
diff --git a/sdk/cwl/tests/noreuse.cwl b/sdk/cwl/tests/noreuse.cwl
new file mode 100644 (file)
index 0000000..46771d1
--- /dev/null
@@ -0,0 +1,16 @@
+cwlVersion: v1.0
+class: Workflow
+$namespaces:
+  arv: "http://arvados.org/cwl#"
+inputs: []
+outputs: []
+steps:
+  step1:
+    in:
+      message:
+        default: "hello world"
+    out: [output]
+    hints:
+      arv:ReuseRequirement:
+        enableReuse: false
+    run: stdout.cwl
\ No newline at end of file
index cec0848ccd4697de3088048beb8189862320e5da..c516d7b35340b15ac1f4eaac1a7fcbc645b95eda 100644 (file)
@@ -120,6 +120,9 @@ class TestContainer(unittest.TestCase):
             }, {
                 "class": "http://arvados.org/cwl#IntermediateOutput",
                 "outputTTL": 7200
+            }, {
+                "class": "http://arvados.org/cwl#ReuseRequirement",
+                "enableReuse": False
             }],
             "baseCommand": "ls"
         })
@@ -131,7 +134,7 @@ class TestContainer(unittest.TestCase):
         arvtool.formatgraph = None
         for j in arvtool.job({}, mock.MagicMock(), basedir="", name="test_resource_requirements",
                              make_fs_access=make_fs_access, tmpdir="/tmp"):
-            j.run()
+            j.run(enable_reuse=True)
 
         call_args, call_kwargs = runner.api.container_requests().create.call_args
 
@@ -147,7 +150,7 @@ class TestContainer(unittest.TestCase):
                 'keep_cache_ram': 536870912,
                 'API': True
             },
-            'use_existing': True,
+            'use_existing': False,
             'priority': 1,
             'mounts': {
                 '/tmp': {'kind': 'tmp',
index fa7a134755aabaa89be5d748dae736ed9ec6b586..5d140ba6b9d56a052f3a75eb0f4697f29b8dad79 100644 (file)
@@ -137,6 +137,10 @@ class TestJob(unittest.TestCase):
                 "outputDirType": "keep_output_dir"
             }, {
                 "class": "http://arvados.org/cwl#APIRequirement",
+            },
+            {
+                "class": "http://arvados.org/cwl#ReuseRequirement",
+                "enableReuse": False
             }],
             "baseCommand": "ls"
         }
@@ -146,7 +150,7 @@ class TestJob(unittest.TestCase):
                                                  make_fs_access=make_fs_access, loader=Loader({}))
         arvtool.formatgraph = None
         for j in arvtool.job({}, mock.MagicMock(), basedir="", make_fs_access=make_fs_access):
-            j.run()
+            j.run(enable_reuse=True)
         runner.api.jobs().create.assert_called_with(
             body=JsonDiffMatcher({
                 'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz',
@@ -170,7 +174,7 @@ class TestJob(unittest.TestCase):
                     'keep_cache_mb_per_task': 512
                 }
             }),
-            find_or_create=True,
+            find_or_create=False,
             filters=[['repository', '=', 'arvados'],
                      ['script', '=', 'crunchrunner'],
                      ['script_version', 'in git', 'a3f2cb186e437bfce0031b024b2157b73ed2717d'],