9308: Set task.keepTmpOutput: true when outputDirType is keep_output_dir.
authorPeter Amstutz <peter.amstutz@curoverse.com>
Wed, 5 Oct 2016 15:08:40 +0000 (11:08 -0400)
committerPeter Amstutz <peter.amstutz@curoverse.com>
Wed, 5 Oct 2016 17:53:46 +0000 (13:53 -0400)
sdk/cwl/arvados_cwl/arv-cwl-schema.yml
sdk/cwl/arvados_cwl/arvjob.py
sdk/cwl/tests/test_job.py

index 44b1b06a1a09b11848119743f9f48f5a63e3240a..fe3eadd8e53163e8b545da97b7c85c50ed6f05f0 100644 (file)
@@ -13,6 +13,29 @@ $graph:
         _id: "@type"
         _type: "@vocab"
 
+- name: OutputDirType
+  type: enum
+  symbols:
+    - local_output_dir
+    - keep_output_dir
+  doc:
+    - |
+      local_output_dir: Use regular file system local to the compute node.
+      There must be sufficient local scratch space to store entire output;
+      specify this this with `outdirMin` of `ResourceRequirement`.  Files are
+      batch uploaded to Keep when the process completes.  Most compatible, but
+      upload step can be time consuming for very large files.
+    - |
+      keep_output_dir: Use writable Keep mount.  Files are streamed to Keep as
+      they are written.  Does not consume local scratch space, but does consume
+      RAM for output buffers (up to 192 MiB per file simultaneously open for
+      writing.)  Best suited to processes which produce sequential output of
+      large files (non-sequential writes may produced fragmented file
+      manifests).  Supports regular files and directories, does not support
+      special files such as symlinks, hard links, named pipes, named sockets,
+      or device nodes.
+
+
 - name: RuntimeConstraints
   type: record
   doc: |
@@ -31,6 +54,11 @@ $graph:
         MiB. Increase this to reduce cache thrashing in situations such as
         accessing multiple large (64+ MiB) files at the same time, or
         performing random access on a large file.
+    - name: outputDirType
+      type: OutputDirType?
+      doc: |
+        Preferred backing store for output staging.  If not specified, the
+        system may choose which one to use.
 
 - name: APIRequirement
   type: record
index 0818d5d62b4dcd023153daf1e69ace6b34175655..4ede88d7ca2b8cbb3c9c57a420ab39b28407d484 100644 (file)
@@ -95,7 +95,13 @@ class ArvadosJob(object):
 
         runtime_req, _ = get_feature(self, "http://arvados.org/cwl#RuntimeConstraints")
         if runtime_req:
-            runtime_constraints["keep_cache_mb_per_task"] = runtime_req["keep_cache"]
+            if "keep_cache" in runtime_req:
+                runtime_constraints["keep_cache_mb_per_task"] = runtime_req["keep_cache"]
+            if "outputDirType" in runtime_req:
+                if runtime_req["outputDirType"] == "local_output_dir":
+                    script_parameters["task.keepTmpOutput"] = False
+                elif runtime_req["outputDirType"] == "keep_output_dir":
+                    script_parameters["task.keepTmpOutput"] = True
 
         filters = [["repository", "=", "arvados"],
                    ["script", "=", "crunchrunner"],
index 7f31520454662540aea97cbcfcbb254b0dccee8f..99e34d35074bed1030ec2abd5af49c68fe5cc487 100644 (file)
@@ -82,7 +82,8 @@ class TestJob(unittest.TestCase):
                 "tmpdirMin": 4000
             }, {
                 "class": "http://arvados.org/cwl#RuntimeConstraints",
-                "keep_cache": 512
+                "keep_cache": 512,
+                "outputDirType": "keep_output_dir"
             }, {
                 "class": "http://arvados.org/cwl#APIRequirement",
             }],
@@ -101,6 +102,7 @@ class TestJob(unittest.TestCase):
                 'script_parameters': {
                     'tasks': [{
                         'task.env': {'HOME': '$(task.outdir)', 'TMPDIR': '$(task.tmpdir)'},
+                        'task.keepTmpOutput': True,
                         'command': ['ls']
                     }]
             },