14510: Setting collection cache wip
authorPeter Amstutz <pamstutz@veritasgenetics.com>
Tue, 27 Nov 2018 20:53:07 +0000 (15:53 -0500)
committerPeter Amstutz <pamstutz@veritasgenetics.com>
Tue, 27 Nov 2018 21:01:38 +0000 (16:01 -0500)
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz@veritasgenetics.com>

apps/workbench/app/controllers/work_units_controller.rb
doc/user/cwl/cwl-extensions.html.textile.liquid
sdk/cwl/arvados_cwl/__init__.py
sdk/cwl/arvados_cwl/arvcontainer.py
sdk/cwl/arvados_cwl/context.py
sdk/cwl/arvados_cwl/executor.py
sdk/cwl/arvados_cwl/runner.py
sdk/cwl/tests/test_submit.py
sdk/cwl/tests/wf/submit_wf_runner_resources.cwl

index 8527b4d48cb717b941ab376b68255e917c5797a3..767762c81e3cd3d899bda0b3bce873cc97c390b9 100644 (file)
@@ -85,12 +85,6 @@ class WorkUnitsController < ApplicationController
       attrs['state'] = "Uncommitted"
 
       # required
-      attrs['command'] = ["arvados-cwl-runner",
-                          "--local",
-                          "--api=containers",
-                          "--project-uuid=#{params['work_unit']['owner_uuid']}",
-                          "/var/lib/cwl/workflow.json#main",
-                          "/var/lib/cwl/cwl.input.json"]
       attrs['container_image'] = "arvados/jobs"
       attrs['cwd'] = "/var/spool/cwl"
       attrs['output_path'] = "/var/spool/cwl"
@@ -102,6 +96,7 @@ class WorkUnitsController < ApplicationController
         "API" => true
       }
 
+      keep_cache = 256
       input_defaults = {}
       if wf_json
         main = get_cwl_main(wf_json)
@@ -119,11 +114,22 @@ class WorkUnitsController < ApplicationController
               if hint[:ramMin]
                 runtime_constraints["ram"] = hint[:ramMin] * 1024 * 1024
               end
+              if hint[:keep_cache]
+                keep_cache = hint[:keep_cache]
+              end
             end
           end
         end
       end
 
+      attrs['command'] = ["arvados-cwl-runner",
+                          "--local",
+                          "--api=containers",
+                          "--project-uuid=#{params['work_unit']['owner_uuid']}",
+                          "--collection-keep-cache=#{keep_cache}",
+                          "/var/lib/cwl/workflow.json#main",
+                          "/var/lib/cwl/cwl.input.json"]
+
       # mounts
       mounts = {
         "/var/lib/cwl/cwl.input.json" => {
index 7abc794e198d6910eb64bbf88da79d218c7e2b25..f2dd937d95976d3a770215dcd57faec1a0010e70 100644 (file)
@@ -43,6 +43,7 @@ hints:
   arv:WorkflowRunnerResources:
     ramMin: 2048
     coresMin: 2
+    keep_cache: 512
   arv:ClusterTarget:
     cluster_id: clsr1
     project_uuid: clsr1-j7d0g-qxc4jcji7n4lafx
@@ -137,7 +138,7 @@ table(table table-bordered table-condensed).
 |_. Field |_. Type |_. Description |
 |ramMin|int|RAM, in mebibytes, to reserve for the arvados-cwl-runner process. Default 1 GiB|
 |coresMin|int|Number of cores to reserve to the arvados-cwl-runner process. Default 1 core.|
-
+|keep_cache|int|RAM, in mebibytes, to reserve for caching keep collection metadata. Default 256 MiB|
 h2(#clustertarget). arv:ClusterTarget
 
 Specify which Arvados cluster should execute a container or subworkflow, and the parent project for the container request.
index ce22219d7a71ea9d5434f50023d0394ba964cc57..225741f947e396e3dcc86ad67ca1cdc056e03cfc 100644 (file)
@@ -159,9 +159,9 @@ def arg_parser():  # type: () -> argparse.ArgumentParser
                          default=None,
                          metavar="CLUSTER_ID")
 
-    parser.add_argument("--collection-cache", type=int,
-                        default=256*1024*1024,
-                        help="Collection caches size.")
+    parser.add_argument("--collection-cache-size", type=int,
+                        default=None,
+                        help="Collection cache size (in MiB, default 256).")
 
     parser.add_argument("--name", type=str,
                         help="Name to use for workflow execution instance.",
index f1ae65fc0f97e4b29f69fed0ca1fa3cf9ac78de2..4c49a449b2a68fdf1eaaa5cd674129ae257dfc6e 100644 (file)
@@ -407,7 +407,7 @@ class RunnerContainer(Runner):
             "secret_mounts": secret_mounts,
             "runtime_constraints": {
                 "vcpus": math.ceil(self.submit_runner_cores),
-                "ram": math.ceil(1024*1024 * self.submit_runner_ram),
+                "ram": 1024*1024 * (math.ceil(self.submit_runner_ram) + math.ceil(self.collection_cache_size)),
                 "API": True
             },
             "use_existing": self.enable_reuse,
@@ -441,6 +441,7 @@ class RunnerContainer(Runner):
         # --eval-timeout is the timeout for javascript invocation
         # --parallel-task-count is the number of threads to use for job submission
         # --enable/disable-reuse sets desired job reuse
+        # --collection-cache-size sets aside memory to store collections
         command = ["arvados-cwl-runner",
                    "--local",
                    "--api=containers",
@@ -448,7 +449,8 @@ class RunnerContainer(Runner):
                    "--disable-validate",
                    "--eval-timeout=%s" % self.arvrunner.eval_timeout,
                    "--thread-count=%s" % self.arvrunner.thread_count,
-                   "--enable-reuse" if self.enable_reuse else "--disable-reuse"]
+                   "--enable-reuse" if self.enable_reuse else "--disable-reuse",
+                   "--collection-cache-size=%s" % self.collection_cache_size]
 
         if self.output_name:
             command.append("--output-name=" + self.output_name)
index 7831e1cfd0822abbcac5a77c460a33e8ff492714..8cfe22ad7b6619f1f02d95eaf71153e44e52fd01 100644 (file)
@@ -34,6 +34,7 @@ class ArvRuntimeContext(RuntimeContext):
         self.submit_runner_cluster = None
         self.cluster_target_id = 0
         self.always_submit_runner = False
+        self.collection_cache_size = 256
 
         super(ArvRuntimeContext, self).__init__(kwargs)
 
index ff8ff6ff89fc76bc40359194025f7b4c5c31fec1..3589fad276c25fa4e666a8f51effcd485eaf8bf8 100644 (file)
@@ -122,8 +122,13 @@ class ArvCwlExecutor(object):
         else:
             self.keep_client = arvados.keep.KeepClient(api_client=self.api, num_retries=self.num_retries)
 
+        if arvargs.collection_cache_size:
+            collection_cache_size = arvargs.collection_cache_size*1024*1024
+        else:
+            collection_cache_size = 256*1024*1024
+
         self.collection_cache = CollectionCache(self.api, self.keep_client, self.num_retries,
-                                                cap=arvargs.collection_cache)
+                                                cap=collection_cache_size)
 
         self.fetcher_constructor = partial(CollectionFetcher,
                                            api_client=self.api,
@@ -607,7 +612,8 @@ http://doc.arvados.org/install/install-api-server.html#disable_api_methods
                                                 intermediate_output_ttl=runtimeContext.intermediate_output_ttl,
                                                 merged_map=merged_map,
                                                 priority=runtimeContext.priority,
-                                                secret_store=self.secret_store)
+                                                secret_store=self.secret_store,
+                                                collection_cache_size=runtimeContext.collection_cache_size)
             elif self.work_api == "jobs":
                 runnerjob = RunnerJob(self, tool, job_order, runtimeContext.enable_reuse,
                                       self.output_name,
index a846f2b0016931dcd6a938c47f9572083963d2bc..c1a98e745672b65829326afdf6c2fb6313db1bbc 100644 (file)
@@ -364,7 +364,7 @@ class Runner(object):
                  output_name, output_tags, submit_runner_ram=0,
                  name=None, on_error=None, submit_runner_image=None,
                  intermediate_output_ttl=0, merged_map=None,
-                 priority=None, secret_store=None):
+                 priority=None, secret_store=None, collection_cache_size=None):
         self.arvrunner = runner
         self.tool = tool
         self.job_order = job_order
@@ -389,6 +389,7 @@ class Runner(object):
 
         self.submit_runner_cores = 1
         self.submit_runner_ram = 1024  # defaut 1 GiB
+        self.collection_cache_size = 256
 
         runner_resource_req, _ = self.tool.get_requirement("http://arvados.org/cwl#WorkflowRunnerResources")
         if runner_resource_req:
@@ -396,11 +397,17 @@ class Runner(object):
                 self.submit_runner_cores = runner_resource_req["coresMin"]
             if runner_resource_req.get("ramMin"):
                 self.submit_runner_ram = runner_resource_req["ramMin"]
+            if runner_resource_req.get("keep_cache"):
+                self.collection_cache_size = runner_resource_req["keep_cache"]
 
         if submit_runner_ram:
             # Command line / initializer overrides default and/or spec from workflow
             self.submit_runner_ram = submit_runner_ram
 
+        if collection_cache_size:
+            # Command line / initializer overrides default and/or spec from workflow
+            self.collection_cache_size = collection_cache_size
+
         if self.submit_runner_ram <= 0:
             raise Exception("Value of submit-runner-ram must be greater than zero")
 
index 1b892a9836f209857995fbf4e94002e015dcd1cf..bf2791d728286c7ccd66431766e34d9a77b66392 100644 (file)
@@ -1204,7 +1204,8 @@ class TestSubmit(unittest.TestCase):
             {
                 "class": "http://arvados.org/cwl#WorkflowRunnerResources",
                 "coresMin": 2,
-                "ramMin": 2000
+                "ramMin": 2000,
+                "keep_cache": 512
             }
         ]
         expect_container["mounts"]["/var/lib/cwl/workflow.json"]["content"]["$graph"][0]["$namespaces"] = {
index 9e2712194950627d87c148b76fae14d00f5fac2b..814cd07ab5d0833a5a374e503b6ee1feae00ef87 100644 (file)
@@ -15,6 +15,7 @@ hints:
   arv:WorkflowRunnerResources:
     ramMin: 2000
     coresMin: 2
+    keep_cache: 512
 inputs:
   - id: x
     type: File