From eb58fd945645f5a670c761f7046b10885941167e Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Tue, 27 Nov 2018 15:53:07 -0500 Subject: [PATCH 1/1] 14510: Setting collection cache wip Arvados-DCO-1.1-Signed-off-by: Peter Amstutz --- .../app/controllers/work_units_controller.rb | 18 ++++++++++++------ .../cwl/cwl-extensions.html.textile.liquid | 3 ++- sdk/cwl/arvados_cwl/__init__.py | 6 +++--- sdk/cwl/arvados_cwl/arvcontainer.py | 6 ++++-- sdk/cwl/arvados_cwl/context.py | 1 + sdk/cwl/arvados_cwl/executor.py | 10 ++++++++-- sdk/cwl/arvados_cwl/runner.py | 9 ++++++++- sdk/cwl/tests/test_submit.py | 3 ++- .../tests/wf/submit_wf_runner_resources.cwl | 1 + 9 files changed, 41 insertions(+), 16 deletions(-) diff --git a/apps/workbench/app/controllers/work_units_controller.rb b/apps/workbench/app/controllers/work_units_controller.rb index 8527b4d48c..767762c81e 100644 --- a/apps/workbench/app/controllers/work_units_controller.rb +++ b/apps/workbench/app/controllers/work_units_controller.rb @@ -85,12 +85,6 @@ class WorkUnitsController < ApplicationController attrs['state'] = "Uncommitted" # required - attrs['command'] = ["arvados-cwl-runner", - "--local", - "--api=containers", - "--project-uuid=#{params['work_unit']['owner_uuid']}", - "/var/lib/cwl/workflow.json#main", - "/var/lib/cwl/cwl.input.json"] attrs['container_image'] = "arvados/jobs" attrs['cwd'] = "/var/spool/cwl" attrs['output_path'] = "/var/spool/cwl" @@ -102,6 +96,7 @@ class WorkUnitsController < ApplicationController "API" => true } + keep_cache = 256 input_defaults = {} if wf_json main = get_cwl_main(wf_json) @@ -119,11 +114,22 @@ class WorkUnitsController < ApplicationController if hint[:ramMin] runtime_constraints["ram"] = hint[:ramMin] * 1024 * 1024 end + if hint[:keep_cache] + keep_cache = hint[:keep_cache] + end end end end end + attrs['command'] = ["arvados-cwl-runner", + "--local", + "--api=containers", + "--project-uuid=#{params['work_unit']['owner_uuid']}", + "--collection-keep-cache=#{keep_cache}", + "/var/lib/cwl/workflow.json#main", + "/var/lib/cwl/cwl.input.json"] + # mounts mounts = { "/var/lib/cwl/cwl.input.json" => { diff --git a/doc/user/cwl/cwl-extensions.html.textile.liquid b/doc/user/cwl/cwl-extensions.html.textile.liquid index 7abc794e19..f2dd937d95 100644 --- a/doc/user/cwl/cwl-extensions.html.textile.liquid +++ b/doc/user/cwl/cwl-extensions.html.textile.liquid @@ -43,6 +43,7 @@ hints: arv:WorkflowRunnerResources: ramMin: 2048 coresMin: 2 + keep_cache: 512 arv:ClusterTarget: cluster_id: clsr1 project_uuid: clsr1-j7d0g-qxc4jcji7n4lafx @@ -137,7 +138,7 @@ table(table table-bordered table-condensed). |_. Field |_. Type |_. Description | |ramMin|int|RAM, in mebibytes, to reserve for the arvados-cwl-runner process. Default 1 GiB| |coresMin|int|Number of cores to reserve to the arvados-cwl-runner process. Default 1 core.| - +|keep_cache|int|RAM, in mebibytes, to reserve for caching keep collection metadata. Default 256 MiB| h2(#clustertarget). arv:ClusterTarget Specify which Arvados cluster should execute a container or subworkflow, and the parent project for the container request. diff --git a/sdk/cwl/arvados_cwl/__init__.py b/sdk/cwl/arvados_cwl/__init__.py index ce22219d7a..225741f947 100644 --- a/sdk/cwl/arvados_cwl/__init__.py +++ b/sdk/cwl/arvados_cwl/__init__.py @@ -159,9 +159,9 @@ def arg_parser(): # type: () -> argparse.ArgumentParser default=None, metavar="CLUSTER_ID") - parser.add_argument("--collection-cache", type=int, - default=256*1024*1024, - help="Collection caches size.") + parser.add_argument("--collection-cache-size", type=int, + default=None, + help="Collection cache size (in MiB, default 256).") parser.add_argument("--name", type=str, help="Name to use for workflow execution instance.", diff --git a/sdk/cwl/arvados_cwl/arvcontainer.py b/sdk/cwl/arvados_cwl/arvcontainer.py index f1ae65fc0f..4c49a449b2 100644 --- a/sdk/cwl/arvados_cwl/arvcontainer.py +++ b/sdk/cwl/arvados_cwl/arvcontainer.py @@ -407,7 +407,7 @@ class RunnerContainer(Runner): "secret_mounts": secret_mounts, "runtime_constraints": { "vcpus": math.ceil(self.submit_runner_cores), - "ram": math.ceil(1024*1024 * self.submit_runner_ram), + "ram": 1024*1024 * (math.ceil(self.submit_runner_ram) + math.ceil(self.collection_cache_size)), "API": True }, "use_existing": self.enable_reuse, @@ -441,6 +441,7 @@ class RunnerContainer(Runner): # --eval-timeout is the timeout for javascript invocation # --parallel-task-count is the number of threads to use for job submission # --enable/disable-reuse sets desired job reuse + # --collection-cache-size sets aside memory to store collections command = ["arvados-cwl-runner", "--local", "--api=containers", @@ -448,7 +449,8 @@ class RunnerContainer(Runner): "--disable-validate", "--eval-timeout=%s" % self.arvrunner.eval_timeout, "--thread-count=%s" % self.arvrunner.thread_count, - "--enable-reuse" if self.enable_reuse else "--disable-reuse"] + "--enable-reuse" if self.enable_reuse else "--disable-reuse", + "--collection-cache-size=%s" % self.collection_cache_size] if self.output_name: command.append("--output-name=" + self.output_name) diff --git a/sdk/cwl/arvados_cwl/context.py b/sdk/cwl/arvados_cwl/context.py index 7831e1cfd0..8cfe22ad7b 100644 --- a/sdk/cwl/arvados_cwl/context.py +++ b/sdk/cwl/arvados_cwl/context.py @@ -34,6 +34,7 @@ class ArvRuntimeContext(RuntimeContext): self.submit_runner_cluster = None self.cluster_target_id = 0 self.always_submit_runner = False + self.collection_cache_size = 256 super(ArvRuntimeContext, self).__init__(kwargs) diff --git a/sdk/cwl/arvados_cwl/executor.py b/sdk/cwl/arvados_cwl/executor.py index ff8ff6ff89..3589fad276 100644 --- a/sdk/cwl/arvados_cwl/executor.py +++ b/sdk/cwl/arvados_cwl/executor.py @@ -122,8 +122,13 @@ class ArvCwlExecutor(object): else: self.keep_client = arvados.keep.KeepClient(api_client=self.api, num_retries=self.num_retries) + if arvargs.collection_cache_size: + collection_cache_size = arvargs.collection_cache_size*1024*1024 + else: + collection_cache_size = 256*1024*1024 + self.collection_cache = CollectionCache(self.api, self.keep_client, self.num_retries, - cap=arvargs.collection_cache) + cap=collection_cache_size) self.fetcher_constructor = partial(CollectionFetcher, api_client=self.api, @@ -607,7 +612,8 @@ http://doc.arvados.org/install/install-api-server.html#disable_api_methods intermediate_output_ttl=runtimeContext.intermediate_output_ttl, merged_map=merged_map, priority=runtimeContext.priority, - secret_store=self.secret_store) + secret_store=self.secret_store, + collection_cache_size=runtimeContext.collection_cache_size) elif self.work_api == "jobs": runnerjob = RunnerJob(self, tool, job_order, runtimeContext.enable_reuse, self.output_name, diff --git a/sdk/cwl/arvados_cwl/runner.py b/sdk/cwl/arvados_cwl/runner.py index a846f2b001..c1a98e7456 100644 --- a/sdk/cwl/arvados_cwl/runner.py +++ b/sdk/cwl/arvados_cwl/runner.py @@ -364,7 +364,7 @@ class Runner(object): output_name, output_tags, submit_runner_ram=0, name=None, on_error=None, submit_runner_image=None, intermediate_output_ttl=0, merged_map=None, - priority=None, secret_store=None): + priority=None, secret_store=None, collection_cache_size=None): self.arvrunner = runner self.tool = tool self.job_order = job_order @@ -389,6 +389,7 @@ class Runner(object): self.submit_runner_cores = 1 self.submit_runner_ram = 1024 # defaut 1 GiB + self.collection_cache_size = 256 runner_resource_req, _ = self.tool.get_requirement("http://arvados.org/cwl#WorkflowRunnerResources") if runner_resource_req: @@ -396,11 +397,17 @@ class Runner(object): self.submit_runner_cores = runner_resource_req["coresMin"] if runner_resource_req.get("ramMin"): self.submit_runner_ram = runner_resource_req["ramMin"] + if runner_resource_req.get("keep_cache"): + self.collection_cache_size = runner_resource_req["keep_cache"] if submit_runner_ram: # Command line / initializer overrides default and/or spec from workflow self.submit_runner_ram = submit_runner_ram + if collection_cache_size: + # Command line / initializer overrides default and/or spec from workflow + self.collection_cache_size = collection_cache_size + if self.submit_runner_ram <= 0: raise Exception("Value of submit-runner-ram must be greater than zero") diff --git a/sdk/cwl/tests/test_submit.py b/sdk/cwl/tests/test_submit.py index 1b892a9836..bf2791d728 100644 --- a/sdk/cwl/tests/test_submit.py +++ b/sdk/cwl/tests/test_submit.py @@ -1204,7 +1204,8 @@ class TestSubmit(unittest.TestCase): { "class": "http://arvados.org/cwl#WorkflowRunnerResources", "coresMin": 2, - "ramMin": 2000 + "ramMin": 2000, + "keep_cache": 512 } ] expect_container["mounts"]["/var/lib/cwl/workflow.json"]["content"]["$graph"][0]["$namespaces"] = { diff --git a/sdk/cwl/tests/wf/submit_wf_runner_resources.cwl b/sdk/cwl/tests/wf/submit_wf_runner_resources.cwl index 9e27121949..814cd07ab5 100644 --- a/sdk/cwl/tests/wf/submit_wf_runner_resources.cwl +++ b/sdk/cwl/tests/wf/submit_wf_runner_resources.cwl @@ -15,6 +15,7 @@ hints: arv:WorkflowRunnerResources: ramMin: 2000 coresMin: 2 + keep_cache: 512 inputs: - id: x type: File -- 2.30.2