From 2bb2e562faaa5ae3160469cf3ef5b36f1b884504 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Tue, 25 Oct 2022 11:21:47 -0400 Subject: [PATCH] 18842: Initial work adding disk cache option to configuration Arvados-DCO-1.1-Signed-off-by: Peter Amstutz --- lib/config/config.default.yml | 11 +++++++++-- lib/config/export.go | 1 + lib/crunchrun/crunchrun.go | 6 +++++- sdk/cwl/arvados_cwl/arvcontainer.py | 6 +++++- sdk/go/arvados/container.go | 11 ++++++----- services/api/app/models/arvados_model.rb | 1 + services/api/app/models/container.rb | 4 ++++ 7 files changed, 31 insertions(+), 9 deletions(-) diff --git a/lib/config/config.default.yml b/lib/config/config.default.yml index 9b5547248e..88154f05e7 100644 --- a/lib/config/config.default.yml +++ b/lib/config/config.default.yml @@ -953,8 +953,15 @@ Clusters: # troubleshooting purposes. LogReuseDecisions: false - # Default value for keep_cache_ram of a container's runtime_constraints. - DefaultKeepCacheRAM: 268435456 + # Default value for keep_cache_ram of a container's + # runtime_constraints. Note: this gets added to the RAM request + # used to allocate a VM or submit an HPC job + DefaultKeepCacheRAM: 0 + + # Default value for keep_cache_disk of a container's + # runtime_constraints. Note: this gets added to the disk + # request used to allocate a VM or submit an HPC job + DefaultKeepCacheDisk: 8589934592 # Number of times a container can be unlocked before being # automatically cancelled. diff --git a/lib/config/export.go b/lib/config/export.go index e7cf094eb0..9877b85a3a 100644 --- a/lib/config/export.go +++ b/lib/config/export.go @@ -121,6 +121,7 @@ var whitelist = map[string]bool{ "Containers.CrunchRunArgumentsList": false, "Containers.CrunchRunCommand": false, "Containers.DefaultKeepCacheRAM": true, + "Containers.DefaultKeepCacheDisk": true, "Containers.DispatchPrivateKey": false, "Containers.JobsAPI": true, "Containers.JobsAPI.Enable": true, diff --git a/lib/crunchrun/crunchrun.go b/lib/crunchrun/crunchrun.go index ee9115d8d8..67454cd0ff 100644 --- a/lib/crunchrun/crunchrun.go +++ b/lib/crunchrun/crunchrun.go @@ -427,7 +427,11 @@ func (runner *ContainerRunner) SetupMounts() (map[string]bindmount, error) { arvMountCmd = append(arvMountCmd, "--allow-other") } - if runner.Container.RuntimeConstraints.KeepCacheRAM > 0 { + if runner.Container.RuntimeConstraints.KeepCacheDisk > 0 { + keepcachedir, err = runner.MkTempDir(runner.parentTemp, "keepcache") + arvMountCmd = append(arvMountCmd, "--disk-cache", "--disk-cache-dir", keepcachedir, "--file-cache", fmt.Sprintf("%d", runner.Container.RuntimeConstraints.KeepCacheDisk)) + } + else if runner.Container.RuntimeConstraints.KeepCacheRAM > 0 { arvMountCmd = append(arvMountCmd, "--file-cache", fmt.Sprintf("%d", runner.Container.RuntimeConstraints.KeepCacheRAM)) } diff --git a/sdk/cwl/arvados_cwl/arvcontainer.py b/sdk/cwl/arvados_cwl/arvcontainer.py index e9b58bc83b..9b5f322338 100644 --- a/sdk/cwl/arvados_cwl/arvcontainer.py +++ b/sdk/cwl/arvados_cwl/arvcontainer.py @@ -265,7 +265,11 @@ class ArvadosContainer(JobBase): runtime_req, _ = self.get_requirement("http://arvados.org/cwl#RuntimeConstraints") if runtime_req: if "keep_cache" in runtime_req: - runtime_constraints["keep_cache_ram"] = math.ceil(runtime_req["keep_cache"] * 2**20) + if self.arvrunner.api.config()["Containers"].get("DefaultKeepCacheDisk", 0) > 0: + # If DefaultKeepCacheDisk is non-zero it means we should use disk cache. + runtime_constraints["keep_cache_disk"] = math.ceil(runtime_req["keep_cache"] * 2**20) + else: + runtime_constraints["keep_cache_ram"] = math.ceil(runtime_req["keep_cache"] * 2**20) if "outputDirType" in runtime_req: if runtime_req["outputDirType"] == "local_output_dir": # Currently the default behavior. diff --git a/sdk/go/arvados/container.go b/sdk/go/arvados/container.go index 45f92017c4..165c8112e8 100644 --- a/sdk/go/arvados/container.go +++ b/sdk/go/arvados/container.go @@ -107,11 +107,12 @@ type CUDARuntimeConstraints struct { // RuntimeConstraints specify a container's compute resources (RAM, // CPU) and network connectivity. type RuntimeConstraints struct { - API bool `json:"API"` - RAM int64 `json:"ram"` - VCPUs int `json:"vcpus"` - KeepCacheRAM int64 `json:"keep_cache_ram"` - CUDA CUDARuntimeConstraints `json:"cuda"` + API bool `json:"API"` + RAM int64 `json:"ram"` + VCPUs int `json:"vcpus"` + KeepCacheRAM int64 `json:"keep_cache_ram"` + KeepCacheDisk int64 `json:"keep_cache_disk"` + CUDA CUDARuntimeConstraints `json:"cuda"` } // SchedulingParameters specify a container's scheduling parameters diff --git a/services/api/app/models/arvados_model.rb b/services/api/app/models/arvados_model.rb index c2725506c0..7143378d2f 100644 --- a/services/api/app/models/arvados_model.rb +++ b/services/api/app/models/arvados_model.rb @@ -959,6 +959,7 @@ class ArvadosModel < ApplicationRecord 'hardware_capability' => '', }, 'keep_cache_ram' => 0, + 'keep_cache_disk' => 0, 'ram' => 0, 'vcpus' => 0, }.merge(attributes['runtime_constraints'] || {}) diff --git a/services/api/app/models/container.rb b/services/api/app/models/container.rb index 3e3f73b838..3334fdcac9 100644 --- a/services/api/app/models/container.rb +++ b/services/api/app/models/container.rb @@ -227,6 +227,10 @@ class Container < ArvadosModel if rc['keep_cache_ram'] == 0 rc['keep_cache_ram'] = Rails.configuration.Containers.DefaultKeepCacheRAM end + if rc['keep_cache_disk'] == 0 and rc['keep_cache_ram'] == 0 + # Only set if keep_cache_ram isn't set. + rc['keep_cache_disk'] = Rails.configuration.Containers.DefaultKeepCacheDisk + end rc end -- 2.30.2