18842: Initial work adding disk cache option to configuration
authorPeter Amstutz <peter.amstutz@curii.com>
Tue, 25 Oct 2022 15:21:47 +0000 (11:21 -0400)
committerPeter Amstutz <peter.amstutz@curii.com>
Tue, 25 Oct 2022 15:21:47 +0000 (11:21 -0400)
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz@curii.com>

lib/config/config.default.yml
lib/config/export.go
lib/crunchrun/crunchrun.go
sdk/cwl/arvados_cwl/arvcontainer.py
sdk/go/arvados/container.go
services/api/app/models/arvados_model.rb
services/api/app/models/container.rb

index 9b5547248e7c8ab84adc245cdd266deb07f8da2f..88154f05e7cea12fbe002654dc5a7921f6f0f588 100644 (file)
@@ -953,8 +953,15 @@ Clusters:
       # troubleshooting purposes.
       LogReuseDecisions: false
 
-      # Default value for keep_cache_ram of a container's runtime_constraints.
-      DefaultKeepCacheRAM: 268435456
+      # Default value for keep_cache_ram of a container's
+      # runtime_constraints.  Note: this gets added to the RAM request
+      # used to allocate a VM or submit an HPC job
+      DefaultKeepCacheRAM: 0
+
+      # Default value for keep_cache_disk of a container's
+      # runtime_constraints.  Note: this gets added to the disk
+      # request used to allocate a VM or submit an HPC job
+      DefaultKeepCacheDisk: 8589934592
 
       # Number of times a container can be unlocked before being
       # automatically cancelled.
index e7cf094eb02b63f8a613b881696940e2cec505ab..9877b85a3ac8861cd2a096aee342b83507679645 100644 (file)
@@ -121,6 +121,7 @@ var whitelist = map[string]bool{
        "Containers.CrunchRunArgumentsList":        false,
        "Containers.CrunchRunCommand":              false,
        "Containers.DefaultKeepCacheRAM":           true,
+       "Containers.DefaultKeepCacheDisk":          true,
        "Containers.DispatchPrivateKey":            false,
        "Containers.JobsAPI":                       true,
        "Containers.JobsAPI.Enable":                true,
index ee9115d8d809903be17cbaa10dc4010d1b7d87dc..67454cd0ff79dced34d87311e7744a2cdeab86d8 100644 (file)
@@ -427,7 +427,11 @@ func (runner *ContainerRunner) SetupMounts() (map[string]bindmount, error) {
                arvMountCmd = append(arvMountCmd, "--allow-other")
        }
 
-       if runner.Container.RuntimeConstraints.KeepCacheRAM > 0 {
+       if runner.Container.RuntimeConstraints.KeepCacheDisk > 0 {
+               keepcachedir, err = runner.MkTempDir(runner.parentTemp, "keepcache")
+               arvMountCmd = append(arvMountCmd, "--disk-cache", "--disk-cache-dir", keepcachedir, "--file-cache", fmt.Sprintf("%d", runner.Container.RuntimeConstraints.KeepCacheDisk))
+       }
+       else if runner.Container.RuntimeConstraints.KeepCacheRAM > 0 {
                arvMountCmd = append(arvMountCmd, "--file-cache", fmt.Sprintf("%d", runner.Container.RuntimeConstraints.KeepCacheRAM))
        }
 
index e9b58bc83b2fb4b655676acab301a6528f170a77..9b5f322338275fee56035d03909ee5bf23819b20 100644 (file)
@@ -265,7 +265,11 @@ class ArvadosContainer(JobBase):
         runtime_req, _ = self.get_requirement("http://arvados.org/cwl#RuntimeConstraints")
         if runtime_req:
             if "keep_cache" in runtime_req:
-                runtime_constraints["keep_cache_ram"] = math.ceil(runtime_req["keep_cache"] * 2**20)
+                if self.arvrunner.api.config()["Containers"].get("DefaultKeepCacheDisk", 0) > 0:
+                    # If DefaultKeepCacheDisk is non-zero it means we should use disk cache.
+                    runtime_constraints["keep_cache_disk"] = math.ceil(runtime_req["keep_cache"] * 2**20)
+                else:
+                    runtime_constraints["keep_cache_ram"] = math.ceil(runtime_req["keep_cache"] * 2**20)
             if "outputDirType" in runtime_req:
                 if runtime_req["outputDirType"] == "local_output_dir":
                     # Currently the default behavior.
index 45f92017c4d02be4a6d4063439ea8cd515dbd268..165c8112e8f1ed39cde40e2b6a913072ced0fe32 100644 (file)
@@ -107,11 +107,12 @@ type CUDARuntimeConstraints struct {
 // RuntimeConstraints specify a container's compute resources (RAM,
 // CPU) and network connectivity.
 type RuntimeConstraints struct {
-       API          bool                   `json:"API"`
-       RAM          int64                  `json:"ram"`
-       VCPUs        int                    `json:"vcpus"`
-       KeepCacheRAM int64                  `json:"keep_cache_ram"`
-       CUDA         CUDARuntimeConstraints `json:"cuda"`
+       API           bool                   `json:"API"`
+       RAM           int64                  `json:"ram"`
+       VCPUs         int                    `json:"vcpus"`
+       KeepCacheRAM  int64                  `json:"keep_cache_ram"`
+       KeepCacheDisk int64                  `json:"keep_cache_disk"`
+       CUDA          CUDARuntimeConstraints `json:"cuda"`
 }
 
 // SchedulingParameters specify a container's scheduling parameters
index c2725506c02ef75a85dee2a7c3a11fbd8db7e119..7143378d2ff3779e50be6c7d766c4655f6e45564 100644 (file)
@@ -959,6 +959,7 @@ class ArvadosModel < ApplicationRecord
         'hardware_capability' => '',
       },
       'keep_cache_ram' => 0,
+      'keep_cache_disk' => 0,
       'ram' => 0,
       'vcpus' => 0,
     }.merge(attributes['runtime_constraints'] || {})
index 3e3f73b838dab5f4809bef12cd8c3d3dc1b02b08..3334fdcac99802c0c370e51eb4621a48b87cbcb4 100644 (file)
@@ -227,6 +227,10 @@ class Container < ArvadosModel
     if rc['keep_cache_ram'] == 0
       rc['keep_cache_ram'] = Rails.configuration.Containers.DefaultKeepCacheRAM
     end
+    if rc['keep_cache_disk'] == 0 and rc['keep_cache_ram'] == 0
+      # Only set if keep_cache_ram isn't set.
+      rc['keep_cache_disk'] = Rails.configuration.Containers.DefaultKeepCacheDisk
+    end
     rc
   end