Merge branch 'main' into 18324-lsf-gpu
authorPeter Amstutz <peter.amstutz@curii.com>
Fri, 7 Jan 2022 22:19:54 +0000 (17:19 -0500)
committerPeter Amstutz <peter.amstutz@curii.com>
Fri, 7 Jan 2022 23:14:33 +0000 (18:14 -0500)
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz@curii.com>

1  2 
lib/config/config.default.yml
sdk/go/arvados/config.go

index 002acd03aa3db6e129e3618ae38df2cd0b23d166,21e39a8a873403e801aa02504175f83a8aa6521f..17bba5410bb4f31efceb8b1b6ed74eb372d183b8
@@@ -899,10 -899,20 +899,20 @@@ Clusters
        # go down.
        MaxComputeVMs: 64
  
-       # Preemptible instance support (e.g. AWS Spot Instances)
-       # When true, child containers will get created with the preemptible
-       # scheduling parameter parameter set.
-       UsePreemptibleInstances: false
+       # Schedule all child containers on preemptible instances (e.g. AWS
+       # Spot Instances) even if not requested by the submitter.
+       #
+       # If false, containers are scheduled on preemptible instances
+       # only when requested by the submitter.
+       #
+       # Note that arvados-cwl-runner does not currently offer a
+       # feature to request preemptible instances, so this value
+       # effectively acts as a cluster-wide decision about whether to
+       # use preemptible instances.
+       #
+       # This flag is ignored if no preemptible instance types are
+       # configured, and has no effect on top-level containers.
+       AlwaysUsePreemptibleInstances: true
  
        # PEM encoded SSH key (RSA, DSA, or ECDSA) used by the
        # cloud dispatcher for executing containers on worker VMs.
          # %C number of VCPUs
          # %M memory in MB
          # %T tmp in MB
 +        # %G number of GPU devices (runtime_constraints.cuda.device_count)
          #
          # Use %% to express a literal %. The %%J in the default will be changed
          # to %J, which is interpreted by bsub itself.
          # from /tmp, or adjust the "-o" and "-e" arguments accordingly.
          BsubArgumentsList: ["-o", "/tmp/crunch-run.%%J.out", "-e", "/tmp/crunch-run.%%J.err", "-J", "%U", "-n", "%C", "-D", "%MMB", "-R", "rusage[mem=%MMB:tmp=%TMB] span[hosts=1]", "-R", "select[mem>=%MMB]", "-R", "select[tmp>=%TMB]", "-R", "select[ncpus>=%C]"]
  
 +        # Arguments that will be appended to the bsub command line
 +        # when submitting Arvados containers as LSF jobs with
 +        # runtime_constraints.cuda.device_count > 0
 +        BsubCUDAArguments: ["-gpu", "num=%G"]
 +
          # Use sudo to switch to this user account when submitting LSF
          # jobs.
          #
diff --combined sdk/go/arvados/config.go
index e367c9202c6692d77c52f8fc210a808040471acf,a1ab713e4a1eb43864922afecb11ee41139c0252..b8c8269f12acba74feb00edc07ec7949e0db5fc4
@@@ -431,23 -431,23 +431,23 @@@ type InstanceType struct 
  }
  
  type ContainersConfig struct {
-       CloudVMs                    CloudVMsConfig
-       CrunchRunCommand            string
-       CrunchRunArgumentsList      []string
-       DefaultKeepCacheRAM         ByteSize
-       DispatchPrivateKey          string
-       LogReuseDecisions           bool
-       MaxComputeVMs               int
-       MaxDispatchAttempts         int
-       MaxRetryAttempts            int
-       MinRetryPeriod              Duration
-       ReserveExtraRAM             ByteSize
-       StaleLockTimeout            Duration
-       SupportedDockerImageFormats StringSet
-       UsePreemptibleInstances     bool
-       RuntimeEngine               string
-       LocalKeepBlobBuffersPerVCPU int
-       LocalKeepLogsToContainerLog string
+       CloudVMs                      CloudVMsConfig
+       CrunchRunCommand              string
+       CrunchRunArgumentsList        []string
+       DefaultKeepCacheRAM           ByteSize
+       DispatchPrivateKey            string
+       LogReuseDecisions             bool
+       MaxComputeVMs                 int
+       MaxDispatchAttempts           int
+       MaxRetryAttempts              int
+       MinRetryPeriod                Duration
+       ReserveExtraRAM               ByteSize
+       StaleLockTimeout              Duration
+       SupportedDockerImageFormats   StringSet
+       AlwaysUsePreemptibleInstances bool
+       RuntimeEngine                 string
+       LocalKeepBlobBuffersPerVCPU   int
+       LocalKeepLogsToContainerLog   string
  
        JobsAPI struct {
                Enable         string
        LSF struct {
                BsubSudoUser      string
                BsubArgumentsList []string
 +              BsubCUDAArguments []string
        }
  }