From: Peter Amstutz Date: Fri, 7 Jan 2022 22:19:54 +0000 (-0500) Subject: Merge branch 'main' into 18324-lsf-gpu X-Git-Tag: 2.4.0~117^2 X-Git-Url: https://git.arvados.org/arvados.git/commitdiff_plain/15f6e58f3b7c158aa735ce78597cf22c6ef3543b?hp=-c Merge branch 'main' into 18324-lsf-gpu Arvados-DCO-1.1-Signed-off-by: Peter Amstutz --- 15f6e58f3b7c158aa735ce78597cf22c6ef3543b diff --combined lib/config/config.default.yml index 002acd03aa,21e39a8a87..17bba5410b --- a/lib/config/config.default.yml +++ b/lib/config/config.default.yml @@@ -899,10 -899,20 +899,20 @@@ Clusters # go down. MaxComputeVMs: 64 - # Preemptible instance support (e.g. AWS Spot Instances) - # When true, child containers will get created with the preemptible - # scheduling parameter parameter set. - UsePreemptibleInstances: false + # Schedule all child containers on preemptible instances (e.g. AWS + # Spot Instances) even if not requested by the submitter. + # + # If false, containers are scheduled on preemptible instances + # only when requested by the submitter. + # + # Note that arvados-cwl-runner does not currently offer a + # feature to request preemptible instances, so this value + # effectively acts as a cluster-wide decision about whether to + # use preemptible instances. + # + # This flag is ignored if no preemptible instance types are + # configured, and has no effect on top-level containers. + AlwaysUsePreemptibleInstances: true # PEM encoded SSH key (RSA, DSA, or ECDSA) used by the # cloud dispatcher for executing containers on worker VMs. @@@ -1089,7 -1099,6 +1099,7 @@@ # %C number of VCPUs # %M memory in MB # %T tmp in MB + # %G number of GPU devices (runtime_constraints.cuda.device_count) # # Use %% to express a literal %. The %%J in the default will be changed # to %J, which is interpreted by bsub itself. @@@ -1100,11 -1109,6 +1110,11 @@@ # from /tmp, or adjust the "-o" and "-e" arguments accordingly. BsubArgumentsList: ["-o", "/tmp/crunch-run.%%J.out", "-e", "/tmp/crunch-run.%%J.err", "-J", "%U", "-n", "%C", "-D", "%MMB", "-R", "rusage[mem=%MMB:tmp=%TMB] span[hosts=1]", "-R", "select[mem>=%MMB]", "-R", "select[tmp>=%TMB]", "-R", "select[ncpus>=%C]"] + # Arguments that will be appended to the bsub command line + # when submitting Arvados containers as LSF jobs with + # runtime_constraints.cuda.device_count > 0 + BsubCUDAArguments: ["-gpu", "num=%G"] + # Use sudo to switch to this user account when submitting LSF # jobs. # diff --combined sdk/go/arvados/config.go index e367c9202c,a1ab713e4a..b8c8269f12 --- a/sdk/go/arvados/config.go +++ b/sdk/go/arvados/config.go @@@ -431,23 -431,23 +431,23 @@@ type InstanceType struct } type ContainersConfig struct { - CloudVMs CloudVMsConfig - CrunchRunCommand string - CrunchRunArgumentsList []string - DefaultKeepCacheRAM ByteSize - DispatchPrivateKey string - LogReuseDecisions bool - MaxComputeVMs int - MaxDispatchAttempts int - MaxRetryAttempts int - MinRetryPeriod Duration - ReserveExtraRAM ByteSize - StaleLockTimeout Duration - SupportedDockerImageFormats StringSet - UsePreemptibleInstances bool - RuntimeEngine string - LocalKeepBlobBuffersPerVCPU int - LocalKeepLogsToContainerLog string + CloudVMs CloudVMsConfig + CrunchRunCommand string + CrunchRunArgumentsList []string + DefaultKeepCacheRAM ByteSize + DispatchPrivateKey string + LogReuseDecisions bool + MaxComputeVMs int + MaxDispatchAttempts int + MaxRetryAttempts int + MinRetryPeriod Duration + ReserveExtraRAM ByteSize + StaleLockTimeout Duration + SupportedDockerImageFormats StringSet + AlwaysUsePreemptibleInstances bool + RuntimeEngine string + LocalKeepBlobBuffersPerVCPU int + LocalKeepLogsToContainerLog string JobsAPI struct { Enable string @@@ -486,7 -486,6 +486,7 @@@ LSF struct { BsubSudoUser string BsubArgumentsList []string + BsubCUDAArguments []string } }