X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/927524f1be454de021180b74999d682780b8cb6b..dac27775017bab861a9fe3e3f85766df7cd8f80d:/lib/lsf/dispatch.go diff --git a/lib/lsf/dispatch.go b/lib/lsf/dispatch.go index 537d52a072..0d9324784d 100644 --- a/lib/lsf/dispatch.go +++ b/lib/lsf/dispatch.go @@ -119,7 +119,7 @@ func (disp *dispatcher) init() { disp.lsfcli.logger = disp.logger disp.lsfqueue = lsfqueue{ logger: disp.logger, - period: time.Duration(disp.Cluster.Containers.CloudVMs.PollInterval), + period: disp.Cluster.Containers.CloudVMs.PollInterval.Duration(), lsfcli: &disp.lsfcli, } disp.ArvClient.AuthToken = disp.AuthToken @@ -256,7 +256,7 @@ func (disp *dispatcher) runContainer(_ *dispatch.Dispatcher, ctr arvados.Contain // Try "bkill" every few seconds until the LSF job disappears // from the queue. - ticker := time.NewTicker(5 * time.Second) + ticker := time.NewTicker(disp.Cluster.Containers.CloudVMs.PollInterval.Duration() / 2) defer ticker.Stop() for qent, ok := disp.lsfqueue.Lookup(ctr.UUID); ok; _, ok = disp.lsfqueue.Lookup(ctr.UUID) { err := disp.lsfcli.Bkill(qent.ID) @@ -306,11 +306,16 @@ func (disp *dispatcher) bsubArgs(container arvados.Container) ([]string, error) "%M": fmt.Sprintf("%d", mem), "%T": fmt.Sprintf("%d", tmp), "%U": container.UUID, + "%G": fmt.Sprintf("%d", container.RuntimeConstraints.CUDA.DeviceCount), } re := regexp.MustCompile(`%.`) var substitutionErrors string - for _, a := range disp.Cluster.Containers.LSF.BsubArgumentsList { + argumentTemplate := disp.Cluster.Containers.LSF.BsubArgumentsList + if container.RuntimeConstraints.CUDA.DeviceCount > 0 { + argumentTemplate = append(argumentTemplate, disp.Cluster.Containers.LSF.BsubCUDAArguments...) + } + for _, a := range argumentTemplate { args = append(args, re.ReplaceAllStringFunc(a, func(s string) string { subst := repl[s] if len(subst) == 0 {