disp.lsfcli.logger = disp.logger
disp.lsfqueue = lsfqueue{
logger: disp.logger,
- period: time.Duration(disp.Cluster.Containers.CloudVMs.PollInterval),
+ period: disp.Cluster.Containers.CloudVMs.PollInterval.Duration(),
lsfcli: &disp.lsfcli,
}
disp.ArvClient.AuthToken = disp.AuthToken
// Try "bkill" every few seconds until the LSF job disappears
// from the queue.
- ticker := time.NewTicker(5 * time.Second)
+ ticker := time.NewTicker(disp.Cluster.Containers.CloudVMs.PollInterval.Duration() / 2)
defer ticker.Stop()
for qent, ok := disp.lsfqueue.Lookup(ctr.UUID); ok; _, ok = disp.lsfqueue.Lookup(ctr.UUID) {
err := disp.lsfcli.Bkill(qent.ID)
"%M": fmt.Sprintf("%d", mem),
"%T": fmt.Sprintf("%d", tmp),
"%U": container.UUID,
+ "%G": fmt.Sprintf("%d", container.RuntimeConstraints.CUDA.DeviceCount),
}
re := regexp.MustCompile(`%.`)
var substitutionErrors string
- for _, a := range disp.Cluster.Containers.LSF.BsubArgumentsList {
+ argumentTemplate := disp.Cluster.Containers.LSF.BsubArgumentsList
+ if container.RuntimeConstraints.CUDA.DeviceCount > 0 {
+ argumentTemplate = append(argumentTemplate, disp.Cluster.Containers.LSF.BsubCUDAArguments...)
+ }
+ for _, a := range argumentTemplate {
args = append(args, re.ReplaceAllStringFunc(a, func(s string) string {
subst := repl[s]
if len(subst) == 0 {