From 499208bc547b8151fcf3a4230fab28720f94b13c Mon Sep 17 00:00:00 2001 From: "Joshua C. Randall" Date: Thu, 23 Aug 2018 19:49:44 +0000 Subject: [PATCH] limit concurrent slurm commands use a semaphore channel to limit concurrent sbatch/scancel/scontrol commands to 3. squeue is already limited to one at a time. fixes 14110 Arvados-DCO-1.1-Signed-off-by: Joshua C. Randall --- .../crunch-dispatch-slurm/crunch-dispatch-slurm.go | 2 +- services/crunch-dispatch-slurm/slurm.go | 12 +++++++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/services/crunch-dispatch-slurm/crunch-dispatch-slurm.go b/services/crunch-dispatch-slurm/crunch-dispatch-slurm.go index 36ef264963..16d9fd18db 100644 --- a/services/crunch-dispatch-slurm/crunch-dispatch-slurm.go +++ b/services/crunch-dispatch-slurm/crunch-dispatch-slurm.go @@ -159,7 +159,7 @@ func (disp *Dispatcher) setup() { } arv.Retries = 25 - disp.slurm = &slurmCLI{} + disp.slurm = NewSlurmCLI() disp.sqCheck = &SqueueChecker{ Period: time.Duration(disp.PollPeriod), PrioritySpread: disp.PrioritySpread, diff --git a/services/crunch-dispatch-slurm/slurm.go b/services/crunch-dispatch-slurm/slurm.go index 9e9f45270f..782be7d8c4 100644 --- a/services/crunch-dispatch-slurm/slurm.go +++ b/services/crunch-dispatch-slurm/slurm.go @@ -20,7 +20,15 @@ type Slurm interface { Renice(name string, nice int64) error } -type slurmCLI struct{} +type slurmCLI struct{ + runSemaphore chan bool +} + +func NewSlurmCLI() *slurmCLI { + return &slurmCLI{ + runSemaphore: make(chan bool, 3), + } +} func (scli *slurmCLI) Batch(script io.Reader, args []string) error { return scli.run(script, "sbatch", args) @@ -64,6 +72,8 @@ func (scli *slurmCLI) Renice(name string, nice int64) error { } func (scli *slurmCLI) run(stdin io.Reader, prog string, args []string) error { + scli.runSemaphore <- true + defer func() { <-scli.runSemaphore }() cmd := exec.Command(prog, args...) cmd.Stdin = stdin out, err := cmd.CombinedOutput() -- 2.30.2