1 // Copyright (C) The Arvados Authors. All rights reserved.
3 // SPDX-License-Identifier: AGPL-3.0
15 type Slurm interface {
16 Batch(script io.Reader, args []string) error
17 Cancel(name string) error
18 QueueCommand(args []string) *exec.Cmd
19 Release(name string) error
20 Renice(name string, nice int64) error
23 type slurmCLI struct {
24 runSemaphore chan bool
27 func NewSlurmCLI() *slurmCLI {
29 runSemaphore: make(chan bool, 3),
33 func (scli *slurmCLI) Batch(script io.Reader, args []string) error {
34 return scli.run(script, "sbatch", args)
37 func (scli *slurmCLI) Cancel(name string) error {
38 for _, args := range [][]string{
39 // If the slurm job hasn't started yet, remove it from
42 // If the slurm job has started, send SIGTERM. If we
43 // cancel a running job without a --signal argument,
44 // slurm will send SIGTERM and then (after some
45 // site-configured interval) SIGKILL. This would kill
46 // crunch-run without stopping the container, which we
48 {"--batch", "--signal=TERM", "--state=running"},
49 {"--batch", "--signal=TERM", "--state=suspended"},
51 err := scli.run(nil, "scancel", append([]string{"--name=" + name}, args...))
53 // scancel exits 0 if no job matches the given
54 // name and state. Any error from scancel here
55 // really indicates something is wrong.
62 func (scli *slurmCLI) QueueCommand(args []string) *exec.Cmd {
63 return exec.Command("squeue", args...)
66 func (scli *slurmCLI) Release(name string) error {
67 return scli.run(nil, "scontrol", []string{"release", "Name=" + name})
70 func (scli *slurmCLI) Renice(name string, nice int64) error {
71 return scli.run(nil, "scontrol", []string{"update", "JobName=" + name, fmt.Sprintf("Nice=%d", nice)})
74 func (scli *slurmCLI) run(stdin io.Reader, prog string, args []string) error {
75 scli.runSemaphore <- true
76 defer func() { <-scli.runSemaphore }()
77 cmd := exec.Command(prog, args...)
79 out, err := cmd.CombinedOutput()
80 outTrim := strings.TrimSpace(string(out))
81 if err != nil || len(out) > 0 {
82 log.Printf("%q %q: %q", cmd.Path, cmd.Args, outTrim)
85 err = fmt.Errorf("%s: %s (%q)", cmd.Path, err, outTrim)