// Dispatcher service for Crunch that submits containers to the slurm queue.
import (
+ "bytes"
"context"
"flag"
"fmt"
}
}
-func (disp *Dispatcher) sbatchArgs(container arvados.Container) ([]string, error) {
+func (disp *Dispatcher) slurmConstraintArgs(container arvados.Container) []string {
mem := int64(math.Ceil(float64(container.RuntimeConstraints.RAM+container.RuntimeConstraints.KeepCacheRAM+disp.ReserveExtraRAM) / float64(1048576)))
var disk int64
}
}
disk = int64(math.Ceil(float64(disk) / float64(1048576)))
-
- var args []string
- args = append(args, disp.SbatchArguments...)
- args = append(args,
- fmt.Sprintf("--job-name=%s", container.UUID),
- fmt.Sprintf("--nice=%d", initialNiceValue))
-
- constraintArgs := []string{
+ return []string{
fmt.Sprintf("--mem=%d", mem),
fmt.Sprintf("--cpus-per-task=%d", container.RuntimeConstraints.VCPUs),
fmt.Sprintf("--tmp=%d", disk),
}
+}
+
+func (disp *Dispatcher) sbatchArgs(container arvados.Container) ([]string, error) {
+ var args []string
+ args = append(args, disp.SbatchArguments...)
+ args = append(args, "--job-name="+container.UUID, fmt.Sprintf("--nice=%d", initialNiceValue))
+
if disp.cluster == nil {
// no instance types configured
+ args = append(args, disp.slurmConstraintArgs(container)...)
} else if it, err := dispatchcloud.ChooseInstanceType(disp.cluster, &container); err == dispatchcloud.ErrInstanceTypesNotConfigured {
// ditto
+ args = append(args, disp.slurmConstraintArgs(container)...)
} else if err != nil {
return nil, err
} else {
// use instancetype constraint instead of slurm mem/cpu/tmp specs
- constraintArgs = []string{"--constraint=instancetype=" + it.Name}
+ args = append(args, "--constraint=instancetype="+it.Name)
}
- args = append(args, constraintArgs...)
if len(container.SchedulingParameters.Partitions) > 0 {
- args = append(args, fmt.Sprintf("--partition=%s", strings.Join(container.SchedulingParameters.Partitions, ",")))
+ args = append(args, "--partition="+strings.Join(container.SchedulingParameters.Partitions, ","))
}
return args, nil
log.Printf("Submitting container %s to slurm", ctr.UUID)
if err := disp.submit(ctr, disp.CrunchRunCommand); err != nil {
var text string
- if err == dispatchcloud.ErrConstraintsNotSatisfiable {
- text = fmt.Sprintf("cannot run container %s: %s", ctr.UUID, err)
+ if err, ok := err.(dispatchcloud.ConstraintsNotSatisfiableError); ok {
+ var logBuf bytes.Buffer
+ fmt.Fprintf(&logBuf, "cannot run container %s: %s\n", ctr.UUID, err)
+ if len(err.AvailableTypes) == 0 {
+ fmt.Fprint(&logBuf, "No instance types are configured.\n")
+ } else {
+ fmt.Fprint(&logBuf, "Available instance types:\n")
+ for _, t := range err.AvailableTypes {
+ fmt.Fprintf(&logBuf,
+ "Type %q: %d VCPUs, %d RAM, %d Scratch, %f Price\n",
+ t.Name, t.VCPUs, t.RAM, t.Scratch, t.Price,
+ )
+ }
+ }
+ text = logBuf.String()
disp.UpdateState(ctr.UUID, dispatch.Cancelled)
} else {
text = fmt.Sprintf("Error submitting container %s to slurm: %s", ctr.UUID, err)