// Dispatcher service for Crunch that submits containers to the slurm queue.
import (
+ "bytes"
"context"
"flag"
"fmt"
// Minimum time between two attempts to run the same container
MinRetryPeriod arvados.Duration
+
+ // Batch size for container queries
+ BatchSize int64
}
func main() {
}
disp.Dispatcher = &dispatch.Dispatcher{
Arv: arv,
+ BatchSize: disp.BatchSize,
RunContainer: disp.runContainer,
PollPeriod: time.Duration(disp.PollPeriod),
MinRetryPeriod: time.Duration(disp.MinRetryPeriod),
}
}
-func (disp *Dispatcher) sbatchArgs(container arvados.Container) ([]string, error) {
+func (disp *Dispatcher) slurmConstraintArgs(container arvados.Container) []string {
mem := int64(math.Ceil(float64(container.RuntimeConstraints.RAM+container.RuntimeConstraints.KeepCacheRAM+disp.ReserveExtraRAM) / float64(1048576)))
var disk int64
}
}
disk = int64(math.Ceil(float64(disk) / float64(1048576)))
-
- var sbatchArgs []string
- sbatchArgs = append(sbatchArgs, disp.SbatchArguments...)
- sbatchArgs = append(sbatchArgs, fmt.Sprintf("--job-name=%s", container.UUID))
- sbatchArgs = append(sbatchArgs, fmt.Sprintf("--mem=%d", mem))
- sbatchArgs = append(sbatchArgs, fmt.Sprintf("--cpus-per-task=%d", container.RuntimeConstraints.VCPUs))
- sbatchArgs = append(sbatchArgs, fmt.Sprintf("--tmp=%d", disk))
- sbatchArgs = append(sbatchArgs, fmt.Sprintf("--nice=%d", initialNiceValue))
- if len(container.SchedulingParameters.Partitions) > 0 {
- sbatchArgs = append(sbatchArgs, fmt.Sprintf("--partition=%s", strings.Join(container.SchedulingParameters.Partitions, ",")))
+ return []string{
+ fmt.Sprintf("--mem=%d", mem),
+ fmt.Sprintf("--cpus-per-task=%d", container.RuntimeConstraints.VCPUs),
+ fmt.Sprintf("--tmp=%d", disk),
}
+}
+
+func (disp *Dispatcher) sbatchArgs(container arvados.Container) ([]string, error) {
+ var args []string
+ args = append(args, disp.SbatchArguments...)
+ args = append(args, "--job-name="+container.UUID, fmt.Sprintf("--nice=%d", initialNiceValue))
if disp.cluster == nil {
// no instance types configured
+ args = append(args, disp.slurmConstraintArgs(container)...)
} else if it, err := dispatchcloud.ChooseInstanceType(disp.cluster, &container); err == dispatchcloud.ErrInstanceTypesNotConfigured {
// ditto
+ args = append(args, disp.slurmConstraintArgs(container)...)
} else if err != nil {
return nil, err
} else {
- sbatchArgs = append(sbatchArgs, "--constraint=instancetype="+it.Name)
+ // use instancetype constraint instead of slurm mem/cpu/tmp specs
+ args = append(args, "--constraint=instancetype="+it.Name)
}
- return sbatchArgs, nil
+ if len(container.SchedulingParameters.Partitions) > 0 {
+ args = append(args, "--partition="+strings.Join(container.SchedulingParameters.Partitions, ","))
+ }
+
+ return args, nil
}
func (disp *Dispatcher) submit(container arvados.Container, crunchRunCommand []string) error {
log.Printf("Submitting container %s to slurm", ctr.UUID)
if err := disp.submit(ctr, disp.CrunchRunCommand); err != nil {
var text string
- if err == dispatchcloud.ErrConstraintsNotSatisfiable {
- text = fmt.Sprintf("cannot run container %s: %s", ctr.UUID, err)
+ if err, ok := err.(dispatchcloud.ConstraintsNotSatisfiableError); ok {
+ var logBuf bytes.Buffer
+ fmt.Fprintf(&logBuf, "cannot run container %s: %s\n", ctr.UUID, err)
+ if len(err.AvailableTypes) == 0 {
+ fmt.Fprint(&logBuf, "No instance types are configured.\n")
+ } else {
+ fmt.Fprint(&logBuf, "Available instance types:\n")
+ for _, t := range err.AvailableTypes {
+ fmt.Fprintf(&logBuf,
+ "Type %q: %d VCPUs, %d RAM, %d Scratch, %f Price\n",
+ t.Name, t.VCPUs, t.RAM, t.Scratch, t.Price,
+ )
+ }
+ }
+ text = logBuf.String()
disp.UpdateState(ctr.UUID, dispatch.Cancelled)
} else {
text = fmt.Sprintf("Error submitting container %s to slurm: %s", ctr.UUID, err)