}
type slurmFake struct {
- didBatch [][]string
- didCancel []string
- didRelease []string
- didRenice [][]string
- queue string
+ didBatch [][]string
+ didCancel []string
+ didRelease []string
+ didRenice [][]string
+ queue string
+ rejectNice10K bool
// If non-nil, run this func during the 2nd+ call to Cancel()
onCancel func()
// Error returned by Batch()
func (sf *slurmFake) Renice(name string, nice int64) error {
sf.didRenice = append(sf.didRenice, []string{name, fmt.Sprintf("%d", nice)})
+ if sf.rejectNice10K && nice > 10000 {
+ return errors.New("scontrol: error: Invalid nice value, must be between -10000 and 10000")
+ }
return nil
}
"time"
)
+const slurm15NiceLimit int64 = 10000
+
type slurmJob struct {
uuid string
wantPriority int64
priority int64 // current slurm priority (incorporates nice value)
nice int64 // current slurm nice value
+ hitNiceLimit bool
}
// Squeue implements asynchronous polling monitor of the SLURM queue using the
})
renice := wantNice(jobs, sqc.PrioritySpread)
for i, job := range jobs {
- if renice[i] == job.nice {
+ niceNew := renice[i]
+ if job.hitNiceLimit && niceNew > slurm15NiceLimit {
+ niceNew = slurm15NiceLimit
+ }
+ if niceNew == job.nice {
continue
}
- sqc.Slurm.Renice(job.uuid, renice[i])
+ err := sqc.Slurm.Renice(job.uuid, niceNew)
+ if err != nil && niceNew > slurm15NiceLimit && strings.Contains(err.Error(), "Invalid nice value") {
+ log.Printf("container %q clamping nice values at %d, priority order will not be correct", job.uuid, slurm15NiceLimit)
+ job.hitNiceLimit = true
+ }
}
}
}
}
+// If a limited nice range prevents desired priority adjustments, give
+// up and clamp nice to 10K.
+func (s *SqueueSuite) TestReniceInvalidNiceValue(c *C) {
+ uuids := []string{"zzzzz-dz642-fake0fake0fake0", "zzzzz-dz642-fake1fake1fake1", "zzzzz-dz642-fake2fake2fake2"}
+ slurm := &slurmFake{
+ queue: uuids[0] + " 0 4294000222 PENDING Resources\n" + uuids[1] + " 0 4294555222 PENDING Resources\n",
+ rejectNice10K: true,
+ }
+ sqc := &SqueueChecker{
+ Slurm: slurm,
+ PrioritySpread: 1,
+ Period: time.Hour,
+ }
+ sqc.startOnce.Do(sqc.start)
+ sqc.check()
+ sqc.SetPriority(uuids[0], 2)
+ sqc.SetPriority(uuids[1], 1)
+
+ // First attempt should renice to 555001, which will fail
+ sqc.reniceAll()
+ c.Check(slurm.didRenice, DeepEquals, [][]string{{uuids[1], "555001"}})
+
+ // Next attempt should renice to 10K, which will succeed
+ sqc.reniceAll()
+ c.Check(slurm.didRenice, DeepEquals, [][]string{{uuids[1], "555001"}, {uuids[1], "10000"}})
+ // ...so we'll change the squeue response to reflect the
+ // updated priority+nice, and make sure sqc sees that...
+ slurm.queue = uuids[0] + " 0 4294000222 PENDING Resources\n" + uuids[1] + " 10000 4294545222 PENDING Resources\n"
+ sqc.check()
+
+ // Next attempt should leave nice alone because it's already
+ // at the 10K limit
+ sqc.reniceAll()
+ c.Check(slurm.didRenice, DeepEquals, [][]string{{uuids[1], "555001"}, {uuids[1], "10000"}})
+
+ // Back to normal if desired nice value falls below 10K
+ slurm.queue = uuids[0] + " 0 4294000222 PENDING Resources\n" + uuids[1] + " 10000 4294000111 PENDING Resources\n"
+ sqc.check()
+ sqc.reniceAll()
+ c.Check(slurm.didRenice, DeepEquals, [][]string{{uuids[1], "555001"}, {uuids[1], "10000"}, {uuids[1], "9890"}})
+
+ sqc.Stop()
+}
+
// If the given UUID isn't in the slurm queue yet, SetPriority()
// should wait for it to appear on the very next poll, then give up.
func (s *SqueueSuite) TestSetPriorityBeforeQueued(c *C) {