Refactor the multi-host salt install page.
[arvados.git] / services / crunch-dispatch-slurm / squeue_test.go
index 11f7c482b9ac2754ad66d05fed12bc0fd471aa5b..d41e1982b4a91986259e09ea5fc985d7de6e45ce 100644 (file)
@@ -2,11 +2,12 @@
 //
 // SPDX-License-Identifier: AGPL-3.0
 
-package main
+package dispatchslurm
 
 import (
        "time"
 
+       "github.com/sirupsen/logrus"
        . "gopkg.in/check.v1"
 )
 
@@ -14,6 +15,37 @@ var _ = Suite(&SqueueSuite{})
 
 type SqueueSuite struct{}
 
+func (s *SqueueSuite) TestReleasePending(c *C) {
+       uuids := []string{
+               "zzzzz-dz642-fake0fake0fake0",
+               "zzzzz-dz642-fake1fake1fake1",
+               "zzzzz-dz642-fake2fake2fake2",
+       }
+       slurm := &slurmFake{
+               queue: uuids[0] + " 10000 4294000000 PENDING Resources\n" + uuids[1] + " 10000 4294000111 PENDING Resources\n" + uuids[2] + " 10000 0 PENDING BadConstraints\n",
+       }
+       sqc := &SqueueChecker{
+               Logger: logrus.StandardLogger(),
+               Slurm:  slurm,
+               Period: time.Hour,
+       }
+       sqc.startOnce.Do(sqc.start)
+       defer sqc.Stop()
+
+       done := make(chan struct{})
+       go func() {
+               for _, u := range uuids {
+                       sqc.SetPriority(u, 1)
+               }
+               close(done)
+       }()
+       callUntilReady(sqc.check, done)
+
+       slurm.didRelease = nil
+       sqc.check()
+       c.Check(slurm.didRelease, DeepEquals, []string{uuids[2]})
+}
+
 func (s *SqueueSuite) TestReniceAll(c *C) {
        uuids := []string{"zzzzz-dz642-fake0fake0fake0", "zzzzz-dz642-fake1fake1fake1", "zzzzz-dz642-fake2fake2fake2"}
        for _, test := range []struct {
@@ -24,31 +56,31 @@ func (s *SqueueSuite) TestReniceAll(c *C) {
        }{
                {
                        spread: 1,
-                       squeue: uuids[0] + " 10000 4294000000\n",
+                       squeue: uuids[0] + " 10000 4294000000 PENDING Resources\n",
                        want:   map[string]int64{uuids[0]: 1},
                        expect: [][]string{{uuids[0], "0"}},
                },
                { // fake0 priority is too high
                        spread: 1,
-                       squeue: uuids[0] + " 10000 4294000777\n" + uuids[1] + " 10000 4294000444\n",
+                       squeue: uuids[0] + " 10000 4294000777 PENDING Resources\n" + uuids[1] + " 10000 4294000444 PENDING Resources\n",
                        want:   map[string]int64{uuids[0]: 1, uuids[1]: 999},
                        expect: [][]string{{uuids[1], "0"}, {uuids[0], "334"}},
                },
                { // specify spread
                        spread: 100,
-                       squeue: uuids[0] + " 10000 4294000777\n" + uuids[1] + " 10000 4294000444\n",
+                       squeue: uuids[0] + " 10000 4294000777 PENDING Resources\n" + uuids[1] + " 10000 4294000444 PENDING Resources\n",
                        want:   map[string]int64{uuids[0]: 1, uuids[1]: 999},
                        expect: [][]string{{uuids[1], "0"}, {uuids[0], "433"}},
                },
                { // ignore fake2 because SetPriority() not called
                        spread: 1,
-                       squeue: uuids[0] + " 10000 4294000000\n" + uuids[1] + " 10000 4294000111\n" + uuids[2] + " 10000 4294000222\n",
+                       squeue: uuids[0] + " 10000 4294000000 PENDING Resources\n" + uuids[1] + " 10000 4294000111 PENDING Resources\n" + uuids[2] + " 10000 4294000222 PENDING Resources\n",
                        want:   map[string]int64{uuids[0]: 999, uuids[1]: 1},
                        expect: [][]string{{uuids[0], "0"}, {uuids[1], "112"}},
                },
                { // ignore fake2 because slurm priority=0
                        spread: 1,
-                       squeue: uuids[0] + " 10000 4294000000\n" + uuids[1] + " 10000 4294000111\n" + uuids[2] + " 10000 0\n",
+                       squeue: uuids[0] + " 10000 4294000000 PENDING Resources\n" + uuids[1] + " 10000 4294000111 PENDING Resources\n" + uuids[2] + " 10000 0 PENDING Resources\n",
                        want:   map[string]int64{uuids[0]: 999, uuids[1]: 1, uuids[2]: 997},
                        expect: [][]string{{uuids[0], "0"}, {uuids[1], "112"}},
                },
@@ -58,6 +90,7 @@ func (s *SqueueSuite) TestReniceAll(c *C) {
                        queue: test.squeue,
                }
                sqc := &SqueueChecker{
+                       Logger:         logrus.StandardLogger(),
                        Slurm:          slurm,
                        PrioritySpread: test.spread,
                        Period:         time.Hour,
@@ -73,6 +106,51 @@ func (s *SqueueSuite) TestReniceAll(c *C) {
        }
 }
 
+// If a limited nice range prevents desired priority adjustments, give
+// up and clamp nice to 10K.
+func (s *SqueueSuite) TestReniceInvalidNiceValue(c *C) {
+       uuids := []string{"zzzzz-dz642-fake0fake0fake0", "zzzzz-dz642-fake1fake1fake1", "zzzzz-dz642-fake2fake2fake2"}
+       slurm := &slurmFake{
+               queue:         uuids[0] + " 0 4294000222 PENDING Resources\n" + uuids[1] + " 0 4294555222 PENDING Resources\n",
+               rejectNice10K: true,
+       }
+       sqc := &SqueueChecker{
+               Logger:         logrus.StandardLogger(),
+               Slurm:          slurm,
+               PrioritySpread: 1,
+               Period:         time.Hour,
+       }
+       sqc.startOnce.Do(sqc.start)
+       sqc.check()
+       sqc.SetPriority(uuids[0], 2)
+       sqc.SetPriority(uuids[1], 1)
+
+       // First attempt should renice to 555001, which will fail
+       sqc.reniceAll()
+       c.Check(slurm.didRenice, DeepEquals, [][]string{{uuids[1], "555001"}})
+
+       // Next attempt should renice to 10K, which will succeed
+       sqc.reniceAll()
+       c.Check(slurm.didRenice, DeepEquals, [][]string{{uuids[1], "555001"}, {uuids[1], "10000"}})
+       // ...so we'll change the squeue response to reflect the
+       // updated priority+nice, and make sure sqc sees that...
+       slurm.queue = uuids[0] + " 0 4294000222 PENDING Resources\n" + uuids[1] + " 10000 4294545222 PENDING Resources\n"
+       sqc.check()
+
+       // Next attempt should leave nice alone because it's already
+       // at the 10K limit
+       sqc.reniceAll()
+       c.Check(slurm.didRenice, DeepEquals, [][]string{{uuids[1], "555001"}, {uuids[1], "10000"}})
+
+       // Back to normal if desired nice value falls below 10K
+       slurm.queue = uuids[0] + " 0 4294000222 PENDING Resources\n" + uuids[1] + " 10000 4294000111 PENDING Resources\n"
+       sqc.check()
+       sqc.reniceAll()
+       c.Check(slurm.didRenice, DeepEquals, [][]string{{uuids[1], "555001"}, {uuids[1], "10000"}, {uuids[1], "9890"}})
+
+       sqc.Stop()
+}
+
 // If the given UUID isn't in the slurm queue yet, SetPriority()
 // should wait for it to appear on the very next poll, then give up.
 func (s *SqueueSuite) TestSetPriorityBeforeQueued(c *C) {
@@ -81,6 +159,7 @@ func (s *SqueueSuite) TestSetPriorityBeforeQueued(c *C) {
 
        slurm := &slurmFake{}
        sqc := &SqueueChecker{
+               Logger: logrus.StandardLogger(),
                Slurm:  slurm,
                Period: time.Hour,
        }
@@ -103,7 +182,7 @@ func (s *SqueueSuite) TestSetPriorityBeforeQueued(c *C) {
        for {
                select {
                case <-tick.C:
-                       slurm.queue = uuidGood + " 0 12345\n"
+                       slurm.queue = uuidGood + " 0 12345 PENDING Resources\n"
                        sqc.check()
 
                        // Avoid immediately selecting this case again
@@ -123,3 +202,16 @@ func (s *SqueueSuite) TestSetPriorityBeforeQueued(c *C) {
                }
        }
 }
+
+func callUntilReady(fn func(), done <-chan struct{}) {
+       tick := time.NewTicker(time.Millisecond)
+       defer tick.Stop()
+       for {
+               select {
+               case <-done:
+                       return
+               case <-tick.C:
+                       fn()
+               }
+       }
+}