13493: Merge branch 'master' into 13493-federation-proxy
[arvados.git] / services / crunch-dispatch-slurm / squeue_test.go
1 // Copyright (C) The Arvados Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4
5 package main
6
7 import (
8         "time"
9
10         . "gopkg.in/check.v1"
11 )
12
13 var _ = Suite(&SqueueSuite{})
14
15 type SqueueSuite struct{}
16
17 func (s *SqueueSuite) TestReleasePending(c *C) {
18         uuids := []string{
19                 "zzzzz-dz642-fake0fake0fake0",
20                 "zzzzz-dz642-fake1fake1fake1",
21                 "zzzzz-dz642-fake2fake2fake2",
22         }
23         slurm := &slurmFake{
24                 queue: uuids[0] + " 10000 4294000000 PENDING Resources\n" + uuids[1] + " 10000 4294000111 PENDING Resources\n" + uuids[2] + " 10000 0 PENDING BadConstraints\n",
25         }
26         sqc := &SqueueChecker{
27                 Slurm:  slurm,
28                 Period: time.Hour,
29         }
30         sqc.startOnce.Do(sqc.start)
31         defer sqc.Stop()
32
33         done := make(chan struct{})
34         go func() {
35                 for _, u := range uuids {
36                         sqc.SetPriority(u, 1)
37                 }
38                 close(done)
39         }()
40         callUntilReady(sqc.check, done)
41
42         slurm.didRelease = nil
43         sqc.check()
44         c.Check(slurm.didRelease, DeepEquals, []string{uuids[2]})
45 }
46
47 func (s *SqueueSuite) TestReniceAll(c *C) {
48         uuids := []string{"zzzzz-dz642-fake0fake0fake0", "zzzzz-dz642-fake1fake1fake1", "zzzzz-dz642-fake2fake2fake2"}
49         for _, test := range []struct {
50                 spread int64
51                 squeue string
52                 want   map[string]int64
53                 expect [][]string
54         }{
55                 {
56                         spread: 1,
57                         squeue: uuids[0] + " 10000 4294000000 PENDING Resources\n",
58                         want:   map[string]int64{uuids[0]: 1},
59                         expect: [][]string{{uuids[0], "0"}},
60                 },
61                 { // fake0 priority is too high
62                         spread: 1,
63                         squeue: uuids[0] + " 10000 4294000777 PENDING Resources\n" + uuids[1] + " 10000 4294000444 PENDING Resources\n",
64                         want:   map[string]int64{uuids[0]: 1, uuids[1]: 999},
65                         expect: [][]string{{uuids[1], "0"}, {uuids[0], "334"}},
66                 },
67                 { // specify spread
68                         spread: 100,
69                         squeue: uuids[0] + " 10000 4294000777 PENDING Resources\n" + uuids[1] + " 10000 4294000444 PENDING Resources\n",
70                         want:   map[string]int64{uuids[0]: 1, uuids[1]: 999},
71                         expect: [][]string{{uuids[1], "0"}, {uuids[0], "433"}},
72                 },
73                 { // ignore fake2 because SetPriority() not called
74                         spread: 1,
75                         squeue: uuids[0] + " 10000 4294000000 PENDING Resources\n" + uuids[1] + " 10000 4294000111 PENDING Resources\n" + uuids[2] + " 10000 4294000222 PENDING Resources\n",
76                         want:   map[string]int64{uuids[0]: 999, uuids[1]: 1},
77                         expect: [][]string{{uuids[0], "0"}, {uuids[1], "112"}},
78                 },
79                 { // ignore fake2 because slurm priority=0
80                         spread: 1,
81                         squeue: uuids[0] + " 10000 4294000000 PENDING Resources\n" + uuids[1] + " 10000 4294000111 PENDING Resources\n" + uuids[2] + " 10000 0 PENDING Resources\n",
82                         want:   map[string]int64{uuids[0]: 999, uuids[1]: 1, uuids[2]: 997},
83                         expect: [][]string{{uuids[0], "0"}, {uuids[1], "112"}},
84                 },
85         } {
86                 c.Logf("spread=%d squeue=%q want=%v -> expect=%v", test.spread, test.squeue, test.want, test.expect)
87                 slurm := &slurmFake{
88                         queue: test.squeue,
89                 }
90                 sqc := &SqueueChecker{
91                         Slurm:          slurm,
92                         PrioritySpread: test.spread,
93                         Period:         time.Hour,
94                 }
95                 sqc.startOnce.Do(sqc.start)
96                 sqc.check()
97                 for uuid, pri := range test.want {
98                         sqc.SetPriority(uuid, pri)
99                 }
100                 sqc.reniceAll()
101                 c.Check(slurm.didRenice, DeepEquals, test.expect)
102                 sqc.Stop()
103         }
104 }
105
106 // If a limited nice range prevents desired priority adjustments, give
107 // up and clamp nice to 10K.
108 func (s *SqueueSuite) TestReniceInvalidNiceValue(c *C) {
109         uuids := []string{"zzzzz-dz642-fake0fake0fake0", "zzzzz-dz642-fake1fake1fake1", "zzzzz-dz642-fake2fake2fake2"}
110         slurm := &slurmFake{
111                 queue:         uuids[0] + " 0 4294000222 PENDING Resources\n" + uuids[1] + " 0 4294555222 PENDING Resources\n",
112                 rejectNice10K: true,
113         }
114         sqc := &SqueueChecker{
115                 Slurm:          slurm,
116                 PrioritySpread: 1,
117                 Period:         time.Hour,
118         }
119         sqc.startOnce.Do(sqc.start)
120         sqc.check()
121         sqc.SetPriority(uuids[0], 2)
122         sqc.SetPriority(uuids[1], 1)
123
124         // First attempt should renice to 555001, which will fail
125         sqc.reniceAll()
126         c.Check(slurm.didRenice, DeepEquals, [][]string{{uuids[1], "555001"}})
127
128         // Next attempt should renice to 10K, which will succeed
129         sqc.reniceAll()
130         c.Check(slurm.didRenice, DeepEquals, [][]string{{uuids[1], "555001"}, {uuids[1], "10000"}})
131         // ...so we'll change the squeue response to reflect the
132         // updated priority+nice, and make sure sqc sees that...
133         slurm.queue = uuids[0] + " 0 4294000222 PENDING Resources\n" + uuids[1] + " 10000 4294545222 PENDING Resources\n"
134         sqc.check()
135
136         // Next attempt should leave nice alone because it's already
137         // at the 10K limit
138         sqc.reniceAll()
139         c.Check(slurm.didRenice, DeepEquals, [][]string{{uuids[1], "555001"}, {uuids[1], "10000"}})
140
141         // Back to normal if desired nice value falls below 10K
142         slurm.queue = uuids[0] + " 0 4294000222 PENDING Resources\n" + uuids[1] + " 10000 4294000111 PENDING Resources\n"
143         sqc.check()
144         sqc.reniceAll()
145         c.Check(slurm.didRenice, DeepEquals, [][]string{{uuids[1], "555001"}, {uuids[1], "10000"}, {uuids[1], "9890"}})
146
147         sqc.Stop()
148 }
149
150 // If the given UUID isn't in the slurm queue yet, SetPriority()
151 // should wait for it to appear on the very next poll, then give up.
152 func (s *SqueueSuite) TestSetPriorityBeforeQueued(c *C) {
153         uuidGood := "zzzzz-dz642-fake0fake0fake0"
154         uuidBad := "zzzzz-dz642-fake1fake1fake1"
155
156         slurm := &slurmFake{}
157         sqc := &SqueueChecker{
158                 Slurm:  slurm,
159                 Period: time.Hour,
160         }
161         sqc.startOnce.Do(sqc.start)
162         sqc.Stop()
163         sqc.check()
164
165         done := make(chan struct{})
166         go func() {
167                 sqc.SetPriority(uuidGood, 123)
168                 sqc.SetPriority(uuidBad, 345)
169                 close(done)
170         }()
171         c.Check(sqc.queue[uuidGood], IsNil)
172         c.Check(sqc.queue[uuidBad], IsNil)
173         timeout := time.NewTimer(time.Second)
174         defer timeout.Stop()
175         tick := time.NewTicker(time.Millisecond)
176         defer tick.Stop()
177         for {
178                 select {
179                 case <-tick.C:
180                         slurm.queue = uuidGood + " 0 12345 PENDING Resources\n"
181                         sqc.check()
182
183                         // Avoid immediately selecting this case again
184                         // on the next iteration if check() took
185                         // longer than one tick.
186                         select {
187                         case <-tick.C:
188                         default:
189                         }
190                 case <-timeout.C:
191                         c.Fatal("timed out")
192                 case <-done:
193                         c.Assert(sqc.queue[uuidGood], NotNil)
194                         c.Check(sqc.queue[uuidGood].wantPriority, Equals, int64(123))
195                         c.Check(sqc.queue[uuidBad], IsNil)
196                         return
197                 }
198         }
199 }
200
201 func callUntilReady(fn func(), done <-chan struct{}) {
202         tick := time.NewTicker(time.Millisecond)
203         defer tick.Stop()
204         for {
205                 select {
206                 case <-done:
207                         return
208                 case <-tick.C:
209                         fn()
210                 }
211         }
212 }