16811: Add a test that system users/groups can't be deleted.
[arvados.git] / lib / dispatchcloud / scheduler / run_queue_test.go
1 // Copyright (C) The Arvados Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4
5 package scheduler
6
7 import (
8         "context"
9         "sync"
10         "time"
11
12         "git.arvados.org/arvados.git/lib/dispatchcloud/test"
13         "git.arvados.org/arvados.git/lib/dispatchcloud/worker"
14         "git.arvados.org/arvados.git/sdk/go/arvados"
15         "git.arvados.org/arvados.git/sdk/go/ctxlog"
16         check "gopkg.in/check.v1"
17 )
18
19 var (
20         // arbitrary example container UUIDs
21         uuids = func() (r []string) {
22                 for i := 0; i < 16; i++ {
23                         r = append(r, test.ContainerUUID(i))
24                 }
25                 return
26         }()
27 )
28
29 type stubQuotaError struct {
30         error
31 }
32
33 func (stubQuotaError) IsQuotaError() bool { return true }
34
35 type stubPool struct {
36         notify    <-chan struct{}
37         unalloc   map[arvados.InstanceType]int // idle+booting+unknown
38         idle      map[arvados.InstanceType]int
39         unknown   map[arvados.InstanceType]int
40         running   map[string]time.Time
41         quota     int
42         canCreate int
43         creates   []arvados.InstanceType
44         starts    []string
45         shutdowns int
46         sync.Mutex
47 }
48
49 func (p *stubPool) AtQuota() bool {
50         p.Lock()
51         defer p.Unlock()
52         return len(p.unalloc)+len(p.running)+len(p.unknown) >= p.quota
53 }
54 func (p *stubPool) Subscribe() <-chan struct{}  { return p.notify }
55 func (p *stubPool) Unsubscribe(<-chan struct{}) {}
56 func (p *stubPool) Running() map[string]time.Time {
57         p.Lock()
58         defer p.Unlock()
59         r := map[string]time.Time{}
60         for k, v := range p.running {
61                 r[k] = v
62         }
63         return r
64 }
65 func (p *stubPool) Unallocated() map[arvados.InstanceType]int {
66         p.Lock()
67         defer p.Unlock()
68         r := map[arvados.InstanceType]int{}
69         for it, n := range p.unalloc {
70                 r[it] = n - p.unknown[it]
71         }
72         return r
73 }
74 func (p *stubPool) Create(it arvados.InstanceType) bool {
75         p.Lock()
76         defer p.Unlock()
77         p.creates = append(p.creates, it)
78         if p.canCreate < 1 {
79                 return false
80         }
81         p.canCreate--
82         p.unalloc[it]++
83         return true
84 }
85 func (p *stubPool) ForgetContainer(uuid string) {
86 }
87 func (p *stubPool) KillContainer(uuid, reason string) bool {
88         p.Lock()
89         defer p.Unlock()
90         defer delete(p.running, uuid)
91         t, ok := p.running[uuid]
92         return ok && t.IsZero()
93 }
94 func (p *stubPool) Shutdown(arvados.InstanceType) bool {
95         p.shutdowns++
96         return false
97 }
98 func (p *stubPool) CountWorkers() map[worker.State]int {
99         p.Lock()
100         defer p.Unlock()
101         return map[worker.State]int{
102                 worker.StateBooting: len(p.unalloc) - len(p.idle),
103                 worker.StateIdle:    len(p.idle),
104                 worker.StateRunning: len(p.running),
105                 worker.StateUnknown: len(p.unknown),
106         }
107 }
108 func (p *stubPool) StartContainer(it arvados.InstanceType, ctr arvados.Container) bool {
109         p.Lock()
110         defer p.Unlock()
111         p.starts = append(p.starts, ctr.UUID)
112         if p.idle[it] == 0 {
113                 return false
114         }
115         p.idle[it]--
116         p.unalloc[it]--
117         p.running[ctr.UUID] = time.Time{}
118         return true
119 }
120
121 func chooseType(ctr *arvados.Container) (arvados.InstanceType, error) {
122         return test.InstanceType(ctr.RuntimeConstraints.VCPUs), nil
123 }
124
125 var _ = check.Suite(&SchedulerSuite{})
126
127 type SchedulerSuite struct{}
128
129 // Assign priority=4 container to idle node. Create new instances for
130 // the priority=3, 2, 1 containers.
131 func (*SchedulerSuite) TestUseIdleWorkers(c *check.C) {
132         ctx := ctxlog.Context(context.Background(), ctxlog.TestLogger(c))
133         queue := test.Queue{
134                 ChooseType: chooseType,
135                 Containers: []arvados.Container{
136                         {
137                                 UUID:     test.ContainerUUID(1),
138                                 Priority: 1,
139                                 State:    arvados.ContainerStateLocked,
140                                 RuntimeConstraints: arvados.RuntimeConstraints{
141                                         VCPUs: 1,
142                                         RAM:   1 << 30,
143                                 },
144                         },
145                         {
146                                 UUID:     test.ContainerUUID(2),
147                                 Priority: 2,
148                                 State:    arvados.ContainerStateLocked,
149                                 RuntimeConstraints: arvados.RuntimeConstraints{
150                                         VCPUs: 1,
151                                         RAM:   1 << 30,
152                                 },
153                         },
154                         {
155                                 UUID:     test.ContainerUUID(3),
156                                 Priority: 3,
157                                 State:    arvados.ContainerStateLocked,
158                                 RuntimeConstraints: arvados.RuntimeConstraints{
159                                         VCPUs: 1,
160                                         RAM:   1 << 30,
161                                 },
162                         },
163                         {
164                                 UUID:     test.ContainerUUID(4),
165                                 Priority: 4,
166                                 State:    arvados.ContainerStateLocked,
167                                 RuntimeConstraints: arvados.RuntimeConstraints{
168                                         VCPUs: 1,
169                                         RAM:   1 << 30,
170                                 },
171                         },
172                 },
173         }
174         queue.Update()
175         pool := stubPool{
176                 quota: 1000,
177                 unalloc: map[arvados.InstanceType]int{
178                         test.InstanceType(1): 1,
179                         test.InstanceType(2): 2,
180                 },
181                 idle: map[arvados.InstanceType]int{
182                         test.InstanceType(1): 1,
183                         test.InstanceType(2): 2,
184                 },
185                 running:   map[string]time.Time{},
186                 canCreate: 0,
187         }
188         New(ctx, &queue, &pool, time.Millisecond, time.Millisecond).runQueue()
189         c.Check(pool.creates, check.DeepEquals, []arvados.InstanceType{test.InstanceType(1), test.InstanceType(1), test.InstanceType(1)})
190         c.Check(pool.starts, check.DeepEquals, []string{test.ContainerUUID(4)})
191         c.Check(pool.running, check.HasLen, 1)
192         for uuid := range pool.running {
193                 c.Check(uuid, check.Equals, uuids[4])
194         }
195 }
196
197 // If pool.AtQuota() is true, shutdown some unalloc nodes, and don't
198 // call Create().
199 func (*SchedulerSuite) TestShutdownAtQuota(c *check.C) {
200         ctx := ctxlog.Context(context.Background(), ctxlog.TestLogger(c))
201         for quota := 1; quota < 3; quota++ {
202                 c.Logf("quota=%d", quota)
203                 shouldCreate := []arvados.InstanceType{}
204                 for i := 1; i < quota; i++ {
205                         shouldCreate = append(shouldCreate, test.InstanceType(3))
206                 }
207                 queue := test.Queue{
208                         ChooseType: chooseType,
209                         Containers: []arvados.Container{
210                                 {
211                                         UUID:     test.ContainerUUID(2),
212                                         Priority: 2,
213                                         State:    arvados.ContainerStateLocked,
214                                         RuntimeConstraints: arvados.RuntimeConstraints{
215                                                 VCPUs: 2,
216                                                 RAM:   2 << 30,
217                                         },
218                                 },
219                                 {
220                                         UUID:     test.ContainerUUID(3),
221                                         Priority: 3,
222                                         State:    arvados.ContainerStateLocked,
223                                         RuntimeConstraints: arvados.RuntimeConstraints{
224                                                 VCPUs: 3,
225                                                 RAM:   3 << 30,
226                                         },
227                                 },
228                         },
229                 }
230                 queue.Update()
231                 pool := stubPool{
232                         quota: quota,
233                         unalloc: map[arvados.InstanceType]int{
234                                 test.InstanceType(2): 2,
235                         },
236                         idle: map[arvados.InstanceType]int{
237                                 test.InstanceType(2): 2,
238                         },
239                         running:   map[string]time.Time{},
240                         creates:   []arvados.InstanceType{},
241                         starts:    []string{},
242                         canCreate: 0,
243                 }
244                 New(ctx, &queue, &pool, time.Millisecond, time.Millisecond).runQueue()
245                 c.Check(pool.creates, check.DeepEquals, shouldCreate)
246                 if len(shouldCreate) == 0 {
247                         c.Check(pool.starts, check.DeepEquals, []string{})
248                         c.Check(pool.shutdowns, check.Not(check.Equals), 0)
249                 } else {
250                         c.Check(pool.starts, check.DeepEquals, []string{test.ContainerUUID(2)})
251                         c.Check(pool.shutdowns, check.Equals, 0)
252                 }
253         }
254 }
255
256 // Start lower-priority containers while waiting for new/existing
257 // workers to come up for higher-priority containers.
258 func (*SchedulerSuite) TestStartWhileCreating(c *check.C) {
259         ctx := ctxlog.Context(context.Background(), ctxlog.TestLogger(c))
260         pool := stubPool{
261                 quota: 1000,
262                 unalloc: map[arvados.InstanceType]int{
263                         test.InstanceType(1): 2,
264                         test.InstanceType(2): 2,
265                 },
266                 idle: map[arvados.InstanceType]int{
267                         test.InstanceType(1): 1,
268                         test.InstanceType(2): 1,
269                 },
270                 running:   map[string]time.Time{},
271                 canCreate: 4,
272         }
273         queue := test.Queue{
274                 ChooseType: chooseType,
275                 Containers: []arvados.Container{
276                         {
277                                 // create a new worker
278                                 UUID:     test.ContainerUUID(1),
279                                 Priority: 1,
280                                 State:    arvados.ContainerStateLocked,
281                                 RuntimeConstraints: arvados.RuntimeConstraints{
282                                         VCPUs: 1,
283                                         RAM:   1 << 30,
284                                 },
285                         },
286                         {
287                                 // tentatively map to unalloc worker
288                                 UUID:     test.ContainerUUID(2),
289                                 Priority: 2,
290                                 State:    arvados.ContainerStateLocked,
291                                 RuntimeConstraints: arvados.RuntimeConstraints{
292                                         VCPUs: 1,
293                                         RAM:   1 << 30,
294                                 },
295                         },
296                         {
297                                 // start now on idle worker
298                                 UUID:     test.ContainerUUID(3),
299                                 Priority: 3,
300                                 State:    arvados.ContainerStateLocked,
301                                 RuntimeConstraints: arvados.RuntimeConstraints{
302                                         VCPUs: 1,
303                                         RAM:   1 << 30,
304                                 },
305                         },
306                         {
307                                 // create a new worker
308                                 UUID:     test.ContainerUUID(4),
309                                 Priority: 4,
310                                 State:    arvados.ContainerStateLocked,
311                                 RuntimeConstraints: arvados.RuntimeConstraints{
312                                         VCPUs: 2,
313                                         RAM:   2 << 30,
314                                 },
315                         },
316                         {
317                                 // tentatively map to unalloc worker
318                                 UUID:     test.ContainerUUID(5),
319                                 Priority: 5,
320                                 State:    arvados.ContainerStateLocked,
321                                 RuntimeConstraints: arvados.RuntimeConstraints{
322                                         VCPUs: 2,
323                                         RAM:   2 << 30,
324                                 },
325                         },
326                         {
327                                 // start now on idle worker
328                                 UUID:     test.ContainerUUID(6),
329                                 Priority: 6,
330                                 State:    arvados.ContainerStateLocked,
331                                 RuntimeConstraints: arvados.RuntimeConstraints{
332                                         VCPUs: 2,
333                                         RAM:   2 << 30,
334                                 },
335                         },
336                 },
337         }
338         queue.Update()
339         New(ctx, &queue, &pool, time.Millisecond, time.Millisecond).runQueue()
340         c.Check(pool.creates, check.DeepEquals, []arvados.InstanceType{test.InstanceType(2), test.InstanceType(1)})
341         c.Check(pool.starts, check.DeepEquals, []string{uuids[6], uuids[5], uuids[3], uuids[2]})
342         running := map[string]bool{}
343         for uuid, t := range pool.running {
344                 if t.IsZero() {
345                         running[uuid] = false
346                 } else {
347                         running[uuid] = true
348                 }
349         }
350         c.Check(running, check.DeepEquals, map[string]bool{uuids[3]: false, uuids[6]: false})
351 }
352
353 func (*SchedulerSuite) TestKillNonexistentContainer(c *check.C) {
354         ctx := ctxlog.Context(context.Background(), ctxlog.TestLogger(c))
355         pool := stubPool{
356                 quota: 1000,
357                 unalloc: map[arvados.InstanceType]int{
358                         test.InstanceType(2): 0,
359                 },
360                 idle: map[arvados.InstanceType]int{
361                         test.InstanceType(2): 0,
362                 },
363                 running: map[string]time.Time{
364                         test.ContainerUUID(2): {},
365                 },
366         }
367         queue := test.Queue{
368                 ChooseType: chooseType,
369                 Containers: []arvados.Container{
370                         {
371                                 // create a new worker
372                                 UUID:     test.ContainerUUID(1),
373                                 Priority: 1,
374                                 State:    arvados.ContainerStateLocked,
375                                 RuntimeConstraints: arvados.RuntimeConstraints{
376                                         VCPUs: 1,
377                                         RAM:   1 << 30,
378                                 },
379                         },
380                 },
381         }
382         queue.Update()
383         sch := New(ctx, &queue, &pool, time.Millisecond, time.Millisecond)
384         c.Check(pool.running, check.HasLen, 1)
385         sch.sync()
386         for deadline := time.Now().Add(time.Second); len(pool.Running()) > 0 && time.Now().Before(deadline); time.Sleep(time.Millisecond) {
387         }
388         c.Check(pool.Running(), check.HasLen, 0)
389 }