14324: Use logrus in Azure driver. Fix Sirupsen->sirupsen in imports
[arvados.git] / lib / dispatchcloud / scheduler / run_queue_test.go
1 // Copyright (C) The Arvados Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4
5 package scheduler
6
7 import (
8         "errors"
9         "time"
10
11         "git.curoverse.com/arvados.git/lib/dispatchcloud/test"
12         "git.curoverse.com/arvados.git/lib/dispatchcloud/worker"
13         "git.curoverse.com/arvados.git/sdk/go/arvados"
14         "github.com/sirupsen/logrus"
15         check "gopkg.in/check.v1"
16 )
17
18 var (
19         logger = logrus.StandardLogger()
20
21         // arbitrary example container UUIDs
22         uuids = func() (r []string) {
23                 for i := 0; i < 16; i++ {
24                         r = append(r, test.ContainerUUID(i))
25                 }
26                 return
27         }()
28 )
29
30 type stubQuotaError struct {
31         error
32 }
33
34 func (stubQuotaError) IsQuotaError() bool { return true }
35
36 type stubPool struct {
37         notify    <-chan struct{}
38         unalloc   map[arvados.InstanceType]int // idle+booting+unknown
39         idle      map[arvados.InstanceType]int
40         running   map[string]time.Time
41         atQuota   bool
42         canCreate int
43         creates   []arvados.InstanceType
44         starts    []string
45         shutdowns int
46 }
47
48 func (p *stubPool) AtQuota() bool                 { return p.atQuota }
49 func (p *stubPool) Subscribe() <-chan struct{}    { return p.notify }
50 func (p *stubPool) Unsubscribe(<-chan struct{})   {}
51 func (p *stubPool) Running() map[string]time.Time { return p.running }
52 func (p *stubPool) Unallocated() map[arvados.InstanceType]int {
53         r := map[arvados.InstanceType]int{}
54         for it, n := range p.unalloc {
55                 r[it] = n
56         }
57         return r
58 }
59 func (p *stubPool) Create(it arvados.InstanceType) error {
60         p.creates = append(p.creates, it)
61         if p.canCreate < 1 {
62                 return stubQuotaError{errors.New("quota")}
63         }
64         p.canCreate--
65         p.unalloc[it]++
66         return nil
67 }
68 func (p *stubPool) KillContainer(uuid string) {
69         p.running[uuid] = time.Now()
70 }
71 func (p *stubPool) Shutdown(arvados.InstanceType) bool {
72         p.shutdowns++
73         return false
74 }
75 func (p *stubPool) CountWorkers() map[worker.State]int {
76         return map[worker.State]int{
77                 worker.StateBooting: len(p.unalloc) - len(p.idle),
78                 worker.StateIdle:    len(p.idle),
79                 worker.StateRunning: len(p.running),
80         }
81 }
82 func (p *stubPool) StartContainer(it arvados.InstanceType, ctr arvados.Container) bool {
83         p.starts = append(p.starts, ctr.UUID)
84         if p.idle[it] == 0 {
85                 return false
86         }
87         p.idle[it]--
88         p.unalloc[it]--
89         p.running[ctr.UUID] = time.Time{}
90         return true
91 }
92
93 var _ = check.Suite(&SchedulerSuite{})
94
95 type SchedulerSuite struct{}
96
97 // Assign priority=4 container to idle node. Create a new instance for
98 // the priority=3 container. Don't try to start any priority<3
99 // containers because priority=3 container didn't start
100 // immediately. Don't try to create any other nodes after the failed
101 // create.
102 func (*SchedulerSuite) TestUseIdleWorkers(c *check.C) {
103         queue := test.Queue{
104                 ChooseType: func(ctr *arvados.Container) (arvados.InstanceType, error) {
105                         return test.InstanceType(ctr.RuntimeConstraints.VCPUs), nil
106                 },
107                 Containers: []arvados.Container{
108                         {
109                                 UUID:     test.ContainerUUID(1),
110                                 Priority: 1,
111                                 State:    arvados.ContainerStateLocked,
112                                 RuntimeConstraints: arvados.RuntimeConstraints{
113                                         VCPUs: 1,
114                                         RAM:   1 << 30,
115                                 },
116                         },
117                         {
118                                 UUID:     test.ContainerUUID(2),
119                                 Priority: 2,
120                                 State:    arvados.ContainerStateLocked,
121                                 RuntimeConstraints: arvados.RuntimeConstraints{
122                                         VCPUs: 1,
123                                         RAM:   1 << 30,
124                                 },
125                         },
126                         {
127                                 UUID:     test.ContainerUUID(3),
128                                 Priority: 3,
129                                 State:    arvados.ContainerStateLocked,
130                                 RuntimeConstraints: arvados.RuntimeConstraints{
131                                         VCPUs: 1,
132                                         RAM:   1 << 30,
133                                 },
134                         },
135                         {
136                                 UUID:     test.ContainerUUID(4),
137                                 Priority: 4,
138                                 State:    arvados.ContainerStateLocked,
139                                 RuntimeConstraints: arvados.RuntimeConstraints{
140                                         VCPUs: 1,
141                                         RAM:   1 << 30,
142                                 },
143                         },
144                 },
145         }
146         queue.Update()
147         pool := stubPool{
148                 unalloc: map[arvados.InstanceType]int{
149                         test.InstanceType(1): 1,
150                         test.InstanceType(2): 2,
151                 },
152                 idle: map[arvados.InstanceType]int{
153                         test.InstanceType(1): 1,
154                         test.InstanceType(2): 2,
155                 },
156                 running:   map[string]time.Time{},
157                 canCreate: 0,
158         }
159         New(logger, &queue, &pool, time.Millisecond, time.Millisecond).runQueue()
160         c.Check(pool.creates, check.DeepEquals, []arvados.InstanceType{test.InstanceType(1)})
161         c.Check(pool.starts, check.DeepEquals, []string{test.ContainerUUID(4)})
162         c.Check(pool.running, check.HasLen, 1)
163         for uuid := range pool.running {
164                 c.Check(uuid, check.Equals, uuids[4])
165         }
166 }
167
168 // If Create() fails, shutdown some nodes, and don't call Create()
169 // again.  Don't call Create() at all if AtQuota() is true.
170 func (*SchedulerSuite) TestShutdownAtQuota(c *check.C) {
171         for quota := 0; quota < 2; quota++ {
172                 c.Logf("quota=%d", quota)
173                 shouldCreate := []arvados.InstanceType{}
174                 for i := 0; i < quota; i++ {
175                         shouldCreate = append(shouldCreate, test.InstanceType(3))
176                 }
177                 queue := test.Queue{
178                         ChooseType: func(ctr *arvados.Container) (arvados.InstanceType, error) {
179                                 return test.InstanceType(ctr.RuntimeConstraints.VCPUs), nil
180                         },
181                         Containers: []arvados.Container{
182                                 {
183                                         UUID:     test.ContainerUUID(2),
184                                         Priority: 2,
185                                         State:    arvados.ContainerStateLocked,
186                                         RuntimeConstraints: arvados.RuntimeConstraints{
187                                                 VCPUs: 2,
188                                                 RAM:   2 << 30,
189                                         },
190                                 },
191                                 {
192                                         UUID:     test.ContainerUUID(3),
193                                         Priority: 3,
194                                         State:    arvados.ContainerStateLocked,
195                                         RuntimeConstraints: arvados.RuntimeConstraints{
196                                                 VCPUs: 3,
197                                                 RAM:   3 << 30,
198                                         },
199                                 },
200                         },
201                 }
202                 queue.Update()
203                 pool := stubPool{
204                         atQuota: quota == 0,
205                         unalloc: map[arvados.InstanceType]int{
206                                 test.InstanceType(2): 2,
207                         },
208                         idle: map[arvados.InstanceType]int{
209                                 test.InstanceType(2): 2,
210                         },
211                         running:   map[string]time.Time{},
212                         creates:   []arvados.InstanceType{},
213                         starts:    []string{},
214                         canCreate: 0,
215                 }
216                 New(logger, &queue, &pool, time.Millisecond, time.Millisecond).runQueue()
217                 c.Check(pool.creates, check.DeepEquals, shouldCreate)
218                 c.Check(pool.starts, check.DeepEquals, []string{})
219                 c.Check(pool.shutdowns, check.Not(check.Equals), 0)
220         }
221 }
222
223 // Start lower-priority containers while waiting for new/existing
224 // workers to come up for higher-priority containers.
225 func (*SchedulerSuite) TestStartWhileCreating(c *check.C) {
226         pool := stubPool{
227                 unalloc: map[arvados.InstanceType]int{
228                         test.InstanceType(1): 2,
229                         test.InstanceType(2): 2,
230                 },
231                 idle: map[arvados.InstanceType]int{
232                         test.InstanceType(1): 1,
233                         test.InstanceType(2): 1,
234                 },
235                 running:   map[string]time.Time{},
236                 canCreate: 4,
237         }
238         queue := test.Queue{
239                 ChooseType: func(ctr *arvados.Container) (arvados.InstanceType, error) {
240                         return test.InstanceType(ctr.RuntimeConstraints.VCPUs), nil
241                 },
242                 Containers: []arvados.Container{
243                         {
244                                 // create a new worker
245                                 UUID:     test.ContainerUUID(1),
246                                 Priority: 1,
247                                 State:    arvados.ContainerStateLocked,
248                                 RuntimeConstraints: arvados.RuntimeConstraints{
249                                         VCPUs: 1,
250                                         RAM:   1 << 30,
251                                 },
252                         },
253                         {
254                                 // tentatively map to unalloc worker
255                                 UUID:     test.ContainerUUID(2),
256                                 Priority: 2,
257                                 State:    arvados.ContainerStateLocked,
258                                 RuntimeConstraints: arvados.RuntimeConstraints{
259                                         VCPUs: 1,
260                                         RAM:   1 << 30,
261                                 },
262                         },
263                         {
264                                 // start now on idle worker
265                                 UUID:     test.ContainerUUID(3),
266                                 Priority: 3,
267                                 State:    arvados.ContainerStateLocked,
268                                 RuntimeConstraints: arvados.RuntimeConstraints{
269                                         VCPUs: 1,
270                                         RAM:   1 << 30,
271                                 },
272                         },
273                         {
274                                 // create a new worker
275                                 UUID:     test.ContainerUUID(4),
276                                 Priority: 4,
277                                 State:    arvados.ContainerStateLocked,
278                                 RuntimeConstraints: arvados.RuntimeConstraints{
279                                         VCPUs: 2,
280                                         RAM:   2 << 30,
281                                 },
282                         },
283                         {
284                                 // tentatively map to unalloc worker
285                                 UUID:     test.ContainerUUID(5),
286                                 Priority: 5,
287                                 State:    arvados.ContainerStateLocked,
288                                 RuntimeConstraints: arvados.RuntimeConstraints{
289                                         VCPUs: 2,
290                                         RAM:   2 << 30,
291                                 },
292                         },
293                         {
294                                 // start now on idle worker
295                                 UUID:     test.ContainerUUID(6),
296                                 Priority: 6,
297                                 State:    arvados.ContainerStateLocked,
298                                 RuntimeConstraints: arvados.RuntimeConstraints{
299                                         VCPUs: 2,
300                                         RAM:   2 << 30,
301                                 },
302                         },
303                 },
304         }
305         queue.Update()
306         New(logger, &queue, &pool, time.Millisecond, time.Millisecond).runQueue()
307         c.Check(pool.creates, check.DeepEquals, []arvados.InstanceType{test.InstanceType(2), test.InstanceType(1)})
308         c.Check(pool.starts, check.DeepEquals, []string{uuids[6], uuids[5], uuids[3], uuids[2]})
309         running := map[string]bool{}
310         for uuid, t := range pool.running {
311                 if t.IsZero() {
312                         running[uuid] = false
313                 } else {
314                         running[uuid] = true
315                 }
316         }
317         c.Check(running, check.DeepEquals, map[string]bool{uuids[3]: false, uuids[6]: false})
318 }