17339: Merge branch 'main' into 17339-s3aws-driver-memory-footprint
[arvados.git] / lib / dispatchcloud / scheduler / sync_test.go
1 // Copyright (C) The Arvados Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4
5 package scheduler
6
7 import (
8         "context"
9         "time"
10
11         "git.arvados.org/arvados.git/lib/dispatchcloud/test"
12         "git.arvados.org/arvados.git/sdk/go/arvados"
13         "git.arvados.org/arvados.git/sdk/go/ctxlog"
14         check "gopkg.in/check.v1"
15 )
16
17 // Ensure the scheduler expunges containers from the queue when they
18 // are no longer relevant (completed and not running, queued with
19 // priority 0, etc).
20 func (*SchedulerSuite) TestForgetIrrelevantContainers(c *check.C) {
21         ctx := ctxlog.Context(context.Background(), ctxlog.TestLogger(c))
22         pool := stubPool{}
23         queue := test.Queue{
24                 ChooseType: chooseType,
25                 Containers: []arvados.Container{
26                         {
27                                 UUID:     test.ContainerUUID(1),
28                                 Priority: 0,
29                                 State:    arvados.ContainerStateQueued,
30                                 RuntimeConstraints: arvados.RuntimeConstraints{
31                                         VCPUs: 1,
32                                         RAM:   1 << 30,
33                                 },
34                         },
35                         {
36                                 UUID:     test.ContainerUUID(2),
37                                 Priority: 12345,
38                                 State:    arvados.ContainerStateComplete,
39                                 RuntimeConstraints: arvados.RuntimeConstraints{
40                                         VCPUs: 1,
41                                         RAM:   1 << 30,
42                                 },
43                         },
44                 },
45         }
46         queue.Update()
47
48         ents, _ := queue.Entries()
49         c.Check(ents, check.HasLen, 1)
50
51         sch := New(ctx, &queue, &pool, nil, time.Millisecond, time.Millisecond)
52         sch.sync()
53
54         ents, _ = queue.Entries()
55         c.Check(ents, check.HasLen, 0)
56 }
57
58 func (*SchedulerSuite) TestCancelOrphanedContainers(c *check.C) {
59         ctx := ctxlog.Context(context.Background(), ctxlog.TestLogger(c))
60         pool := stubPool{
61                 unalloc: map[arvados.InstanceType]int{test.InstanceType(1): 1},
62                 unknown: map[arvados.InstanceType]int{test.InstanceType(1): 1},
63         }
64         queue := test.Queue{
65                 ChooseType: chooseType,
66                 Containers: []arvados.Container{
67                         {
68                                 UUID:     test.ContainerUUID(1),
69                                 Priority: 0,
70                                 State:    arvados.ContainerStateRunning,
71                                 RuntimeConstraints: arvados.RuntimeConstraints{
72                                         VCPUs: 1,
73                                         RAM:   1 << 30,
74                                 },
75                         },
76                 },
77         }
78         queue.Update()
79
80         ents, _ := queue.Entries()
81         c.Check(ents, check.HasLen, 1)
82
83         sch := New(ctx, &queue, &pool, nil, time.Millisecond, time.Millisecond)
84
85         // Sync shouldn't cancel the container because it might be
86         // running on the VM with state=="unknown".
87         //
88         // (Cancel+forget happens asynchronously and requires multiple
89         // sync() calls, so even after 10x sync-and-sleep iterations,
90         // we aren't 100% confident that sync isn't trying to
91         // cancel. But in the test environment, the goroutines started
92         // by sync() access stubs and therefore run quickly, so it
93         // works fine in practice. We accept that if the code is
94         // broken, the test will still pass occasionally.)
95         for i := 0; i < 10; i++ {
96                 sch.sync()
97                 time.Sleep(time.Millisecond)
98         }
99         ents, _ = queue.Entries()
100         c.Check(ents, check.HasLen, 1)
101         c.Check(ents[test.ContainerUUID(1)].Container.State, check.Equals, arvados.ContainerStateRunning)
102
103         // Sync should cancel & forget the container when the
104         // "unknown" node goes away.
105         //
106         // (As above, cancel+forget is async and requires multiple
107         // sync() calls, but stubs are fast so in practice this takes
108         // much less than 1s to complete.)
109         pool.unknown = nil
110         for deadline := time.Now().Add(time.Second); ; time.Sleep(time.Millisecond) {
111                 sch.sync()
112                 ents, _ = queue.Entries()
113                 if len(ents) == 0 || time.Now().After(deadline) {
114                         break
115                 }
116         }
117         c.Check(ents, check.HasLen, 0)
118 }