Merge branch '14860-fix-collection-versioning'
[arvados.git] / lib / dispatchcloud / worker / worker_test.go
1 // Copyright (C) The Arvados Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4
5 package worker
6
7 import (
8         "errors"
9         "io"
10         "time"
11
12         "git.curoverse.com/arvados.git/lib/cloud"
13         "git.curoverse.com/arvados.git/lib/dispatchcloud/test"
14         "git.curoverse.com/arvados.git/sdk/go/arvados"
15         check "gopkg.in/check.v1"
16 )
17
18 var _ = check.Suite(&WorkerSuite{})
19
20 type WorkerSuite struct{}
21
22 func (suite *WorkerSuite) TestProbeAndUpdate(c *check.C) {
23         logger := test.Logger()
24         bootTimeout := time.Minute
25         probeTimeout := time.Second
26
27         is, err := (&test.StubDriver{}).InstanceSet(nil, "", logger)
28         c.Assert(err, check.IsNil)
29         inst, err := is.Create(arvados.InstanceType{}, "", nil, nil)
30         c.Assert(err, check.IsNil)
31
32         type trialT struct {
33                 testCaseComment string // displayed in test output to help identify failure case
34                 age             time.Duration
35                 state           State
36                 running         int
37                 starting        int
38                 respBoot        stubResp // zero value is success
39                 respRun         stubResp // zero value is success + nothing running
40                 expectState     State
41                 expectRunning   int
42         }
43
44         errFail := errors.New("failed")
45         respFail := stubResp{"", "command failed\n", errFail}
46         respContainerRunning := stubResp{"zzzzz-dz642-abcdefghijklmno\n", "", nil}
47         for _, trial := range []trialT{
48                 {
49                         testCaseComment: "Unknown, probes fail",
50                         state:           StateUnknown,
51                         respBoot:        respFail,
52                         respRun:         respFail,
53                         expectState:     StateUnknown,
54                 },
55                 {
56                         testCaseComment: "Unknown, boot probe fails, but one container is running",
57                         state:           StateUnknown,
58                         respBoot:        respFail,
59                         respRun:         respContainerRunning,
60                         expectState:     StateUnknown,
61                         expectRunning:   1,
62                 },
63                 {
64                         testCaseComment: "Unknown, boot probe fails, previously running container has exited",
65                         state:           StateUnknown,
66                         running:         1,
67                         respBoot:        respFail,
68                         expectState:     StateUnknown,
69                         expectRunning:   0,
70                 },
71                 {
72                         testCaseComment: "Unknown, boot timeout exceeded, boot probe fails",
73                         state:           StateUnknown,
74                         age:             bootTimeout + time.Second,
75                         respBoot:        respFail,
76                         respRun:         respFail,
77                         expectState:     StateShutdown,
78                 },
79                 {
80                         testCaseComment: "Unknown, boot timeout exceeded, boot probe succeeds but crunch-run fails",
81                         state:           StateUnknown,
82                         age:             bootTimeout * 2,
83                         respRun:         respFail,
84                         expectState:     StateShutdown,
85                 },
86                 {
87                         testCaseComment: "Unknown, boot timeout exceeded, boot probe fails but crunch-run succeeds",
88                         state:           StateUnknown,
89                         age:             bootTimeout * 2,
90                         respBoot:        respFail,
91                         expectState:     StateShutdown,
92                 },
93                 {
94                         testCaseComment: "Unknown, boot timeout exceeded, boot probe fails but container is running",
95                         state:           StateUnknown,
96                         age:             bootTimeout * 2,
97                         respBoot:        respFail,
98                         respRun:         respContainerRunning,
99                         expectState:     StateUnknown,
100                         expectRunning:   1,
101                 },
102                 {
103                         testCaseComment: "Booting, boot probe fails, run probe fails",
104                         state:           StateBooting,
105                         respBoot:        respFail,
106                         respRun:         respFail,
107                         expectState:     StateBooting,
108                 },
109                 {
110                         testCaseComment: "Booting, boot probe fails, run probe succeeds (but isn't expected to be called)",
111                         state:           StateBooting,
112                         respBoot:        respFail,
113                         expectState:     StateBooting,
114                 },
115                 {
116                         testCaseComment: "Booting, boot probe succeeds, run probe fails",
117                         state:           StateBooting,
118                         respRun:         respFail,
119                         expectState:     StateBooting,
120                 },
121                 {
122                         testCaseComment: "Booting, boot probe succeeds, run probe succeeds",
123                         state:           StateBooting,
124                         expectState:     StateIdle,
125                 },
126                 {
127                         testCaseComment: "Booting, boot probe succeeds, run probe succeeds, container is running",
128                         state:           StateBooting,
129                         respRun:         respContainerRunning,
130                         expectState:     StateRunning,
131                         expectRunning:   1,
132                 },
133                 {
134                         testCaseComment: "Booting, boot timeout exceeded",
135                         state:           StateBooting,
136                         age:             bootTimeout * 2,
137                         respRun:         respFail,
138                         expectState:     StateShutdown,
139                 },
140                 {
141                         testCaseComment: "Idle, probe timeout exceeded, one container running",
142                         state:           StateIdle,
143                         age:             probeTimeout * 2,
144                         respRun:         respContainerRunning,
145                         expectState:     StateRunning,
146                         expectRunning:   1,
147                 },
148                 {
149                         testCaseComment: "Idle, probe timeout exceeded, one container running, probe fails",
150                         state:           StateIdle,
151                         age:             probeTimeout * 2,
152                         running:         1,
153                         respRun:         respFail,
154                         expectState:     StateShutdown,
155                         expectRunning:   1,
156                 },
157                 {
158                         testCaseComment: "Idle, probe timeout exceeded, nothing running, probe fails",
159                         state:           StateIdle,
160                         age:             probeTimeout * 2,
161                         respRun:         respFail,
162                         expectState:     StateShutdown,
163                 },
164                 {
165                         testCaseComment: "Running, one container still running",
166                         state:           StateRunning,
167                         running:         1,
168                         respRun:         respContainerRunning,
169                         expectState:     StateRunning,
170                         expectRunning:   1,
171                 },
172                 {
173                         testCaseComment: "Running, container has exited",
174                         state:           StateRunning,
175                         running:         1,
176                         expectState:     StateIdle,
177                         expectRunning:   0,
178                 },
179                 {
180                         testCaseComment: "Running, probe timeout exceeded, nothing running, new container being started",
181                         state:           StateRunning,
182                         age:             probeTimeout * 2,
183                         starting:        1,
184                         expectState:     StateRunning,
185                 },
186         } {
187                 c.Logf("------- %#v", trial)
188                 ctime := time.Now().Add(-trial.age)
189                 exr := stubExecutor{
190                         "bootprobe":         trial.respBoot,
191                         "crunch-run --list": trial.respRun,
192                 }
193                 wp := &Pool{
194                         newExecutor:      func(cloud.Instance) Executor { return exr },
195                         bootProbeCommand: "bootprobe",
196                         timeoutBooting:   bootTimeout,
197                         timeoutProbe:     probeTimeout,
198                         exited:           map[string]time.Time{},
199                 }
200                 wkr := &worker{
201                         logger:   logger,
202                         executor: exr,
203                         wp:       wp,
204                         mtx:      &wp.mtx,
205                         state:    trial.state,
206                         instance: inst,
207                         appeared: ctime,
208                         busy:     ctime,
209                         probed:   ctime,
210                         updated:  ctime,
211                 }
212                 if trial.running > 0 {
213                         wkr.running = map[string]struct{}{"zzzzz-dz642-abcdefghijklmno": struct{}{}}
214                 }
215                 if trial.starting > 0 {
216                         wkr.starting = map[string]struct{}{"zzzzz-dz642-abcdefghijklmno": struct{}{}}
217                 }
218                 wkr.probeAndUpdate()
219                 c.Check(wkr.state, check.Equals, trial.expectState)
220                 c.Check(len(wkr.running), check.Equals, trial.expectRunning)
221         }
222 }
223
224 type stubResp struct {
225         stdout string
226         stderr string
227         err    error
228 }
229 type stubExecutor map[string]stubResp
230
231 func (se stubExecutor) SetTarget(cloud.ExecutorTarget) {}
232 func (se stubExecutor) Close()                         {}
233 func (se stubExecutor) Execute(env map[string]string, cmd string, stdin io.Reader) (stdout, stderr []byte, err error) {
234         resp, ok := se[cmd]
235         if !ok {
236                 return nil, []byte("command not found\n"), errors.New("command not found")
237         }
238         return []byte(resp.stdout), []byte(resp.stderr), resp.err
239 }