Bump loofah from 2.2.3 to 2.3.1 in /apps/workbench
[arvados.git] / lib / dispatchcloud / worker / worker_test.go
1 // Copyright (C) The Arvados Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4
5 package worker
6
7 import (
8         "errors"
9         "io"
10         "time"
11
12         "git.curoverse.com/arvados.git/lib/cloud"
13         "git.curoverse.com/arvados.git/lib/dispatchcloud/test"
14         "git.curoverse.com/arvados.git/sdk/go/arvados"
15         "git.curoverse.com/arvados.git/sdk/go/ctxlog"
16         check "gopkg.in/check.v1"
17 )
18
19 var _ = check.Suite(&WorkerSuite{})
20
21 type WorkerSuite struct{}
22
23 func (suite *WorkerSuite) TestProbeAndUpdate(c *check.C) {
24         logger := ctxlog.TestLogger(c)
25         bootTimeout := time.Minute
26         probeTimeout := time.Second
27
28         ac := arvados.NewClientFromEnv()
29         is, err := (&test.StubDriver{}).InstanceSet(nil, "test-instance-set-id", nil, logger)
30         c.Assert(err, check.IsNil)
31         inst, err := is.Create(arvados.InstanceType{}, "", nil, "echo InitCommand", nil)
32         c.Assert(err, check.IsNil)
33
34         type trialT struct {
35                 testCaseComment string // displayed in test output to help identify failure case
36                 age             time.Duration
37                 state           State
38                 running         int
39                 starting        int
40                 respBoot        stubResp // zero value is success
41                 respRun         stubResp // zero value is success + nothing running
42                 expectState     State
43                 expectRunning   int
44         }
45
46         errFail := errors.New("failed")
47         respFail := stubResp{"", "command failed\n", errFail}
48         respContainerRunning := stubResp{"zzzzz-dz642-abcdefghijklmno\n", "", nil}
49         for _, trial := range []trialT{
50                 {
51                         testCaseComment: "Unknown, probes fail",
52                         state:           StateUnknown,
53                         respBoot:        respFail,
54                         respRun:         respFail,
55                         expectState:     StateUnknown,
56                 },
57                 {
58                         testCaseComment: "Unknown, boot probe fails, but one container is running",
59                         state:           StateUnknown,
60                         respBoot:        respFail,
61                         respRun:         respContainerRunning,
62                         expectState:     StateUnknown,
63                         expectRunning:   1,
64                 },
65                 {
66                         testCaseComment: "Unknown, boot probe fails, previously running container has exited",
67                         state:           StateUnknown,
68                         running:         1,
69                         respBoot:        respFail,
70                         expectState:     StateUnknown,
71                         expectRunning:   0,
72                 },
73                 {
74                         testCaseComment: "Unknown, boot timeout exceeded, boot probe fails",
75                         state:           StateUnknown,
76                         age:             bootTimeout + time.Second,
77                         respBoot:        respFail,
78                         respRun:         respFail,
79                         expectState:     StateShutdown,
80                 },
81                 {
82                         testCaseComment: "Unknown, boot timeout exceeded, boot probe succeeds but crunch-run fails",
83                         state:           StateUnknown,
84                         age:             bootTimeout * 2,
85                         respRun:         respFail,
86                         expectState:     StateShutdown,
87                 },
88                 {
89                         testCaseComment: "Unknown, boot timeout exceeded, boot probe fails but crunch-run succeeds",
90                         state:           StateUnknown,
91                         age:             bootTimeout * 2,
92                         respBoot:        respFail,
93                         expectState:     StateShutdown,
94                 },
95                 {
96                         testCaseComment: "Unknown, boot timeout exceeded, boot probe fails but container is running",
97                         state:           StateUnknown,
98                         age:             bootTimeout * 2,
99                         respBoot:        respFail,
100                         respRun:         respContainerRunning,
101                         expectState:     StateUnknown,
102                         expectRunning:   1,
103                 },
104                 {
105                         testCaseComment: "Booting, boot probe fails, run probe fails",
106                         state:           StateBooting,
107                         respBoot:        respFail,
108                         respRun:         respFail,
109                         expectState:     StateBooting,
110                 },
111                 {
112                         testCaseComment: "Booting, boot probe fails, run probe succeeds (but isn't expected to be called)",
113                         state:           StateBooting,
114                         respBoot:        respFail,
115                         expectState:     StateBooting,
116                 },
117                 {
118                         testCaseComment: "Booting, boot probe succeeds, run probe fails",
119                         state:           StateBooting,
120                         respRun:         respFail,
121                         expectState:     StateBooting,
122                 },
123                 {
124                         testCaseComment: "Booting, boot probe succeeds, run probe succeeds",
125                         state:           StateBooting,
126                         expectState:     StateIdle,
127                 },
128                 {
129                         testCaseComment: "Booting, boot probe succeeds, run probe succeeds, container is running",
130                         state:           StateBooting,
131                         respRun:         respContainerRunning,
132                         expectState:     StateRunning,
133                         expectRunning:   1,
134                 },
135                 {
136                         testCaseComment: "Booting, boot timeout exceeded",
137                         state:           StateBooting,
138                         age:             bootTimeout * 2,
139                         respRun:         respFail,
140                         expectState:     StateShutdown,
141                 },
142                 {
143                         testCaseComment: "Idle, probe timeout exceeded, one container running",
144                         state:           StateIdle,
145                         age:             probeTimeout * 2,
146                         respRun:         respContainerRunning,
147                         expectState:     StateRunning,
148                         expectRunning:   1,
149                 },
150                 {
151                         testCaseComment: "Idle, probe timeout exceeded, one container running, probe fails",
152                         state:           StateIdle,
153                         age:             probeTimeout * 2,
154                         running:         1,
155                         respRun:         respFail,
156                         expectState:     StateShutdown,
157                         expectRunning:   1,
158                 },
159                 {
160                         testCaseComment: "Idle, probe timeout exceeded, nothing running, probe fails",
161                         state:           StateIdle,
162                         age:             probeTimeout * 2,
163                         respRun:         respFail,
164                         expectState:     StateShutdown,
165                 },
166                 {
167                         testCaseComment: "Running, one container still running",
168                         state:           StateRunning,
169                         running:         1,
170                         respRun:         respContainerRunning,
171                         expectState:     StateRunning,
172                         expectRunning:   1,
173                 },
174                 {
175                         testCaseComment: "Running, container has exited",
176                         state:           StateRunning,
177                         running:         1,
178                         expectState:     StateIdle,
179                         expectRunning:   0,
180                 },
181                 {
182                         testCaseComment: "Running, probe timeout exceeded, nothing running, new container being started",
183                         state:           StateRunning,
184                         age:             probeTimeout * 2,
185                         starting:        1,
186                         expectState:     StateRunning,
187                 },
188         } {
189                 c.Logf("------- %#v", trial)
190                 ctime := time.Now().Add(-trial.age)
191                 exr := stubExecutor{
192                         "bootprobe":         trial.respBoot,
193                         "crunch-run --list": trial.respRun,
194                 }
195                 wp := &Pool{
196                         arvClient:        ac,
197                         newExecutor:      func(cloud.Instance) Executor { return exr },
198                         bootProbeCommand: "bootprobe",
199                         timeoutBooting:   bootTimeout,
200                         timeoutProbe:     probeTimeout,
201                         exited:           map[string]time.Time{},
202                 }
203                 wkr := &worker{
204                         logger:   logger,
205                         executor: exr,
206                         wp:       wp,
207                         mtx:      &wp.mtx,
208                         state:    trial.state,
209                         instance: inst,
210                         appeared: ctime,
211                         busy:     ctime,
212                         probed:   ctime,
213                         updated:  ctime,
214                         running:  map[string]*remoteRunner{},
215                         starting: map[string]*remoteRunner{},
216                         probing:  make(chan struct{}, 1),
217                 }
218                 if trial.running > 0 {
219                         uuid := "zzzzz-dz642-abcdefghijklmno"
220                         wkr.running = map[string]*remoteRunner{uuid: newRemoteRunner(uuid, wkr)}
221                 }
222                 if trial.starting > 0 {
223                         uuid := "zzzzz-dz642-bcdefghijklmnop"
224                         wkr.starting = map[string]*remoteRunner{uuid: newRemoteRunner(uuid, wkr)}
225                 }
226                 wkr.probeAndUpdate()
227                 c.Check(wkr.state, check.Equals, trial.expectState)
228                 c.Check(len(wkr.running), check.Equals, trial.expectRunning)
229         }
230 }
231
232 type stubResp struct {
233         stdout string
234         stderr string
235         err    error
236 }
237 type stubExecutor map[string]stubResp
238
239 func (se stubExecutor) SetTarget(cloud.ExecutorTarget) {}
240 func (se stubExecutor) Close()                         {}
241 func (se stubExecutor) Execute(env map[string]string, cmd string, stdin io.Reader) (stdout, stderr []byte, err error) {
242         resp, ok := se[cmd]
243         if !ok {
244                 return nil, []byte("command not found\n"), errors.New("command not found")
245         }
246         return []byte(resp.stdout), []byte(resp.stderr), resp.err
247 }