1 // Copyright (C) The Arvados Authors. All rights reserved.
3 // SPDX-License-Identifier: AGPL-3.0
16 "git.arvados.org/arvados.git/lib/cloud"
17 "git.arvados.org/arvados.git/lib/dispatchcloud/test"
18 "git.arvados.org/arvados.git/sdk/go/arvados"
19 "git.arvados.org/arvados.git/sdk/go/ctxlog"
20 check "gopkg.in/check.v1"
23 var _ = check.Suite(&WorkerSuite{})
25 type WorkerSuite struct{}
27 func (suite *WorkerSuite) TestProbeAndUpdate(c *check.C) {
28 logger := ctxlog.TestLogger(c)
29 bootTimeout := time.Minute
30 probeTimeout := time.Second
32 ac := arvados.NewClientFromEnv()
33 is, err := (&test.StubDriver{}).InstanceSet(nil, "test-instance-set-id", nil, logger)
34 c.Assert(err, check.IsNil)
35 inst, err := is.Create(arvados.InstanceType{}, "", nil, "echo InitCommand", nil)
36 c.Assert(err, check.IsNil)
39 testCaseComment string // displayed in test output to help identify failure case
44 respBoot stubResp // zero value is success
45 respDeploy stubResp // zero value is success
46 respRun stubResp // zero value is success + nothing running
47 respRunDeployed stubResp
54 errFail := errors.New("failed")
55 respFail := stubResp{"", "command failed\n", errFail}
56 respContainerRunning := stubResp{"zzzzz-dz642-abcdefghijklmno\n", "", nil}
57 for idx, trial := range []trialT{
59 testCaseComment: "Unknown, probes fail",
63 expectState: StateUnknown,
66 testCaseComment: "Unknown, boot probe fails, but one container is running",
69 respRun: respContainerRunning,
70 expectState: StateUnknown,
74 testCaseComment: "Unknown, boot probe fails, previously running container has exited",
78 expectState: StateUnknown,
82 testCaseComment: "Unknown, boot timeout exceeded, boot probe fails",
84 age: bootTimeout + time.Second,
87 expectState: StateShutdown,
90 testCaseComment: "Unknown, boot timeout exceeded, boot probe succeeds but crunch-run fails",
94 expectState: StateShutdown,
97 testCaseComment: "Unknown, boot timeout exceeded, boot probe fails but crunch-run succeeds",
101 expectState: StateShutdown,
104 testCaseComment: "Unknown, boot timeout exceeded, boot probe fails but container is running",
106 age: bootTimeout * 2,
108 respRun: respContainerRunning,
109 expectState: StateUnknown,
113 testCaseComment: "Booting, boot probe fails, run probe fails",
117 expectState: StateBooting,
120 testCaseComment: "Booting, boot probe fails, run probe succeeds (but isn't expected to be called)",
123 expectState: StateBooting,
126 testCaseComment: "Booting, boot probe succeeds, run probe fails",
129 expectState: StateBooting,
132 testCaseComment: "Booting, boot probe succeeds, run probe succeeds",
134 expectState: StateIdle,
137 testCaseComment: "Booting, boot probe succeeds, run probe succeeds, container is running",
139 respRun: respContainerRunning,
140 expectState: StateRunning,
144 testCaseComment: "Booting, boot timeout exceeded",
146 age: bootTimeout * 2,
148 expectState: StateShutdown,
151 testCaseComment: "Idle, probe timeout exceeded, one container running",
153 age: probeTimeout * 2,
154 respRun: respContainerRunning,
155 expectState: StateRunning,
159 testCaseComment: "Idle, probe timeout exceeded, one container running, probe fails",
161 age: probeTimeout * 2,
164 expectState: StateShutdown,
168 testCaseComment: "Idle, probe timeout exceeded, nothing running, probe fails",
170 age: probeTimeout * 2,
172 expectState: StateShutdown,
175 testCaseComment: "Running, one container still running",
178 respRun: respContainerRunning,
179 expectState: StateRunning,
183 testCaseComment: "Running, container has exited",
186 expectState: StateIdle,
190 testCaseComment: "Running, probe timeout exceeded, nothing running, new container being started",
192 age: probeTimeout * 2,
194 expectState: StateRunning,
197 testCaseComment: "Booting, boot probe succeeds, deployRunner succeeds, run probe succeeds",
199 deployRunner: []byte("ELF"),
200 expectStdin: []byte("ELF"),
202 respRunDeployed: respContainerRunning,
204 expectState: StateRunning,
207 testCaseComment: "Booting, boot probe succeeds, deployRunner fails",
209 deployRunner: []byte("ELF"),
210 respDeploy: respFail,
211 expectStdin: []byte("ELF"),
212 expectState: StateBooting,
215 testCaseComment: "Booting, boot probe succeeds, deployRunner skipped, run probe succeeds",
218 respDeploy: respFail,
219 expectState: StateIdle,
222 c.Logf("------- trial %d: %#v", idx, trial)
223 ctime := time.Now().Add(-trial.age)
224 exr := &stubExecutor{
225 response: map[string]stubResp{
226 "bootprobe": trial.respBoot,
227 "crunch-run --list": trial.respRun,
228 "{deploy}": trial.respDeploy,
233 newExecutor: func(cloud.Instance) Executor { return exr },
234 bootProbeCommand: "bootprobe",
235 timeoutBooting: bootTimeout,
236 timeoutProbe: probeTimeout,
237 exited: map[string]time.Time{},
238 runnerCmd: "crunch-run",
239 runnerData: trial.deployRunner,
240 runnerMD5: md5.Sum(trial.deployRunner),
242 if trial.deployRunner != nil {
243 svHash := md5.Sum(trial.deployRunner)
244 wp.runnerCmd = fmt.Sprintf("/var/run/arvados/crunch-run~%x", svHash)
245 exr.response[wp.runnerCmd+" --list"] = trial.respRunDeployed
258 running: map[string]*remoteRunner{},
259 starting: map[string]*remoteRunner{},
260 probing: make(chan struct{}, 1),
262 if trial.running > 0 {
263 uuid := "zzzzz-dz642-abcdefghijklmno"
264 wkr.running = map[string]*remoteRunner{uuid: newRemoteRunner(uuid, wkr)}
266 if trial.starting > 0 {
267 uuid := "zzzzz-dz642-bcdefghijklmnop"
268 wkr.starting = map[string]*remoteRunner{uuid: newRemoteRunner(uuid, wkr)}
271 c.Check(wkr.state, check.Equals, trial.expectState)
272 c.Check(len(wkr.running), check.Equals, trial.expectRunning)
273 c.Check(exr.stdin.String(), check.Equals, string(trial.expectStdin))
277 type stubResp struct {
283 type stubExecutor struct {
284 response map[string]stubResp
288 func (se *stubExecutor) SetTarget(cloud.ExecutorTarget) {}
289 func (se *stubExecutor) Close() {}
290 func (se *stubExecutor) Execute(env map[string]string, cmd string, stdin io.Reader) (stdout, stderr []byte, err error) {
292 _, err = io.Copy(&se.stdin, stdin)
294 return nil, []byte(err.Error()), err
297 resp, ok := se.response[cmd]
298 if !ok && strings.Contains(cmd, `; cat >"$dstfile"`) {
299 resp, ok = se.response["{deploy}"]
302 return nil, []byte(fmt.Sprintf("%s: command not found\n", cmd)), errors.New("command not found")
304 return []byte(resp.stdout), []byte(resp.stderr), resp.err