1 // Copyright (C) The Arvados Authors. All rights reserved.
3 // SPDX-License-Identifier: AGPL-3.0
12 "git.curoverse.com/arvados.git/lib/cloud"
13 "git.curoverse.com/arvados.git/lib/dispatchcloud/test"
14 "git.curoverse.com/arvados.git/sdk/go/arvados"
15 "git.curoverse.com/arvados.git/sdk/go/ctxlog"
16 check "gopkg.in/check.v1"
19 var _ = check.Suite(&WorkerSuite{})
21 type WorkerSuite struct{}
23 func (suite *WorkerSuite) TestProbeAndUpdate(c *check.C) {
24 logger := ctxlog.TestLogger(c)
25 bootTimeout := time.Minute
26 probeTimeout := time.Second
28 ac := arvados.NewClientFromEnv()
29 is, err := (&test.StubDriver{}).InstanceSet(nil, "test-instance-set-id", nil, logger)
30 c.Assert(err, check.IsNil)
31 inst, err := is.Create(arvados.InstanceType{}, "", nil, "echo InitCommand", nil)
32 c.Assert(err, check.IsNil)
35 testCaseComment string // displayed in test output to help identify failure case
40 respBoot stubResp // zero value is success
41 respRun stubResp // zero value is success + nothing running
46 errFail := errors.New("failed")
47 respFail := stubResp{"", "command failed\n", errFail}
48 respContainerRunning := stubResp{"zzzzz-dz642-abcdefghijklmno\n", "", nil}
49 for _, trial := range []trialT{
51 testCaseComment: "Unknown, probes fail",
55 expectState: StateUnknown,
58 testCaseComment: "Unknown, boot probe fails, but one container is running",
61 respRun: respContainerRunning,
62 expectState: StateUnknown,
66 testCaseComment: "Unknown, boot probe fails, previously running container has exited",
70 expectState: StateUnknown,
74 testCaseComment: "Unknown, boot timeout exceeded, boot probe fails",
76 age: bootTimeout + time.Second,
79 expectState: StateShutdown,
82 testCaseComment: "Unknown, boot timeout exceeded, boot probe succeeds but crunch-run fails",
86 expectState: StateShutdown,
89 testCaseComment: "Unknown, boot timeout exceeded, boot probe fails but crunch-run succeeds",
93 expectState: StateShutdown,
96 testCaseComment: "Unknown, boot timeout exceeded, boot probe fails but container is running",
100 respRun: respContainerRunning,
101 expectState: StateUnknown,
105 testCaseComment: "Booting, boot probe fails, run probe fails",
109 expectState: StateBooting,
112 testCaseComment: "Booting, boot probe fails, run probe succeeds (but isn't expected to be called)",
115 expectState: StateBooting,
118 testCaseComment: "Booting, boot probe succeeds, run probe fails",
121 expectState: StateBooting,
124 testCaseComment: "Booting, boot probe succeeds, run probe succeeds",
126 expectState: StateIdle,
129 testCaseComment: "Booting, boot probe succeeds, run probe succeeds, container is running",
131 respRun: respContainerRunning,
132 expectState: StateRunning,
136 testCaseComment: "Booting, boot timeout exceeded",
138 age: bootTimeout * 2,
140 expectState: StateShutdown,
143 testCaseComment: "Idle, probe timeout exceeded, one container running",
145 age: probeTimeout * 2,
146 respRun: respContainerRunning,
147 expectState: StateRunning,
151 testCaseComment: "Idle, probe timeout exceeded, one container running, probe fails",
153 age: probeTimeout * 2,
156 expectState: StateShutdown,
160 testCaseComment: "Idle, probe timeout exceeded, nothing running, probe fails",
162 age: probeTimeout * 2,
164 expectState: StateShutdown,
167 testCaseComment: "Running, one container still running",
170 respRun: respContainerRunning,
171 expectState: StateRunning,
175 testCaseComment: "Running, container has exited",
178 expectState: StateIdle,
182 testCaseComment: "Running, probe timeout exceeded, nothing running, new container being started",
184 age: probeTimeout * 2,
186 expectState: StateRunning,
189 c.Logf("------- %#v", trial)
190 ctime := time.Now().Add(-trial.age)
192 "bootprobe": trial.respBoot,
193 "crunch-run --list": trial.respRun,
197 newExecutor: func(cloud.Instance) Executor { return exr },
198 bootProbeCommand: "bootprobe",
199 timeoutBooting: bootTimeout,
200 timeoutProbe: probeTimeout,
201 exited: map[string]time.Time{},
214 running: map[string]*remoteRunner{},
215 starting: map[string]*remoteRunner{},
216 probing: make(chan struct{}, 1),
218 if trial.running > 0 {
219 uuid := "zzzzz-dz642-abcdefghijklmno"
220 wkr.running = map[string]*remoteRunner{uuid: newRemoteRunner(uuid, wkr)}
222 if trial.starting > 0 {
223 uuid := "zzzzz-dz642-bcdefghijklmnop"
224 wkr.starting = map[string]*remoteRunner{uuid: newRemoteRunner(uuid, wkr)}
227 c.Check(wkr.state, check.Equals, trial.expectState)
228 c.Check(len(wkr.running), check.Equals, trial.expectRunning)
232 type stubResp struct {
237 type stubExecutor map[string]stubResp
239 func (se stubExecutor) SetTarget(cloud.ExecutorTarget) {}
240 func (se stubExecutor) Close() {}
241 func (se stubExecutor) Execute(env map[string]string, cmd string, stdin io.Reader) (stdout, stderr []byte, err error) {
244 return nil, []byte("command not found\n"), errors.New("command not found")
246 return []byte(resp.stdout), []byte(resp.stderr), resp.err