1 // Copyright (C) The Arvados Authors. All rights reserved.
3 // SPDX-License-Identifier: AGPL-3.0
23 "git.curoverse.com/arvados.git/sdk/go/arvados"
24 "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
25 "git.curoverse.com/arvados.git/sdk/go/arvadostest"
26 "git.curoverse.com/arvados.git/sdk/go/dispatch"
30 // Gocheck boilerplate
31 func Test(t *testing.T) {
35 var _ = Suite(&TestSuite{})
36 var _ = Suite(&MockArvadosServerSuite{})
38 type TestSuite struct{}
39 type MockArvadosServerSuite struct{}
41 var initialArgs []string
43 func (s *TestSuite) SetUpSuite(c *C) {
47 func (s *TestSuite) TearDownSuite(c *C) {
50 func (s *TestSuite) SetUpTest(c *C) {
51 args := []string{"crunch-dispatch-slurm"}
54 arvadostest.StartAPI()
55 os.Setenv("ARVADOS_API_TOKEN", arvadostest.Dispatch1Token)
58 func (s *TestSuite) TearDownTest(c *C) {
60 arvadostest.ResetEnv()
64 func (s *MockArvadosServerSuite) TearDownTest(c *C) {
65 arvadostest.ResetEnv()
68 type slurmFake struct {
73 // If non-nil, run this func during the 2nd+ call to Cancel()
75 // Error returned by Batch()
79 func (sf *slurmFake) Batch(script io.Reader, args []string) error {
80 sf.didBatch = append(sf.didBatch, args)
84 func (sf *slurmFake) QueueCommand(args []string) *exec.Cmd {
85 return exec.Command("echo", sf.queue)
88 func (sf *slurmFake) Renice(name string, nice int) error {
89 sf.didRenice = append(sf.didRenice, []string{name, fmt.Sprintf("%d", nice)})
93 func (sf *slurmFake) Cancel(name string) error {
94 sf.didCancel = append(sf.didCancel, name)
95 if len(sf.didCancel) == 1 {
96 // simulate error on first attempt
97 return errors.New("something terrible happened")
99 if sf.onCancel != nil {
105 func (s *TestSuite) integrationTest(c *C, slurm *slurmFake,
106 expectBatch [][]string,
107 runContainer func(*dispatch.Dispatcher, arvados.Container)) arvados.Container {
108 arvadostest.ResetEnv()
110 arv, err := arvadosclient.MakeArvadosClient()
113 defer func(orig Slurm) {
114 theConfig.slurm = orig
116 theConfig.slurm = slurm
118 // There should be one queued container
119 params := arvadosclient.Dict{
120 "filters": [][]string{{"state", "=", "Queued"}},
122 var containers arvados.ContainerList
123 err = arv.List("containers", params, &containers)
125 c.Check(len(containers.Items), Equals, 1)
127 theConfig.CrunchRunCommand = []string{"echo"}
129 ctx, cancel := context.WithCancel(context.Background())
130 doneRun := make(chan struct{})
132 dispatcher := dispatch.Dispatcher{
134 PollPeriod: time.Duration(1) * time.Second,
135 RunContainer: func(disp *dispatch.Dispatcher, ctr arvados.Container, status <-chan arvados.Container) {
137 runContainer(disp, ctr)
139 doneRun <- struct{}{}
141 run(disp, ctr, status)
146 sqCheck = &SqueueChecker{Period: 500 * time.Millisecond}
148 err = dispatcher.Run(ctx)
150 c.Assert(err, Equals, context.Canceled)
154 c.Check(slurm.didBatch, DeepEquals, expectBatch)
156 // There should be no queued containers now
157 err = arv.List("containers", params, &containers)
159 c.Check(len(containers.Items), Equals, 0)
161 // Previously "Queued" container should now be in "Complete" state
162 var container arvados.Container
163 err = arv.Get("containers", "zzzzz-dz642-queuedcontainer", nil, &container)
168 func (s *TestSuite) TestIntegrationNormal(c *C) {
169 container := s.integrationTest(c,
170 &slurmFake{queue: "zzzzz-dz642-queuedcontainer 9990 100\n"},
172 func(dispatcher *dispatch.Dispatcher, container arvados.Container) {
173 dispatcher.UpdateState(container.UUID, dispatch.Running)
174 time.Sleep(3 * time.Second)
175 dispatcher.UpdateState(container.UUID, dispatch.Complete)
177 c.Check(container.State, Equals, arvados.ContainerStateComplete)
180 func (s *TestSuite) TestIntegrationCancel(c *C) {
181 slurm := &slurmFake{queue: "zzzzz-dz642-queuedcontainer 9990 100\n"}
182 readyToCancel := make(chan bool)
183 slurm.onCancel = func() { <-readyToCancel }
184 container := s.integrationTest(c,
187 func(dispatcher *dispatch.Dispatcher, container arvados.Container) {
188 dispatcher.UpdateState(container.UUID, dispatch.Running)
189 time.Sleep(time.Second)
190 dispatcher.Arv.Update("containers", container.UUID,
192 "container": arvadosclient.Dict{"priority": 0}},
194 readyToCancel <- true
197 c.Check(container.State, Equals, arvados.ContainerStateCancelled)
198 c.Check(len(slurm.didCancel) > 1, Equals, true)
199 c.Check(slurm.didCancel[:2], DeepEquals, []string{"zzzzz-dz642-queuedcontainer", "zzzzz-dz642-queuedcontainer"})
202 func (s *TestSuite) TestIntegrationMissingFromSqueue(c *C) {
203 container := s.integrationTest(c, &slurmFake{},
205 fmt.Sprintf("--job-name=%s", "zzzzz-dz642-queuedcontainer"),
206 fmt.Sprintf("--mem=%d", 11445),
207 fmt.Sprintf("--cpus-per-task=%d", 4),
208 fmt.Sprintf("--tmp=%d", 45777),
209 fmt.Sprintf("--nice=%d", 9990)}},
210 func(dispatcher *dispatch.Dispatcher, container arvados.Container) {
211 dispatcher.UpdateState(container.UUID, dispatch.Running)
212 time.Sleep(3 * time.Second)
213 dispatcher.UpdateState(container.UUID, dispatch.Complete)
215 c.Check(container.State, Equals, arvados.ContainerStateCancelled)
218 func (s *TestSuite) TestSbatchFail(c *C) {
219 container := s.integrationTest(c,
220 &slurmFake{errBatch: errors.New("something terrible happened")},
221 [][]string{{"--job-name=zzzzz-dz642-queuedcontainer", "--mem=11445", "--cpus-per-task=4", "--tmp=45777", "--nice=9990"}},
222 func(dispatcher *dispatch.Dispatcher, container arvados.Container) {
223 dispatcher.UpdateState(container.UUID, dispatch.Running)
224 dispatcher.UpdateState(container.UUID, dispatch.Complete)
226 c.Check(container.State, Equals, arvados.ContainerStateComplete)
228 arv, err := arvadosclient.MakeArvadosClient()
231 var ll arvados.LogList
232 err = arv.List("logs", arvadosclient.Dict{"filters": [][]string{
233 {"object_uuid", "=", container.UUID},
234 {"event_type", "=", "dispatch"},
236 c.Assert(len(ll.Items), Equals, 1)
239 func (s *MockArvadosServerSuite) TestAPIErrorGettingContainers(c *C) {
240 apiStubResponses := make(map[string]arvadostest.StubResponse)
241 apiStubResponses["/arvados/v1/api_client_authorizations/current"] = arvadostest.StubResponse{200, `{"uuid":"` + arvadostest.Dispatch1AuthUUID + `"}`}
242 apiStubResponses["/arvados/v1/containers"] = arvadostest.StubResponse{500, string(`{}`)}
244 testWithServerStub(c, apiStubResponses, "echo", "Error getting list of containers")
247 func testWithServerStub(c *C, apiStubResponses map[string]arvadostest.StubResponse, crunchCmd string, expected string) {
248 apiStub := arvadostest.ServerStub{apiStubResponses}
250 api := httptest.NewServer(&apiStub)
253 arv := &arvadosclient.ArvadosClient{
255 ApiServer: api.URL[7:],
257 Client: &http.Client{Transport: &http.Transport{}},
261 buf := bytes.NewBuffer(nil)
262 log.SetOutput(io.MultiWriter(buf, os.Stderr))
263 defer log.SetOutput(os.Stderr)
265 theConfig.CrunchRunCommand = []string{crunchCmd}
267 ctx, cancel := context.WithCancel(context.Background())
268 dispatcher := dispatch.Dispatcher{
270 PollPeriod: time.Duration(1) * time.Second,
271 RunContainer: func(disp *dispatch.Dispatcher, ctr arvados.Container, status <-chan arvados.Container) {
273 time.Sleep(1 * time.Second)
274 disp.UpdateState(ctr.UUID, dispatch.Running)
275 disp.UpdateState(ctr.UUID, dispatch.Complete)
277 run(disp, ctr, status)
283 for i := 0; i < 80 && !strings.Contains(buf.String(), expected); i++ {
284 time.Sleep(100 * time.Millisecond)
289 err := dispatcher.Run(ctx)
290 c.Assert(err, Equals, context.Canceled)
292 c.Check(buf.String(), Matches, `(?ms).*`+expected+`.*`)
295 func (s *MockArvadosServerSuite) TestNoSuchConfigFile(c *C) {
297 err := readConfig(&config, "/nosuchdir89j7879/8hjwr7ojgyy7")
298 c.Assert(err, NotNil)
301 func (s *MockArvadosServerSuite) TestBadSbatchArgsConfig(c *C) {
304 tmpfile, err := ioutil.TempFile(os.TempDir(), "config")
306 defer os.Remove(tmpfile.Name())
308 _, err = tmpfile.Write([]byte(`{"SbatchArguments": "oops this is not a string array"}`))
311 err = readConfig(&config, tmpfile.Name())
312 c.Assert(err, NotNil)
315 func (s *MockArvadosServerSuite) TestNoSuchArgInConfigIgnored(c *C) {
318 tmpfile, err := ioutil.TempFile(os.TempDir(), "config")
320 defer os.Remove(tmpfile.Name())
322 _, err = tmpfile.Write([]byte(`{"NoSuchArg": "Nobody loves me, not one tiny hunk."}`))
325 err = readConfig(&config, tmpfile.Name())
327 c.Check(0, Equals, len(config.SbatchArguments))
330 func (s *MockArvadosServerSuite) TestReadConfig(c *C) {
333 tmpfile, err := ioutil.TempFile(os.TempDir(), "config")
335 defer os.Remove(tmpfile.Name())
337 args := []string{"--arg1=v1", "--arg2", "--arg3=v3"}
338 argsS := `{"SbatchArguments": ["--arg1=v1", "--arg2", "--arg3=v3"]}`
339 _, err = tmpfile.Write([]byte(argsS))
342 err = readConfig(&config, tmpfile.Name())
344 c.Check(3, Equals, len(config.SbatchArguments))
345 c.Check(args, DeepEquals, config.SbatchArguments)
348 func (s *MockArvadosServerSuite) TestSbatchFuncWithNoConfigArgs(c *C) {
349 testSbatchFuncWithArgs(c, nil)
352 func (s *MockArvadosServerSuite) TestSbatchFuncWithEmptyConfigArgs(c *C) {
353 testSbatchFuncWithArgs(c, []string{})
356 func (s *MockArvadosServerSuite) TestSbatchFuncWithConfigArgs(c *C) {
357 testSbatchFuncWithArgs(c, []string{"--arg1=v1", "--arg2"})
360 func testSbatchFuncWithArgs(c *C, args []string) {
361 defer func() { theConfig.SbatchArguments = nil }()
362 theConfig.SbatchArguments = append(theConfig.SbatchArguments, args...)
364 container := arvados.Container{
366 RuntimeConstraints: arvados.RuntimeConstraints{RAM: 250000000, VCPUs: 2},
369 var expected []string
370 expected = append(expected, theConfig.SbatchArguments...)
371 expected = append(expected, "--job-name=123", "--mem=239", "--cpus-per-task=2", "--tmp=0", "--nice=9990")
372 c.Check(sbatchArgs(container), DeepEquals, expected)
375 func (s *MockArvadosServerSuite) TestSbatchPartition(c *C) {
376 container := arvados.Container{
378 RuntimeConstraints: arvados.RuntimeConstraints{RAM: 250000000, VCPUs: 1},
379 SchedulingParameters: arvados.SchedulingParameters{Partitions: []string{"blurb", "b2"}},
382 c.Check(sbatchArgs(container), DeepEquals, []string{
383 "--job-name=123", "--mem=239", "--cpus-per-task=1", "--tmp=0", "--nice=9990",
384 "--partition=blurb,b2",
388 func (s *TestSuite) TestIntegrationChangePriority(c *C) {
389 slurm := &slurmFake{queue: "zzzzz-dz642-queuedcontainer 9990 100\n"}
390 container := s.integrationTest(c, slurm, nil,
391 func(dispatcher *dispatch.Dispatcher, container arvados.Container) {
392 dispatcher.UpdateState(container.UUID, dispatch.Running)
393 time.Sleep(time.Second)
394 dispatcher.Arv.Update("containers", container.UUID,
396 "container": arvadosclient.Dict{"priority": 600}},
398 time.Sleep(time.Second)
399 dispatcher.UpdateState(container.UUID, dispatch.Complete)
401 c.Check(container.State, Equals, arvados.ContainerStateComplete)
402 c.Assert(len(slurm.didRenice), Not(Equals), 0)
403 c.Check(slurm.didRenice[len(slurm.didRenice)-1], DeepEquals, []string{"zzzzz-dz642-queuedcontainer", "4000"})