8784: Fix test for latest firefox.
[arvados.git] / services / crunch-dispatch-slurm / crunch-dispatch-slurm_test.go
1 package main
2
3 import (
4         "bytes"
5         "context"
6         "fmt"
7         "io"
8         "io/ioutil"
9         "log"
10         "net/http"
11         "net/http/httptest"
12         "os"
13         "os/exec"
14         "strings"
15         "testing"
16         "time"
17
18         "git.curoverse.com/arvados.git/sdk/go/arvados"
19         "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
20         "git.curoverse.com/arvados.git/sdk/go/arvadostest"
21         "git.curoverse.com/arvados.git/sdk/go/dispatch"
22         . "gopkg.in/check.v1"
23 )
24
25 // Gocheck boilerplate
26 func Test(t *testing.T) {
27         TestingT(t)
28 }
29
30 var _ = Suite(&TestSuite{})
31 var _ = Suite(&MockArvadosServerSuite{})
32
33 type TestSuite struct{}
34 type MockArvadosServerSuite struct{}
35
36 var initialArgs []string
37
38 func (s *TestSuite) SetUpSuite(c *C) {
39         initialArgs = os.Args
40 }
41
42 func (s *TestSuite) TearDownSuite(c *C) {
43 }
44
45 func (s *TestSuite) SetUpTest(c *C) {
46         args := []string{"crunch-dispatch-slurm"}
47         os.Args = args
48
49         arvadostest.StartAPI()
50         os.Setenv("ARVADOS_API_TOKEN", arvadostest.Dispatch1Token)
51 }
52
53 func (s *TestSuite) TearDownTest(c *C) {
54         os.Args = initialArgs
55         arvadostest.ResetEnv()
56         arvadostest.StopAPI()
57 }
58
59 func (s *MockArvadosServerSuite) TearDownTest(c *C) {
60         arvadostest.ResetEnv()
61 }
62
63 func (s *TestSuite) TestIntegrationNormal(c *C) {
64         done := false
65         container := s.integrationTest(c,
66                 func() *exec.Cmd {
67                         if done {
68                                 return exec.Command("true")
69                         } else {
70                                 return exec.Command("echo", "zzzzz-dz642-queuedcontainer")
71                         }
72                 },
73                 nil,
74                 nil,
75                 []string(nil),
76                 func(dispatcher *dispatch.Dispatcher, container arvados.Container) {
77                         dispatcher.UpdateState(container.UUID, dispatch.Running)
78                         time.Sleep(3 * time.Second)
79                         dispatcher.UpdateState(container.UUID, dispatch.Complete)
80                         done = true
81                 })
82         c.Check(container.State, Equals, arvados.ContainerStateComplete)
83 }
84
85 func (s *TestSuite) TestIntegrationCancel(c *C) {
86         var cmd *exec.Cmd
87         var scancelCmdLine []string
88         attempt := 0
89
90         container := s.integrationTest(c,
91                 func() *exec.Cmd {
92                         if cmd != nil && cmd.ProcessState != nil {
93                                 return exec.Command("true")
94                         } else {
95                                 return exec.Command("echo", "zzzzz-dz642-queuedcontainer")
96                         }
97                 },
98                 func(container arvados.Container) *exec.Cmd {
99                         if attempt++; attempt == 1 {
100                                 return exec.Command("false")
101                         } else {
102                                 scancelCmdLine = scancelFunc(container).Args
103                                 cmd = exec.Command("echo")
104                                 return cmd
105                         }
106                 },
107                 nil,
108                 []string(nil),
109                 func(dispatcher *dispatch.Dispatcher, container arvados.Container) {
110                         dispatcher.UpdateState(container.UUID, dispatch.Running)
111                         time.Sleep(1 * time.Second)
112                         dispatcher.Arv.Update("containers", container.UUID,
113                                 arvadosclient.Dict{
114                                         "container": arvadosclient.Dict{"priority": 0}},
115                                 nil)
116                 })
117         c.Check(container.State, Equals, arvados.ContainerStateCancelled)
118         c.Check(scancelCmdLine, DeepEquals, []string{"scancel", "--name=zzzzz-dz642-queuedcontainer"})
119 }
120
121 func (s *TestSuite) TestIntegrationMissingFromSqueue(c *C) {
122         container := s.integrationTest(c,
123                 func() *exec.Cmd { return exec.Command("echo") },
124                 nil,
125                 nil,
126                 []string{"sbatch",
127                         fmt.Sprintf("--job-name=%s", "zzzzz-dz642-queuedcontainer"),
128                         fmt.Sprintf("--mem=%d", 11445),
129                         fmt.Sprintf("--cpus-per-task=%d", 4),
130                         fmt.Sprintf("--tmp=%d", 45777)},
131                 func(dispatcher *dispatch.Dispatcher, container arvados.Container) {
132                         dispatcher.UpdateState(container.UUID, dispatch.Running)
133                         time.Sleep(3 * time.Second)
134                         dispatcher.UpdateState(container.UUID, dispatch.Complete)
135                 })
136         c.Check(container.State, Equals, arvados.ContainerStateCancelled)
137 }
138
139 func (s *TestSuite) TestSbatchFail(c *C) {
140         container := s.integrationTest(c,
141                 func() *exec.Cmd { return exec.Command("echo") },
142                 nil,
143                 func(container arvados.Container) *exec.Cmd {
144                         return exec.Command("false")
145                 },
146                 []string(nil),
147                 func(dispatcher *dispatch.Dispatcher, container arvados.Container) {
148                         dispatcher.UpdateState(container.UUID, dispatch.Running)
149                         dispatcher.UpdateState(container.UUID, dispatch.Complete)
150                 })
151         c.Check(container.State, Equals, arvados.ContainerStateComplete)
152
153         arv, err := arvadosclient.MakeArvadosClient()
154         c.Assert(err, IsNil)
155
156         var ll arvados.LogList
157         err = arv.List("logs", arvadosclient.Dict{"filters": [][]string{
158                 []string{"object_uuid", "=", container.UUID},
159                 []string{"event_type", "=", "dispatch"},
160         }}, &ll)
161         c.Assert(len(ll.Items), Equals, 1)
162 }
163
164 func (s *TestSuite) integrationTest(c *C,
165         newSqueueCmd func() *exec.Cmd,
166         newScancelCmd func(arvados.Container) *exec.Cmd,
167         newSbatchCmd func(arvados.Container) *exec.Cmd,
168         sbatchCmdComps []string,
169         runContainer func(*dispatch.Dispatcher, arvados.Container)) arvados.Container {
170         arvadostest.ResetEnv()
171
172         arv, err := arvadosclient.MakeArvadosClient()
173         c.Assert(err, IsNil)
174
175         var sbatchCmdLine []string
176
177         // Override sbatchCmd
178         defer func(orig func(arvados.Container) *exec.Cmd) {
179                 sbatchCmd = orig
180         }(sbatchCmd)
181
182         if newSbatchCmd != nil {
183                 sbatchCmd = newSbatchCmd
184         } else {
185                 sbatchCmd = func(container arvados.Container) *exec.Cmd {
186                         sbatchCmdLine = sbatchFunc(container).Args
187                         return exec.Command("sh")
188                 }
189         }
190
191         // Override squeueCmd
192         defer func(orig func() *exec.Cmd) {
193                 squeueCmd = orig
194         }(squeueCmd)
195         squeueCmd = newSqueueCmd
196
197         // Override scancel
198         defer func(orig func(arvados.Container) *exec.Cmd) {
199                 scancelCmd = orig
200         }(scancelCmd)
201         scancelCmd = newScancelCmd
202
203         // There should be one queued container
204         params := arvadosclient.Dict{
205                 "filters": [][]string{{"state", "=", "Queued"}},
206         }
207         var containers arvados.ContainerList
208         err = arv.List("containers", params, &containers)
209         c.Check(err, IsNil)
210         c.Check(len(containers.Items), Equals, 1)
211
212         theConfig.CrunchRunCommand = []string{"echo"}
213
214         ctx, cancel := context.WithCancel(context.Background())
215         doneRun := make(chan struct{})
216
217         dispatcher := dispatch.Dispatcher{
218                 Arv:        arv,
219                 PollPeriod: time.Duration(1) * time.Second,
220                 RunContainer: func(disp *dispatch.Dispatcher, ctr arvados.Container, status <-chan arvados.Container) {
221                         go func() {
222                                 runContainer(disp, ctr)
223                                 doneRun <- struct{}{}
224                         }()
225                         run(disp, ctr, status)
226                         cancel()
227                 },
228         }
229
230         sqCheck = &SqueueChecker{Period: 500 * time.Millisecond}
231
232         err = dispatcher.Run(ctx)
233         <-doneRun
234         c.Assert(err, Equals, context.Canceled)
235
236         sqCheck.Stop()
237
238         c.Check(sbatchCmdLine, DeepEquals, sbatchCmdComps)
239
240         // There should be no queued containers now
241         err = arv.List("containers", params, &containers)
242         c.Check(err, IsNil)
243         c.Check(len(containers.Items), Equals, 0)
244
245         // Previously "Queued" container should now be in "Complete" state
246         var container arvados.Container
247         err = arv.Get("containers", "zzzzz-dz642-queuedcontainer", nil, &container)
248         c.Check(err, IsNil)
249         return container
250 }
251
252 func (s *MockArvadosServerSuite) TestAPIErrorGettingContainers(c *C) {
253         apiStubResponses := make(map[string]arvadostest.StubResponse)
254         apiStubResponses["/arvados/v1/api_client_authorizations/current"] = arvadostest.StubResponse{200, `{"uuid":"` + arvadostest.Dispatch1AuthUUID + `"}`}
255         apiStubResponses["/arvados/v1/containers"] = arvadostest.StubResponse{500, string(`{}`)}
256
257         testWithServerStub(c, apiStubResponses, "echo", "Error getting list of containers")
258 }
259
260 func testWithServerStub(c *C, apiStubResponses map[string]arvadostest.StubResponse, crunchCmd string, expected string) {
261         apiStub := arvadostest.ServerStub{apiStubResponses}
262
263         api := httptest.NewServer(&apiStub)
264         defer api.Close()
265
266         arv := &arvadosclient.ArvadosClient{
267                 Scheme:    "http",
268                 ApiServer: api.URL[7:],
269                 ApiToken:  "abc123",
270                 Client:    &http.Client{Transport: &http.Transport{}},
271                 Retries:   0,
272         }
273
274         buf := bytes.NewBuffer(nil)
275         log.SetOutput(io.MultiWriter(buf, os.Stderr))
276         defer log.SetOutput(os.Stderr)
277
278         theConfig.CrunchRunCommand = []string{crunchCmd}
279
280         ctx, cancel := context.WithCancel(context.Background())
281         dispatcher := dispatch.Dispatcher{
282                 Arv:        arv,
283                 PollPeriod: time.Duration(1) * time.Second,
284                 RunContainer: func(disp *dispatch.Dispatcher, ctr arvados.Container, status <-chan arvados.Container) {
285                         go func() {
286                                 time.Sleep(1 * time.Second)
287                                 disp.UpdateState(ctr.UUID, dispatch.Running)
288                                 disp.UpdateState(ctr.UUID, dispatch.Complete)
289                         }()
290                         run(disp, ctr, status)
291                         cancel()
292                 },
293         }
294
295         go func() {
296                 for i := 0; i < 80 && !strings.Contains(buf.String(), expected); i++ {
297                         time.Sleep(100 * time.Millisecond)
298                 }
299                 cancel()
300         }()
301
302         err := dispatcher.Run(ctx)
303         c.Assert(err, Equals, context.Canceled)
304
305         c.Check(buf.String(), Matches, `(?ms).*`+expected+`.*`)
306 }
307
308 func (s *MockArvadosServerSuite) TestNoSuchConfigFile(c *C) {
309         var config Config
310         err := readConfig(&config, "/nosuchdir89j7879/8hjwr7ojgyy7")
311         c.Assert(err, NotNil)
312 }
313
314 func (s *MockArvadosServerSuite) TestBadSbatchArgsConfig(c *C) {
315         var config Config
316
317         tmpfile, err := ioutil.TempFile(os.TempDir(), "config")
318         c.Check(err, IsNil)
319         defer os.Remove(tmpfile.Name())
320
321         _, err = tmpfile.Write([]byte(`{"SbatchArguments": "oops this is not a string array"}`))
322         c.Check(err, IsNil)
323
324         err = readConfig(&config, tmpfile.Name())
325         c.Assert(err, NotNil)
326 }
327
328 func (s *MockArvadosServerSuite) TestNoSuchArgInConfigIgnored(c *C) {
329         var config Config
330
331         tmpfile, err := ioutil.TempFile(os.TempDir(), "config")
332         c.Check(err, IsNil)
333         defer os.Remove(tmpfile.Name())
334
335         _, err = tmpfile.Write([]byte(`{"NoSuchArg": "Nobody loves me, not one tiny hunk."}`))
336         c.Check(err, IsNil)
337
338         err = readConfig(&config, tmpfile.Name())
339         c.Assert(err, IsNil)
340         c.Check(0, Equals, len(config.SbatchArguments))
341 }
342
343 func (s *MockArvadosServerSuite) TestReadConfig(c *C) {
344         var config Config
345
346         tmpfile, err := ioutil.TempFile(os.TempDir(), "config")
347         c.Check(err, IsNil)
348         defer os.Remove(tmpfile.Name())
349
350         args := []string{"--arg1=v1", "--arg2", "--arg3=v3"}
351         argsS := `{"SbatchArguments": ["--arg1=v1",  "--arg2", "--arg3=v3"]}`
352         _, err = tmpfile.Write([]byte(argsS))
353         c.Check(err, IsNil)
354
355         err = readConfig(&config, tmpfile.Name())
356         c.Assert(err, IsNil)
357         c.Check(3, Equals, len(config.SbatchArguments))
358         c.Check(args, DeepEquals, config.SbatchArguments)
359 }
360
361 func (s *MockArvadosServerSuite) TestSbatchFuncWithNoConfigArgs(c *C) {
362         testSbatchFuncWithArgs(c, nil)
363 }
364
365 func (s *MockArvadosServerSuite) TestSbatchFuncWithEmptyConfigArgs(c *C) {
366         testSbatchFuncWithArgs(c, []string{})
367 }
368
369 func (s *MockArvadosServerSuite) TestSbatchFuncWithConfigArgs(c *C) {
370         testSbatchFuncWithArgs(c, []string{"--arg1=v1", "--arg2"})
371 }
372
373 func testSbatchFuncWithArgs(c *C, args []string) {
374         theConfig.SbatchArguments = append(theConfig.SbatchArguments, args...)
375
376         container := arvados.Container{UUID: "123", RuntimeConstraints: arvados.RuntimeConstraints{RAM: 250000000, VCPUs: 2}}
377         sbatchCmd := sbatchFunc(container)
378
379         var expected []string
380         expected = append(expected, "sbatch")
381         expected = append(expected, theConfig.SbatchArguments...)
382         expected = append(expected, "--job-name=123", "--mem=239", "--cpus-per-task=2", "--tmp=0")
383
384         c.Check(sbatchCmd.Args, DeepEquals, expected)
385 }
386
387 func (s *MockArvadosServerSuite) TestSbatchPartition(c *C) {
388         theConfig.SbatchArguments = nil
389         container := arvados.Container{UUID: "123", RuntimeConstraints: arvados.RuntimeConstraints{RAM: 250000000, VCPUs: 1}, SchedulingParameters: arvados.SchedulingParameters{Partitions: []string{"blurb", "b2"}}}
390         sbatchCmd := sbatchFunc(container)
391
392         var expected []string
393         expected = append(expected, "sbatch")
394         expected = append(expected, "--job-name=123", "--mem=239", "--cpus-per-task=1", "--tmp=0", "--partition=blurb,b2")
395
396         c.Check(sbatchCmd.Args, DeepEquals, expected)
397 }