18287: Remove superfluous {ExternalURL: "-"} values.
[arvados.git] / lib / lsf / dispatch_test.go
1 // Copyright (C) The Arvados Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4
5 package lsf
6
7 import (
8         "context"
9         "fmt"
10         "math/rand"
11         "os/exec"
12         "strconv"
13         "sync"
14         "testing"
15         "time"
16
17         "git.arvados.org/arvados.git/lib/config"
18         "git.arvados.org/arvados.git/sdk/go/arvados"
19         "git.arvados.org/arvados.git/sdk/go/arvadostest"
20         "git.arvados.org/arvados.git/sdk/go/ctxlog"
21         "github.com/prometheus/client_golang/prometheus"
22         "gopkg.in/check.v1"
23 )
24
25 func Test(t *testing.T) {
26         check.TestingT(t)
27 }
28
29 var _ = check.Suite(&suite{})
30
31 type suite struct {
32         disp *dispatcher
33 }
34
35 func (s *suite) TearDownTest(c *check.C) {
36         arvados.NewClientFromEnv().RequestAndDecode(nil, "POST", "database/reset", nil, nil)
37 }
38
39 func (s *suite) SetUpTest(c *check.C) {
40         cfg, err := config.NewLoader(nil, ctxlog.TestLogger(c)).Load()
41         c.Assert(err, check.IsNil)
42         cluster, err := cfg.GetCluster("")
43         c.Assert(err, check.IsNil)
44         cluster.Containers.CloudVMs.PollInterval = arvados.Duration(time.Second)
45         s.disp = newHandler(context.Background(), cluster, arvadostest.Dispatch1Token, prometheus.NewRegistry()).(*dispatcher)
46         s.disp.lsfcli.stubCommand = func(string, ...string) *exec.Cmd {
47                 return exec.Command("bash", "-c", "echo >&2 unimplemented stub; false")
48         }
49 }
50
51 type lsfstub struct {
52         sudoUser  string
53         errorRate float64
54 }
55
56 func (stub lsfstub) stubCommand(s *suite, c *check.C) func(prog string, args ...string) *exec.Cmd {
57         mtx := sync.Mutex{}
58         nextjobid := 100
59         fakejobq := map[int]string{}
60         return func(prog string, args ...string) *exec.Cmd {
61                 c.Logf("stubCommand: %q %q", prog, args)
62                 if rand.Float64() < stub.errorRate {
63                         return exec.Command("bash", "-c", "echo >&2 'stub random failure' && false")
64                 }
65                 if stub.sudoUser != "" && len(args) > 3 &&
66                         prog == "sudo" &&
67                         args[0] == "-E" &&
68                         args[1] == "-u" &&
69                         args[2] == stub.sudoUser {
70                         prog, args = args[3], args[4:]
71                 }
72                 switch prog {
73                 case "bsub":
74                         defaultArgs := s.disp.Cluster.Containers.LSF.BsubArgumentsList
75                         c.Assert(len(args) > len(defaultArgs), check.Equals, true)
76                         c.Check(args[:len(defaultArgs)], check.DeepEquals, defaultArgs)
77                         args = args[len(defaultArgs):]
78
79                         c.Check(args[0], check.Equals, "-J")
80                         switch args[1] {
81                         case arvadostest.LockedContainerUUID:
82                                 c.Check(args, check.DeepEquals, []string{
83                                         "-J", arvadostest.LockedContainerUUID,
84                                         "-n", "4",
85                                         "-D", "11701MB",
86                                         "-R", "rusage[mem=11701MB:tmp=0MB] span[hosts=1]"})
87                                 mtx.Lock()
88                                 fakejobq[nextjobid] = args[1]
89                                 nextjobid++
90                                 mtx.Unlock()
91                         case arvadostest.QueuedContainerUUID:
92                                 c.Check(args, check.DeepEquals, []string{
93                                         "-J", arvadostest.QueuedContainerUUID,
94                                         "-n", "4",
95                                         "-D", "11701MB",
96                                         "-R", "rusage[mem=11701MB:tmp=45777MB] span[hosts=1]"})
97                                 mtx.Lock()
98                                 fakejobq[nextjobid] = args[1]
99                                 nextjobid++
100                                 mtx.Unlock()
101                         default:
102                                 c.Errorf("unexpected uuid passed to bsub: args %q", args)
103                                 return exec.Command("false")
104                         }
105                         return exec.Command("echo", "submitted job")
106                 case "bjobs":
107                         c.Check(args, check.DeepEquals, []string{"-u", "all", "-noheader", "-o", "jobid stat job_name:30"})
108                         out := ""
109                         for jobid, uuid := range fakejobq {
110                                 out += fmt.Sprintf(`%d %s %s\n`, jobid, "RUN", uuid)
111                         }
112                         c.Logf("bjobs out: %q", out)
113                         return exec.Command("printf", out)
114                 case "bkill":
115                         killid, _ := strconv.Atoi(args[0])
116                         if uuid, ok := fakejobq[killid]; !ok {
117                                 return exec.Command("bash", "-c", fmt.Sprintf("printf >&2 'Job <%d>: No matching job found\n'", killid))
118                         } else if uuid == "" {
119                                 return exec.Command("bash", "-c", fmt.Sprintf("printf >&2 'Job <%d>: Job has already finished\n'", killid))
120                         } else {
121                                 go func() {
122                                         time.Sleep(time.Millisecond)
123                                         mtx.Lock()
124                                         delete(fakejobq, killid)
125                                         mtx.Unlock()
126                                 }()
127                                 return exec.Command("bash", "-c", fmt.Sprintf("printf 'Job <%d> is being terminated\n'", killid))
128                         }
129                 default:
130                         return exec.Command("bash", "-c", fmt.Sprintf("echo >&2 'stub: command not found: %+q'", prog))
131                 }
132         }
133 }
134
135 func (s *suite) TestSubmit(c *check.C) {
136         s.disp.lsfcli.stubCommand = lsfstub{
137                 errorRate: 0.1,
138                 sudoUser:  s.disp.Cluster.Containers.LSF.BsubSudoUser,
139         }.stubCommand(s, c)
140         s.disp.Start()
141         deadline := time.Now().Add(20 * time.Second)
142         for range time.NewTicker(time.Second).C {
143                 if time.Now().After(deadline) {
144                         c.Error("timed out")
145                         break
146                 }
147                 // "queuedcontainer" should be running
148                 if _, ok := s.disp.lsfqueue.JobID(arvadostest.QueuedContainerUUID); !ok {
149                         continue
150                 }
151                 // "lockedcontainer" should be cancelled because it
152                 // has priority 0 (no matching container requests)
153                 if _, ok := s.disp.lsfqueue.JobID(arvadostest.LockedContainerUUID); ok {
154                         continue
155                 }
156                 var ctr arvados.Container
157                 if err := s.disp.arvDispatcher.Arv.Get("containers", arvadostest.LockedContainerUUID, nil, &ctr); err != nil {
158                         c.Logf("error getting container state for %s: %s", arvadostest.LockedContainerUUID, err)
159                         continue
160                 }
161                 if ctr.State != arvados.ContainerStateQueued {
162                         c.Logf("LockedContainer is not in the LSF queue but its arvados record has not been updated to state==Queued (state is %q)", ctr.State)
163                         continue
164                 }
165                 c.Log("reached desired state")
166                 break
167         }
168 }