19146: Remove unneeded special case checks, explain the needed one.
[arvados.git] / lib / lsf / dispatch_test.go
index c044df09f65d42f5f4aad7903b60e27160d5ec98..a99983f34a8ae4163f9a91ba59c43ab9e57c3e00 100644 (file)
@@ -30,8 +30,9 @@ func Test(t *testing.T) {
 var _ = check.Suite(&suite{})
 
 type suite struct {
-       disp     *dispatcher
-       crTooBig arvados.ContainerRequest
+       disp          *dispatcher
+       crTooBig      arvados.ContainerRequest
+       crCUDARequest arvados.ContainerRequest
 }
 
 func (s *suite) TearDownTest(c *check.C) {
@@ -43,7 +44,8 @@ func (s *suite) SetUpTest(c *check.C) {
        c.Assert(err, check.IsNil)
        cluster, err := cfg.GetCluster("")
        c.Assert(err, check.IsNil)
-       cluster.Containers.CloudVMs.PollInterval = arvados.Duration(time.Second)
+       cluster.Containers.CloudVMs.PollInterval = arvados.Duration(time.Second / 4)
+       cluster.Containers.MinRetryPeriod = arvados.Duration(time.Second / 4)
        s.disp = newHandler(context.Background(), cluster, arvadostest.Dispatch1Token, prometheus.NewRegistry()).(*dispatcher)
        s.disp.lsfcli.stubCommand = func(string, ...string) *exec.Cmd {
                return exec.Command("bash", "-c", "echo >&2 unimplemented stub; false")
@@ -64,6 +66,29 @@ func (s *suite) SetUpTest(c *check.C) {
                },
        })
        c.Assert(err, check.IsNil)
+
+       err = arvados.NewClientFromEnv().RequestAndDecode(&s.crCUDARequest, "POST", "arvados/v1/container_requests", nil, map[string]interface{}{
+               "container_request": map[string]interface{}{
+                       "runtime_constraints": arvados.RuntimeConstraints{
+                               RAM:   16000000,
+                               VCPUs: 1,
+                               CUDA: arvados.CUDARuntimeConstraints{
+                                       DeviceCount:        1,
+                                       DriverVersion:      "11.0",
+                                       HardwareCapability: "8.0",
+                               },
+                       },
+                       "container_image":     arvadostest.DockerImage112PDH,
+                       "command":             []string{"sleep", "1"},
+                       "mounts":              map[string]arvados.Mount{"/mnt/out": {Kind: "tmp", Capacity: 1000}},
+                       "output_path":         "/mnt/out",
+                       "state":               arvados.ContainerRequestStateCommitted,
+                       "priority":            1,
+                       "container_count_max": 1,
+               },
+       })
+       c.Assert(err, check.IsNil)
+
 }
 
 type lsfstub struct {
@@ -90,7 +115,11 @@ func (stub lsfstub) stubCommand(s *suite, c *check.C) func(prog string, args ...
                switch prog {
                case "bsub":
                        defaultArgs := s.disp.Cluster.Containers.LSF.BsubArgumentsList
-                       c.Assert(len(args), check.Equals, len(defaultArgs))
+                       if args[5] == s.crCUDARequest.ContainerUUID {
+                               c.Assert(len(args), check.Equals, len(defaultArgs)+len(s.disp.Cluster.Containers.LSF.BsubCUDAArguments))
+                       } else {
+                               c.Assert(len(args), check.Equals, len(defaultArgs))
+                       }
                        // %%J must have been rewritten to %J
                        c.Check(args[1], check.Equals, "/tmp/crunch-run.%J.out")
                        args = args[4:]
@@ -134,6 +163,20 @@ func (stub lsfstub) stubCommand(s *suite, c *check.C) func(prog string, args ...
                                fakejobq[nextjobid] = args[1]
                                nextjobid++
                                mtx.Unlock()
+                       case s.crCUDARequest.ContainerUUID:
+                               c.Check(args, check.DeepEquals, []string{
+                                       "-J", s.crCUDARequest.ContainerUUID,
+                                       "-n", "1",
+                                       "-D", "528MB",
+                                       "-R", "rusage[mem=528MB:tmp=256MB] span[hosts=1]",
+                                       "-R", "select[mem>=528MB]",
+                                       "-R", "select[tmp>=256MB]",
+                                       "-R", "select[ncpus>=1]",
+                                       "-gpu", "num=1"})
+                               mtx.Lock()
+                               fakejobq[nextjobid] = args[1]
+                               nextjobid++
+                               mtx.Unlock()
                        default:
                                c.Errorf("unexpected uuid passed to bsub: args %q", args)
                                return exec.Command("false")
@@ -201,16 +244,19 @@ func (s *suite) TestSubmit(c *check.C) {
                }
                // "queuedcontainer" should be running
                if _, ok := s.disp.lsfqueue.Lookup(arvadostest.QueuedContainerUUID); !ok {
+                       c.Log("Lookup(queuedcontainer) == false")
                        continue
                }
                // "lockedcontainer" should be cancelled because it
                // has priority 0 (no matching container requests)
-               if _, ok := s.disp.lsfqueue.Lookup(arvadostest.LockedContainerUUID); ok {
+               if ent, ok := s.disp.lsfqueue.Lookup(arvadostest.LockedContainerUUID); ok {
+                       c.Logf("Lookup(lockedcontainer) == true, ent = %#v", ent)
                        continue
                }
                // "crTooBig" should be cancelled because lsf stub
                // reports there is no suitable instance type
-               if _, ok := s.disp.lsfqueue.Lookup(s.crTooBig.ContainerUUID); ok {
+               if ent, ok := s.disp.lsfqueue.Lookup(s.crTooBig.ContainerUUID); ok {
+                       c.Logf("Lookup(crTooBig) == true, ent = %#v", ent)
                        continue
                }
                var ctr arvados.Container