16723: Don't lock after requeue until old crunch-run exits.
[arvados.git] / lib / dispatchcloud / container / queue_test.go
1 // Copyright (C) The Arvados Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4
5 package container
6
7 import (
8         "errors"
9         "os"
10         "sync"
11         "testing"
12         "time"
13
14         "git.arvados.org/arvados.git/sdk/go/arvados"
15         "git.arvados.org/arvados.git/sdk/go/arvadostest"
16         "github.com/sirupsen/logrus"
17         check "gopkg.in/check.v1"
18 )
19
20 // Gocheck boilerplate
21 func Test(t *testing.T) {
22         check.TestingT(t)
23 }
24
25 var _ = check.Suite(&IntegrationSuite{})
26
27 func logger() logrus.FieldLogger {
28         logger := logrus.StandardLogger()
29         if os.Getenv("ARVADOS_DEBUG") != "" {
30                 logger.SetLevel(logrus.DebugLevel)
31         }
32         return logger
33 }
34
35 type IntegrationSuite struct{}
36
37 func (suite *IntegrationSuite) TearDownTest(c *check.C) {
38         err := arvados.NewClientFromEnv().RequestAndDecode(nil, "POST", "database/reset", nil, nil)
39         c.Check(err, check.IsNil)
40 }
41
42 func (suite *IntegrationSuite) TestGetLockUnlockCancel(c *check.C) {
43         typeChooser := func(ctr *arvados.Container) (arvados.InstanceType, error) {
44                 return arvados.InstanceType{Name: "testType"}, nil
45         }
46
47         client := arvados.NewClientFromEnv()
48         cq := NewQueue(logger(), nil, typeChooser, client)
49
50         err := cq.Update()
51         c.Check(err, check.IsNil)
52
53         ents, threshold := cq.Entries()
54         c.Check(len(ents), check.Not(check.Equals), 0)
55         c.Check(time.Since(threshold) < time.Minute, check.Equals, true)
56         c.Check(time.Since(threshold) > 0, check.Equals, true)
57
58         _, ok := ents[arvadostest.QueuedContainerUUID]
59         c.Check(ok, check.Equals, true)
60
61         var wg sync.WaitGroup
62         for uuid, ent := range ents {
63                 c.Check(ent.Container.UUID, check.Equals, uuid)
64                 c.Check(ent.InstanceType.Name, check.Equals, "testType")
65                 c.Check(ent.Container.State, check.Equals, arvados.ContainerStateQueued)
66                 c.Check(ent.Container.Priority > 0, check.Equals, true)
67
68                 ctr, ok := cq.Get(uuid)
69                 c.Check(ok, check.Equals, true)
70                 c.Check(ctr.UUID, check.Equals, uuid)
71
72                 wg.Add(1)
73                 go func(uuid string) {
74                         defer wg.Done()
75                         err := cq.Unlock(uuid)
76                         c.Check(err, check.NotNil)
77                         c.Check(err, check.ErrorMatches, ".*cannot unlock when Queued.*")
78
79                         err = cq.Lock(uuid)
80                         c.Check(err, check.IsNil)
81                         ctr, ok := cq.Get(uuid)
82                         c.Check(ok, check.Equals, true)
83                         c.Check(ctr.State, check.Equals, arvados.ContainerStateLocked)
84                         err = cq.Lock(uuid)
85                         c.Check(err, check.NotNil)
86
87                         err = cq.Unlock(uuid)
88                         c.Check(err, check.IsNil)
89                         ctr, ok = cq.Get(uuid)
90                         c.Check(ok, check.Equals, true)
91                         c.Check(ctr.State, check.Equals, arvados.ContainerStateQueued)
92                         err = cq.Unlock(uuid)
93                         c.Check(err, check.NotNil)
94
95                         err = cq.Cancel(uuid)
96                         c.Check(err, check.IsNil)
97                         ctr, ok = cq.Get(uuid)
98                         c.Check(ok, check.Equals, true)
99                         c.Check(ctr.State, check.Equals, arvados.ContainerStateCancelled)
100                         err = cq.Lock(uuid)
101                         c.Check(err, check.NotNil)
102                 }(uuid)
103         }
104         wg.Wait()
105 }
106
107 func (suite *IntegrationSuite) TestCancelIfNoInstanceType(c *check.C) {
108         errorTypeChooser := func(ctr *arvados.Container) (arvados.InstanceType, error) {
109                 // Make sure the relevant container fields are
110                 // actually populated.
111                 c.Check(ctr.ContainerImage, check.Equals, "test")
112                 c.Check(ctr.RuntimeConstraints.VCPUs, check.Equals, 4)
113                 c.Check(ctr.RuntimeConstraints.RAM, check.Equals, int64(12000000000))
114                 c.Check(ctr.Mounts["/tmp"].Capacity, check.Equals, int64(24000000000))
115                 c.Check(ctr.Mounts["/var/spool/cwl"].Capacity, check.Equals, int64(24000000000))
116                 return arvados.InstanceType{}, errors.New("no suitable instance type")
117         }
118
119         client := arvados.NewClientFromEnv()
120         cq := NewQueue(logger(), nil, errorTypeChooser, client)
121
122         ch := cq.Subscribe()
123         go func() {
124                 defer cq.Unsubscribe(ch)
125                 for range ch {
126                         // Container should never be added to
127                         // queue. Note that polling the queue this way
128                         // doesn't guarantee a bug (container being
129                         // incorrectly added to the queue) will cause
130                         // a test failure.
131                         _, ok := cq.Get(arvadostest.QueuedContainerUUID)
132                         if !c.Check(ok, check.Equals, false) {
133                                 // Don't spam the log with more failures
134                                 break
135                         }
136                 }
137         }()
138
139         var ctr arvados.Container
140         err := client.RequestAndDecode(&ctr, "GET", "arvados/v1/containers/"+arvadostest.QueuedContainerUUID, nil, nil)
141         c.Check(err, check.IsNil)
142         c.Check(ctr.State, check.Equals, arvados.ContainerStateQueued)
143
144         go cq.Update()
145
146         // Wait for the cancel operation to take effect. Container
147         // will have state=Cancelled or just disappear from the queue.
148         suite.waitfor(c, time.Second, func() bool {
149                 err := client.RequestAndDecode(&ctr, "GET", "arvados/v1/containers/"+arvadostest.QueuedContainerUUID, nil, nil)
150                 return err == nil && ctr.State == arvados.ContainerStateCancelled
151         })
152         c.Check(ctr.RuntimeStatus["error"], check.Equals, `no suitable instance type`)
153 }
154
155 func (suite *IntegrationSuite) waitfor(c *check.C, timeout time.Duration, fn func() bool) {
156         defer func() {
157                 c.Check(fn(), check.Equals, true)
158         }()
159         deadline := time.Now().Add(timeout)
160         for !fn() && time.Now().Before(deadline) {
161                 time.Sleep(timeout / 1000)
162         }
163 }