21314: Cancel container if it cannot even be loaded.
[arvados.git] / lib / dispatchcloud / container / queue_test.go
1 // Copyright (C) The Arvados Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4
5 package container
6
7 import (
8         "context"
9         "errors"
10         "os"
11         "path/filepath"
12         "sync"
13         "testing"
14         "time"
15
16         "git.arvados.org/arvados.git/lib/ctrlctx"
17         "git.arvados.org/arvados.git/sdk/go/arvados"
18         "git.arvados.org/arvados.git/sdk/go/arvadostest"
19         "github.com/sirupsen/logrus"
20         check "gopkg.in/check.v1"
21 )
22
23 // Gocheck boilerplate
24 func Test(t *testing.T) {
25         check.TestingT(t)
26 }
27
28 var _ = check.Suite(&IntegrationSuite{})
29
30 func logger() logrus.FieldLogger {
31         logger := logrus.StandardLogger()
32         if os.Getenv("ARVADOS_DEBUG") != "" {
33                 logger.SetLevel(logrus.DebugLevel)
34         }
35         return logger
36 }
37
38 type IntegrationSuite struct{}
39
40 func (suite *IntegrationSuite) TearDownTest(c *check.C) {
41         err := arvados.NewClientFromEnv().RequestAndDecode(nil, "POST", "database/reset", nil, nil)
42         c.Check(err, check.IsNil)
43 }
44
45 func (suite *IntegrationSuite) TestGetLockUnlockCancel(c *check.C) {
46         typeChooser := func(ctr *arvados.Container) ([]arvados.InstanceType, error) {
47                 c.Check(ctr.Mounts["/tmp"].Capacity, check.Equals, int64(24000000000))
48                 return []arvados.InstanceType{{Name: "testType"}}, nil
49         }
50
51         client := arvados.NewClientFromEnv()
52         cq := NewQueue(logger(), nil, typeChooser, client)
53
54         err := cq.Update()
55         c.Check(err, check.IsNil)
56
57         ents, threshold := cq.Entries()
58         c.Check(len(ents), check.Not(check.Equals), 0)
59         c.Check(time.Since(threshold) < time.Minute, check.Equals, true)
60         c.Check(time.Since(threshold) > 0, check.Equals, true)
61
62         _, ok := ents[arvadostest.QueuedContainerUUID]
63         c.Check(ok, check.Equals, true)
64
65         var wg sync.WaitGroup
66         for uuid, ent := range ents {
67                 c.Check(ent.Container.UUID, check.Equals, uuid)
68                 c.Check(ent.InstanceTypes, check.HasLen, 1)
69                 c.Check(ent.InstanceTypes[0].Name, check.Equals, "testType")
70                 c.Check(ent.Container.State, check.Equals, arvados.ContainerStateQueued)
71                 c.Check(ent.Container.Priority > 0, check.Equals, true)
72                 // Mounts should be deleted to avoid wasting memory
73                 c.Check(ent.Container.Mounts, check.IsNil)
74
75                 ctr, ok := cq.Get(uuid)
76                 c.Check(ok, check.Equals, true)
77                 c.Check(ctr.UUID, check.Equals, uuid)
78
79                 wg.Add(1)
80                 go func(uuid string) {
81                         defer wg.Done()
82                         err := cq.Unlock(uuid)
83                         c.Check(err, check.NotNil)
84                         c.Check(err, check.ErrorMatches, ".*cannot unlock when Queued.*")
85
86                         err = cq.Lock(uuid)
87                         c.Check(err, check.IsNil)
88                         ctr, ok := cq.Get(uuid)
89                         c.Check(ok, check.Equals, true)
90                         c.Check(ctr.State, check.Equals, arvados.ContainerStateLocked)
91                         err = cq.Lock(uuid)
92                         c.Check(err, check.NotNil)
93
94                         err = cq.Unlock(uuid)
95                         c.Check(err, check.IsNil)
96                         ctr, ok = cq.Get(uuid)
97                         c.Check(ok, check.Equals, true)
98                         c.Check(ctr.State, check.Equals, arvados.ContainerStateQueued)
99                         err = cq.Unlock(uuid)
100                         c.Check(err, check.NotNil)
101
102                         err = cq.Cancel(uuid)
103                         c.Check(err, check.IsNil)
104                         ctr, ok = cq.Get(uuid)
105                         c.Check(ok, check.Equals, true)
106                         c.Check(ctr.State, check.Equals, arvados.ContainerStateCancelled)
107                         err = cq.Lock(uuid)
108                         c.Check(err, check.NotNil)
109                 }(uuid)
110         }
111         wg.Wait()
112 }
113
114 func (suite *IntegrationSuite) TestCancel_NoInstanceType(c *check.C) {
115         errorTypeChooser := func(ctr *arvados.Container) ([]arvados.InstanceType, error) {
116                 // Make sure the relevant container fields are
117                 // actually populated.
118                 c.Check(ctr.ContainerImage, check.Equals, "test")
119                 c.Check(ctr.RuntimeConstraints.VCPUs, check.Equals, 4)
120                 c.Check(ctr.RuntimeConstraints.RAM, check.Equals, int64(12000000000))
121                 c.Check(ctr.Mounts["/tmp"].Capacity, check.Equals, int64(24000000000))
122                 c.Check(ctr.Mounts["/var/spool/cwl"].Capacity, check.Equals, int64(24000000000))
123                 return nil, errors.New("no suitable instance type")
124         }
125
126         client := arvados.NewClientFromEnv()
127         cq := NewQueue(logger(), nil, errorTypeChooser, client)
128
129         go failIfContainerAppearsInQueue(c, cq, arvadostest.QueuedContainerUUID)
130
131         var ctr arvados.Container
132         err := client.RequestAndDecode(&ctr, "GET", "arvados/v1/containers/"+arvadostest.QueuedContainerUUID, nil, nil)
133         c.Check(err, check.IsNil)
134         c.Check(ctr.State, check.Equals, arvados.ContainerStateQueued)
135
136         go cq.Update()
137
138         // Wait for the cancel operation to take effect. Container
139         // will have state=Cancelled or just disappear from the queue.
140         suite.waitfor(c, time.Second, func() bool {
141                 err := client.RequestAndDecode(&ctr, "GET", "arvados/v1/containers/"+arvadostest.QueuedContainerUUID, nil, nil)
142                 return err == nil && ctr.State == arvados.ContainerStateCancelled
143         })
144         c.Check(ctr.RuntimeStatus["error"], check.Equals, `no suitable instance type`)
145 }
146
147 func (suite *IntegrationSuite) TestCancel_InvalidMountsField(c *check.C) {
148         cfg, err := arvados.GetConfig(filepath.Join(os.Getenv("WORKSPACE"), "tmp", "arvados.yml"))
149         c.Assert(err, check.IsNil)
150         cc, err := cfg.GetCluster("zzzzz")
151         c.Assert(err, check.IsNil)
152         db, err := (&ctrlctx.DBConnector{PostgreSQL: cc.PostgreSQL}).GetDB(context.Background())
153         c.Assert(err, check.IsNil)
154         _, err = db.Exec(`update containers set mounts=$1 where uuid=$2`, `{"stdin":["bork"]}`, arvadostest.QueuedContainerUUID)
155         c.Assert(err, check.IsNil)
156         // Note this setup gets cleaned up by the database reset in
157         // TearDownTest.
158
159         typeChooser := func(ctr *arvados.Container) ([]arvados.InstanceType, error) {
160                 return []arvados.InstanceType{}, nil
161         }
162         client := arvados.NewClientFromEnv()
163         cq := NewQueue(logger(), nil, typeChooser, client)
164
165         go failIfContainerAppearsInQueue(c, cq, arvadostest.QueuedContainerUUID)
166
167         var ctr arvados.Container
168         err = client.RequestAndDecode(&ctr, "GET", "arvados/v1/containers/"+arvadostest.QueuedContainerUUID, nil, arvados.GetOptions{Select: []string{"state"}})
169         c.Check(err, check.IsNil)
170         c.Check(ctr.State, check.Equals, arvados.ContainerStateQueued)
171
172         go cq.Update()
173
174         // Wait for the cancel operation to take effect. Container
175         // will have state=Cancelled or just disappear from the queue.
176         suite.waitfor(c, time.Second, func() bool {
177                 err := client.RequestAndDecode(&ctr, "GET", "arvados/v1/containers/"+arvadostest.QueuedContainerUUID, nil, arvados.GetOptions{Select: []string{"state", "runtime_status"}})
178                 return err == nil && ctr.State == arvados.ContainerStateCancelled
179         })
180         c.Logf("runtime_status: %v", ctr.RuntimeStatus)
181         c.Check(ctr.RuntimeStatus["error"], check.Matches, `error getting mounts from container record: json: cannot unmarshal .*`)
182 }
183
184 func failIfContainerAppearsInQueue(c *check.C, cq *Queue, uuid string) {
185         ch := cq.Subscribe()
186         defer cq.Unsubscribe(ch)
187         for range ch {
188                 // Container should never be added to
189                 // queue. Note that polling the queue this way
190                 // doesn't guarantee a bug (container being
191                 // incorrectly added to the queue) will cause
192                 // a test failure.
193                 _, ok := cq.Get(uuid)
194                 if !c.Check(ok, check.Equals, false) {
195                         // Don't spam the log with more failures
196                         break
197                 }
198         }
199 }
200
201 func (suite *IntegrationSuite) waitfor(c *check.C, timeout time.Duration, fn func() bool) {
202         defer func() {
203                 c.Check(fn(), check.Equals, true)
204         }()
205         deadline := time.Now().Add(timeout)
206         for !fn() && time.Now().Before(deadline) {
207                 time.Sleep(timeout / 1000)
208         }
209 }