Merge branch '20457-logs-and-mem-usage'
[arvados.git] / lib / dispatchcloud / container / queue_test.go
1 // Copyright (C) The Arvados Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4
5 package container
6
7 import (
8         "errors"
9         "os"
10         "sync"
11         "testing"
12         "time"
13
14         "git.arvados.org/arvados.git/sdk/go/arvados"
15         "git.arvados.org/arvados.git/sdk/go/arvadostest"
16         "github.com/sirupsen/logrus"
17         check "gopkg.in/check.v1"
18 )
19
20 // Gocheck boilerplate
21 func Test(t *testing.T) {
22         check.TestingT(t)
23 }
24
25 var _ = check.Suite(&IntegrationSuite{})
26
27 func logger() logrus.FieldLogger {
28         logger := logrus.StandardLogger()
29         if os.Getenv("ARVADOS_DEBUG") != "" {
30                 logger.SetLevel(logrus.DebugLevel)
31         }
32         return logger
33 }
34
35 type IntegrationSuite struct{}
36
37 func (suite *IntegrationSuite) TearDownTest(c *check.C) {
38         err := arvados.NewClientFromEnv().RequestAndDecode(nil, "POST", "database/reset", nil, nil)
39         c.Check(err, check.IsNil)
40 }
41
42 func (suite *IntegrationSuite) TestGetLockUnlockCancel(c *check.C) {
43         typeChooser := func(ctr *arvados.Container) (arvados.InstanceType, error) {
44                 c.Check(ctr.Mounts["/tmp"].Capacity, check.Equals, int64(24000000000))
45                 return arvados.InstanceType{Name: "testType"}, nil
46         }
47
48         client := arvados.NewClientFromEnv()
49         cq := NewQueue(logger(), nil, typeChooser, client)
50
51         err := cq.Update()
52         c.Check(err, check.IsNil)
53
54         ents, threshold := cq.Entries()
55         c.Check(len(ents), check.Not(check.Equals), 0)
56         c.Check(time.Since(threshold) < time.Minute, check.Equals, true)
57         c.Check(time.Since(threshold) > 0, check.Equals, true)
58
59         _, ok := ents[arvadostest.QueuedContainerUUID]
60         c.Check(ok, check.Equals, true)
61
62         var wg sync.WaitGroup
63         for uuid, ent := range ents {
64                 c.Check(ent.Container.UUID, check.Equals, uuid)
65                 c.Check(ent.InstanceType.Name, check.Equals, "testType")
66                 c.Check(ent.Container.State, check.Equals, arvados.ContainerStateQueued)
67                 c.Check(ent.Container.Priority > 0, check.Equals, true)
68                 // Mounts should be deleted to avoid wasting memory
69                 c.Check(ent.Container.Mounts, check.IsNil)
70
71                 ctr, ok := cq.Get(uuid)
72                 c.Check(ok, check.Equals, true)
73                 c.Check(ctr.UUID, check.Equals, uuid)
74
75                 wg.Add(1)
76                 go func(uuid string) {
77                         defer wg.Done()
78                         err := cq.Unlock(uuid)
79                         c.Check(err, check.NotNil)
80                         c.Check(err, check.ErrorMatches, ".*cannot unlock when Queued.*")
81
82                         err = cq.Lock(uuid)
83                         c.Check(err, check.IsNil)
84                         ctr, ok := cq.Get(uuid)
85                         c.Check(ok, check.Equals, true)
86                         c.Check(ctr.State, check.Equals, arvados.ContainerStateLocked)
87                         err = cq.Lock(uuid)
88                         c.Check(err, check.NotNil)
89
90                         err = cq.Unlock(uuid)
91                         c.Check(err, check.IsNil)
92                         ctr, ok = cq.Get(uuid)
93                         c.Check(ok, check.Equals, true)
94                         c.Check(ctr.State, check.Equals, arvados.ContainerStateQueued)
95                         err = cq.Unlock(uuid)
96                         c.Check(err, check.NotNil)
97
98                         err = cq.Cancel(uuid)
99                         c.Check(err, check.IsNil)
100                         ctr, ok = cq.Get(uuid)
101                         c.Check(ok, check.Equals, true)
102                         c.Check(ctr.State, check.Equals, arvados.ContainerStateCancelled)
103                         err = cq.Lock(uuid)
104                         c.Check(err, check.NotNil)
105                 }(uuid)
106         }
107         wg.Wait()
108 }
109
110 func (suite *IntegrationSuite) TestCancelIfNoInstanceType(c *check.C) {
111         errorTypeChooser := func(ctr *arvados.Container) (arvados.InstanceType, error) {
112                 // Make sure the relevant container fields are
113                 // actually populated.
114                 c.Check(ctr.ContainerImage, check.Equals, "test")
115                 c.Check(ctr.RuntimeConstraints.VCPUs, check.Equals, 4)
116                 c.Check(ctr.RuntimeConstraints.RAM, check.Equals, int64(12000000000))
117                 c.Check(ctr.Mounts["/tmp"].Capacity, check.Equals, int64(24000000000))
118                 c.Check(ctr.Mounts["/var/spool/cwl"].Capacity, check.Equals, int64(24000000000))
119                 return arvados.InstanceType{}, errors.New("no suitable instance type")
120         }
121
122         client := arvados.NewClientFromEnv()
123         cq := NewQueue(logger(), nil, errorTypeChooser, client)
124
125         ch := cq.Subscribe()
126         go func() {
127                 defer cq.Unsubscribe(ch)
128                 for range ch {
129                         // Container should never be added to
130                         // queue. Note that polling the queue this way
131                         // doesn't guarantee a bug (container being
132                         // incorrectly added to the queue) will cause
133                         // a test failure.
134                         _, ok := cq.Get(arvadostest.QueuedContainerUUID)
135                         if !c.Check(ok, check.Equals, false) {
136                                 // Don't spam the log with more failures
137                                 break
138                         }
139                 }
140         }()
141
142         var ctr arvados.Container
143         err := client.RequestAndDecode(&ctr, "GET", "arvados/v1/containers/"+arvadostest.QueuedContainerUUID, nil, nil)
144         c.Check(err, check.IsNil)
145         c.Check(ctr.State, check.Equals, arvados.ContainerStateQueued)
146
147         go cq.Update()
148
149         // Wait for the cancel operation to take effect. Container
150         // will have state=Cancelled or just disappear from the queue.
151         suite.waitfor(c, time.Second, func() bool {
152                 err := client.RequestAndDecode(&ctr, "GET", "arvados/v1/containers/"+arvadostest.QueuedContainerUUID, nil, nil)
153                 return err == nil && ctr.State == arvados.ContainerStateCancelled
154         })
155         c.Check(ctr.RuntimeStatus["error"], check.Equals, `no suitable instance type`)
156 }
157
158 func (suite *IntegrationSuite) waitfor(c *check.C, timeout time.Duration, fn func() bool) {
159         defer func() {
160                 c.Check(fn(), check.Equals, true)
161         }()
162         deadline := time.Now().Add(timeout)
163         for !fn() && time.Now().Before(deadline) {
164                 time.Sleep(timeout / 1000)
165         }
166 }