21811: Merge branch 'main' into 21811-side-favorites-test
[arvados.git] / lib / crunchrun / crunchrun_test.go
1 // Copyright (C) The Arvados Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4
5 package crunchrun
6
7 import (
8         "bytes"
9         "context"
10         "crypto/md5"
11         "encoding/json"
12         "errors"
13         "fmt"
14         "io"
15         "io/fs"
16         "io/ioutil"
17         "math/rand"
18         "net/http"
19         "net/http/httptest"
20         "net/http/httputil"
21         "net/url"
22         "os"
23         "os/exec"
24         "path"
25         "regexp"
26         "runtime/pprof"
27         "strings"
28         "sync"
29         "sync/atomic"
30         "syscall"
31         "testing"
32         "time"
33
34         "git.arvados.org/arvados.git/lib/cloud"
35         "git.arvados.org/arvados.git/lib/cmd"
36         "git.arvados.org/arvados.git/sdk/go/arvados"
37         "git.arvados.org/arvados.git/sdk/go/arvadosclient"
38         "git.arvados.org/arvados.git/sdk/go/arvadostest"
39
40         . "gopkg.in/check.v1"
41 )
42
43 // Gocheck boilerplate
44 func TestCrunchExec(t *testing.T) {
45         TestingT(t)
46 }
47
48 const logLineStart = `(?m)(.*\n)*\d{4}-\d\d-\d\dT\d\d:\d\d:\d\d\.\d+Z `
49
50 var _ = Suite(&TestSuite{})
51
52 type TestSuite struct {
53         client                   *arvados.Client
54         api                      *ArvTestClient
55         runner                   *ContainerRunner
56         executor                 *stubExecutor
57         keepmount                string
58         keepmountTmp             []string
59         testDispatcherKeepClient KeepTestClient
60         testContainerKeepClient  KeepTestClient
61         debian12MemoryCurrent    int64
62         debian12SwapCurrent      int64
63 }
64
65 func (s *TestSuite) SetUpSuite(c *C) {
66         buf, err := os.ReadFile("../crunchstat/testdata/debian12/sys/fs/cgroup/user.slice/user-1000.slice/session-4.scope/memory.current")
67         c.Assert(err, IsNil)
68         _, err = fmt.Sscanf(string(buf), "%d", &s.debian12MemoryCurrent)
69         c.Assert(err, IsNil)
70
71         buf, err = os.ReadFile("../crunchstat/testdata/debian12/sys/fs/cgroup/user.slice/user-1000.slice/session-4.scope/memory.swap.current")
72         c.Assert(err, IsNil)
73         _, err = fmt.Sscanf(string(buf), "%d", &s.debian12SwapCurrent)
74         c.Assert(err, IsNil)
75 }
76
77 func (s *TestSuite) SetUpTest(c *C) {
78         s.client = arvados.NewClientFromEnv()
79         s.executor = &stubExecutor{}
80         var err error
81         s.api = &ArvTestClient{}
82         s.runner, err = NewContainerRunner(s.client, s.api, &s.testDispatcherKeepClient, "zzzzz-zzzzz-zzzzzzzzzzzzzzz")
83         c.Assert(err, IsNil)
84         s.runner.executor = s.executor
85         s.runner.MkArvClient = func(token string) (IArvadosClient, IKeepClient, *arvados.Client, error) {
86                 return s.api, &s.testContainerKeepClient, s.client, nil
87         }
88         s.runner.RunArvMount = func(cmd []string, tok string) (*exec.Cmd, error) {
89                 s.runner.ArvMountPoint = s.keepmount
90                 for i, opt := range cmd {
91                         if opt == "--mount-tmp" {
92                                 err := os.Mkdir(s.keepmount+"/"+cmd[i+1], 0700)
93                                 if err != nil {
94                                         return nil, err
95                                 }
96                                 s.keepmountTmp = append(s.keepmountTmp, cmd[i+1])
97                         }
98                 }
99                 return nil, nil
100         }
101         s.keepmount = c.MkDir()
102         err = os.Mkdir(s.keepmount+"/by_id", 0755)
103         s.keepmountTmp = nil
104         c.Assert(err, IsNil)
105         err = os.Mkdir(s.keepmount+"/by_id/"+arvadostest.DockerImage112PDH, 0755)
106         c.Assert(err, IsNil)
107         err = ioutil.WriteFile(s.keepmount+"/by_id/"+arvadostest.DockerImage112PDH+"/"+arvadostest.DockerImage112Filename, []byte("#notarealtarball"), 0644)
108         err = os.Mkdir(s.keepmount+"/by_id/"+fakeInputCollectionPDH, 0755)
109         c.Assert(err, IsNil)
110         err = ioutil.WriteFile(s.keepmount+"/by_id/"+fakeInputCollectionPDH+"/input.json", []byte(`{"input":true}`), 0644)
111         c.Assert(err, IsNil)
112         s.runner.ArvMountPoint = s.keepmount
113         os.Setenv("InstanceType", `{"ProviderType":"a1.2xlarge","Price":1.2}`)
114 }
115
116 type ArvTestClient struct {
117         Total   int64
118         Calls   int
119         Content []arvadosclient.Dict
120         arvados.Container
121         secretMounts []byte
122         sync.Mutex
123         WasSetRunning bool
124         callraw       bool
125 }
126
127 type KeepTestClient struct {
128         Called         bool
129         StorageClasses []string
130         blocks         sync.Map
131 }
132
133 type stubExecutor struct {
134         imageLoaded bool
135         loaded      string
136         loadErr     error
137         exitCode    int
138         createErr   error
139         created     containerSpec
140         startErr    error
141         waitSleep   time.Duration
142         waitErr     error
143         stopErr     error
144         stopped     bool
145         closed      bool
146         runFunc     func() int
147         exit        chan int
148 }
149
150 func (e *stubExecutor) LoadImage(imageId string, tarball string, container arvados.Container, keepMount string,
151         containerClient *arvados.Client) error {
152         e.loaded = tarball
153         return e.loadErr
154 }
155 func (e *stubExecutor) Runtime() string                 { return "stub" }
156 func (e *stubExecutor) Version() string                 { return "stub " + cmd.Version.String() }
157 func (e *stubExecutor) Create(spec containerSpec) error { e.created = spec; return e.createErr }
158 func (e *stubExecutor) Start() error {
159         e.exit = make(chan int, 1)
160         go func() { e.exit <- e.runFunc() }()
161         return e.startErr
162 }
163 func (e *stubExecutor) Pid() int    { return 1115883 } // matches pid in ../crunchstat/testdata/debian12/proc/
164 func (e *stubExecutor) Stop() error { e.stopped = true; go func() { e.exit <- -1 }(); return e.stopErr }
165 func (e *stubExecutor) Close()      { e.closed = true }
166 func (e *stubExecutor) Wait(context.Context) (int, error) {
167         return <-e.exit, e.waitErr
168 }
169 func (e *stubExecutor) InjectCommand(ctx context.Context, _, _ string, _ bool, _ []string) (*exec.Cmd, error) {
170         return nil, errors.New("unimplemented")
171 }
172 func (e *stubExecutor) IPAddress() (string, error) { return "", errors.New("unimplemented") }
173
174 const fakeInputCollectionPDH = "ffffffffaaaaaaaa88888888eeeeeeee+1234"
175
176 var hwManifest = ". 82ab40c24fc8df01798e57ba66795bb1+841216+Aa124ac75e5168396c73c0a18eda641a4f41791c0@569fa8c3 0:841216:9c31ee32b3d15268a0754e8edc74d4f815ee014b693bc5109058e431dd5caea7.tar\n"
177 var hwPDH = "a45557269dcb65a6b78f9ac061c0850b+120"
178 var hwImageID = "9c31ee32b3d15268a0754e8edc74d4f815ee014b693bc5109058e431dd5caea7"
179
180 var otherManifest = ". 68a84f561b1d1708c6baff5e019a9ab3+46+Ae5d0af96944a3690becb1decdf60cc1c937f556d@5693216f 0:46:md5sum.txt\n"
181 var otherPDH = "a3e8f74c6f101eae01fa08bfb4e49b3a+54"
182
183 var normalizedManifestWithSubdirs = `. 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396c73c0abcdefgh11234567890@569fa8c3 0:9:file1_in_main.txt 9:18:file2_in_main.txt 0:27:zzzzz-8i9sb-bcdefghijkdhvnk.log.txt
184 ./subdir1 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396cabcdefghij6419876543234@569fa8c4 0:9:file1_in_subdir1.txt 9:18:file2_in_subdir1.txt
185 ./subdir1/subdir2 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396c73c0bcdefghijk544332211@569fa8c5 0:9:file1_in_subdir2.txt 9:18:file2_in_subdir2.txt
186 `
187
188 var normalizedWithSubdirsPDH = "a0def87f80dd594d4675809e83bd4f15+367"
189
190 var denormalizedManifestWithSubdirs = ". 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396c73c0abcdefgh11234567890@569fa8c3 0:9:file1_in_main.txt 9:18:file2_in_main.txt 0:27:zzzzz-8i9sb-bcdefghijkdhvnk.log.txt 0:10:subdir1/file1_in_subdir1.txt 10:17:subdir1/file2_in_subdir1.txt\n"
191 var denormalizedWithSubdirsPDH = "b0def87f80dd594d4675809e83bd4f15+367"
192
193 var fakeAuthUUID = "zzzzz-gj3su-55pqoyepgi2glem"
194 var fakeAuthToken = "a3ltuwzqcu2u4sc0q7yhpc2w7s00fdcqecg5d6e0u3pfohmbjt"
195
196 func (client *ArvTestClient) Create(resourceType string,
197         parameters arvadosclient.Dict,
198         output interface{}) error {
199
200         client.Mutex.Lock()
201         defer client.Mutex.Unlock()
202
203         client.Calls++
204         client.Content = append(client.Content, parameters)
205
206         if resourceType == "logs" {
207                 panic("logs.create called")
208         }
209
210         if resourceType == "collections" && output != nil {
211                 mt := parameters["collection"].(arvadosclient.Dict)["manifest_text"].(string)
212                 md5sum := md5.Sum([]byte(mt))
213                 outmap := output.(*arvados.Collection)
214                 outmap.PortableDataHash = fmt.Sprintf("%x+%d", md5sum, len(mt))
215                 outmap.UUID = fmt.Sprintf("zzzzz-4zz18-%015x", md5sum[:7])
216         }
217
218         return nil
219 }
220
221 func (client *ArvTestClient) Call(method, resourceType, uuid, action string, parameters arvadosclient.Dict, output interface{}) error {
222         switch {
223         case method == "GET" && resourceType == "containers" && action == "auth":
224                 return json.Unmarshal([]byte(`{
225                         "kind": "arvados#api_client_authorization",
226                         "uuid": "`+fakeAuthUUID+`",
227                         "api_token": "`+fakeAuthToken+`"
228                         }`), output)
229         case method == "GET" && resourceType == "containers" && action == "secret_mounts":
230                 if client.secretMounts != nil {
231                         return json.Unmarshal(client.secretMounts, output)
232                 }
233                 return json.Unmarshal([]byte(`{"secret_mounts":{}}`), output)
234         default:
235                 return fmt.Errorf("Not found")
236         }
237 }
238
239 func (client *ArvTestClient) CallRaw(method, resourceType, uuid, action string,
240         parameters arvadosclient.Dict) (reader io.ReadCloser, err error) {
241         var j []byte
242         if method == "GET" && resourceType == "nodes" && uuid == "" && action == "" {
243                 j = []byte(`{
244                         "kind": "arvados#nodeList",
245                         "items": [{
246                                 "uuid": "zzzzz-7ekkf-2z3mc76g2q73aio",
247                                 "hostname": "compute2",
248                                 "properties": {"total_cpu_cores": 16}
249                         }]}`)
250         } else if method == "GET" && resourceType == "containers" && action == "" && !client.callraw {
251                 if uuid == "" {
252                         j, err = json.Marshal(map[string]interface{}{
253                                 "items": []interface{}{client.Container},
254                                 "kind":  "arvados#nodeList",
255                         })
256                 } else {
257                         j, err = json.Marshal(client.Container)
258                 }
259         } else {
260                 j = []byte(`{
261                         "command": ["sleep", "1"],
262                         "container_image": "` + arvadostest.DockerImage112PDH + `",
263                         "cwd": ".",
264                         "environment": {},
265                         "mounts": {"/tmp": {"kind": "tmp"}, "/json": {"kind": "json", "content": {"number": 123456789123456789}}},
266                         "output_path": "/tmp",
267                         "priority": 1,
268                         "runtime_constraints": {}
269                 }`)
270         }
271         return ioutil.NopCloser(bytes.NewReader(j)), err
272 }
273
274 func (client *ArvTestClient) Get(resourceType string, uuid string, parameters arvadosclient.Dict, output interface{}) error {
275         if resourceType == "collections" {
276                 if uuid == hwPDH {
277                         output.(*arvados.Collection).ManifestText = hwManifest
278                 } else if uuid == otherPDH {
279                         output.(*arvados.Collection).ManifestText = otherManifest
280                 } else if uuid == normalizedWithSubdirsPDH {
281                         output.(*arvados.Collection).ManifestText = normalizedManifestWithSubdirs
282                 } else if uuid == denormalizedWithSubdirsPDH {
283                         output.(*arvados.Collection).ManifestText = denormalizedManifestWithSubdirs
284                 }
285         }
286         if resourceType == "containers" {
287                 (*output.(*arvados.Container)) = client.Container
288         }
289         return nil
290 }
291
292 func (client *ArvTestClient) Update(resourceType string, uuid string, parameters arvadosclient.Dict, output interface{}) (err error) {
293         client.Mutex.Lock()
294         defer client.Mutex.Unlock()
295         client.Calls++
296         client.Content = append(client.Content, parameters)
297         if resourceType == "containers" {
298                 if parameters["container"].(arvadosclient.Dict)["state"] == "Running" {
299                         client.WasSetRunning = true
300                 }
301         } else if resourceType == "collections" && output != nil {
302                 mt := parameters["collection"].(arvadosclient.Dict)["manifest_text"].(string)
303                 output.(*arvados.Collection).UUID = uuid
304                 output.(*arvados.Collection).PortableDataHash = arvados.PortableDataHash(mt)
305         }
306         return nil
307 }
308
309 var discoveryMap = map[string]interface{}{
310         "crunchLogUpdateSize":   float64(crunchLogUpdateSize),
311         "crunchLogUpdatePeriod": float64(crunchLogUpdatePeriod.Seconds()),
312 }
313
314 func (client *ArvTestClient) Discovery(key string) (interface{}, error) {
315         return discoveryMap[key], nil
316 }
317
318 // CalledWith returns the parameters from the first API call whose
319 // parameters match jpath/string. E.g., CalledWith(c, "foo.bar",
320 // "baz") returns parameters with parameters["foo"]["bar"]=="baz". If
321 // no call matches, it returns nil.
322 func (client *ArvTestClient) CalledWith(jpath string, expect interface{}) arvadosclient.Dict {
323 call:
324         for _, content := range client.Content {
325                 var v interface{} = content
326                 for _, k := range strings.Split(jpath, ".") {
327                         if dict, ok := v.(arvadosclient.Dict); !ok {
328                                 continue call
329                         } else {
330                                 v = dict[k]
331                         }
332                 }
333                 if v == expect {
334                         return content
335                 }
336         }
337         return nil
338 }
339
340 func (client *KeepTestClient) LocalLocator(locator string) (string, error) {
341         return locator, nil
342 }
343
344 func (client *KeepTestClient) BlockWrite(_ context.Context, opts arvados.BlockWriteOptions) (arvados.BlockWriteResponse, error) {
345         locator := fmt.Sprintf("%x+%d", md5.Sum(opts.Data), len(opts.Data))
346         client.blocks.Store(locator, append([]byte(nil), opts.Data...))
347         return arvados.BlockWriteResponse{
348                 Locator: locator,
349         }, nil
350 }
351
352 func (client *KeepTestClient) ReadAt(locator string, dst []byte, offset int) (int, error) {
353         loaded, ok := client.blocks.Load(locator)
354         if !ok {
355                 return 0, os.ErrNotExist
356         }
357         data := loaded.([]byte)
358         if offset >= len(data) {
359                 return 0, io.EOF
360         }
361         return copy(dst, data[offset:]), nil
362 }
363
364 func (client *KeepTestClient) Close() {
365         client.blocks.Range(func(locator, value interface{}) bool {
366                 client.blocks.Delete(locator)
367                 return true
368         })
369 }
370
371 func (client *KeepTestClient) SetStorageClasses(sc []string) {
372         client.StorageClasses = sc
373 }
374
375 type FileWrapper struct {
376         io.ReadCloser
377         len int64
378 }
379
380 func (fw FileWrapper) Readdir(n int) ([]os.FileInfo, error) {
381         return nil, errors.New("not implemented")
382 }
383
384 func (fw FileWrapper) Seek(int64, int) (int64, error) {
385         return 0, errors.New("not implemented")
386 }
387
388 func (fw FileWrapper) Size() int64 {
389         return fw.len
390 }
391
392 func (fw FileWrapper) Stat() (os.FileInfo, error) {
393         return nil, errors.New("not implemented")
394 }
395
396 func (fw FileWrapper) Truncate(int64) error {
397         return errors.New("not implemented")
398 }
399
400 func (fw FileWrapper) Write([]byte) (int, error) {
401         return 0, errors.New("not implemented")
402 }
403
404 func (fw FileWrapper) Sync() error {
405         return errors.New("not implemented")
406 }
407
408 func (fw FileWrapper) Snapshot() (*arvados.Subtree, error) {
409         return nil, errors.New("not implemented")
410 }
411
412 func (fw FileWrapper) Splice(*arvados.Subtree) error {
413         return errors.New("not implemented")
414 }
415
416 type apiStubServer struct {
417         server    *httptest.Server
418         proxy     *httputil.ReverseProxy
419         intercept func(http.ResponseWriter, *http.Request) bool
420
421         container arvados.Container
422 }
423
424 func apiStub() (*arvados.Client, *apiStubServer) {
425         client := arvados.NewClientFromEnv()
426         apistub := &apiStubServer{}
427         apistub.server = httptest.NewTLSServer(apistub)
428         apistub.proxy = httputil.NewSingleHostReverseProxy(&url.URL{Scheme: "https", Host: client.APIHost})
429         if client.Insecure {
430                 apistub.proxy.Transport = arvados.InsecureHTTPClient.Transport
431         }
432         client.APIHost = apistub.server.Listener.Addr().String()
433         return client, apistub
434 }
435
436 func (apistub *apiStubServer) ServeHTTP(w http.ResponseWriter, r *http.Request) {
437         if apistub.intercept != nil && apistub.intercept(w, r) {
438                 return
439         }
440         if r.Method == "GET" && r.URL.Path == "/arvados/v1/collections/"+hwPDH {
441                 json.NewEncoder(w).Encode(arvados.Collection{ManifestText: hwManifest})
442                 return
443         }
444         if r.Method == "GET" && r.URL.Path == "/arvados/v1/collections/"+otherPDH {
445                 json.NewEncoder(w).Encode(arvados.Collection{ManifestText: otherManifest})
446                 return
447         }
448         if r.Method == "GET" && r.URL.Path == "/arvados/v1/collections/"+normalizedWithSubdirsPDH {
449                 json.NewEncoder(w).Encode(arvados.Collection{ManifestText: normalizedManifestWithSubdirs})
450                 return
451         }
452         if r.Method == "GET" && r.URL.Path == "/arvados/v1/collections/"+denormalizedWithSubdirsPDH {
453                 json.NewEncoder(w).Encode(arvados.Collection{ManifestText: denormalizedManifestWithSubdirs})
454                 return
455         }
456         if r.Method == "GET" && r.URL.Path == "/arvados/v1/containers/"+apistub.container.UUID {
457                 json.NewEncoder(w).Encode(apistub.container)
458                 return
459         }
460         apistub.proxy.ServeHTTP(w, r)
461 }
462
463 func (s *TestSuite) TestLoadImage(c *C) {
464         s.runner.Container.ContainerImage = arvadostest.DockerImage112PDH
465         s.runner.Container.Mounts = map[string]arvados.Mount{
466                 "/out": {Kind: "tmp", Writable: true},
467         }
468         s.runner.Container.OutputPath = "/out"
469
470         _, err := s.runner.SetupMounts()
471         c.Assert(err, IsNil)
472
473         imageID, err := s.runner.LoadImage()
474         c.Check(err, IsNil)
475         c.Check(s.executor.loaded, Matches, ".*"+regexp.QuoteMeta(arvadostest.DockerImage112Filename))
476         c.Check(imageID, Equals, strings.TrimSuffix(arvadostest.DockerImage112Filename, ".tar"))
477
478         s.runner.Container.ContainerImage = arvadostest.DockerImage112PDH
479         s.executor.imageLoaded = false
480         s.executor.loaded = ""
481         s.executor.loadErr = errors.New("bork")
482         imageID, err = s.runner.LoadImage()
483         c.Check(err, ErrorMatches, ".*bork")
484         c.Check(s.executor.loaded, Matches, ".*"+regexp.QuoteMeta(arvadostest.DockerImage112Filename))
485
486         s.runner.Container.ContainerImage = fakeInputCollectionPDH
487         s.executor.imageLoaded = false
488         s.executor.loaded = ""
489         s.executor.loadErr = nil
490         imageID, err = s.runner.LoadImage()
491         c.Check(err, ErrorMatches, "image collection does not include a \\.tar image file")
492         c.Check(s.executor.loaded, Equals, "")
493 }
494
495 type ArvErrorTestClient struct{}
496
497 func (ArvErrorTestClient) Create(resourceType string,
498         parameters arvadosclient.Dict,
499         output interface{}) error {
500         return nil
501 }
502
503 func (ArvErrorTestClient) Call(method, resourceType, uuid, action string, parameters arvadosclient.Dict, output interface{}) error {
504         if method == "GET" && resourceType == "containers" && action == "auth" {
505                 return nil
506         }
507         return errors.New("ArvError")
508 }
509
510 func (ArvErrorTestClient) CallRaw(method, resourceType, uuid, action string,
511         parameters arvadosclient.Dict) (reader io.ReadCloser, err error) {
512         return nil, errors.New("ArvError")
513 }
514
515 func (ArvErrorTestClient) Get(resourceType string, uuid string, parameters arvadosclient.Dict, output interface{}) error {
516         return errors.New("ArvError")
517 }
518
519 func (ArvErrorTestClient) Update(resourceType string, uuid string, parameters arvadosclient.Dict, output interface{}) (err error) {
520         return nil
521 }
522
523 func (ArvErrorTestClient) Discovery(key string) (interface{}, error) {
524         return discoveryMap[key], nil
525 }
526
527 type KeepErrorTestClient struct {
528         KeepTestClient
529 }
530
531 func (*KeepErrorTestClient) BlockWrite(context.Context, arvados.BlockWriteOptions) (arvados.BlockWriteResponse, error) {
532         return arvados.BlockWriteResponse{}, errors.New("KeepError")
533 }
534
535 func (*KeepErrorTestClient) LocalLocator(string) (string, error) {
536         return "", errors.New("KeepError")
537 }
538
539 type KeepReadErrorTestClient struct {
540         KeepTestClient
541 }
542
543 func (*KeepReadErrorTestClient) ReadAt(string, []byte, int) (int, error) {
544         return 0, errors.New("KeepError")
545 }
546
547 func dockerLog(fd byte, msg string) []byte {
548         by := []byte(msg)
549         header := make([]byte, 8+len(by))
550         header[0] = fd
551         header[7] = byte(len(by))
552         copy(header[8:], by)
553         return header
554 }
555
556 func (s *TestSuite) TestRunContainer(c *C) {
557         s.executor.runFunc = func() int {
558                 fmt.Fprintf(s.executor.created.Stdout, "Hello world\n")
559                 return 0
560         }
561
562         s.runner.Container.ContainerImage = arvadostest.DockerImage112PDH
563         s.runner.Container.Command = []string{"./hw"}
564         s.runner.Container.OutputStorageClasses = []string{"default"}
565
566         imageID, err := s.runner.LoadImage()
567         c.Assert(err, IsNil)
568
569         err = s.runner.CreateContainer(imageID, nil)
570         c.Assert(err, IsNil)
571
572         err = s.runner.StartContainer()
573         c.Assert(err, IsNil)
574
575         err = s.runner.WaitFinish()
576         c.Assert(err, IsNil)
577
578         c.Check(logFileContent(c, s.runner, "stdout.txt"), Matches, `2\S+Z Hello world\n`)
579         c.Check(logFileContent(c, s.runner, "stderr.txt"), Matches, ``)
580 }
581
582 func (s *TestSuite) TestCommitLogs(c *C) {
583         api := &ArvTestClient{}
584         kc := &KeepTestClient{}
585         defer kc.Close()
586         cr, err := NewContainerRunner(s.client, api, kc, "zzzzz-zzzzz-zzzzzzzzzzzzzzz")
587         c.Assert(err, IsNil)
588         f, err := cr.openLogFile("crunch-run")
589         c.Assert(err, IsNil)
590         cr.CrunchLog = newLogWriter(newTestTimestamper(f))
591
592         cr.CrunchLog.Print("Hello world!")
593         cr.CrunchLog.Print("Goodbye")
594         cr.finalState = "Complete"
595
596         err = cr.CommitLogs()
597         c.Check(err, IsNil)
598
599         c.Check(api.Calls, Equals, 1)
600         c.Check(api.Content[0]["ensure_unique_name"], Equals, true)
601         c.Check(api.Content[0]["collection"].(arvadosclient.Dict)["name"], Equals, "logs for zzzzz-zzzzz-zzzzzzzzzzzzzzz")
602         c.Check(api.Content[0]["collection"].(arvadosclient.Dict)["manifest_text"], Equals, ". 744b2e4553123b02fa7b452ec5c18993+123 0:123:crunch-run.txt\n")
603         c.Check(*cr.LogsPDH, Equals, "63da7bdacf08c40f604daad80c261e9a+60")
604 }
605
606 func (s *TestSuite) TestUpdateContainerRunning(c *C) {
607         api := &ArvTestClient{}
608         kc := &KeepTestClient{}
609         defer kc.Close()
610         cr, err := NewContainerRunner(s.client, api, kc, "zzzzz-zzzzz-zzzzzzzzzzzzzzz")
611         c.Assert(err, IsNil)
612
613         err = cr.UpdateContainerRunning("")
614         c.Check(err, IsNil)
615
616         c.Check(api.Content[0]["container"].(arvadosclient.Dict)["state"], Equals, "Running")
617 }
618
619 func (s *TestSuite) TestUpdateContainerComplete(c *C) {
620         api := &ArvTestClient{}
621         kc := &KeepTestClient{}
622         defer kc.Close()
623         cr, err := NewContainerRunner(s.client, api, kc, "zzzzz-zzzzz-zzzzzzzzzzzzzzz")
624         c.Assert(err, IsNil)
625
626         cr.LogsPDH = new(string)
627         *cr.LogsPDH = "d3a229d2fe3690c2c3e75a71a153c6a3+60"
628
629         cr.ExitCode = new(int)
630         *cr.ExitCode = 42
631         cr.finalState = "Complete"
632
633         err = cr.UpdateContainerFinal()
634         c.Check(err, IsNil)
635
636         c.Check(api.Content[0]["container"].(arvadosclient.Dict)["log"], Equals, *cr.LogsPDH)
637         c.Check(api.Content[0]["container"].(arvadosclient.Dict)["exit_code"], Equals, *cr.ExitCode)
638         c.Check(api.Content[0]["container"].(arvadosclient.Dict)["state"], Equals, "Complete")
639 }
640
641 func (s *TestSuite) TestUpdateContainerCancelled(c *C) {
642         api := &ArvTestClient{}
643         kc := &KeepTestClient{}
644         defer kc.Close()
645         cr, err := NewContainerRunner(s.client, api, kc, "zzzzz-zzzzz-zzzzzzzzzzzzzzz")
646         c.Assert(err, IsNil)
647         cr.cCancelled = true
648         cr.finalState = "Cancelled"
649
650         err = cr.UpdateContainerFinal()
651         c.Check(err, IsNil)
652
653         c.Check(api.Content[0]["container"].(arvadosclient.Dict)["log"], IsNil)
654         c.Check(api.Content[0]["container"].(arvadosclient.Dict)["exit_code"], IsNil)
655         c.Check(api.Content[0]["container"].(arvadosclient.Dict)["state"], Equals, "Cancelled")
656 }
657
658 // Used by the TestFullRun*() test below to DRY up boilerplate setup to do full
659 // dress rehearsal of the Run() function, starting from a JSON container record.
660 func (s *TestSuite) fullRunHelper(c *C, record string, extraMounts []string, fn func() int) (*ArvTestClient, *ContainerRunner, string) {
661         err := json.Unmarshal([]byte(record), &s.api.Container)
662         c.Assert(err, IsNil)
663         initialState := s.api.Container.State
664
665         var sm struct {
666                 SecretMounts map[string]arvados.Mount `json:"secret_mounts"`
667         }
668         err = json.Unmarshal([]byte(record), &sm)
669         c.Check(err, IsNil)
670         secretMounts, err := json.Marshal(sm)
671         c.Assert(err, IsNil)
672         c.Logf("SecretMounts decoded %v json %q", sm, secretMounts)
673
674         s.executor.runFunc = fn
675
676         s.runner.statInterval = 100 * time.Millisecond
677         s.runner.containerWatchdogInterval = time.Second
678
679         realTemp := c.MkDir()
680         tempcount := 0
681         s.runner.MkTempDir = func(_, prefix string) (string, error) {
682                 tempcount++
683                 d := fmt.Sprintf("%s/%s%d", realTemp, prefix, tempcount)
684                 err := os.Mkdir(d, os.ModePerm)
685                 if err != nil && strings.Contains(err.Error(), ": file exists") {
686                         // Test case must have pre-populated the tempdir
687                         err = nil
688                 }
689                 return d, err
690         }
691         client, _ := apiStub()
692         s.runner.MkArvClient = func(token string) (IArvadosClient, IKeepClient, *arvados.Client, error) {
693                 return &ArvTestClient{secretMounts: secretMounts}, &s.testContainerKeepClient, client, nil
694         }
695
696         if extraMounts != nil && len(extraMounts) > 0 {
697                 err := s.runner.SetupArvMountPoint("keep")
698                 c.Check(err, IsNil)
699
700                 for _, m := range extraMounts {
701                         os.MkdirAll(s.runner.ArvMountPoint+"/by_id/"+m, os.ModePerm)
702                 }
703         }
704
705         err = s.runner.Run()
706         if s.api.CalledWith("container.state", "Complete") != nil {
707                 c.Check(err, IsNil)
708         }
709         if s.executor.loadErr == nil && s.executor.createErr == nil && initialState != "Running" {
710                 c.Check(s.api.WasSetRunning, Equals, true)
711                 var lastupdate arvadosclient.Dict
712                 for _, content := range s.api.Content {
713                         if content["container"] != nil {
714                                 lastupdate = content["container"].(arvadosclient.Dict)
715                         }
716                 }
717                 if lastupdate["log"] == nil {
718                         c.Errorf("no container update with non-nil log -- updates were: %v", s.api.Content)
719                 }
720         }
721
722         if err != nil {
723                 dumpAllLogFiles(c, s.runner)
724         }
725
726         return s.api, s.runner, realTemp
727 }
728
729 func (s *TestSuite) TestFullRunHello(c *C) {
730         s.runner.enableMemoryLimit = true
731         s.runner.networkMode = "default"
732         s.fullRunHelper(c, `{
733     "command": ["echo", "hello world"],
734     "container_image": "`+arvadostest.DockerImage112PDH+`",
735     "cwd": ".",
736     "environment": {"foo":"bar","baz":"waz"},
737     "mounts": {"/tmp": {"kind": "tmp"} },
738     "output_path": "/tmp",
739     "priority": 1,
740     "runtime_constraints": {"vcpus":1,"ram":1000000},
741     "state": "Locked",
742     "output_storage_classes": ["default"]
743 }`, nil, func() int {
744                 c.Check(s.executor.created.Command, DeepEquals, []string{"echo", "hello world"})
745                 c.Check(s.executor.created.Image, Equals, "sha256:d8309758b8fe2c81034ffc8a10c36460b77db7bc5e7b448c4e5b684f9d95a678")
746                 c.Check(s.executor.created.Env, DeepEquals, map[string]string{"foo": "bar", "baz": "waz"})
747                 c.Check(s.executor.created.VCPUs, Equals, 1)
748                 c.Check(s.executor.created.RAM, Equals, int64(1000000))
749                 c.Check(s.executor.created.NetworkMode, Equals, "default")
750                 c.Check(s.executor.created.EnableNetwork, Equals, false)
751                 c.Check(s.executor.created.CUDADeviceCount, Equals, 0)
752                 fmt.Fprintln(s.executor.created.Stdout, "hello world")
753                 return 0
754         })
755
756         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
757         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
758         c.Check(logFileContent(c, s.runner, "stdout.txt"), Matches, `2\S+Z hello world\n`)
759         c.Check(s.testDispatcherKeepClient.StorageClasses, DeepEquals, []string{"default"})
760         c.Check(s.testContainerKeepClient.StorageClasses, DeepEquals, []string{"default"})
761 }
762
763 func (s *TestSuite) TestRunAlreadyRunning(c *C) {
764         var ran bool
765         s.fullRunHelper(c, `{
766     "command": ["sleep", "3"],
767     "container_image": "`+arvadostest.DockerImage112PDH+`",
768     "cwd": ".",
769     "environment": {},
770     "mounts": {"/tmp": {"kind": "tmp"} },
771     "output_path": "/tmp",
772     "priority": 1,
773     "runtime_constraints": {},
774     "scheduling_parameters":{"max_run_time": 1},
775     "state": "Running"
776 }`, nil, func() int {
777                 ran = true
778                 return 2
779         })
780         c.Check(s.api.CalledWith("container.state", "Cancelled"), IsNil)
781         c.Check(s.api.CalledWith("container.state", "Complete"), IsNil)
782         c.Check(ran, Equals, false)
783 }
784
785 func ec2MetadataServerStub(c *C, token *string, failureRate float64, stoptime *atomic.Value) *httptest.Server {
786         failedOnce := false
787         return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
788                 if !failedOnce || rand.Float64() < failureRate {
789                         w.WriteHeader(http.StatusServiceUnavailable)
790                         failedOnce = true
791                         return
792                 }
793                 switch r.URL.Path {
794                 case "/latest/api/token":
795                         fmt.Fprintln(w, *token)
796                 case "/latest/meta-data/spot/instance-action":
797                         if r.Header.Get("X-aws-ec2-metadata-token") != *token {
798                                 w.WriteHeader(http.StatusUnauthorized)
799                         } else if t, _ := stoptime.Load().(time.Time); t.IsZero() {
800                                 w.WriteHeader(http.StatusNotFound)
801                         } else {
802                                 fmt.Fprintf(w, `{"action":"stop","time":"%s"}`, t.Format(time.RFC3339))
803                         }
804                 default:
805                         w.WriteHeader(http.StatusNotFound)
806                 }
807         }))
808 }
809
810 func (s *TestSuite) TestSpotInterruptionNotice(c *C) {
811         s.testSpotInterruptionNotice(c, 0.1)
812 }
813
814 func (s *TestSuite) TestSpotInterruptionNoticeNotAvailable(c *C) {
815         s.testSpotInterruptionNotice(c, 1)
816 }
817
818 func (s *TestSuite) testSpotInterruptionNotice(c *C, failureRate float64) {
819         var stoptime atomic.Value
820         token := "fake-ec2-metadata-token"
821         stub := ec2MetadataServerStub(c, &token, failureRate, &stoptime)
822         defer stub.Close()
823
824         defer func(i time.Duration, u string) {
825                 spotInterruptionCheckInterval = i
826                 ec2MetadataBaseURL = u
827         }(spotInterruptionCheckInterval, ec2MetadataBaseURL)
828         spotInterruptionCheckInterval = time.Second / 8
829         ec2MetadataBaseURL = stub.URL
830
831         checkedLogs := false
832         checkLogs := func() {
833                 checkedLogs = true
834                 c.Check(logFileContent(c, s.runner, "crunch-run.txt"), Matches, `(?ms).*Checking for spot interruptions every 125ms using instance metadata at http://.*`)
835                 c.Check(logFileContent(c, s.runner, "crunch-run.txt"), Matches, `(?ms).*Error checking spot interruptions: 503 Service Unavailable.*`)
836                 if failureRate == 1 {
837                         c.Check(logFileContent(c, s.runner, "crunch-run.txt"), Matches, `(?ms).*Giving up on checking spot interruptions after too many consecutive failures.*`)
838                         return
839                 }
840                 text := `Cloud provider scheduled instance stop at ` + stoptime.Load().(time.Time).Format(time.RFC3339)
841                 c.Check(logFileContent(c, s.runner, "crunch-run.txt"), Matches, `(?ms).*`+text+`.*`)
842                 c.Check(s.api.CalledWith("container.runtime_status.warning", "preemption notice"), NotNil)
843                 c.Check(s.api.CalledWith("container.runtime_status.warningDetail", text), NotNil)
844                 c.Check(s.api.CalledWith("container.runtime_status.preemptionNotice", text), NotNil)
845
846                 // Check that the log collection was saved, and the
847                 // container record updated with the new PDH,
848                 // immediately after the preemption notice was
849                 // received -- i.e., while the container is still
850                 // running.
851                 lastpdh := ""
852                 saved := make(map[string]string) // pdh => manifest_text
853                 for _, call := range s.api.Content {
854                         if ctr, ok := call["container"].(arvadosclient.Dict); ok {
855                                 if pdh, ok := ctr["log"].(string); ok {
856                                         lastpdh = pdh
857                                 }
858                         }
859                         if coll, ok := call["collection"].(arvadosclient.Dict); ok {
860                                 mt, _ := coll["manifest_text"].(string)
861                                 if strings.Contains(mt, ":crunch-run.txt") {
862                                         saved[arvados.PortableDataHash(mt)] = mt
863                                 }
864                         }
865                 }
866                 logfs, err := (&arvados.Collection{ManifestText: saved[lastpdh]}).FileSystem(s.runner.dispatcherClient, s.runner.DispatcherKeepClient)
867                 c.Assert(err, IsNil)
868                 log, err := fs.ReadFile(arvados.FS(logfs), "crunch-run.txt")
869                 c.Check(err, IsNil)
870                 c.Check(string(log), Matches, `(?ms).*\Q`+text+`\E.*`)
871         }
872
873         go s.runner.checkSpotInterruptionNotices()
874         s.fullRunHelper(c, `{
875     "command": ["sleep", "3"],
876     "container_image": "`+arvadostest.DockerImage112PDH+`",
877     "cwd": ".",
878     "environment": {},
879     "mounts": {"/tmp": {"kind": "tmp"} },
880     "output_path": "/tmp",
881     "priority": 1,
882     "runtime_constraints": {},
883     "state": "Locked"
884 }`, nil, func() int {
885                 time.Sleep(time.Second)
886                 stoptime.Store(time.Now().Add(time.Minute).UTC())
887                 token = "different-fake-ec2-metadata-token"
888                 time.Sleep(time.Second)
889                 checkLogs()
890                 return 0
891         })
892         c.Check(checkedLogs, Equals, true)
893 }
894
895 func (s *TestSuite) TestRunTimeExceeded(c *C) {
896         s.fullRunHelper(c, `{
897     "command": ["sleep", "3"],
898     "container_image": "`+arvadostest.DockerImage112PDH+`",
899     "cwd": ".",
900     "environment": {},
901     "mounts": {"/tmp": {"kind": "tmp"} },
902     "output_path": "/tmp",
903     "priority": 1,
904     "runtime_constraints": {},
905     "scheduling_parameters":{"max_run_time": 1},
906     "state": "Locked"
907 }`, nil, func() int {
908                 time.Sleep(3 * time.Second)
909                 return 0
910         })
911
912         c.Check(s.api.CalledWith("container.state", "Cancelled"), NotNil)
913         c.Check(logFileContent(c, s.runner, "crunch-run.txt"), Matches, "(?ms).*maximum run time exceeded.*")
914 }
915
916 func (s *TestSuite) TestContainerWaitFails(c *C) {
917         s.fullRunHelper(c, `{
918     "command": ["sleep", "3"],
919     "container_image": "`+arvadostest.DockerImage112PDH+`",
920     "cwd": ".",
921     "mounts": {"/tmp": {"kind": "tmp"} },
922     "output_path": "/tmp",
923     "priority": 1,
924     "state": "Locked"
925 }`, nil, func() int {
926                 s.executor.waitErr = errors.New("Container is not running")
927                 return 0
928         })
929
930         c.Check(s.api.CalledWith("container.state", "Cancelled"), NotNil)
931         c.Check(logFileContent(c, s.runner, "crunch-run.txt"), Matches, "(?ms).*Container is not running.*")
932 }
933
934 func (s *TestSuite) TestCrunchstat(c *C) {
935         s.runner.crunchstatFakeFS = os.DirFS("../crunchstat/testdata/debian12")
936         s.fullRunHelper(c, `{
937                 "command": ["sleep", "1"],
938                 "container_image": "`+arvadostest.DockerImage112PDH+`",
939                 "cwd": ".",
940                 "environment": {},
941                 "mounts": {"/tmp": {"kind": "tmp"} },
942                 "output_path": "/tmp",
943                 "priority": 1,
944                 "runtime_constraints": {},
945                 "state": "Locked"
946         }`, nil, func() int {
947                 time.Sleep(time.Second)
948                 return 0
949         })
950
951         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
952         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
953
954         c.Check(logFileContent(c, s.runner, "crunchstat.txt"), Matches, `(?ms).*mem \d+ swap \d+ pgmajfault \d+ rss.*`)
955
956         // Check that we called (*crunchstat.Reporter)Stop().
957         c.Check(logFileContent(c, s.runner, "crunch-run.txt"), Matches, `(?ms).*Maximum crunch-run memory rss usage was \d+ bytes\n.*`)
958 }
959
960 func (s *TestSuite) TestNodeInfoLog(c *C) {
961         s.fullRunHelper(c, `{
962                 "command": ["true"],
963                 "container_image": "`+arvadostest.DockerImage112PDH+`",
964                 "cwd": ".",
965                 "environment": {},
966                 "mounts": {"/tmp": {"kind": "tmp"} },
967                 "output_path": "/tmp",
968                 "priority": 1,
969                 "runtime_constraints": {},
970                 "state": "Locked"
971         }`, nil, func() int {
972                 return 0
973         })
974
975         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
976         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
977
978         json := logFileContent(c, s.runner, "node.json")
979         c.Check(json, Matches, `(?ms).*"ProviderType": *"a1\.2xlarge".*`)
980         c.Check(json, Matches, `(?ms).*"Price": *1\.2.*`)
981
982         nodeinfo := logFileContent(c, s.runner, "node-info.txt")
983         c.Check(nodeinfo, Matches, `(?ms).*Host Information.*`)
984         c.Check(nodeinfo, Matches, `(?ms).*CPU Information.*`)
985         c.Check(nodeinfo, Matches, `(?ms).*Memory Information.*`)
986         c.Check(nodeinfo, Matches, `(?ms).*Disk Space.*`)
987         c.Check(nodeinfo, Matches, `(?ms).*Disk INodes.*`)
988 }
989
990 func (s *TestSuite) TestLogVersionAndRuntime(c *C) {
991         s.fullRunHelper(c, `{
992                 "command": ["sleep", "1"],
993                 "container_image": "`+arvadostest.DockerImage112PDH+`",
994                 "cwd": ".",
995                 "environment": {},
996                 "mounts": {"/tmp": {"kind": "tmp"} },
997                 "output_path": "/tmp",
998                 "priority": 1,
999                 "runtime_constraints": {},
1000                 "state": "Locked"
1001         }`, nil, func() int {
1002                 return 0
1003         })
1004
1005         c.Check(logFileContent(c, s.runner, "crunch-run.txt"), Matches, `(?ms).*crunch-run \S+ \(go\S+\) start.*`)
1006         c.Check(logFileContent(c, s.runner, "crunch-run.txt"), Matches, `(?ms).*crunch-run process has uid=\d+\(.+\) gid=\d+\(.+\) groups=\d+\(.+\)(,\d+\(.+\))*\n.*`)
1007         c.Check(logFileContent(c, s.runner, "crunch-run.txt"), Matches, `(?ms).*Executing container: zzzzz-zzzzz-zzzzzzzzzzzzzzz.*`)
1008         c.Check(logFileContent(c, s.runner, "crunch-run.txt"), Matches, `(?ms).*Using container runtime: stub.*`)
1009 }
1010
1011 func (s *TestSuite) testLogRSSThresholds(c *C, ram int64, expected []int, notExpected int) {
1012         s.runner.crunchstatFakeFS = os.DirFS("../crunchstat/testdata/debian12")
1013         s.fullRunHelper(c, `{
1014                 "command": ["true"],
1015                 "container_image": "`+arvadostest.DockerImage112PDH+`",
1016                 "cwd": ".",
1017                 "environment": {},
1018                 "mounts": {"/tmp": {"kind": "tmp"} },
1019                 "output_path": "/tmp",
1020                 "priority": 1,
1021                 "runtime_constraints": {"ram": `+fmt.Sprintf("%d", ram)+`},
1022                 "state": "Locked"
1023         }`, nil, func() int { return 0 })
1024         logs := logFileContent(c, s.runner, "crunch-run.txt")
1025         c.Log("=== crunchstat logs")
1026         c.Log(logs)
1027         pattern := logLineStart + `Container using over %d%% of memory \(rss %d/%d bytes\)`
1028         var threshold int
1029         for _, threshold = range expected {
1030                 c.Check(logs, Matches, fmt.Sprintf(pattern, threshold, s.debian12MemoryCurrent, ram))
1031         }
1032         if notExpected > threshold {
1033                 c.Check(logs, Not(Matches), fmt.Sprintf(pattern, notExpected, s.debian12MemoryCurrent, ram))
1034         }
1035 }
1036
1037 func (s *TestSuite) TestLogNoRSSThresholds(c *C) {
1038         s.testLogRSSThresholds(c, s.debian12MemoryCurrent*10, []int{}, 90)
1039 }
1040
1041 func (s *TestSuite) TestLogSomeRSSThresholds(c *C) {
1042         onePercentRSS := s.debian12MemoryCurrent / 100
1043         s.testLogRSSThresholds(c, 102*onePercentRSS, []int{90, 95}, 99)
1044 }
1045
1046 func (s *TestSuite) TestLogAllRSSThresholds(c *C) {
1047         s.testLogRSSThresholds(c, s.debian12MemoryCurrent, []int{90, 95, 99}, 0)
1048 }
1049
1050 func (s *TestSuite) TestLogMaximaAfterRun(c *C) {
1051         s.runner.crunchstatFakeFS = os.DirFS("../crunchstat/testdata/debian12")
1052         s.runner.parentTemp = c.MkDir()
1053         s.fullRunHelper(c, `{
1054         "command": ["true"],
1055         "container_image": "`+arvadostest.DockerImage112PDH+`",
1056         "cwd": ".",
1057         "environment": {},
1058         "mounts": {"/tmp": {"kind": "tmp"} },
1059         "output_path": "/tmp",
1060         "priority": 1,
1061         "runtime_constraints": {"ram": `+fmt.Sprintf("%d", s.debian12MemoryCurrent*10)+`},
1062         "state": "Locked"
1063     }`, nil, func() int { return 0 })
1064         logs := logFileContent(c, s.runner, "crunch-run.txt")
1065         for _, expected := range []string{
1066                 `Maximum disk usage was \d+%, \d+/\d+ bytes`,
1067                 fmt.Sprintf(`Maximum container memory swap usage was %d bytes`, s.debian12SwapCurrent),
1068                 `Maximum container memory pgmajfault usage was \d+ faults`,
1069                 fmt.Sprintf(`Maximum container memory rss usage was 10%%, %d/%d bytes`, s.debian12MemoryCurrent, s.debian12MemoryCurrent*10),
1070                 `Maximum crunch-run memory rss usage was \d+ bytes`,
1071         } {
1072                 c.Check(logs, Matches, logLineStart+expected)
1073         }
1074 }
1075
1076 func (s *TestSuite) TestCommitNodeInfoBeforeStart(c *C) {
1077         var collection_create, container_update arvadosclient.Dict
1078         s.fullRunHelper(c, `{
1079                 "command": ["true"],
1080                 "container_image": "`+arvadostest.DockerImage112PDH+`",
1081                 "cwd": ".",
1082                 "environment": {},
1083                 "mounts": {"/tmp": {"kind": "tmp"} },
1084                 "output_path": "/tmp",
1085                 "priority": 1,
1086                 "runtime_constraints": {},
1087                 "state": "Locked",
1088                 "uuid": "zzzzz-dz642-202301121543210"
1089         }`, nil, func() int {
1090                 collection_create = s.api.CalledWith("ensure_unique_name", true)
1091                 container_update = s.api.CalledWith("container.state", "Running")
1092                 return 0
1093         })
1094
1095         c.Assert(collection_create, NotNil)
1096         log_collection := collection_create["collection"].(arvadosclient.Dict)
1097         c.Check(log_collection["name"], Equals, "logs for zzzzz-dz642-202301121543210")
1098         manifest_text := log_collection["manifest_text"].(string)
1099         // We check that the file size is at least two digits as an easy way to
1100         // check the file isn't empty.
1101         c.Check(manifest_text, Matches, `\. .+ \d+:\d{2,}:node-info\.txt( .+)?\n`)
1102         c.Check(manifest_text, Matches, `\. .+ \d+:\d{2,}:node\.json( .+)?\n`)
1103
1104         c.Assert(container_update, NotNil)
1105         // As of Arvados 2.5.0, the container update must specify its log in PDH
1106         // format for the API server to propagate it to container requests, which
1107         // is what we care about for this test.
1108         expect_pdh := fmt.Sprintf("%x+%d", md5.Sum([]byte(manifest_text)), len(manifest_text))
1109         c.Check(container_update["container"].(arvadosclient.Dict)["log"], Equals, expect_pdh)
1110 }
1111
1112 func (s *TestSuite) TestContainerRecordLog(c *C) {
1113         s.fullRunHelper(c, `{
1114                 "command": ["sleep", "1"],
1115                 "container_image": "`+arvadostest.DockerImage112PDH+`",
1116                 "cwd": ".",
1117                 "environment": {},
1118                 "mounts": {"/tmp": {"kind": "tmp"} },
1119                 "output_path": "/tmp",
1120                 "priority": 1,
1121                 "runtime_constraints": {},
1122                 "state": "Locked"
1123         }`, nil,
1124                 func() int {
1125                         time.Sleep(time.Second)
1126                         return 0
1127                 })
1128
1129         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
1130         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
1131
1132         c.Check(logFileContent(c, s.runner, "container.json"), Matches, `(?ms).*container_image.*`)
1133 }
1134
1135 func (s *TestSuite) TestFullRunStderr(c *C) {
1136         s.fullRunHelper(c, `{
1137     "command": ["/bin/sh", "-c", "echo hello ; echo world 1>&2 ; exit 1"],
1138     "container_image": "`+arvadostest.DockerImage112PDH+`",
1139     "cwd": ".",
1140     "environment": {},
1141     "mounts": {"/tmp": {"kind": "tmp"} },
1142     "output_path": "/tmp",
1143     "priority": 1,
1144     "runtime_constraints": {},
1145     "state": "Locked"
1146 }`, nil, func() int {
1147                 fmt.Fprintln(s.executor.created.Stdout, "hello")
1148                 fmt.Fprintln(s.executor.created.Stderr, "world")
1149                 return 1
1150         })
1151
1152         final := s.api.CalledWith("container.state", "Complete")
1153         c.Assert(final, NotNil)
1154         c.Check(final["container"].(arvadosclient.Dict)["exit_code"], Equals, 1)
1155         c.Check(final["container"].(arvadosclient.Dict)["log"], NotNil)
1156
1157         c.Check(logFileContent(c, s.runner, "stdout.txt"), Matches, ".*hello\n")
1158         c.Check(logFileContent(c, s.runner, "stderr.txt"), Matches, ".*world\n")
1159 }
1160
1161 func (s *TestSuite) TestFullRunDefaultCwd(c *C) {
1162         s.fullRunHelper(c, `{
1163     "command": ["pwd"],
1164     "container_image": "`+arvadostest.DockerImage112PDH+`",
1165     "cwd": ".",
1166     "environment": {},
1167     "mounts": {"/tmp": {"kind": "tmp"} },
1168     "output_path": "/tmp",
1169     "priority": 1,
1170     "runtime_constraints": {},
1171     "state": "Locked"
1172 }`, nil, func() int {
1173                 fmt.Fprintf(s.executor.created.Stdout, "workdir=%q", s.executor.created.WorkingDir)
1174                 return 0
1175         })
1176
1177         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
1178         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
1179         c.Check(logFileContent(c, s.runner, "stdout.txt"), Matches, `.*workdir=""`)
1180 }
1181
1182 func (s *TestSuite) TestFullRunSetCwd(c *C) {
1183         s.fullRunHelper(c, `{
1184     "command": ["pwd"],
1185     "container_image": "`+arvadostest.DockerImage112PDH+`",
1186     "cwd": "/bin",
1187     "environment": {},
1188     "mounts": {"/tmp": {"kind": "tmp"} },
1189     "output_path": "/tmp",
1190     "priority": 1,
1191     "runtime_constraints": {},
1192     "state": "Locked"
1193 }`, nil, func() int {
1194                 fmt.Fprintln(s.executor.created.Stdout, s.executor.created.WorkingDir)
1195                 return 0
1196         })
1197
1198         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
1199         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
1200         c.Check(logFileContent(c, s.runner, "stdout.txt"), Matches, ".*/bin\n")
1201 }
1202
1203 func (s *TestSuite) TestFullRunSetOutputStorageClasses(c *C) {
1204         s.fullRunHelper(c, `{
1205     "command": ["pwd"],
1206     "container_image": "`+arvadostest.DockerImage112PDH+`",
1207     "cwd": "/bin",
1208     "environment": {},
1209     "mounts": {"/tmp": {"kind": "tmp"} },
1210     "output_path": "/tmp",
1211     "priority": 1,
1212     "runtime_constraints": {},
1213     "state": "Locked",
1214     "output_storage_classes": ["foo", "bar"]
1215 }`, nil, func() int {
1216                 fmt.Fprintln(s.executor.created.Stdout, s.executor.created.WorkingDir)
1217                 return 0
1218         })
1219
1220         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
1221         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
1222         c.Check(logFileContent(c, s.runner, "stdout.txt"), Matches, ".*/bin\n")
1223         c.Check(s.testDispatcherKeepClient.StorageClasses, DeepEquals, []string{"foo", "bar"})
1224         c.Check(s.testContainerKeepClient.StorageClasses, DeepEquals, []string{"foo", "bar"})
1225 }
1226
1227 func (s *TestSuite) TestEnableCUDADeviceCount(c *C) {
1228         s.fullRunHelper(c, `{
1229     "command": ["pwd"],
1230     "container_image": "`+arvadostest.DockerImage112PDH+`",
1231     "cwd": "/bin",
1232     "environment": {},
1233     "mounts": {"/tmp": {"kind": "tmp"} },
1234     "output_path": "/tmp",
1235     "priority": 1,
1236     "runtime_constraints": {"cuda": {"device_count": 2}},
1237     "state": "Locked",
1238     "output_storage_classes": ["foo", "bar"]
1239 }`, nil, func() int {
1240                 fmt.Fprintln(s.executor.created.Stdout, "ok")
1241                 return 0
1242         })
1243         c.Check(s.executor.created.CUDADeviceCount, Equals, 2)
1244 }
1245
1246 func (s *TestSuite) TestEnableCUDAHardwareCapability(c *C) {
1247         s.fullRunHelper(c, `{
1248     "command": ["pwd"],
1249     "container_image": "`+arvadostest.DockerImage112PDH+`",
1250     "cwd": "/bin",
1251     "environment": {},
1252     "mounts": {"/tmp": {"kind": "tmp"} },
1253     "output_path": "/tmp",
1254     "priority": 1,
1255     "runtime_constraints": {"cuda": {"hardware_capability": "foo"}},
1256     "state": "Locked",
1257     "output_storage_classes": ["foo", "bar"]
1258 }`, nil, func() int {
1259                 fmt.Fprintln(s.executor.created.Stdout, "ok")
1260                 return 0
1261         })
1262         c.Check(s.executor.created.CUDADeviceCount, Equals, 0)
1263 }
1264
1265 func (s *TestSuite) TestStopOnSignal(c *C) {
1266         s.executor.runFunc = func() int {
1267                 s.executor.created.Stdout.Write([]byte("foo\n"))
1268                 s.runner.SigChan <- syscall.SIGINT
1269                 time.Sleep(10 * time.Second)
1270                 return 0
1271         }
1272         s.testStopContainer(c)
1273 }
1274
1275 func (s *TestSuite) TestStopOnArvMountDeath(c *C) {
1276         s.executor.runFunc = func() int {
1277                 s.executor.created.Stdout.Write([]byte("foo\n"))
1278                 s.runner.ArvMountExit <- nil
1279                 close(s.runner.ArvMountExit)
1280                 time.Sleep(10 * time.Second)
1281                 return 0
1282         }
1283         s.runner.ArvMountExit = make(chan error)
1284         s.testStopContainer(c)
1285 }
1286
1287 func (s *TestSuite) testStopContainer(c *C) {
1288         record := `{
1289     "command": ["/bin/sh", "-c", "echo foo && sleep 30 && echo bar"],
1290     "container_image": "` + arvadostest.DockerImage112PDH + `",
1291     "cwd": ".",
1292     "environment": {},
1293     "mounts": {"/tmp": {"kind": "tmp"} },
1294     "output_path": "/tmp",
1295     "priority": 1,
1296     "runtime_constraints": {},
1297     "state": "Locked"
1298 }`
1299
1300         err := json.Unmarshal([]byte(record), &s.api.Container)
1301         c.Assert(err, IsNil)
1302
1303         s.runner.RunArvMount = func([]string, string) (*exec.Cmd, error) { return nil, nil }
1304         s.runner.MkArvClient = func(token string) (IArvadosClient, IKeepClient, *arvados.Client, error) {
1305                 return &ArvTestClient{}, &KeepTestClient{}, nil, nil
1306         }
1307
1308         done := make(chan error)
1309         go func() {
1310                 done <- s.runner.Run()
1311         }()
1312         select {
1313         case <-time.After(20 * time.Second):
1314                 pprof.Lookup("goroutine").WriteTo(os.Stderr, 1)
1315                 c.Fatal("timed out")
1316         case err = <-done:
1317                 c.Check(err, IsNil)
1318         }
1319         dumpAllLogFiles(c, s.runner)
1320
1321         c.Check(s.api.CalledWith("container.log", nil), NotNil)
1322         c.Check(s.api.CalledWith("container.state", "Cancelled"), NotNil)
1323         c.Check(logFileContent(c, s.runner, "stdout.txt"), Matches, "(?ms).*foo\n$")
1324 }
1325
1326 func (s *TestSuite) TestFullRunSetEnv(c *C) {
1327         s.fullRunHelper(c, `{
1328     "command": ["/bin/sh", "-c", "echo $FROBIZ"],
1329     "container_image": "`+arvadostest.DockerImage112PDH+`",
1330     "cwd": "/bin",
1331     "environment": {"FROBIZ": "bilbo"},
1332     "mounts": {"/tmp": {"kind": "tmp"} },
1333     "output_path": "/tmp",
1334     "priority": 1,
1335     "runtime_constraints": {},
1336     "state": "Locked"
1337 }`, nil, func() int {
1338                 fmt.Fprintf(s.executor.created.Stdout, "%v", s.executor.created.Env)
1339                 return 0
1340         })
1341
1342         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
1343         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
1344         c.Check(logFileContent(c, s.runner, "stdout.txt"), Matches, `.*map\[FROBIZ:bilbo\]`)
1345 }
1346
1347 type ArvMountCmdLine struct {
1348         Cmd   []string
1349         token string
1350 }
1351
1352 func (am *ArvMountCmdLine) ArvMountTest(c []string, token string) (*exec.Cmd, error) {
1353         am.Cmd = c
1354         am.token = token
1355         return nil, nil
1356 }
1357
1358 func stubCert(c *C, temp string) string {
1359         path := temp + "/ca-certificates.crt"
1360         err := os.WriteFile(path, []byte{}, 0666)
1361         c.Assert(err, IsNil)
1362         os.Setenv("SSL_CERT_FILE", path)
1363         return path
1364 }
1365
1366 func (s *TestSuite) TestSetupMounts(c *C) {
1367         cr := s.runner
1368         am := &ArvMountCmdLine{}
1369         cr.RunArvMount = am.ArvMountTest
1370         cr.containerClient, _ = apiStub()
1371         cr.ContainerArvClient = &ArvTestClient{}
1372         cr.ContainerKeepClient = &KeepTestClient{}
1373         cr.Container.OutputStorageClasses = []string{"default"}
1374
1375         realTemp := c.MkDir()
1376         certTemp := c.MkDir()
1377         stubCertPath := stubCert(c, certTemp)
1378         cr.parentTemp = realTemp
1379
1380         i := 0
1381         cr.MkTempDir = func(_ string, prefix string) (string, error) {
1382                 i++
1383                 d := fmt.Sprintf("%s/%s%d", realTemp, prefix, i)
1384                 err := os.Mkdir(d, os.ModePerm)
1385                 if err != nil && strings.Contains(err.Error(), ": file exists") {
1386                         // Test case must have pre-populated the tempdir
1387                         err = nil
1388                 }
1389                 return d, err
1390         }
1391
1392         checkEmpty := func() {
1393                 // Should be deleted.
1394                 _, err := os.Stat(realTemp)
1395                 c.Assert(os.IsNotExist(err), Equals, true)
1396
1397                 // Now recreate it for the next test.
1398                 c.Assert(os.Mkdir(realTemp, 0777), IsNil)
1399         }
1400
1401         {
1402                 i = 0
1403                 cr.ArvMountPoint = ""
1404                 cr.Container.Mounts = make(map[string]arvados.Mount)
1405                 cr.Container.Mounts["/tmp"] = arvados.Mount{Kind: "tmp"}
1406                 cr.Container.OutputPath = "/tmp"
1407                 cr.statInterval = 5 * time.Second
1408                 bindmounts, err := cr.SetupMounts()
1409                 c.Check(err, IsNil)
1410                 c.Check(am.Cmd, DeepEquals, []string{"arv-mount", "--foreground",
1411                         "--read-write", "--storage-classes", "default", "--crunchstat-interval=5",
1412                         "--mount-by-pdh", "by_id", "--disable-event-listening", "--mount-by-id", "by_uuid", realTemp + "/keep1"})
1413                 c.Check(bindmounts, DeepEquals, map[string]bindmount{"/tmp": {realTemp + "/tmp2", false}})
1414                 os.RemoveAll(cr.ArvMountPoint)
1415                 cr.CleanupDirs()
1416                 checkEmpty()
1417         }
1418
1419         {
1420                 i = 0
1421                 cr.ArvMountPoint = ""
1422                 cr.Container.Mounts = make(map[string]arvados.Mount)
1423                 cr.Container.Mounts["/out"] = arvados.Mount{Kind: "tmp"}
1424                 cr.Container.Mounts["/tmp"] = arvados.Mount{Kind: "tmp"}
1425                 cr.Container.OutputPath = "/out"
1426                 cr.Container.OutputStorageClasses = []string{"foo", "bar"}
1427
1428                 bindmounts, err := cr.SetupMounts()
1429                 c.Check(err, IsNil)
1430                 c.Check(am.Cmd, DeepEquals, []string{"arv-mount", "--foreground",
1431                         "--read-write", "--storage-classes", "foo,bar", "--crunchstat-interval=5",
1432                         "--mount-by-pdh", "by_id", "--disable-event-listening", "--mount-by-id", "by_uuid", realTemp + "/keep1"})
1433                 c.Check(bindmounts, DeepEquals, map[string]bindmount{"/out": {realTemp + "/tmp2", false}, "/tmp": {realTemp + "/tmp3", false}})
1434                 os.RemoveAll(cr.ArvMountPoint)
1435                 cr.CleanupDirs()
1436                 checkEmpty()
1437         }
1438
1439         {
1440                 i = 0
1441                 cr.ArvMountPoint = ""
1442                 cr.Container.Mounts = make(map[string]arvados.Mount)
1443                 cr.Container.Mounts["/tmp"] = arvados.Mount{Kind: "tmp"}
1444                 cr.Container.OutputPath = "/tmp"
1445                 cr.Container.RuntimeConstraints.API = true
1446                 cr.Container.OutputStorageClasses = []string{"default"}
1447
1448                 bindmounts, err := cr.SetupMounts()
1449                 c.Check(err, IsNil)
1450                 c.Check(am.Cmd, DeepEquals, []string{"arv-mount", "--foreground",
1451                         "--read-write", "--storage-classes", "default", "--crunchstat-interval=5",
1452                         "--mount-by-pdh", "by_id", "--disable-event-listening", "--mount-by-id", "by_uuid", realTemp + "/keep1"})
1453                 c.Check(bindmounts, DeepEquals, map[string]bindmount{"/tmp": {realTemp + "/tmp2", false}, "/etc/arvados/ca-certificates.crt": {stubCertPath, true}})
1454                 os.RemoveAll(cr.ArvMountPoint)
1455                 cr.CleanupDirs()
1456                 checkEmpty()
1457
1458                 cr.Container.RuntimeConstraints.API = false
1459         }
1460
1461         {
1462                 i = 0
1463                 cr.ArvMountPoint = ""
1464                 cr.Container.Mounts = map[string]arvados.Mount{
1465                         "/keeptmp": {Kind: "collection", Writable: true},
1466                 }
1467                 cr.Container.OutputPath = "/keeptmp"
1468
1469                 os.MkdirAll(realTemp+"/keep1/tmp0", os.ModePerm)
1470
1471                 bindmounts, err := cr.SetupMounts()
1472                 c.Check(err, IsNil)
1473                 c.Check(am.Cmd, DeepEquals, []string{"arv-mount", "--foreground",
1474                         "--read-write", "--storage-classes", "default", "--crunchstat-interval=5",
1475                         "--mount-tmp", "tmp0", "--mount-by-pdh", "by_id", "--disable-event-listening", "--mount-by-id", "by_uuid", realTemp + "/keep1"})
1476                 c.Check(bindmounts, DeepEquals, map[string]bindmount{"/keeptmp": {realTemp + "/keep1/tmp0", false}})
1477                 os.RemoveAll(cr.ArvMountPoint)
1478                 cr.CleanupDirs()
1479                 checkEmpty()
1480         }
1481
1482         {
1483                 i = 0
1484                 cr.ArvMountPoint = ""
1485                 cr.Container.Mounts = map[string]arvados.Mount{
1486                         "/keepinp": {Kind: "collection", PortableDataHash: "59389a8f9ee9d399be35462a0f92541c+53"},
1487                         "/keepout": {Kind: "collection", Writable: true},
1488                 }
1489                 cr.Container.OutputPath = "/keepout"
1490
1491                 os.MkdirAll(realTemp+"/keep1/by_id/59389a8f9ee9d399be35462a0f92541c+53", os.ModePerm)
1492                 os.MkdirAll(realTemp+"/keep1/tmp0", os.ModePerm)
1493
1494                 bindmounts, err := cr.SetupMounts()
1495                 c.Check(err, IsNil)
1496                 c.Check(am.Cmd, DeepEquals, []string{"arv-mount", "--foreground",
1497                         "--read-write", "--storage-classes", "default", "--crunchstat-interval=5",
1498                         "--mount-tmp", "tmp0", "--mount-by-pdh", "by_id", "--disable-event-listening", "--mount-by-id", "by_uuid", realTemp + "/keep1"})
1499                 c.Check(bindmounts, DeepEquals, map[string]bindmount{
1500                         "/keepinp": {realTemp + "/keep1/by_id/59389a8f9ee9d399be35462a0f92541c+53", true},
1501                         "/keepout": {realTemp + "/keep1/tmp0", false},
1502                 })
1503                 os.RemoveAll(cr.ArvMountPoint)
1504                 cr.CleanupDirs()
1505                 checkEmpty()
1506         }
1507
1508         {
1509                 i = 0
1510                 cr.ArvMountPoint = ""
1511                 cr.Container.RuntimeConstraints.KeepCacheRAM = 512
1512                 cr.Container.Mounts = map[string]arvados.Mount{
1513                         "/keepinp": {Kind: "collection", PortableDataHash: "59389a8f9ee9d399be35462a0f92541c+53"},
1514                         "/keepout": {Kind: "collection", Writable: true},
1515                 }
1516                 cr.Container.OutputPath = "/keepout"
1517
1518                 os.MkdirAll(realTemp+"/keep1/by_id/59389a8f9ee9d399be35462a0f92541c+53", os.ModePerm)
1519                 os.MkdirAll(realTemp+"/keep1/tmp0", os.ModePerm)
1520
1521                 bindmounts, err := cr.SetupMounts()
1522                 c.Check(err, IsNil)
1523                 c.Check(am.Cmd, DeepEquals, []string{"arv-mount", "--foreground",
1524                         "--read-write", "--storage-classes", "default", "--crunchstat-interval=5", "--ram-cache",
1525                         "--file-cache", "512", "--mount-tmp", "tmp0", "--mount-by-pdh", "by_id", "--disable-event-listening", "--mount-by-id", "by_uuid", realTemp + "/keep1"})
1526                 c.Check(bindmounts, DeepEquals, map[string]bindmount{
1527                         "/keepinp": {realTemp + "/keep1/by_id/59389a8f9ee9d399be35462a0f92541c+53", true},
1528                         "/keepout": {realTemp + "/keep1/tmp0", false},
1529                 })
1530                 os.RemoveAll(cr.ArvMountPoint)
1531                 cr.CleanupDirs()
1532                 checkEmpty()
1533         }
1534
1535         for _, test := range []struct {
1536                 in  interface{}
1537                 out string
1538         }{
1539                 {in: "foo", out: `"foo"`},
1540                 {in: nil, out: `null`},
1541                 {in: map[string]int64{"foo": 123456789123456789}, out: `{"foo":123456789123456789}`},
1542         } {
1543                 i = 0
1544                 cr.ArvMountPoint = ""
1545                 cr.Container.Mounts = map[string]arvados.Mount{
1546                         "/mnt/test.json": {Kind: "json", Content: test.in},
1547                 }
1548                 bindmounts, err := cr.SetupMounts()
1549                 c.Check(err, IsNil)
1550                 c.Check(bindmounts, DeepEquals, map[string]bindmount{
1551                         "/mnt/test.json": {realTemp + "/json2/mountdata.json", true},
1552                 })
1553                 content, err := ioutil.ReadFile(realTemp + "/json2/mountdata.json")
1554                 c.Check(err, IsNil)
1555                 c.Check(content, DeepEquals, []byte(test.out))
1556                 os.RemoveAll(cr.ArvMountPoint)
1557                 cr.CleanupDirs()
1558                 checkEmpty()
1559         }
1560
1561         for _, test := range []struct {
1562                 in  interface{}
1563                 out string
1564         }{
1565                 {in: "foo", out: `foo`},
1566                 {in: nil, out: "error"},
1567                 {in: map[string]int64{"foo": 123456789123456789}, out: "error"},
1568         } {
1569                 i = 0
1570                 cr.ArvMountPoint = ""
1571                 cr.Container.Mounts = map[string]arvados.Mount{
1572                         "/mnt/test.txt": {Kind: "text", Content: test.in},
1573                 }
1574                 bindmounts, err := cr.SetupMounts()
1575                 if test.out == "error" {
1576                         c.Check(err.Error(), Equals, "content for mount \"/mnt/test.txt\" must be a string")
1577                 } else {
1578                         c.Check(err, IsNil)
1579                         c.Check(bindmounts, DeepEquals, map[string]bindmount{
1580                                 "/mnt/test.txt": {realTemp + "/text2/mountdata.text", true},
1581                         })
1582                         content, err := ioutil.ReadFile(realTemp + "/text2/mountdata.text")
1583                         c.Check(err, IsNil)
1584                         c.Check(content, DeepEquals, []byte(test.out))
1585                 }
1586                 os.RemoveAll(cr.ArvMountPoint)
1587                 cr.CleanupDirs()
1588                 checkEmpty()
1589         }
1590
1591         // Read-only mount points are allowed underneath output_dir mount point
1592         {
1593                 i = 0
1594                 cr.ArvMountPoint = ""
1595                 cr.Container.Mounts = make(map[string]arvados.Mount)
1596                 cr.Container.Mounts = map[string]arvados.Mount{
1597                         "/tmp":     {Kind: "tmp"},
1598                         "/tmp/foo": {Kind: "collection"},
1599                 }
1600                 cr.Container.OutputPath = "/tmp"
1601
1602                 os.MkdirAll(realTemp+"/keep1/tmp0", os.ModePerm)
1603
1604                 bindmounts, err := cr.SetupMounts()
1605                 c.Check(err, IsNil)
1606                 c.Check(am.Cmd, DeepEquals, []string{"arv-mount", "--foreground",
1607                         "--read-write", "--storage-classes", "default", "--crunchstat-interval=5", "--ram-cache",
1608                         "--file-cache", "512", "--mount-tmp", "tmp0", "--mount-by-pdh", "by_id", "--disable-event-listening", "--mount-by-id", "by_uuid", realTemp + "/keep1"})
1609                 c.Check(bindmounts, DeepEquals, map[string]bindmount{
1610                         "/tmp":     {realTemp + "/tmp2", false},
1611                         "/tmp/foo": {realTemp + "/keep1/tmp0", true},
1612                 })
1613                 os.RemoveAll(cr.ArvMountPoint)
1614                 cr.CleanupDirs()
1615                 checkEmpty()
1616         }
1617
1618         // Writable mount points copied to output_dir mount point
1619         {
1620                 i = 0
1621                 cr.ArvMountPoint = ""
1622                 cr.Container.Mounts = make(map[string]arvados.Mount)
1623                 cr.Container.Mounts = map[string]arvados.Mount{
1624                         "/tmp": {Kind: "tmp"},
1625                         "/tmp/foo": {Kind: "collection",
1626                                 PortableDataHash: "59389a8f9ee9d399be35462a0f92541c+53",
1627                                 Writable:         true},
1628                         "/tmp/bar": {Kind: "collection",
1629                                 PortableDataHash: "59389a8f9ee9d399be35462a0f92541d+53",
1630                                 Path:             "baz",
1631                                 Writable:         true},
1632                 }
1633                 cr.Container.OutputPath = "/tmp"
1634
1635                 os.MkdirAll(realTemp+"/keep1/by_id/59389a8f9ee9d399be35462a0f92541c+53", os.ModePerm)
1636                 os.MkdirAll(realTemp+"/keep1/by_id/59389a8f9ee9d399be35462a0f92541d+53/baz", os.ModePerm)
1637
1638                 rf, _ := os.Create(realTemp + "/keep1/by_id/59389a8f9ee9d399be35462a0f92541d+53/baz/quux")
1639                 rf.Write([]byte("bar"))
1640                 rf.Close()
1641
1642                 _, err := cr.SetupMounts()
1643                 c.Check(err, IsNil)
1644                 _, err = os.Stat(cr.HostOutputDir + "/foo")
1645                 c.Check(err, IsNil)
1646                 _, err = os.Stat(cr.HostOutputDir + "/bar/quux")
1647                 c.Check(err, IsNil)
1648                 os.RemoveAll(cr.ArvMountPoint)
1649                 cr.CleanupDirs()
1650                 checkEmpty()
1651         }
1652
1653         // Only mount points of kind 'collection' are allowed underneath output_dir mount point
1654         {
1655                 i = 0
1656                 cr.ArvMountPoint = ""
1657                 cr.Container.Mounts = make(map[string]arvados.Mount)
1658                 cr.Container.Mounts = map[string]arvados.Mount{
1659                         "/tmp":     {Kind: "tmp"},
1660                         "/tmp/foo": {Kind: "tmp"},
1661                 }
1662                 cr.Container.OutputPath = "/tmp"
1663
1664                 _, err := cr.SetupMounts()
1665                 c.Check(err, NotNil)
1666                 c.Check(err, ErrorMatches, `only mount points of kind 'collection', 'text' or 'json' are supported underneath the output_path.*`)
1667                 os.RemoveAll(cr.ArvMountPoint)
1668                 cr.CleanupDirs()
1669                 checkEmpty()
1670         }
1671
1672         // Only mount point of kind 'collection' is allowed for stdin
1673         {
1674                 i = 0
1675                 cr.ArvMountPoint = ""
1676                 cr.Container.Mounts = make(map[string]arvados.Mount)
1677                 cr.Container.Mounts = map[string]arvados.Mount{
1678                         "stdin": {Kind: "tmp"},
1679                 }
1680
1681                 _, err := cr.SetupMounts()
1682                 c.Check(err, NotNil)
1683                 c.Check(err, ErrorMatches, `unsupported mount kind 'tmp' for stdin.*`)
1684                 os.RemoveAll(cr.ArvMountPoint)
1685                 cr.CleanupDirs()
1686                 checkEmpty()
1687         }
1688 }
1689
1690 func (s *TestSuite) TestStdout(c *C) {
1691         helperRecord := `{
1692                 "command": ["/bin/sh", "-c", "echo $FROBIZ"],
1693                 "container_image": "` + arvadostest.DockerImage112PDH + `",
1694                 "cwd": "/bin",
1695                 "environment": {"FROBIZ": "bilbo"},
1696                 "mounts": {"/tmp": {"kind": "tmp"}, "stdout": {"kind": "file", "path": "/tmp/a/b/c.out"} },
1697                 "output_path": "/tmp",
1698                 "priority": 1,
1699                 "runtime_constraints": {},
1700                 "state": "Locked"
1701         }`
1702
1703         s.fullRunHelper(c, helperRecord, nil, func() int {
1704                 fmt.Fprintln(s.executor.created.Stdout, s.executor.created.Env["FROBIZ"])
1705                 return 0
1706         })
1707
1708         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
1709         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
1710         c.Check(s.runner.ContainerArvClient.(*ArvTestClient).CalledWith("collection.manifest_text", "./a/b 307372fa8fd5c146b22ae7a45b49bc31+6 0:6:c.out\n"), NotNil)
1711 }
1712
1713 // Used by the TestStdoutWithWrongPath*()
1714 func (s *TestSuite) stdoutErrorRunHelper(c *C, record string, fn func() int) (*ArvTestClient, *ContainerRunner, error) {
1715         err := json.Unmarshal([]byte(record), &s.api.Container)
1716         c.Assert(err, IsNil)
1717         s.executor.runFunc = fn
1718         s.runner.RunArvMount = (&ArvMountCmdLine{}).ArvMountTest
1719         s.runner.MkArvClient = func(token string) (IArvadosClient, IKeepClient, *arvados.Client, error) {
1720                 return s.api, &KeepTestClient{}, nil, nil
1721         }
1722         return s.api, s.runner, s.runner.Run()
1723 }
1724
1725 func (s *TestSuite) TestStdoutWithWrongPath(c *C) {
1726         _, _, err := s.stdoutErrorRunHelper(c, `{
1727     "mounts": {"/tmp": {"kind": "tmp"}, "stdout": {"kind": "file", "path":"/tmpa.out"} },
1728     "output_path": "/tmp",
1729     "state": "Locked"
1730 }`, func() int { return 0 })
1731         c.Check(err, ErrorMatches, ".*Stdout path does not start with OutputPath.*")
1732 }
1733
1734 func (s *TestSuite) TestStdoutWithWrongKindTmp(c *C) {
1735         _, _, err := s.stdoutErrorRunHelper(c, `{
1736     "mounts": {"/tmp": {"kind": "tmp"}, "stdout": {"kind": "tmp", "path":"/tmp/a.out"} },
1737     "output_path": "/tmp",
1738     "state": "Locked"
1739 }`, func() int { return 0 })
1740         c.Check(err, ErrorMatches, ".*unsupported mount kind 'tmp' for stdout.*")
1741 }
1742
1743 func (s *TestSuite) TestStdoutWithWrongKindCollection(c *C) {
1744         _, _, err := s.stdoutErrorRunHelper(c, `{
1745     "mounts": {"/tmp": {"kind": "tmp"}, "stdout": {"kind": "collection", "path":"/tmp/a.out"} },
1746     "output_path": "/tmp",
1747     "state": "Locked"
1748 }`, func() int { return 0 })
1749         c.Check(err, ErrorMatches, ".*unsupported mount kind 'collection' for stdout.*")
1750 }
1751
1752 func (s *TestSuite) TestFullRunWithAPI(c *C) {
1753         s.fullRunHelper(c, `{
1754     "command": ["/bin/sh", "-c", "true $ARVADOS_API_HOST"],
1755     "container_image": "`+arvadostest.DockerImage112PDH+`",
1756     "cwd": "/bin",
1757     "environment": {},
1758     "mounts": {"/tmp": {"kind": "tmp"} },
1759     "output_path": "/tmp",
1760     "priority": 1,
1761     "runtime_constraints": {"API": true},
1762     "state": "Locked"
1763 }`, nil, func() int {
1764                 c.Check(s.executor.created.Env["ARVADOS_API_HOST"], Equals, os.Getenv("ARVADOS_API_HOST"))
1765                 return 3
1766         })
1767         c.Check(s.api.CalledWith("container.exit_code", 3), NotNil)
1768         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
1769         c.Check(logFileContent(c, s.runner, "crunch-run.txt"), Matches, `(?ms).*status code 3\n.*`)
1770 }
1771
1772 func (s *TestSuite) TestFullRunSetOutput(c *C) {
1773         defer os.Setenv("ARVADOS_API_HOST", os.Getenv("ARVADOS_API_HOST"))
1774         os.Setenv("ARVADOS_API_HOST", "test.arvados.org")
1775         s.fullRunHelper(c, `{
1776     "command": ["/bin/sh", "-c", "echo $ARVADOS_API_HOST"],
1777     "container_image": "`+arvadostest.DockerImage112PDH+`",
1778     "cwd": "/bin",
1779     "environment": {},
1780     "mounts": {"/tmp": {"kind": "tmp"} },
1781     "output_path": "/tmp",
1782     "priority": 1,
1783     "runtime_constraints": {"API": true},
1784     "state": "Locked"
1785 }`, nil, func() int {
1786                 s.api.Container.Output = arvadostest.DockerImage112PDH
1787                 return 0
1788         })
1789
1790         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
1791         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
1792         c.Check(s.api.CalledWith("container.output", arvadostest.DockerImage112PDH), NotNil)
1793 }
1794
1795 func (s *TestSuite) TestArvMountRuntimeStatusWarning(c *C) {
1796         s.runner.RunArvMount = func([]string, string) (*exec.Cmd, error) {
1797                 os.Mkdir(s.runner.ArvMountPoint+"/by_id", 0666)
1798                 ioutil.WriteFile(s.runner.ArvMountPoint+"/by_id/README", nil, 0666)
1799                 return s.runner.ArvMountCmd([]string{"bash", "-c", "echo >&2 Test: Keep write error: I am a teapot; sleep 3"}, "")
1800         }
1801         s.executor.runFunc = func() int {
1802                 time.Sleep(time.Second)
1803                 return 137
1804         }
1805         record := `{
1806     "command": ["sleep", "1"],
1807     "container_image": "` + arvadostest.DockerImage112PDH + `",
1808     "cwd": "/bin",
1809     "environment": {},
1810     "mounts": {"/tmp": {"kind": "tmp"} },
1811     "output_path": "/tmp",
1812     "priority": 1,
1813     "runtime_constraints": {"API": true},
1814     "state": "Locked"
1815 }`
1816         err := json.Unmarshal([]byte(record), &s.api.Container)
1817         c.Assert(err, IsNil)
1818         err = s.runner.Run()
1819         c.Assert(err, IsNil)
1820         c.Check(s.api.CalledWith("container.exit_code", 137), NotNil)
1821         c.Check(s.api.CalledWith("container.runtime_status.warning", "arv-mount: Keep write error"), NotNil)
1822         c.Check(s.api.CalledWith("container.runtime_status.warningDetail", "Test: Keep write error: I am a teapot"), NotNil)
1823         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
1824         c.Check(logFileContent(c, s.runner, "crunch-run.txt"), Matches, `(?ms).*Container exited with status code 137 \(signal 9, SIGKILL\).*`)
1825 }
1826
1827 func (s *TestSuite) TestStdoutWithExcludeFromOutputMountPointUnderOutputDir(c *C) {
1828         helperRecord := `{
1829                 "command": ["/bin/sh", "-c", "echo $FROBIZ"],
1830                 "container_image": "` + arvadostest.DockerImage112PDH + `",
1831                 "cwd": "/bin",
1832                 "environment": {"FROBIZ": "bilbo"},
1833                 "mounts": {
1834         "/tmp": {"kind": "tmp"},
1835         "/tmp/foo": {"kind": "collection",
1836                      "portable_data_hash": "a3e8f74c6f101eae01fa08bfb4e49b3a+54",
1837                      "exclude_from_output": true
1838         },
1839         "stdout": {"kind": "file", "path": "/tmp/a/b/c.out"}
1840     },
1841                 "output_path": "/tmp",
1842                 "priority": 1,
1843                 "runtime_constraints": {},
1844                 "state": "Locked"
1845         }`
1846
1847         extraMounts := []string{"a3e8f74c6f101eae01fa08bfb4e49b3a+54"}
1848
1849         s.fullRunHelper(c, helperRecord, extraMounts, func() int {
1850                 fmt.Fprintln(s.executor.created.Stdout, s.executor.created.Env["FROBIZ"])
1851                 return 0
1852         })
1853
1854         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
1855         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
1856         c.Check(s.runner.ContainerArvClient.(*ArvTestClient).CalledWith("collection.manifest_text", "./a/b 307372fa8fd5c146b22ae7a45b49bc31+6 0:6:c.out\n"), NotNil)
1857 }
1858
1859 func (s *TestSuite) TestStdoutWithMultipleMountPointsUnderOutputDir(c *C) {
1860         helperRecord := `{
1861                 "command": ["/bin/sh", "-c", "echo $FROBIZ"],
1862                 "container_image": "` + arvadostest.DockerImage112PDH + `",
1863                 "cwd": "/bin",
1864                 "environment": {"FROBIZ": "bilbo"},
1865                 "mounts": {
1866         "/tmp": {"kind": "tmp"},
1867         "/tmp/foo/bar": {"kind": "collection", "portable_data_hash": "a0def87f80dd594d4675809e83bd4f15+367", "path":"/file2_in_main.txt"},
1868         "/tmp/foo/sub1": {"kind": "collection", "portable_data_hash": "a0def87f80dd594d4675809e83bd4f15+367", "path":"/subdir1"},
1869         "/tmp/foo/sub1file2": {"kind": "collection", "portable_data_hash": "a0def87f80dd594d4675809e83bd4f15+367", "path":"/subdir1/file2_in_subdir1.txt"},
1870         "/tmp/foo/baz/sub2file2": {"kind": "collection", "portable_data_hash": "a0def87f80dd594d4675809e83bd4f15+367", "path":"/subdir1/subdir2/file2_in_subdir2.txt"},
1871         "stdout": {"kind": "file", "path": "/tmp/a/b/c.out"}
1872     },
1873                 "output_path": "/tmp",
1874                 "priority": 1,
1875                 "runtime_constraints": {},
1876                 "state": "Locked",
1877                 "uuid": "zzzzz-dz642-202301130848001"
1878         }`
1879
1880         extraMounts := []string{
1881                 "a0def87f80dd594d4675809e83bd4f15+367/file2_in_main.txt",
1882                 "a0def87f80dd594d4675809e83bd4f15+367/subdir1/file2_in_subdir1.txt",
1883                 "a0def87f80dd594d4675809e83bd4f15+367/subdir1/subdir2/file2_in_subdir2.txt",
1884         }
1885
1886         api, _, realtemp := s.fullRunHelper(c, helperRecord, extraMounts, func() int {
1887                 fmt.Fprintln(s.executor.created.Stdout, s.executor.created.Env["FROBIZ"])
1888                 return 0
1889         })
1890
1891         c.Check(s.executor.created.BindMounts, DeepEquals, map[string]bindmount{
1892                 "/tmp":                   {realtemp + "/tmp1", false},
1893                 "/tmp/foo/bar":           {s.keepmount + "/by_id/a0def87f80dd594d4675809e83bd4f15+367/file2_in_main.txt", true},
1894                 "/tmp/foo/baz/sub2file2": {s.keepmount + "/by_id/a0def87f80dd594d4675809e83bd4f15+367/subdir1/subdir2/file2_in_subdir2.txt", true},
1895                 "/tmp/foo/sub1":          {s.keepmount + "/by_id/a0def87f80dd594d4675809e83bd4f15+367/subdir1", true},
1896                 "/tmp/foo/sub1file2":     {s.keepmount + "/by_id/a0def87f80dd594d4675809e83bd4f15+367/subdir1/file2_in_subdir1.txt", true},
1897         })
1898
1899         c.Check(api.CalledWith("container.exit_code", 0), NotNil)
1900         c.Check(api.CalledWith("container.state", "Complete"), NotNil)
1901         output_count := uint(0)
1902         for _, v := range s.runner.ContainerArvClient.(*ArvTestClient).Content {
1903                 if v["collection"] == nil {
1904                         continue
1905                 }
1906                 collection := v["collection"].(arvadosclient.Dict)
1907                 if collection["name"].(string) != "output for zzzzz-dz642-202301130848001" {
1908                         continue
1909                 }
1910                 c.Check(v["ensure_unique_name"], Equals, true)
1911                 c.Check(collection["manifest_text"].(string), Equals, `./a/b 307372fa8fd5c146b22ae7a45b49bc31+6 0:6:c.out
1912 ./foo 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396c73c0abcdefgh11234567890@569fa8c3 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396cabcdefghij6419876543234@569fa8c4 9:18:bar 36:18:sub1file2
1913 ./foo/baz 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396c73c0bcdefghijk544332211@569fa8c5 9:18:sub2file2
1914 ./foo/sub1 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396cabcdefghij6419876543234@569fa8c4 0:9:file1_in_subdir1.txt 9:18:file2_in_subdir1.txt
1915 ./foo/sub1/subdir2 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396c73c0bcdefghijk544332211@569fa8c5 0:9:file1_in_subdir2.txt 9:18:file2_in_subdir2.txt
1916 `)
1917                 output_count++
1918         }
1919         c.Check(output_count, Not(Equals), uint(0))
1920 }
1921
1922 func (s *TestSuite) TestStdoutWithMountPointsUnderOutputDirDenormalizedManifest(c *C) {
1923         helperRecord := `{
1924                 "command": ["/bin/sh", "-c", "echo $FROBIZ"],
1925                 "container_image": "` + arvadostest.DockerImage112PDH + `",
1926                 "cwd": "/bin",
1927                 "environment": {"FROBIZ": "bilbo"},
1928                 "mounts": {
1929         "/tmp": {"kind": "tmp"},
1930         "/tmp/foo/bar": {"kind": "collection", "portable_data_hash": "b0def87f80dd594d4675809e83bd4f15+367", "path": "/subdir1/file2_in_subdir1.txt"},
1931         "stdout": {"kind": "file", "path": "/tmp/a/b/c.out"}
1932     },
1933                 "output_path": "/tmp",
1934                 "priority": 1,
1935                 "runtime_constraints": {},
1936                 "state": "Locked",
1937                 "uuid": "zzzzz-dz642-202301130848002"
1938         }`
1939
1940         extraMounts := []string{
1941                 "b0def87f80dd594d4675809e83bd4f15+367/subdir1/file2_in_subdir1.txt",
1942         }
1943
1944         s.fullRunHelper(c, helperRecord, extraMounts, func() int {
1945                 fmt.Fprintln(s.executor.created.Stdout, s.executor.created.Env["FROBIZ"])
1946                 return 0
1947         })
1948
1949         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
1950         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
1951         output_count := uint(0)
1952         for _, v := range s.runner.ContainerArvClient.(*ArvTestClient).Content {
1953                 if v["collection"] == nil {
1954                         continue
1955                 }
1956                 collection := v["collection"].(arvadosclient.Dict)
1957                 if collection["name"].(string) != "output for zzzzz-dz642-202301130848002" {
1958                         continue
1959                 }
1960                 c.Check(collection["manifest_text"].(string), Equals, `./a/b 307372fa8fd5c146b22ae7a45b49bc31+6 0:6:c.out
1961 ./foo 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396c73c0abcdefgh11234567890@569fa8c3 10:17:bar
1962 `)
1963                 output_count++
1964         }
1965         c.Check(output_count, Not(Equals), uint(0))
1966 }
1967
1968 func (s *TestSuite) TestOutputError(c *C) {
1969         helperRecord := `{
1970                 "command": ["/bin/sh", "-c", "echo $FROBIZ"],
1971                 "container_image": "` + arvadostest.DockerImage112PDH + `",
1972                 "cwd": "/bin",
1973                 "environment": {"FROBIZ": "bilbo"},
1974                 "mounts": {
1975                         "/tmp": {"kind": "tmp"}
1976                 },
1977                 "output_path": "/tmp",
1978                 "priority": 1,
1979                 "runtime_constraints": {},
1980                 "state": "Locked"
1981         }`
1982         s.fullRunHelper(c, helperRecord, nil, func() int {
1983                 os.Symlink("/etc/hosts", s.runner.HostOutputDir+"/baz")
1984                 return 0
1985         })
1986
1987         c.Check(s.api.CalledWith("container.state", "Cancelled"), NotNil)
1988 }
1989
1990 func (s *TestSuite) TestStdinCollectionMountPoint(c *C) {
1991         helperRecord := `{
1992                 "command": ["/bin/sh", "-c", "echo $FROBIZ"],
1993                 "container_image": "` + arvadostest.DockerImage112PDH + `",
1994                 "cwd": "/bin",
1995                 "environment": {"FROBIZ": "bilbo"},
1996                 "mounts": {
1997         "/tmp": {"kind": "tmp"},
1998         "stdin": {"kind": "collection", "portable_data_hash": "b0def87f80dd594d4675809e83bd4f15+367", "path": "/file1_in_main.txt"},
1999         "stdout": {"kind": "file", "path": "/tmp/a/b/c.out"}
2000     },
2001                 "output_path": "/tmp",
2002                 "priority": 1,
2003                 "runtime_constraints": {},
2004                 "state": "Locked"
2005         }`
2006
2007         extraMounts := []string{
2008                 "b0def87f80dd594d4675809e83bd4f15+367/file1_in_main.txt",
2009         }
2010
2011         api, _, _ := s.fullRunHelper(c, helperRecord, extraMounts, func() int {
2012                 fmt.Fprintln(s.executor.created.Stdout, s.executor.created.Env["FROBIZ"])
2013                 return 0
2014         })
2015
2016         c.Check(api.CalledWith("container.exit_code", 0), NotNil)
2017         c.Check(api.CalledWith("container.state", "Complete"), NotNil)
2018         for _, v := range api.Content {
2019                 if v["collection"] != nil {
2020                         collection := v["collection"].(arvadosclient.Dict)
2021                         if strings.Index(collection["name"].(string), "output") == 0 {
2022                                 manifest := collection["manifest_text"].(string)
2023                                 c.Check(manifest, Equals, `./a/b 307372fa8fd5c146b22ae7a45b49bc31+6 0:6:c.out
2024 `)
2025                         }
2026                 }
2027         }
2028 }
2029
2030 func (s *TestSuite) TestStdinJsonMountPoint(c *C) {
2031         helperRecord := `{
2032                 "command": ["/bin/sh", "-c", "echo $FROBIZ"],
2033                 "container_image": "` + arvadostest.DockerImage112PDH + `",
2034                 "cwd": "/bin",
2035                 "environment": {"FROBIZ": "bilbo"},
2036                 "mounts": {
2037         "/tmp": {"kind": "tmp"},
2038         "stdin": {"kind": "json", "content": "foo"},
2039         "stdout": {"kind": "file", "path": "/tmp/a/b/c.out"}
2040     },
2041                 "output_path": "/tmp",
2042                 "priority": 1,
2043                 "runtime_constraints": {},
2044                 "state": "Locked"
2045         }`
2046
2047         api, _, _ := s.fullRunHelper(c, helperRecord, nil, func() int {
2048                 fmt.Fprintln(s.executor.created.Stdout, s.executor.created.Env["FROBIZ"])
2049                 return 0
2050         })
2051
2052         c.Check(api.CalledWith("container.exit_code", 0), NotNil)
2053         c.Check(api.CalledWith("container.state", "Complete"), NotNil)
2054         for _, v := range api.Content {
2055                 if v["collection"] != nil {
2056                         collection := v["collection"].(arvadosclient.Dict)
2057                         if strings.Index(collection["name"].(string), "output") == 0 {
2058                                 manifest := collection["manifest_text"].(string)
2059                                 c.Check(manifest, Equals, `./a/b 307372fa8fd5c146b22ae7a45b49bc31+6 0:6:c.out
2060 `)
2061                         }
2062                 }
2063         }
2064 }
2065
2066 func (s *TestSuite) TestStderrMount(c *C) {
2067         api, cr, _ := s.fullRunHelper(c, `{
2068     "command": ["/bin/sh", "-c", "echo hello;exit 1"],
2069     "container_image": "`+arvadostest.DockerImage112PDH+`",
2070     "cwd": ".",
2071     "environment": {},
2072     "mounts": {"/tmp": {"kind": "tmp"},
2073                "stdout": {"kind": "file", "path": "/tmp/a/out.txt"},
2074                "stderr": {"kind": "file", "path": "/tmp/b/err.txt"}},
2075     "output_path": "/tmp",
2076     "priority": 1,
2077     "runtime_constraints": {},
2078     "state": "Locked"
2079 }`, nil, func() int {
2080                 fmt.Fprintln(s.executor.created.Stdout, "hello")
2081                 fmt.Fprintln(s.executor.created.Stderr, "oops")
2082                 return 1
2083         })
2084
2085         final := api.CalledWith("container.state", "Complete")
2086         c.Assert(final, NotNil)
2087         c.Check(final["container"].(arvadosclient.Dict)["exit_code"], Equals, 1)
2088         c.Check(final["container"].(arvadosclient.Dict)["log"], NotNil)
2089
2090         c.Check(cr.ContainerArvClient.(*ArvTestClient).CalledWith("collection.manifest_text", "./a b1946ac92492d2347c6235b4d2611184+6 0:6:out.txt\n./b 38af5c54926b620264ab1501150cf189+5 0:5:err.txt\n"), NotNil)
2091 }
2092
2093 func (s *TestSuite) TestNumberRoundTrip(c *C) {
2094         s.api.callraw = true
2095         err := s.runner.fetchContainerRecord()
2096         c.Assert(err, IsNil)
2097         jsondata, err := json.Marshal(s.runner.Container.Mounts["/json"].Content)
2098         c.Logf("%#v", s.runner.Container)
2099         c.Check(err, IsNil)
2100         c.Check(string(jsondata), Equals, `{"number":123456789123456789}`)
2101 }
2102
2103 func (s *TestSuite) TestFullBrokenDocker(c *C) {
2104         nextState := ""
2105         for _, setup := range []func(){
2106                 func() {
2107                         c.Log("// waitErr = ocl runtime error")
2108                         s.executor.waitErr = errors.New(`Error response from daemon: oci runtime error: container_linux.go:247: starting container process caused "process_linux.go:359: container init caused \"rootfs_linux.go:54: mounting \\\"/tmp/keep453790790/by_id/99999999999999999999999999999999+99999/myGenome\\\" to rootfs \\\"/tmp/docker/overlay2/9999999999999999999999999999999999999999999999999999999999999999/merged\\\" at \\\"/tmp/docker/overlay2/9999999999999999999999999999999999999999999999999999999999999999/merged/keep/99999999999999999999999999999999+99999/myGenome\\\" caused \\\"no such file or directory\\\"\""`)
2109                         nextState = "Cancelled"
2110                 },
2111                 func() {
2112                         c.Log("// loadErr = cannot connect")
2113                         s.executor.loadErr = errors.New("Cannot connect to the Docker daemon at unix:///var/run/docker.sock. Is the docker daemon running?")
2114                         s.runner.brokenNodeHook = c.MkDir() + "/broken-node-hook"
2115                         err := ioutil.WriteFile(s.runner.brokenNodeHook, []byte("#!/bin/sh\nexec echo killme\n"), 0700)
2116                         c.Assert(err, IsNil)
2117                         nextState = "Queued"
2118                 },
2119         } {
2120                 s.SetUpTest(c)
2121                 setup()
2122                 s.fullRunHelper(c, `{
2123     "command": ["echo", "hello world"],
2124     "container_image": "`+arvadostest.DockerImage112PDH+`",
2125     "cwd": ".",
2126     "environment": {},
2127     "mounts": {"/tmp": {"kind": "tmp"} },
2128     "output_path": "/tmp",
2129     "priority": 1,
2130     "runtime_constraints": {},
2131     "state": "Locked"
2132 }`, nil, func() int { return 0 })
2133                 c.Check(s.api.CalledWith("container.state", nextState), NotNil)
2134                 logs := logFileContent(c, s.runner, "crunch-run.txt")
2135                 c.Check(logs, Matches, "(?ms).*unable to run containers.*")
2136                 if s.runner.brokenNodeHook != "" {
2137                         c.Check(logs, Matches, "(?ms).*Running broken node hook.*")
2138                         c.Check(logs, Matches, "(?ms).*killme.*")
2139                         c.Check(logs, Not(Matches), "(?ms).*Writing /var/lock/crunch-run-broken to mark node as broken.*")
2140                 } else {
2141                         c.Check(logs, Matches, "(?ms).*Writing /var/lock/crunch-run-broken to mark node as broken.*")
2142                 }
2143         }
2144 }
2145
2146 func (s *TestSuite) TestBadCommand(c *C) {
2147         for _, startError := range []string{
2148                 `panic: standard_init_linux.go:175: exec user process caused "no such file or directory"`,
2149                 `Error response from daemon: Cannot start container 41f26cbc43bcc1280f4323efb1830a394ba8660c9d1c2b564ba42bf7f7694845: [8] System error: no such file or directory`,
2150                 `Error response from daemon: Cannot start container 58099cd76c834f3dc2a4fb76c8028f049ae6d4fdf0ec373e1f2cfea030670c2d: [8] System error: exec: "foobar": executable file not found in $PATH`,
2151         } {
2152                 s.SetUpTest(c)
2153                 s.executor.startErr = errors.New(startError)
2154                 s.fullRunHelper(c, `{
2155     "command": ["echo", "hello world"],
2156     "container_image": "`+arvadostest.DockerImage112PDH+`",
2157     "cwd": ".",
2158     "environment": {},
2159     "mounts": {"/tmp": {"kind": "tmp"} },
2160     "output_path": "/tmp",
2161     "priority": 1,
2162     "runtime_constraints": {},
2163     "state": "Locked"
2164 }`, nil, func() int { return 0 })
2165                 c.Check(s.api.CalledWith("container.state", "Cancelled"), NotNil)
2166                 c.Check(logFileContent(c, s.runner, "crunch-run.txt"), Matches, "(?ms).*Possible causes:.*is missing.*")
2167         }
2168 }
2169
2170 func (s *TestSuite) TestSecretTextMountPoint(c *C) {
2171         helperRecord := `{
2172                 "command": ["true"],
2173                 "container_image": "` + arvadostest.DockerImage112PDH + `",
2174                 "cwd": "/bin",
2175                 "mounts": {
2176                     "/tmp": {"kind": "tmp"},
2177                     "/tmp/secret.conf": {"kind": "text", "content": "mypassword"}
2178                 },
2179                 "secret_mounts": {
2180                 },
2181                 "output_path": "/tmp",
2182                 "priority": 1,
2183                 "runtime_constraints": {},
2184                 "state": "Locked"
2185         }`
2186
2187         s.fullRunHelper(c, helperRecord, nil, func() int {
2188                 content, err := ioutil.ReadFile(s.runner.HostOutputDir + "/secret.conf")
2189                 c.Check(err, IsNil)
2190                 c.Check(string(content), Equals, "mypassword")
2191                 return 0
2192         })
2193
2194         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
2195         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
2196         c.Check(s.runner.ContainerArvClient.(*ArvTestClient).CalledWith("collection.manifest_text", ". 34819d7beeabb9260a5c854bc85b3e44+10 0:10:secret.conf\n"), NotNil)
2197         c.Check(s.runner.ContainerArvClient.(*ArvTestClient).CalledWith("collection.manifest_text", ""), IsNil)
2198
2199         // under secret mounts, not captured in output
2200         helperRecord = `{
2201                 "command": ["true"],
2202                 "container_image": "` + arvadostest.DockerImage112PDH + `",
2203                 "cwd": "/bin",
2204                 "mounts": {
2205                     "/tmp": {"kind": "tmp"}
2206                 },
2207                 "secret_mounts": {
2208                     "/tmp/secret.conf": {"kind": "text", "content": "mypassword"}
2209                 },
2210                 "output_path": "/tmp",
2211                 "priority": 1,
2212                 "runtime_constraints": {},
2213                 "state": "Locked"
2214         }`
2215
2216         s.SetUpTest(c)
2217         s.fullRunHelper(c, helperRecord, nil, func() int {
2218                 content, err := ioutil.ReadFile(s.runner.HostOutputDir + "/secret.conf")
2219                 c.Check(err, IsNil)
2220                 c.Check(string(content), Equals, "mypassword")
2221                 return 0
2222         })
2223
2224         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
2225         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
2226         c.Check(s.runner.ContainerArvClient.(*ArvTestClient).CalledWith("collection.manifest_text", ". 34819d7beeabb9260a5c854bc85b3e44+10 0:10:secret.conf\n"), IsNil)
2227         c.Check(s.runner.ContainerArvClient.(*ArvTestClient).CalledWith("collection.manifest_text", ""), NotNil)
2228
2229         // under secret mounts, output dir is a collection, not captured in output
2230         helperRecord = `{
2231                 "command": ["true"],
2232                 "container_image": "` + arvadostest.DockerImage112PDH + `",
2233                 "cwd": "/bin",
2234                 "mounts": {
2235                     "/tmp": {"kind": "collection", "writable": true}
2236                 },
2237                 "secret_mounts": {
2238                     "/tmp/secret.conf": {"kind": "text", "content": "mypassword"}
2239                 },
2240                 "output_path": "/tmp",
2241                 "priority": 1,
2242                 "runtime_constraints": {},
2243                 "state": "Locked"
2244         }`
2245
2246         s.SetUpTest(c)
2247         _, _, realtemp := s.fullRunHelper(c, helperRecord, nil, func() int {
2248                 // secret.conf should be provisioned as a separate
2249                 // bind mount, i.e., it should not appear in the
2250                 // (fake) fuse filesystem as viewed from the host.
2251                 content, err := ioutil.ReadFile(s.runner.HostOutputDir + "/secret.conf")
2252                 if !c.Check(errors.Is(err, os.ErrNotExist), Equals, true) {
2253                         c.Logf("secret.conf: content %q, err %#v", content, err)
2254                 }
2255                 err = ioutil.WriteFile(s.runner.HostOutputDir+"/.arvados#collection", []byte(`{"manifest_text":". acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:foo.txt\n"}`), 0700)
2256                 c.Check(err, IsNil)
2257                 return 0
2258         })
2259
2260         content, err := ioutil.ReadFile(realtemp + "/text1/mountdata.text")
2261         c.Check(err, IsNil)
2262         c.Check(string(content), Equals, "mypassword")
2263         c.Check(s.executor.created.BindMounts["/tmp/secret.conf"], DeepEquals, bindmount{realtemp + "/text1/mountdata.text", true})
2264         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
2265         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
2266         c.Check(s.runner.ContainerArvClient.(*ArvTestClient).CalledWith("collection.manifest_text", ". acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:foo.txt\n"), NotNil)
2267 }
2268
2269 func (s *TestSuite) TestCalculateCost(c *C) {
2270         defer func(s string) { lockdir = s }(lockdir)
2271         lockdir = c.MkDir()
2272         now := time.Now()
2273         cr := s.runner
2274         cr.costStartTime = now.Add(-time.Hour)
2275         var logbuf bytes.Buffer
2276         cr.CrunchLog = newLogWriter(&logbuf)
2277
2278         // if there's no InstanceType env var, cost is calculated as 0
2279         os.Unsetenv("InstanceType")
2280         cost := cr.calculateCost(now)
2281         c.Check(cost, Equals, 0.0)
2282
2283         // with InstanceType env var and loadPrices() hasn't run (or
2284         // hasn't found any data), cost is calculated based on
2285         // InstanceType env var
2286         os.Setenv("InstanceType", `{"Price":1.2}`)
2287         cost = cr.calculateCost(now)
2288         c.Check(cost, Equals, 1.2)
2289
2290         // first update tells us the spot price was $1/h until 30
2291         // minutes ago when it increased to $2/h
2292         j, err := json.Marshal([]cloud.InstancePrice{
2293                 {StartTime: now.Add(-4 * time.Hour), Price: 1.0},
2294                 {StartTime: now.Add(-time.Hour / 2), Price: 2.0},
2295         })
2296         c.Assert(err, IsNil)
2297         os.WriteFile(lockdir+"/"+pricesfile, j, 0777)
2298         cr.loadPrices()
2299         cost = cr.calculateCost(now)
2300         c.Check(cost, Equals, 1.5)
2301
2302         // next update (via --list + SIGUSR2) tells us the spot price
2303         // increased to $3/h 15 minutes ago
2304         j, err = json.Marshal([]cloud.InstancePrice{
2305                 {StartTime: now.Add(-time.Hour / 3), Price: 2.0}, // dup of -time.Hour/2 price
2306                 {StartTime: now.Add(-time.Hour / 4), Price: 3.0},
2307         })
2308         c.Assert(err, IsNil)
2309         os.WriteFile(lockdir+"/"+pricesfile, j, 0777)
2310         cr.loadPrices()
2311         cost = cr.calculateCost(now)
2312         c.Check(cost, Equals, 1.0/2+2.0/4+3.0/4)
2313
2314         cost = cr.calculateCost(now.Add(-time.Hour / 2))
2315         c.Check(cost, Equals, 0.5)
2316
2317         c.Logf("%s", logbuf.String())
2318         c.Check(logbuf.String(), Matches, `(?ms).*Instance price changed to 1\.00 at 20.* changed to 2\.00 .* changed to 3\.00 .*`)
2319         c.Check(logbuf.String(), Not(Matches), `(?ms).*changed to 2\.00 .* changed to 2\.00 .*`)
2320 }
2321
2322 func (s *TestSuite) TestSIGUSR2CostUpdate(c *C) {
2323         pid := os.Getpid()
2324         now := time.Now()
2325         pricesJSON, err := json.Marshal([]cloud.InstancePrice{
2326                 {StartTime: now.Add(-4 * time.Hour), Price: 2.4},
2327                 {StartTime: now.Add(-2 * time.Hour), Price: 2.6},
2328         })
2329         c.Assert(err, IsNil)
2330
2331         os.Setenv("InstanceType", `{"Price":2.2}`)
2332         defer func(s string) { lockdir = s }(lockdir)
2333         lockdir = c.MkDir()
2334
2335         // We can't use s.api.CalledWith because timing differences will yield
2336         // different cost values across runs. getCostUpdate iterates over API
2337         // calls until it finds one that sets the cost, then writes that value
2338         // to the next index of costUpdates.
2339         deadline := now.Add(time.Second)
2340         costUpdates := make([]float64, 2)
2341         costIndex := 0
2342         apiIndex := 0
2343         getCostUpdate := func() {
2344                 for ; time.Now().Before(deadline); time.Sleep(time.Second / 10) {
2345                         for apiIndex < len(s.api.Content) {
2346                                 update := s.api.Content[apiIndex]
2347                                 apiIndex++
2348                                 var ok bool
2349                                 var cost float64
2350                                 if update, ok = update["container"].(arvadosclient.Dict); !ok {
2351                                         continue
2352                                 }
2353                                 if cost, ok = update["cost"].(float64); !ok {
2354                                         continue
2355                                 }
2356                                 c.Logf("API call #%d updates cost to %v", apiIndex-1, cost)
2357                                 costUpdates[costIndex] = cost
2358                                 costIndex++
2359                                 return
2360                         }
2361                 }
2362         }
2363
2364         s.fullRunHelper(c, `{
2365                 "command": ["true"],
2366                 "container_image": "`+arvadostest.DockerImage112PDH+`",
2367                 "cwd": ".",
2368                 "environment": {},
2369                 "mounts": {"/tmp": {"kind": "tmp"} },
2370                 "output_path": "/tmp",
2371                 "priority": 1,
2372                 "runtime_constraints": {},
2373                 "state": "Locked",
2374                 "uuid": "zzzzz-dz642-20230320101530a"
2375         }`, nil, func() int {
2376                 s.runner.costStartTime = now.Add(-3 * time.Hour)
2377                 err := syscall.Kill(pid, syscall.SIGUSR2)
2378                 c.Check(err, IsNil, Commentf("error sending first SIGUSR2 to runner"))
2379                 getCostUpdate()
2380
2381                 err = os.WriteFile(path.Join(lockdir, pricesfile), pricesJSON, 0o700)
2382                 c.Check(err, IsNil, Commentf("error writing JSON prices file"))
2383                 err = syscall.Kill(pid, syscall.SIGUSR2)
2384                 c.Check(err, IsNil, Commentf("error sending second SIGUSR2 to runner"))
2385                 getCostUpdate()
2386
2387                 return 0
2388         })
2389         // Comparing with format strings makes it easy to ignore minor variations
2390         // in cost across runs while keeping diagnostics pretty.
2391         c.Check(fmt.Sprintf("%.3f", costUpdates[0]), Equals, "6.600")
2392         c.Check(fmt.Sprintf("%.3f", costUpdates[1]), Equals, "7.600")
2393 }
2394
2395 type FakeProcess struct {
2396         cmdLine []string
2397 }
2398
2399 func (fp FakeProcess) CmdlineSlice() ([]string, error) {
2400         return fp.cmdLine, nil
2401 }
2402
2403 func logFileContent(c *C, cr *ContainerRunner, fnm string) string {
2404         buf, err := fs.ReadFile(arvados.FS(cr.LogCollection), fnm)
2405         c.Assert(err, IsNil)
2406         return string(buf)
2407 }
2408
2409 func dumpAllLogFiles(c *C, cr *ContainerRunner) {
2410         d, err := cr.LogCollection.OpenFile("/", os.O_RDONLY, 0)
2411         c.Assert(err, IsNil)
2412         fis, err := d.Readdir(-1)
2413         c.Assert(err, IsNil)
2414         for _, fi := range fis {
2415                 c.Logf("=== %s", fi.Name())
2416                 c.Log(logFileContent(c, cr, fi.Name()))
2417         }
2418 }