Merge branch '21944-work-launch-description' refs #21944
[arvados.git] / lib / crunchrun / crunchrun_test.go
1 // Copyright (C) The Arvados Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4
5 package crunchrun
6
7 import (
8         "bytes"
9         "context"
10         "crypto/md5"
11         "encoding/json"
12         "errors"
13         "fmt"
14         "io"
15         "io/fs"
16         "io/ioutil"
17         "math/rand"
18         "net/http"
19         "net/http/httptest"
20         "net/http/httputil"
21         "net/url"
22         "os"
23         "os/exec"
24         "path"
25         "regexp"
26         "runtime/pprof"
27         "strings"
28         "sync"
29         "sync/atomic"
30         "syscall"
31         "testing"
32         "time"
33
34         "git.arvados.org/arvados.git/lib/cloud"
35         "git.arvados.org/arvados.git/lib/cmd"
36         "git.arvados.org/arvados.git/sdk/go/arvados"
37         "git.arvados.org/arvados.git/sdk/go/arvadosclient"
38         "git.arvados.org/arvados.git/sdk/go/arvadostest"
39
40         . "gopkg.in/check.v1"
41 )
42
43 // Gocheck boilerplate
44 func TestCrunchExec(t *testing.T) {
45         TestingT(t)
46 }
47
48 const logLineStart = `(?m)(.*\n)*\d{4}-\d\d-\d\dT\d\d:\d\d:\d\d\.\d+Z `
49
50 var _ = Suite(&TestSuite{})
51
52 type TestSuite struct {
53         client                   *arvados.Client
54         api                      *ArvTestClient
55         runner                   *ContainerRunner
56         executor                 *stubExecutor
57         keepmount                string
58         keepmountTmp             []string
59         testDispatcherKeepClient KeepTestClient
60         testContainerKeepClient  KeepTestClient
61         debian12MemoryCurrent    int64
62         debian12SwapCurrent      int64
63 }
64
65 func (s *TestSuite) SetUpSuite(c *C) {
66         buf, err := os.ReadFile("../crunchstat/testdata/debian12/sys/fs/cgroup/user.slice/user-1000.slice/session-4.scope/memory.current")
67         c.Assert(err, IsNil)
68         _, err = fmt.Sscanf(string(buf), "%d", &s.debian12MemoryCurrent)
69         c.Assert(err, IsNil)
70
71         buf, err = os.ReadFile("../crunchstat/testdata/debian12/sys/fs/cgroup/user.slice/user-1000.slice/session-4.scope/memory.swap.current")
72         c.Assert(err, IsNil)
73         _, err = fmt.Sscanf(string(buf), "%d", &s.debian12SwapCurrent)
74         c.Assert(err, IsNil)
75 }
76
77 func (s *TestSuite) SetUpTest(c *C) {
78         s.client = arvados.NewClientFromEnv()
79         s.executor = &stubExecutor{}
80         var err error
81         s.api = &ArvTestClient{}
82         s.runner, err = NewContainerRunner(s.client, s.api, &s.testDispatcherKeepClient, "zzzzz-zzzzz-zzzzzzzzzzzzzzz")
83         c.Assert(err, IsNil)
84         s.runner.executor = s.executor
85         s.runner.MkArvClient = func(token string) (IArvadosClient, IKeepClient, *arvados.Client, error) {
86                 return s.api, &s.testContainerKeepClient, s.client, nil
87         }
88         s.runner.RunArvMount = func(cmd []string, tok string) (*exec.Cmd, error) {
89                 s.runner.ArvMountPoint = s.keepmount
90                 for i, opt := range cmd {
91                         if opt == "--mount-tmp" {
92                                 err := os.Mkdir(s.keepmount+"/"+cmd[i+1], 0700)
93                                 if err != nil {
94                                         return nil, err
95                                 }
96                                 s.keepmountTmp = append(s.keepmountTmp, cmd[i+1])
97                         }
98                 }
99                 return nil, nil
100         }
101         s.keepmount = c.MkDir()
102         err = os.Mkdir(s.keepmount+"/by_id", 0755)
103         s.keepmountTmp = nil
104         c.Assert(err, IsNil)
105         err = os.Mkdir(s.keepmount+"/by_id/"+arvadostest.DockerImage112PDH, 0755)
106         c.Assert(err, IsNil)
107         err = ioutil.WriteFile(s.keepmount+"/by_id/"+arvadostest.DockerImage112PDH+"/"+arvadostest.DockerImage112Filename, []byte("#notarealtarball"), 0644)
108         err = os.Mkdir(s.keepmount+"/by_id/"+fakeInputCollectionPDH, 0755)
109         c.Assert(err, IsNil)
110         err = ioutil.WriteFile(s.keepmount+"/by_id/"+fakeInputCollectionPDH+"/input.json", []byte(`{"input":true}`), 0644)
111         c.Assert(err, IsNil)
112         s.runner.ArvMountPoint = s.keepmount
113         os.Setenv("InstanceType", `{"ProviderType":"a1.2xlarge","Price":1.2}`)
114 }
115
116 type ArvTestClient struct {
117         Total   int64
118         Calls   int
119         Content []arvadosclient.Dict
120         arvados.Container
121         secretMounts []byte
122         sync.Mutex
123         WasSetRunning bool
124         callraw       bool
125 }
126
127 type KeepTestClient struct {
128         Called         bool
129         StorageClasses []string
130         blocks         sync.Map
131 }
132
133 type stubExecutor struct {
134         imageLoaded bool
135         loaded      string
136         loadErr     error
137         exitCode    int
138         createErr   error
139         created     containerSpec
140         startErr    error
141         waitSleep   time.Duration
142         waitErr     error
143         stopErr     error
144         stopped     bool
145         closed      bool
146         runFunc     func() int
147         exit        chan int
148 }
149
150 func (e *stubExecutor) LoadImage(imageId string, tarball string, container arvados.Container, keepMount string,
151         containerClient *arvados.Client) error {
152         e.loaded = tarball
153         return e.loadErr
154 }
155 func (e *stubExecutor) Runtime() string                 { return "stub" }
156 func (e *stubExecutor) Version() string                 { return "stub " + cmd.Version.String() }
157 func (e *stubExecutor) Create(spec containerSpec) error { e.created = spec; return e.createErr }
158 func (e *stubExecutor) Start() error {
159         e.exit = make(chan int, 1)
160         go func() { e.exit <- e.runFunc() }()
161         return e.startErr
162 }
163 func (e *stubExecutor) Pid() int    { return 1115883 } // matches pid in ../crunchstat/testdata/debian12/proc/
164 func (e *stubExecutor) Stop() error { e.stopped = true; go func() { e.exit <- -1 }(); return e.stopErr }
165 func (e *stubExecutor) Close()      { e.closed = true }
166 func (e *stubExecutor) Wait(context.Context) (int, error) {
167         return <-e.exit, e.waitErr
168 }
169 func (e *stubExecutor) InjectCommand(ctx context.Context, _, _ string, _ bool, _ []string) (*exec.Cmd, error) {
170         return nil, errors.New("unimplemented")
171 }
172 func (e *stubExecutor) IPAddress() (string, error) { return "", errors.New("unimplemented") }
173
174 const fakeInputCollectionPDH = "ffffffffaaaaaaaa88888888eeeeeeee+1234"
175
176 var hwManifest = ". 82ab40c24fc8df01798e57ba66795bb1+841216+Aa124ac75e5168396c73c0a18eda641a4f41791c0@569fa8c3 0:841216:9c31ee32b3d15268a0754e8edc74d4f815ee014b693bc5109058e431dd5caea7.tar\n"
177 var hwPDH = "a45557269dcb65a6b78f9ac061c0850b+120"
178 var hwImageID = "9c31ee32b3d15268a0754e8edc74d4f815ee014b693bc5109058e431dd5caea7"
179
180 var otherManifest = ". 68a84f561b1d1708c6baff5e019a9ab3+46+Ae5d0af96944a3690becb1decdf60cc1c937f556d@5693216f 0:46:md5sum.txt\n"
181 var otherPDH = "a3e8f74c6f101eae01fa08bfb4e49b3a+54"
182
183 var normalizedManifestWithSubdirs = `. 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396c73c0abcdefgh11234567890@569fa8c3 0:9:file1_in_main.txt 9:18:file2_in_main.txt 0:27:zzzzz-8i9sb-bcdefghijkdhvnk.log.txt
184 ./subdir1 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396cabcdefghij6419876543234@569fa8c4 0:9:file1_in_subdir1.txt 9:18:file2_in_subdir1.txt
185 ./subdir1/subdir2 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396c73c0bcdefghijk544332211@569fa8c5 0:9:file1_in_subdir2.txt 9:18:file2_in_subdir2.txt
186 `
187
188 var normalizedWithSubdirsPDH = "a0def87f80dd594d4675809e83bd4f15+367"
189
190 var denormalizedManifestWithSubdirs = ". 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396c73c0abcdefgh11234567890@569fa8c3 0:9:file1_in_main.txt 9:18:file2_in_main.txt 0:27:zzzzz-8i9sb-bcdefghijkdhvnk.log.txt 0:10:subdir1/file1_in_subdir1.txt 10:17:subdir1/file2_in_subdir1.txt\n"
191 var denormalizedWithSubdirsPDH = "b0def87f80dd594d4675809e83bd4f15+367"
192
193 var fakeAuthUUID = "zzzzz-gj3su-55pqoyepgi2glem"
194 var fakeAuthToken = "a3ltuwzqcu2u4sc0q7yhpc2w7s00fdcqecg5d6e0u3pfohmbjt"
195
196 func (client *ArvTestClient) Create(resourceType string,
197         parameters arvadosclient.Dict,
198         output interface{}) error {
199
200         client.Mutex.Lock()
201         defer client.Mutex.Unlock()
202
203         client.Calls++
204         client.Content = append(client.Content, parameters)
205
206         if resourceType == "logs" {
207                 panic("logs.create called")
208         }
209
210         if resourceType == "collections" && output != nil {
211                 mt := parameters["collection"].(arvadosclient.Dict)["manifest_text"].(string)
212                 md5sum := md5.Sum([]byte(mt))
213                 outmap := output.(*arvados.Collection)
214                 outmap.PortableDataHash = fmt.Sprintf("%x+%d", md5sum, len(mt))
215                 outmap.UUID = fmt.Sprintf("zzzzz-4zz18-%015x", md5sum[:7])
216         }
217
218         return nil
219 }
220
221 func (client *ArvTestClient) Call(method, resourceType, uuid, action string, parameters arvadosclient.Dict, output interface{}) error {
222         switch {
223         case method == "GET" && resourceType == "containers" && action == "auth":
224                 return json.Unmarshal([]byte(`{
225                         "kind": "arvados#api_client_authorization",
226                         "uuid": "`+fakeAuthUUID+`",
227                         "api_token": "`+fakeAuthToken+`"
228                         }`), output)
229         case method == "GET" && resourceType == "containers" && action == "secret_mounts":
230                 if client.secretMounts != nil {
231                         return json.Unmarshal(client.secretMounts, output)
232                 }
233                 return json.Unmarshal([]byte(`{"secret_mounts":{}}`), output)
234         default:
235                 return fmt.Errorf("Not found")
236         }
237 }
238
239 func (client *ArvTestClient) CallRaw(method, resourceType, uuid, action string,
240         parameters arvadosclient.Dict) (reader io.ReadCloser, err error) {
241         var j []byte
242         if method == "GET" && resourceType == "nodes" && uuid == "" && action == "" {
243                 j = []byte(`{
244                         "kind": "arvados#nodeList",
245                         "items": [{
246                                 "uuid": "zzzzz-7ekkf-2z3mc76g2q73aio",
247                                 "hostname": "compute2",
248                                 "properties": {"total_cpu_cores": 16}
249                         }]}`)
250         } else if method == "GET" && resourceType == "containers" && action == "" && !client.callraw {
251                 if uuid == "" {
252                         j, err = json.Marshal(map[string]interface{}{
253                                 "items": []interface{}{client.Container},
254                                 "kind":  "arvados#nodeList",
255                         })
256                 } else {
257                         j, err = json.Marshal(client.Container)
258                 }
259         } else {
260                 j = []byte(`{
261                         "command": ["sleep", "1"],
262                         "container_image": "` + arvadostest.DockerImage112PDH + `",
263                         "cwd": ".",
264                         "environment": {},
265                         "mounts": {"/tmp": {"kind": "tmp"}, "/json": {"kind": "json", "content": {"number": 123456789123456789}}},
266                         "output_path": "/tmp",
267                         "priority": 1,
268                         "runtime_constraints": {}
269                 }`)
270         }
271         return ioutil.NopCloser(bytes.NewReader(j)), err
272 }
273
274 func (client *ArvTestClient) Get(resourceType string, uuid string, parameters arvadosclient.Dict, output interface{}) error {
275         if resourceType == "collections" {
276                 if uuid == hwPDH {
277                         output.(*arvados.Collection).ManifestText = hwManifest
278                 } else if uuid == otherPDH {
279                         output.(*arvados.Collection).ManifestText = otherManifest
280                 } else if uuid == normalizedWithSubdirsPDH {
281                         output.(*arvados.Collection).ManifestText = normalizedManifestWithSubdirs
282                 } else if uuid == denormalizedWithSubdirsPDH {
283                         output.(*arvados.Collection).ManifestText = denormalizedManifestWithSubdirs
284                 }
285         }
286         if resourceType == "containers" {
287                 (*output.(*arvados.Container)) = client.Container
288         }
289         return nil
290 }
291
292 func (client *ArvTestClient) Update(resourceType string, uuid string, parameters arvadosclient.Dict, output interface{}) (err error) {
293         client.Mutex.Lock()
294         defer client.Mutex.Unlock()
295         client.Calls++
296         client.Content = append(client.Content, parameters)
297         if resourceType == "containers" {
298                 if parameters["container"].(arvadosclient.Dict)["state"] == "Running" {
299                         client.WasSetRunning = true
300                 }
301         } else if resourceType == "collections" && output != nil {
302                 mt := parameters["collection"].(arvadosclient.Dict)["manifest_text"].(string)
303                 output.(*arvados.Collection).UUID = uuid
304                 output.(*arvados.Collection).PortableDataHash = arvados.PortableDataHash(mt)
305         }
306         return nil
307 }
308
309 var discoveryMap = map[string]interface{}{
310         "crunchLogUpdateSize":   float64(crunchLogUpdateSize),
311         "crunchLogUpdatePeriod": float64(crunchLogUpdatePeriod.Seconds()),
312 }
313
314 func (client *ArvTestClient) Discovery(key string) (interface{}, error) {
315         return discoveryMap[key], nil
316 }
317
318 // CalledWith returns the parameters from the first API call whose
319 // parameters match jpath/string. E.g., CalledWith(c, "foo.bar",
320 // "baz") returns parameters with parameters["foo"]["bar"]=="baz". If
321 // no call matches, it returns nil.
322 func (client *ArvTestClient) CalledWith(jpath string, expect interface{}) arvadosclient.Dict {
323 call:
324         for _, content := range client.Content {
325                 var v interface{} = content
326                 for _, k := range strings.Split(jpath, ".") {
327                         if dict, ok := v.(arvadosclient.Dict); !ok {
328                                 continue call
329                         } else {
330                                 v = dict[k]
331                         }
332                 }
333                 if v == expect {
334                         return content
335                 }
336         }
337         return nil
338 }
339
340 func (client *KeepTestClient) LocalLocator(locator string) (string, error) {
341         return locator, nil
342 }
343
344 func (client *KeepTestClient) BlockWrite(_ context.Context, opts arvados.BlockWriteOptions) (arvados.BlockWriteResponse, error) {
345         locator := fmt.Sprintf("%x+%d", md5.Sum(opts.Data), len(opts.Data))
346         client.blocks.Store(locator, append([]byte(nil), opts.Data...))
347         return arvados.BlockWriteResponse{
348                 Locator: locator,
349         }, nil
350 }
351
352 func (client *KeepTestClient) ReadAt(locator string, dst []byte, offset int) (int, error) {
353         loaded, ok := client.blocks.Load(locator)
354         if !ok {
355                 return 0, os.ErrNotExist
356         }
357         data := loaded.([]byte)
358         if offset >= len(data) {
359                 return 0, io.EOF
360         }
361         return copy(dst, data[offset:]), nil
362 }
363
364 func (client *KeepTestClient) Close() {
365         client.blocks.Range(func(locator, value interface{}) bool {
366                 client.blocks.Delete(locator)
367                 return true
368         })
369 }
370
371 func (client *KeepTestClient) SetStorageClasses(sc []string) {
372         client.StorageClasses = sc
373 }
374
375 type FileWrapper struct {
376         io.ReadCloser
377         len int64
378 }
379
380 func (fw FileWrapper) Readdir(n int) ([]os.FileInfo, error) {
381         return nil, errors.New("not implemented")
382 }
383
384 func (fw FileWrapper) Seek(int64, int) (int64, error) {
385         return 0, errors.New("not implemented")
386 }
387
388 func (fw FileWrapper) Size() int64 {
389         return fw.len
390 }
391
392 func (fw FileWrapper) Stat() (os.FileInfo, error) {
393         return nil, errors.New("not implemented")
394 }
395
396 func (fw FileWrapper) Truncate(int64) error {
397         return errors.New("not implemented")
398 }
399
400 func (fw FileWrapper) Write([]byte) (int, error) {
401         return 0, errors.New("not implemented")
402 }
403
404 func (fw FileWrapper) Sync() error {
405         return errors.New("not implemented")
406 }
407
408 func (fw FileWrapper) Snapshot() (*arvados.Subtree, error) {
409         return nil, errors.New("not implemented")
410 }
411
412 func (fw FileWrapper) Splice(*arvados.Subtree) error {
413         return errors.New("not implemented")
414 }
415
416 type apiStubServer struct {
417         server    *httptest.Server
418         proxy     *httputil.ReverseProxy
419         intercept func(http.ResponseWriter, *http.Request) bool
420
421         container arvados.Container
422 }
423
424 func apiStub() (*arvados.Client, *apiStubServer) {
425         client := arvados.NewClientFromEnv()
426         apistub := &apiStubServer{}
427         apistub.server = httptest.NewTLSServer(apistub)
428         apistub.proxy = httputil.NewSingleHostReverseProxy(&url.URL{Scheme: "https", Host: client.APIHost})
429         if client.Insecure {
430                 apistub.proxy.Transport = arvados.InsecureHTTPClient.Transport
431         }
432         client.APIHost = apistub.server.Listener.Addr().String()
433         return client, apistub
434 }
435
436 func (apistub *apiStubServer) ServeHTTP(w http.ResponseWriter, r *http.Request) {
437         if apistub.intercept != nil && apistub.intercept(w, r) {
438                 return
439         }
440         if r.Method == "GET" && r.URL.Path == "/arvados/v1/collections/"+hwPDH {
441                 json.NewEncoder(w).Encode(arvados.Collection{ManifestText: hwManifest})
442                 return
443         }
444         if r.Method == "GET" && r.URL.Path == "/arvados/v1/collections/"+otherPDH {
445                 json.NewEncoder(w).Encode(arvados.Collection{ManifestText: otherManifest})
446                 return
447         }
448         if r.Method == "GET" && r.URL.Path == "/arvados/v1/collections/"+normalizedWithSubdirsPDH {
449                 json.NewEncoder(w).Encode(arvados.Collection{ManifestText: normalizedManifestWithSubdirs})
450                 return
451         }
452         if r.Method == "GET" && r.URL.Path == "/arvados/v1/collections/"+denormalizedWithSubdirsPDH {
453                 json.NewEncoder(w).Encode(arvados.Collection{ManifestText: denormalizedManifestWithSubdirs})
454                 return
455         }
456         if r.Method == "GET" && r.URL.Path == "/arvados/v1/containers/"+apistub.container.UUID {
457                 json.NewEncoder(w).Encode(apistub.container)
458                 return
459         }
460         apistub.proxy.ServeHTTP(w, r)
461 }
462
463 func (s *TestSuite) TestLoadImage(c *C) {
464         s.runner.Container.ContainerImage = arvadostest.DockerImage112PDH
465         s.runner.Container.Mounts = map[string]arvados.Mount{
466                 "/out": {Kind: "tmp", Writable: true},
467         }
468         s.runner.Container.OutputPath = "/out"
469
470         _, err := s.runner.SetupMounts()
471         c.Assert(err, IsNil)
472
473         imageID, err := s.runner.LoadImage()
474         c.Check(err, IsNil)
475         c.Check(s.executor.loaded, Matches, ".*"+regexp.QuoteMeta(arvadostest.DockerImage112Filename))
476         c.Check(imageID, Equals, strings.TrimSuffix(arvadostest.DockerImage112Filename, ".tar"))
477
478         s.runner.Container.ContainerImage = arvadostest.DockerImage112PDH
479         s.executor.imageLoaded = false
480         s.executor.loaded = ""
481         s.executor.loadErr = errors.New("bork")
482         imageID, err = s.runner.LoadImage()
483         c.Check(err, ErrorMatches, ".*bork")
484         c.Check(s.executor.loaded, Matches, ".*"+regexp.QuoteMeta(arvadostest.DockerImage112Filename))
485
486         s.runner.Container.ContainerImage = fakeInputCollectionPDH
487         s.executor.imageLoaded = false
488         s.executor.loaded = ""
489         s.executor.loadErr = nil
490         imageID, err = s.runner.LoadImage()
491         c.Check(err, ErrorMatches, "image collection does not include a \\.tar image file")
492         c.Check(s.executor.loaded, Equals, "")
493 }
494
495 type ArvErrorTestClient struct{}
496
497 func (ArvErrorTestClient) Create(resourceType string,
498         parameters arvadosclient.Dict,
499         output interface{}) error {
500         return nil
501 }
502
503 func (ArvErrorTestClient) Call(method, resourceType, uuid, action string, parameters arvadosclient.Dict, output interface{}) error {
504         if method == "GET" && resourceType == "containers" && action == "auth" {
505                 return nil
506         }
507         return errors.New("ArvError")
508 }
509
510 func (ArvErrorTestClient) CallRaw(method, resourceType, uuid, action string,
511         parameters arvadosclient.Dict) (reader io.ReadCloser, err error) {
512         return nil, errors.New("ArvError")
513 }
514
515 func (ArvErrorTestClient) Get(resourceType string, uuid string, parameters arvadosclient.Dict, output interface{}) error {
516         return errors.New("ArvError")
517 }
518
519 func (ArvErrorTestClient) Update(resourceType string, uuid string, parameters arvadosclient.Dict, output interface{}) (err error) {
520         return nil
521 }
522
523 func (ArvErrorTestClient) Discovery(key string) (interface{}, error) {
524         return discoveryMap[key], nil
525 }
526
527 type KeepErrorTestClient struct {
528         KeepTestClient
529 }
530
531 func (*KeepErrorTestClient) BlockWrite(context.Context, arvados.BlockWriteOptions) (arvados.BlockWriteResponse, error) {
532         return arvados.BlockWriteResponse{}, errors.New("KeepError")
533 }
534
535 func (*KeepErrorTestClient) LocalLocator(string) (string, error) {
536         return "", errors.New("KeepError")
537 }
538
539 type KeepReadErrorTestClient struct {
540         KeepTestClient
541 }
542
543 func (*KeepReadErrorTestClient) ReadAt(string, []byte, int) (int, error) {
544         return 0, errors.New("KeepError")
545 }
546
547 func dockerLog(fd byte, msg string) []byte {
548         by := []byte(msg)
549         header := make([]byte, 8+len(by))
550         header[0] = fd
551         header[7] = byte(len(by))
552         copy(header[8:], by)
553         return header
554 }
555
556 func (s *TestSuite) TestRunContainer(c *C) {
557         s.executor.runFunc = func() int {
558                 fmt.Fprintf(s.executor.created.Stdout, "Hello world\n")
559                 return 0
560         }
561
562         s.runner.Container.ContainerImage = arvadostest.DockerImage112PDH
563         s.runner.Container.Command = []string{"./hw"}
564         s.runner.Container.OutputStorageClasses = []string{"default"}
565
566         imageID, err := s.runner.LoadImage()
567         c.Assert(err, IsNil)
568
569         err = s.runner.CreateContainer(imageID, nil)
570         c.Assert(err, IsNil)
571
572         err = s.runner.StartContainer()
573         c.Assert(err, IsNil)
574
575         err = s.runner.WaitFinish()
576         c.Assert(err, IsNil)
577
578         c.Check(logFileContent(c, s.runner, "stdout.txt"), Matches, `2\S+Z Hello world\n`)
579         c.Check(logFileContent(c, s.runner, "stderr.txt"), Matches, ``)
580 }
581
582 func (s *TestSuite) TestCommitLogs(c *C) {
583         api := &ArvTestClient{}
584         kc := &KeepTestClient{}
585         defer kc.Close()
586         cr, err := NewContainerRunner(s.client, api, kc, "zzzzz-zzzzz-zzzzzzzzzzzzzzz")
587         c.Assert(err, IsNil)
588         f, err := cr.openLogFile("crunch-run")
589         c.Assert(err, IsNil)
590         cr.CrunchLog = newLogWriter(newTestTimestamper(f))
591
592         cr.CrunchLog.Print("Hello world!")
593         cr.CrunchLog.Print("Goodbye")
594         cr.finalState = "Complete"
595
596         err = cr.CommitLogs()
597         c.Check(err, IsNil)
598
599         c.Check(api.Calls, Equals, 1)
600         c.Check(api.Content[0]["ensure_unique_name"], Equals, true)
601         c.Check(api.Content[0]["collection"].(arvadosclient.Dict)["name"], Equals, "logs for zzzzz-zzzzz-zzzzzzzzzzzzzzz")
602         c.Check(api.Content[0]["collection"].(arvadosclient.Dict)["manifest_text"], Equals, ". 744b2e4553123b02fa7b452ec5c18993+123 0:123:crunch-run.txt\n")
603         c.Check(*cr.LogsPDH, Equals, "63da7bdacf08c40f604daad80c261e9a+60")
604 }
605
606 func (s *TestSuite) TestUpdateContainerRunning(c *C) {
607         api := &ArvTestClient{}
608         kc := &KeepTestClient{}
609         defer kc.Close()
610         cr, err := NewContainerRunner(s.client, api, kc, "zzzzz-zzzzz-zzzzzzzzzzzzzzz")
611         c.Assert(err, IsNil)
612
613         err = cr.UpdateContainerRunning("")
614         c.Check(err, IsNil)
615
616         c.Check(api.Content[0]["container"].(arvadosclient.Dict)["state"], Equals, "Running")
617 }
618
619 func (s *TestSuite) TestUpdateContainerComplete(c *C) {
620         api := &ArvTestClient{}
621         kc := &KeepTestClient{}
622         defer kc.Close()
623         cr, err := NewContainerRunner(s.client, api, kc, "zzzzz-zzzzz-zzzzzzzzzzzzzzz")
624         c.Assert(err, IsNil)
625
626         cr.LogsPDH = new(string)
627         *cr.LogsPDH = "d3a229d2fe3690c2c3e75a71a153c6a3+60"
628
629         cr.ExitCode = new(int)
630         *cr.ExitCode = 42
631         cr.finalState = "Complete"
632
633         err = cr.UpdateContainerFinal()
634         c.Check(err, IsNil)
635
636         c.Check(api.Content[0]["container"].(arvadosclient.Dict)["log"], Equals, *cr.LogsPDH)
637         c.Check(api.Content[0]["container"].(arvadosclient.Dict)["exit_code"], Equals, *cr.ExitCode)
638         c.Check(api.Content[0]["container"].(arvadosclient.Dict)["state"], Equals, "Complete")
639 }
640
641 func (s *TestSuite) TestUpdateContainerCancelled(c *C) {
642         api := &ArvTestClient{}
643         kc := &KeepTestClient{}
644         defer kc.Close()
645         cr, err := NewContainerRunner(s.client, api, kc, "zzzzz-zzzzz-zzzzzzzzzzzzzzz")
646         c.Assert(err, IsNil)
647         cr.cCancelled = true
648         cr.finalState = "Cancelled"
649
650         err = cr.UpdateContainerFinal()
651         c.Check(err, IsNil)
652
653         c.Check(api.Content[0]["container"].(arvadosclient.Dict)["log"], IsNil)
654         c.Check(api.Content[0]["container"].(arvadosclient.Dict)["exit_code"], IsNil)
655         c.Check(api.Content[0]["container"].(arvadosclient.Dict)["state"], Equals, "Cancelled")
656 }
657
658 // Used by the TestFullRun*() test below to DRY up boilerplate setup to do full
659 // dress rehearsal of the Run() function, starting from a JSON container record.
660 func (s *TestSuite) fullRunHelper(c *C, record string, extraMounts []string, fn func() int) (*ArvTestClient, *ContainerRunner, string) {
661         err := json.Unmarshal([]byte(record), &s.api.Container)
662         c.Assert(err, IsNil)
663         initialState := s.api.Container.State
664
665         var sm struct {
666                 SecretMounts map[string]arvados.Mount `json:"secret_mounts"`
667         }
668         err = json.Unmarshal([]byte(record), &sm)
669         c.Check(err, IsNil)
670         secretMounts, err := json.Marshal(sm)
671         c.Assert(err, IsNil)
672         c.Logf("SecretMounts decoded %v json %q", sm, secretMounts)
673
674         s.executor.runFunc = fn
675
676         s.runner.statInterval = 100 * time.Millisecond
677         s.runner.containerWatchdogInterval = time.Second
678
679         realTemp := c.MkDir()
680         tempcount := 0
681         s.runner.MkTempDir = func(_, prefix string) (string, error) {
682                 tempcount++
683                 d := fmt.Sprintf("%s/%s%d", realTemp, prefix, tempcount)
684                 err := os.Mkdir(d, os.ModePerm)
685                 if err != nil && strings.Contains(err.Error(), ": file exists") {
686                         // Test case must have pre-populated the tempdir
687                         err = nil
688                 }
689                 return d, err
690         }
691         client, _ := apiStub()
692         s.runner.MkArvClient = func(token string) (IArvadosClient, IKeepClient, *arvados.Client, error) {
693                 return &ArvTestClient{secretMounts: secretMounts}, &s.testContainerKeepClient, client, nil
694         }
695
696         if extraMounts != nil && len(extraMounts) > 0 {
697                 err := s.runner.SetupArvMountPoint("keep")
698                 c.Check(err, IsNil)
699
700                 for _, m := range extraMounts {
701                         os.MkdirAll(s.runner.ArvMountPoint+"/by_id/"+m, os.ModePerm)
702                 }
703         }
704
705         err = s.runner.Run()
706         if s.api.CalledWith("container.state", "Complete") != nil {
707                 c.Check(err, IsNil)
708         }
709         if s.executor.loadErr == nil && s.executor.createErr == nil && initialState != "Running" {
710                 c.Check(s.api.WasSetRunning, Equals, true)
711                 var lastupdate arvadosclient.Dict
712                 for _, content := range s.api.Content {
713                         if content["container"] != nil {
714                                 lastupdate = content["container"].(arvadosclient.Dict)
715                         }
716                 }
717                 if lastupdate["log"] == nil {
718                         c.Errorf("no container update with non-nil log -- updates were: %v", s.api.Content)
719                 }
720         }
721
722         if err != nil {
723                 dumpAllLogFiles(c, s.runner)
724         }
725
726         return s.api, s.runner, realTemp
727 }
728
729 func (s *TestSuite) TestFullRunHello(c *C) {
730         s.runner.enableMemoryLimit = true
731         s.runner.networkMode = "default"
732         s.fullRunHelper(c, `{
733     "command": ["echo", "hello world"],
734     "container_image": "`+arvadostest.DockerImage112PDH+`",
735     "cwd": ".",
736     "environment": {"foo":"bar","baz":"waz"},
737     "mounts": {"/tmp": {"kind": "tmp"} },
738     "output_path": "/tmp",
739     "priority": 1,
740     "runtime_constraints": {"vcpus":1,"ram":1000000},
741     "state": "Locked",
742     "output_storage_classes": ["default"]
743 }`, nil, func() int {
744                 c.Check(s.executor.created.Command, DeepEquals, []string{"echo", "hello world"})
745                 c.Check(s.executor.created.Image, Equals, "sha256:d8309758b8fe2c81034ffc8a10c36460b77db7bc5e7b448c4e5b684f9d95a678")
746                 c.Check(s.executor.created.Env, DeepEquals, map[string]string{"foo": "bar", "baz": "waz"})
747                 c.Check(s.executor.created.VCPUs, Equals, 1)
748                 c.Check(s.executor.created.RAM, Equals, int64(1000000))
749                 c.Check(s.executor.created.NetworkMode, Equals, "default")
750                 c.Check(s.executor.created.EnableNetwork, Equals, false)
751                 c.Check(s.executor.created.CUDADeviceCount, Equals, 0)
752                 fmt.Fprintln(s.executor.created.Stdout, "hello world")
753                 return 0
754         })
755
756         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
757         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
758         c.Check(logFileContent(c, s.runner, "stdout.txt"), Matches, `2\S+Z hello world\n`)
759         c.Check(s.testDispatcherKeepClient.StorageClasses, DeepEquals, []string{"default"})
760         c.Check(s.testContainerKeepClient.StorageClasses, DeepEquals, []string{"default"})
761 }
762
763 func (s *TestSuite) TestRunAlreadyRunning(c *C) {
764         var ran bool
765         s.fullRunHelper(c, `{
766     "command": ["sleep", "3"],
767     "container_image": "`+arvadostest.DockerImage112PDH+`",
768     "cwd": ".",
769     "environment": {},
770     "mounts": {"/tmp": {"kind": "tmp"} },
771     "output_path": "/tmp",
772     "priority": 1,
773     "runtime_constraints": {},
774     "scheduling_parameters":{"max_run_time": 1},
775     "state": "Running"
776 }`, nil, func() int {
777                 ran = true
778                 return 2
779         })
780         c.Check(s.api.CalledWith("container.state", "Cancelled"), IsNil)
781         c.Check(s.api.CalledWith("container.state", "Complete"), IsNil)
782         c.Check(ran, Equals, false)
783 }
784
785 func ec2MetadataServerStub(c *C, token *string, failureRate float64, stoptime *atomic.Value) *httptest.Server {
786         failedOnce := false
787         return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
788                 if !failedOnce || rand.Float64() < failureRate {
789                         w.WriteHeader(http.StatusServiceUnavailable)
790                         failedOnce = true
791                         return
792                 }
793                 switch r.URL.Path {
794                 case "/latest/api/token":
795                         fmt.Fprintln(w, *token)
796                 case "/latest/meta-data/spot/instance-action":
797                         if r.Header.Get("X-aws-ec2-metadata-token") != *token {
798                                 w.WriteHeader(http.StatusUnauthorized)
799                         } else if t, _ := stoptime.Load().(time.Time); t.IsZero() {
800                                 w.WriteHeader(http.StatusNotFound)
801                         } else {
802                                 fmt.Fprintf(w, `{"action":"stop","time":"%s"}`, t.Format(time.RFC3339))
803                         }
804                 default:
805                         w.WriteHeader(http.StatusNotFound)
806                 }
807         }))
808 }
809
810 func (s *TestSuite) TestSpotInterruptionNotice(c *C) {
811         s.testSpotInterruptionNotice(c, 0.1)
812 }
813
814 func (s *TestSuite) TestSpotInterruptionNoticeNotAvailable(c *C) {
815         s.testSpotInterruptionNotice(c, 1)
816 }
817
818 func (s *TestSuite) testSpotInterruptionNotice(c *C, failureRate float64) {
819         var stoptime atomic.Value
820         token := "fake-ec2-metadata-token"
821         stub := ec2MetadataServerStub(c, &token, failureRate, &stoptime)
822         defer stub.Close()
823
824         defer func(i time.Duration, u string) {
825                 spotInterruptionCheckInterval = i
826                 ec2MetadataBaseURL = u
827         }(spotInterruptionCheckInterval, ec2MetadataBaseURL)
828         spotInterruptionCheckInterval = time.Second / 8
829         ec2MetadataBaseURL = stub.URL
830
831         checkedLogs := false
832         checkLogs := func() {
833                 checkedLogs = true
834                 c.Check(logFileContent(c, s.runner, "crunch-run.txt"), Matches, `(?ms).*Checking for spot interruptions every 125ms using instance metadata at http://.*`)
835                 c.Check(logFileContent(c, s.runner, "crunch-run.txt"), Matches, `(?ms).*Error checking spot interruptions: 503 Service Unavailable.*`)
836                 if failureRate == 1 {
837                         c.Check(logFileContent(c, s.runner, "crunch-run.txt"), Matches, `(?ms).*Giving up on checking spot interruptions after too many consecutive failures.*`)
838                         return
839                 }
840                 text := `Cloud provider scheduled instance stop at ` + stoptime.Load().(time.Time).Format(time.RFC3339)
841                 c.Check(logFileContent(c, s.runner, "crunch-run.txt"), Matches, `(?ms).*`+text+`.*`)
842                 c.Check(s.api.CalledWith("container.runtime_status.warning", "preemption notice"), NotNil)
843                 c.Check(s.api.CalledWith("container.runtime_status.warningDetail", text), NotNil)
844                 c.Check(s.api.CalledWith("container.runtime_status.preemptionNotice", text), NotNil)
845
846                 // Check that the log collection was saved, and the
847                 // container record updated with the new PDH,
848                 // immediately after the preemption notice was
849                 // received -- i.e., while the container is still
850                 // running.
851                 lastpdh := ""
852                 saved := make(map[string]string) // pdh => manifest_text
853                 for _, call := range s.api.Content {
854                         if ctr, ok := call["container"].(arvadosclient.Dict); ok {
855                                 if pdh, ok := ctr["log"].(string); ok {
856                                         lastpdh = pdh
857                                 }
858                         }
859                         if coll, ok := call["collection"].(arvadosclient.Dict); ok {
860                                 mt, _ := coll["manifest_text"].(string)
861                                 if strings.Contains(mt, ":crunch-run.txt") {
862                                         saved[arvados.PortableDataHash(mt)] = mt
863                                 }
864                         }
865                 }
866                 logfs, err := (&arvados.Collection{ManifestText: saved[lastpdh]}).FileSystem(s.runner.dispatcherClient, s.runner.DispatcherKeepClient)
867                 c.Assert(err, IsNil)
868                 log, err := fs.ReadFile(arvados.FS(logfs), "crunch-run.txt")
869                 c.Check(err, IsNil)
870                 c.Check(string(log), Matches, `(?ms).*\Q`+text+`\E.*`)
871         }
872
873         go s.runner.checkSpotInterruptionNotices()
874         s.fullRunHelper(c, `{
875     "command": ["sleep", "3"],
876     "container_image": "`+arvadostest.DockerImage112PDH+`",
877     "cwd": ".",
878     "environment": {},
879     "mounts": {"/tmp": {"kind": "tmp"} },
880     "output_path": "/tmp",
881     "priority": 1,
882     "runtime_constraints": {},
883     "state": "Locked"
884 }`, nil, func() int {
885                 time.Sleep(time.Second)
886                 stoptime.Store(time.Now().Add(time.Minute).UTC())
887                 token = "different-fake-ec2-metadata-token"
888                 time.Sleep(time.Second)
889                 checkLogs()
890                 return 0
891         })
892         c.Check(checkedLogs, Equals, true)
893 }
894
895 func (s *TestSuite) TestRunTimeExceeded(c *C) {
896         s.fullRunHelper(c, `{
897     "command": ["sleep", "3"],
898     "container_image": "`+arvadostest.DockerImage112PDH+`",
899     "cwd": ".",
900     "environment": {},
901     "mounts": {"/tmp": {"kind": "tmp"} },
902     "output_path": "/tmp",
903     "priority": 1,
904     "runtime_constraints": {},
905     "scheduling_parameters":{"max_run_time": 1},
906     "state": "Locked"
907 }`, nil, func() int {
908                 time.Sleep(3 * time.Second)
909                 return 0
910         })
911
912         c.Check(s.api.CalledWith("container.state", "Cancelled"), NotNil)
913         c.Check(logFileContent(c, s.runner, "crunch-run.txt"), Matches, "(?ms).*maximum run time exceeded.*")
914 }
915
916 func (s *TestSuite) TestContainerWaitFails(c *C) {
917         s.fullRunHelper(c, `{
918     "command": ["sleep", "3"],
919     "container_image": "`+arvadostest.DockerImage112PDH+`",
920     "cwd": ".",
921     "mounts": {"/tmp": {"kind": "tmp"} },
922     "output_path": "/tmp",
923     "priority": 1,
924     "state": "Locked"
925 }`, nil, func() int {
926                 s.executor.waitErr = errors.New("Container is not running")
927                 return 0
928         })
929
930         c.Check(s.api.CalledWith("container.state", "Cancelled"), NotNil)
931         c.Check(logFileContent(c, s.runner, "crunch-run.txt"), Matches, "(?ms).*Container is not running.*")
932 }
933
934 func (s *TestSuite) TestCrunchstat(c *C) {
935         s.runner.crunchstatFakeFS = os.DirFS("../crunchstat/testdata/debian12")
936         s.fullRunHelper(c, `{
937                 "command": ["sleep", "1"],
938                 "container_image": "`+arvadostest.DockerImage112PDH+`",
939                 "cwd": ".",
940                 "environment": {},
941                 "mounts": {"/tmp": {"kind": "tmp"} },
942                 "output_path": "/tmp",
943                 "priority": 1,
944                 "runtime_constraints": {},
945                 "state": "Locked"
946         }`, nil, func() int {
947                 time.Sleep(time.Second)
948                 return 0
949         })
950
951         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
952         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
953
954         c.Check(logFileContent(c, s.runner, "crunchstat.txt"), Matches, `(?ms).*`+reTimestamp+` mem \d+ swap \d+ pgmajfault \d+ rss.*`)
955         c.Check(logFileContent(c, s.runner, "hoststat.txt"), Matches, `(?ms).*`+reTimestamp+` mem \d+ swap \d+ pgmajfault \d+ rss.*`)
956
957         // Check that we called (*crunchstat.Reporter)Stop().
958         c.Check(logFileContent(c, s.runner, "crunch-run.txt"), Matches, `(?ms).*`+reTimestamp+` Maximum crunch-run memory rss usage was \d+ bytes\n.*`)
959 }
960
961 func (s *TestSuite) TestNodeInfoLog(c *C) {
962         s.fullRunHelper(c, `{
963                 "command": ["true"],
964                 "container_image": "`+arvadostest.DockerImage112PDH+`",
965                 "cwd": ".",
966                 "environment": {},
967                 "mounts": {"/tmp": {"kind": "tmp"} },
968                 "output_path": "/tmp",
969                 "priority": 1,
970                 "runtime_constraints": {},
971                 "state": "Locked"
972         }`, nil, func() int {
973                 return 0
974         })
975
976         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
977         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
978
979         json := logFileContent(c, s.runner, "node.json")
980         c.Check(json, Matches, `(?ms).*"ProviderType": *"a1\.2xlarge".*`)
981         c.Check(json, Matches, `(?ms).*"Price": *1\.2.*`)
982
983         nodeinfo := logFileContent(c, s.runner, "node-info.txt")
984         c.Check(nodeinfo, Matches, `(?ms).*Host Information.*`)
985         c.Check(nodeinfo, Matches, `(?ms).*CPU Information.*`)
986         c.Check(nodeinfo, Matches, `(?ms).*Memory Information.*`)
987         c.Check(nodeinfo, Matches, `(?ms).*Disk Space.*`)
988         c.Check(nodeinfo, Matches, `(?ms).*Disk INodes.*`)
989 }
990
991 func (s *TestSuite) TestLogVersionAndRuntime(c *C) {
992         s.fullRunHelper(c, `{
993                 "command": ["sleep", "1"],
994                 "container_image": "`+arvadostest.DockerImage112PDH+`",
995                 "cwd": ".",
996                 "environment": {},
997                 "mounts": {"/tmp": {"kind": "tmp"} },
998                 "output_path": "/tmp",
999                 "priority": 1,
1000                 "runtime_constraints": {},
1001                 "state": "Locked"
1002         }`, nil, func() int {
1003                 return 0
1004         })
1005
1006         c.Check(logFileContent(c, s.runner, "crunch-run.txt"), Matches, `(?ms).*crunch-run \S+ \(go\S+\) start.*`)
1007         c.Check(logFileContent(c, s.runner, "crunch-run.txt"), Matches, `(?ms).*crunch-run process has uid=\d+\(.+\) gid=\d+\(.+\) groups=\d+\(.+\)(,\d+\(.+\))*\n.*`)
1008         c.Check(logFileContent(c, s.runner, "crunch-run.txt"), Matches, `(?ms).*Executing container: zzzzz-zzzzz-zzzzzzzzzzzzzzz.*`)
1009         c.Check(logFileContent(c, s.runner, "crunch-run.txt"), Matches, `(?ms).*Using container runtime: stub.*`)
1010 }
1011
1012 func (s *TestSuite) testLogRSSThresholds(c *C, ram int64, expected []int, notExpected int) {
1013         s.runner.crunchstatFakeFS = os.DirFS("../crunchstat/testdata/debian12")
1014         s.fullRunHelper(c, `{
1015                 "command": ["true"],
1016                 "container_image": "`+arvadostest.DockerImage112PDH+`",
1017                 "cwd": ".",
1018                 "environment": {},
1019                 "mounts": {"/tmp": {"kind": "tmp"} },
1020                 "output_path": "/tmp",
1021                 "priority": 1,
1022                 "runtime_constraints": {"ram": `+fmt.Sprintf("%d", ram)+`},
1023                 "state": "Locked"
1024         }`, nil, func() int { return 0 })
1025         logs := logFileContent(c, s.runner, "crunch-run.txt")
1026         c.Log("=== crunchstat logs")
1027         c.Log(logs)
1028         pattern := logLineStart + `Container using over %d%% of memory \(rss %d/%d bytes\)`
1029         var threshold int
1030         for _, threshold = range expected {
1031                 c.Check(logs, Matches, fmt.Sprintf(pattern, threshold, s.debian12MemoryCurrent, ram))
1032         }
1033         if notExpected > threshold {
1034                 c.Check(logs, Not(Matches), fmt.Sprintf(pattern, notExpected, s.debian12MemoryCurrent, ram))
1035         }
1036 }
1037
1038 func (s *TestSuite) TestLogNoRSSThresholds(c *C) {
1039         s.testLogRSSThresholds(c, s.debian12MemoryCurrent*10, []int{}, 90)
1040 }
1041
1042 func (s *TestSuite) TestLogSomeRSSThresholds(c *C) {
1043         onePercentRSS := s.debian12MemoryCurrent / 100
1044         s.testLogRSSThresholds(c, 102*onePercentRSS, []int{90, 95}, 99)
1045 }
1046
1047 func (s *TestSuite) TestLogAllRSSThresholds(c *C) {
1048         s.testLogRSSThresholds(c, s.debian12MemoryCurrent, []int{90, 95, 99}, 0)
1049 }
1050
1051 func (s *TestSuite) TestLogMaximaAfterRun(c *C) {
1052         s.runner.crunchstatFakeFS = os.DirFS("../crunchstat/testdata/debian12")
1053         s.runner.parentTemp = c.MkDir()
1054         s.fullRunHelper(c, `{
1055         "command": ["true"],
1056         "container_image": "`+arvadostest.DockerImage112PDH+`",
1057         "cwd": ".",
1058         "environment": {},
1059         "mounts": {"/tmp": {"kind": "tmp"} },
1060         "output_path": "/tmp",
1061         "priority": 1,
1062         "runtime_constraints": {"ram": `+fmt.Sprintf("%d", s.debian12MemoryCurrent*10)+`},
1063         "state": "Locked"
1064     }`, nil, func() int { return 0 })
1065         logs := logFileContent(c, s.runner, "crunch-run.txt")
1066         for _, expected := range []string{
1067                 `Maximum disk usage was \d+%, \d+/\d+ bytes`,
1068                 fmt.Sprintf(`Maximum container memory swap usage was %d bytes`, s.debian12SwapCurrent),
1069                 `Maximum container memory pgmajfault usage was \d+ faults`,
1070                 fmt.Sprintf(`Maximum container memory rss usage was 10%%, %d/%d bytes`, s.debian12MemoryCurrent, s.debian12MemoryCurrent*10),
1071                 `Maximum crunch-run memory rss usage was \d+ bytes`,
1072         } {
1073                 c.Check(logs, Matches, logLineStart+expected)
1074         }
1075 }
1076
1077 func (s *TestSuite) TestCommitNodeInfoBeforeStart(c *C) {
1078         var collection_create, container_update arvadosclient.Dict
1079         s.fullRunHelper(c, `{
1080                 "command": ["true"],
1081                 "container_image": "`+arvadostest.DockerImage112PDH+`",
1082                 "cwd": ".",
1083                 "environment": {},
1084                 "mounts": {"/tmp": {"kind": "tmp"} },
1085                 "output_path": "/tmp",
1086                 "priority": 1,
1087                 "runtime_constraints": {},
1088                 "state": "Locked",
1089                 "uuid": "zzzzz-dz642-202301121543210"
1090         }`, nil, func() int {
1091                 collection_create = s.api.CalledWith("ensure_unique_name", true)
1092                 container_update = s.api.CalledWith("container.state", "Running")
1093                 return 0
1094         })
1095
1096         c.Assert(collection_create, NotNil)
1097         log_collection := collection_create["collection"].(arvadosclient.Dict)
1098         c.Check(log_collection["name"], Equals, "logs for zzzzz-dz642-202301121543210")
1099         manifest_text := log_collection["manifest_text"].(string)
1100         // We check that the file size is at least two digits as an easy way to
1101         // check the file isn't empty.
1102         c.Check(manifest_text, Matches, `\. .+ \d+:\d{2,}:node-info\.txt( .+)?\n`)
1103         c.Check(manifest_text, Matches, `\. .+ \d+:\d{2,}:node\.json( .+)?\n`)
1104
1105         c.Assert(container_update, NotNil)
1106         // As of Arvados 2.5.0, the container update must specify its log in PDH
1107         // format for the API server to propagate it to container requests, which
1108         // is what we care about for this test.
1109         expect_pdh := fmt.Sprintf("%x+%d", md5.Sum([]byte(manifest_text)), len(manifest_text))
1110         c.Check(container_update["container"].(arvadosclient.Dict)["log"], Equals, expect_pdh)
1111 }
1112
1113 func (s *TestSuite) TestContainerRecordLog(c *C) {
1114         s.fullRunHelper(c, `{
1115                 "command": ["sleep", "1"],
1116                 "container_image": "`+arvadostest.DockerImage112PDH+`",
1117                 "cwd": ".",
1118                 "environment": {},
1119                 "mounts": {"/tmp": {"kind": "tmp"} },
1120                 "output_path": "/tmp",
1121                 "priority": 1,
1122                 "runtime_constraints": {},
1123                 "state": "Locked"
1124         }`, nil,
1125                 func() int {
1126                         time.Sleep(time.Second)
1127                         return 0
1128                 })
1129
1130         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
1131         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
1132
1133         c.Check(logFileContent(c, s.runner, "container.json"), Matches, `(?ms).*container_image.*`)
1134 }
1135
1136 func (s *TestSuite) TestFullRunStderr(c *C) {
1137         s.fullRunHelper(c, `{
1138     "command": ["/bin/sh", "-c", "echo hello ; echo world 1>&2 ; exit 1"],
1139     "container_image": "`+arvadostest.DockerImage112PDH+`",
1140     "cwd": ".",
1141     "environment": {},
1142     "mounts": {"/tmp": {"kind": "tmp"} },
1143     "output_path": "/tmp",
1144     "priority": 1,
1145     "runtime_constraints": {},
1146     "state": "Locked"
1147 }`, nil, func() int {
1148                 fmt.Fprintln(s.executor.created.Stdout, "hello")
1149                 fmt.Fprintln(s.executor.created.Stderr, "world")
1150                 return 1
1151         })
1152
1153         final := s.api.CalledWith("container.state", "Complete")
1154         c.Assert(final, NotNil)
1155         c.Check(final["container"].(arvadosclient.Dict)["exit_code"], Equals, 1)
1156         c.Check(final["container"].(arvadosclient.Dict)["log"], NotNil)
1157
1158         c.Check(logFileContent(c, s.runner, "stdout.txt"), Matches, ".*hello\n")
1159         c.Check(logFileContent(c, s.runner, "stderr.txt"), Matches, ".*world\n")
1160 }
1161
1162 func (s *TestSuite) TestFullRunDefaultCwd(c *C) {
1163         s.fullRunHelper(c, `{
1164     "command": ["pwd"],
1165     "container_image": "`+arvadostest.DockerImage112PDH+`",
1166     "cwd": ".",
1167     "environment": {},
1168     "mounts": {"/tmp": {"kind": "tmp"} },
1169     "output_path": "/tmp",
1170     "priority": 1,
1171     "runtime_constraints": {},
1172     "state": "Locked"
1173 }`, nil, func() int {
1174                 fmt.Fprintf(s.executor.created.Stdout, "workdir=%q", s.executor.created.WorkingDir)
1175                 return 0
1176         })
1177
1178         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
1179         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
1180         c.Check(logFileContent(c, s.runner, "stdout.txt"), Matches, `.*workdir=""`)
1181 }
1182
1183 func (s *TestSuite) TestFullRunSetCwd(c *C) {
1184         s.fullRunHelper(c, `{
1185     "command": ["pwd"],
1186     "container_image": "`+arvadostest.DockerImage112PDH+`",
1187     "cwd": "/bin",
1188     "environment": {},
1189     "mounts": {"/tmp": {"kind": "tmp"} },
1190     "output_path": "/tmp",
1191     "priority": 1,
1192     "runtime_constraints": {},
1193     "state": "Locked"
1194 }`, nil, func() int {
1195                 fmt.Fprintln(s.executor.created.Stdout, s.executor.created.WorkingDir)
1196                 return 0
1197         })
1198
1199         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
1200         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
1201         c.Check(logFileContent(c, s.runner, "stdout.txt"), Matches, ".*/bin\n")
1202 }
1203
1204 func (s *TestSuite) TestFullRunSetOutputStorageClasses(c *C) {
1205         s.fullRunHelper(c, `{
1206     "command": ["pwd"],
1207     "container_image": "`+arvadostest.DockerImage112PDH+`",
1208     "cwd": "/bin",
1209     "environment": {},
1210     "mounts": {"/tmp": {"kind": "tmp"} },
1211     "output_path": "/tmp",
1212     "priority": 1,
1213     "runtime_constraints": {},
1214     "state": "Locked",
1215     "output_storage_classes": ["foo", "bar"]
1216 }`, nil, func() int {
1217                 fmt.Fprintln(s.executor.created.Stdout, s.executor.created.WorkingDir)
1218                 return 0
1219         })
1220
1221         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
1222         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
1223         c.Check(logFileContent(c, s.runner, "stdout.txt"), Matches, ".*/bin\n")
1224         c.Check(s.testDispatcherKeepClient.StorageClasses, DeepEquals, []string{"foo", "bar"})
1225         c.Check(s.testContainerKeepClient.StorageClasses, DeepEquals, []string{"foo", "bar"})
1226 }
1227
1228 func (s *TestSuite) TestEnableCUDADeviceCount(c *C) {
1229         s.fullRunHelper(c, `{
1230     "command": ["pwd"],
1231     "container_image": "`+arvadostest.DockerImage112PDH+`",
1232     "cwd": "/bin",
1233     "environment": {},
1234     "mounts": {"/tmp": {"kind": "tmp"} },
1235     "output_path": "/tmp",
1236     "priority": 1,
1237     "runtime_constraints": {"cuda": {"device_count": 2}},
1238     "state": "Locked",
1239     "output_storage_classes": ["foo", "bar"]
1240 }`, nil, func() int {
1241                 fmt.Fprintln(s.executor.created.Stdout, "ok")
1242                 return 0
1243         })
1244         c.Check(s.executor.created.CUDADeviceCount, Equals, 2)
1245 }
1246
1247 func (s *TestSuite) TestEnableCUDAHardwareCapability(c *C) {
1248         s.fullRunHelper(c, `{
1249     "command": ["pwd"],
1250     "container_image": "`+arvadostest.DockerImage112PDH+`",
1251     "cwd": "/bin",
1252     "environment": {},
1253     "mounts": {"/tmp": {"kind": "tmp"} },
1254     "output_path": "/tmp",
1255     "priority": 1,
1256     "runtime_constraints": {"cuda": {"hardware_capability": "foo"}},
1257     "state": "Locked",
1258     "output_storage_classes": ["foo", "bar"]
1259 }`, nil, func() int {
1260                 fmt.Fprintln(s.executor.created.Stdout, "ok")
1261                 return 0
1262         })
1263         c.Check(s.executor.created.CUDADeviceCount, Equals, 0)
1264 }
1265
1266 func (s *TestSuite) TestStopOnSignal(c *C) {
1267         s.executor.runFunc = func() int {
1268                 s.executor.created.Stdout.Write([]byte("foo\n"))
1269                 s.runner.SigChan <- syscall.SIGINT
1270                 time.Sleep(10 * time.Second)
1271                 return 0
1272         }
1273         s.testStopContainer(c)
1274 }
1275
1276 func (s *TestSuite) TestStopOnArvMountDeath(c *C) {
1277         s.executor.runFunc = func() int {
1278                 s.executor.created.Stdout.Write([]byte("foo\n"))
1279                 s.runner.ArvMountExit <- nil
1280                 close(s.runner.ArvMountExit)
1281                 time.Sleep(10 * time.Second)
1282                 return 0
1283         }
1284         s.runner.ArvMountExit = make(chan error)
1285         s.testStopContainer(c)
1286 }
1287
1288 func (s *TestSuite) testStopContainer(c *C) {
1289         record := `{
1290     "command": ["/bin/sh", "-c", "echo foo && sleep 30 && echo bar"],
1291     "container_image": "` + arvadostest.DockerImage112PDH + `",
1292     "cwd": ".",
1293     "environment": {},
1294     "mounts": {"/tmp": {"kind": "tmp"} },
1295     "output_path": "/tmp",
1296     "priority": 1,
1297     "runtime_constraints": {},
1298     "state": "Locked"
1299 }`
1300
1301         err := json.Unmarshal([]byte(record), &s.api.Container)
1302         c.Assert(err, IsNil)
1303
1304         s.runner.RunArvMount = func([]string, string) (*exec.Cmd, error) { return nil, nil }
1305         s.runner.MkArvClient = func(token string) (IArvadosClient, IKeepClient, *arvados.Client, error) {
1306                 return &ArvTestClient{}, &KeepTestClient{}, nil, nil
1307         }
1308
1309         done := make(chan error)
1310         go func() {
1311                 done <- s.runner.Run()
1312         }()
1313         select {
1314         case <-time.After(20 * time.Second):
1315                 pprof.Lookup("goroutine").WriteTo(os.Stderr, 1)
1316                 c.Fatal("timed out")
1317         case err = <-done:
1318                 c.Check(err, IsNil)
1319         }
1320         dumpAllLogFiles(c, s.runner)
1321
1322         c.Check(s.api.CalledWith("container.log", nil), NotNil)
1323         c.Check(s.api.CalledWith("container.state", "Cancelled"), NotNil)
1324         c.Check(logFileContent(c, s.runner, "stdout.txt"), Matches, "(?ms).*foo\n$")
1325 }
1326
1327 func (s *TestSuite) TestFullRunSetEnv(c *C) {
1328         s.fullRunHelper(c, `{
1329     "command": ["/bin/sh", "-c", "echo $FROBIZ"],
1330     "container_image": "`+arvadostest.DockerImage112PDH+`",
1331     "cwd": "/bin",
1332     "environment": {"FROBIZ": "bilbo"},
1333     "mounts": {"/tmp": {"kind": "tmp"} },
1334     "output_path": "/tmp",
1335     "priority": 1,
1336     "runtime_constraints": {},
1337     "state": "Locked"
1338 }`, nil, func() int {
1339                 fmt.Fprintf(s.executor.created.Stdout, "%v", s.executor.created.Env)
1340                 return 0
1341         })
1342
1343         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
1344         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
1345         c.Check(logFileContent(c, s.runner, "stdout.txt"), Matches, `.*map\[FROBIZ:bilbo\]`)
1346 }
1347
1348 type ArvMountCmdLine struct {
1349         Cmd   []string
1350         token string
1351 }
1352
1353 func (am *ArvMountCmdLine) ArvMountTest(c []string, token string) (*exec.Cmd, error) {
1354         am.Cmd = c
1355         am.token = token
1356         return nil, nil
1357 }
1358
1359 func stubCert(c *C, temp string) string {
1360         path := temp + "/ca-certificates.crt"
1361         err := os.WriteFile(path, []byte{}, 0666)
1362         c.Assert(err, IsNil)
1363         os.Setenv("SSL_CERT_FILE", path)
1364         return path
1365 }
1366
1367 func (s *TestSuite) TestSetupMounts(c *C) {
1368         cr := s.runner
1369         am := &ArvMountCmdLine{}
1370         cr.RunArvMount = am.ArvMountTest
1371         cr.containerClient, _ = apiStub()
1372         cr.ContainerArvClient = &ArvTestClient{}
1373         cr.ContainerKeepClient = &KeepTestClient{}
1374         cr.Container.OutputStorageClasses = []string{"default"}
1375
1376         realTemp := c.MkDir()
1377         certTemp := c.MkDir()
1378         stubCertPath := stubCert(c, certTemp)
1379         cr.parentTemp = realTemp
1380
1381         i := 0
1382         cr.MkTempDir = func(_ string, prefix string) (string, error) {
1383                 i++
1384                 d := fmt.Sprintf("%s/%s%d", realTemp, prefix, i)
1385                 err := os.Mkdir(d, os.ModePerm)
1386                 if err != nil && strings.Contains(err.Error(), ": file exists") {
1387                         // Test case must have pre-populated the tempdir
1388                         err = nil
1389                 }
1390                 return d, err
1391         }
1392
1393         checkEmpty := func() {
1394                 // Should be deleted.
1395                 _, err := os.Stat(realTemp)
1396                 c.Assert(os.IsNotExist(err), Equals, true)
1397
1398                 // Now recreate it for the next test.
1399                 c.Assert(os.Mkdir(realTemp, 0777), IsNil)
1400         }
1401
1402         {
1403                 i = 0
1404                 cr.ArvMountPoint = ""
1405                 cr.Container.Mounts = make(map[string]arvados.Mount)
1406                 cr.Container.Mounts["/tmp"] = arvados.Mount{Kind: "tmp"}
1407                 cr.Container.OutputPath = "/tmp"
1408                 cr.statInterval = 5 * time.Second
1409                 bindmounts, err := cr.SetupMounts()
1410                 c.Check(err, IsNil)
1411                 c.Check(am.Cmd, DeepEquals, []string{"arv-mount", "--foreground",
1412                         "--read-write", "--storage-classes", "default", "--crunchstat-interval=5",
1413                         "--mount-by-pdh", "by_id", "--disable-event-listening", "--mount-by-id", "by_uuid", realTemp + "/keep1"})
1414                 c.Check(bindmounts, DeepEquals, map[string]bindmount{"/tmp": {realTemp + "/tmp2", false}})
1415                 os.RemoveAll(cr.ArvMountPoint)
1416                 cr.CleanupDirs()
1417                 checkEmpty()
1418         }
1419
1420         {
1421                 i = 0
1422                 cr.ArvMountPoint = ""
1423                 cr.Container.Mounts = make(map[string]arvados.Mount)
1424                 cr.Container.Mounts["/out"] = arvados.Mount{Kind: "tmp"}
1425                 cr.Container.Mounts["/tmp"] = arvados.Mount{Kind: "tmp"}
1426                 cr.Container.OutputPath = "/out"
1427                 cr.Container.OutputStorageClasses = []string{"foo", "bar"}
1428
1429                 bindmounts, err := cr.SetupMounts()
1430                 c.Check(err, IsNil)
1431                 c.Check(am.Cmd, DeepEquals, []string{"arv-mount", "--foreground",
1432                         "--read-write", "--storage-classes", "foo,bar", "--crunchstat-interval=5",
1433                         "--mount-by-pdh", "by_id", "--disable-event-listening", "--mount-by-id", "by_uuid", realTemp + "/keep1"})
1434                 c.Check(bindmounts, DeepEquals, map[string]bindmount{"/out": {realTemp + "/tmp2", false}, "/tmp": {realTemp + "/tmp3", false}})
1435                 os.RemoveAll(cr.ArvMountPoint)
1436                 cr.CleanupDirs()
1437                 checkEmpty()
1438         }
1439
1440         {
1441                 i = 0
1442                 cr.ArvMountPoint = ""
1443                 cr.Container.Mounts = make(map[string]arvados.Mount)
1444                 cr.Container.Mounts["/tmp"] = arvados.Mount{Kind: "tmp"}
1445                 cr.Container.OutputPath = "/tmp"
1446                 cr.Container.RuntimeConstraints.API = true
1447                 cr.Container.OutputStorageClasses = []string{"default"}
1448
1449                 bindmounts, err := cr.SetupMounts()
1450                 c.Check(err, IsNil)
1451                 c.Check(am.Cmd, DeepEquals, []string{"arv-mount", "--foreground",
1452                         "--read-write", "--storage-classes", "default", "--crunchstat-interval=5",
1453                         "--mount-by-pdh", "by_id", "--disable-event-listening", "--mount-by-id", "by_uuid", realTemp + "/keep1"})
1454                 c.Check(bindmounts, DeepEquals, map[string]bindmount{"/tmp": {realTemp + "/tmp2", false}, "/etc/arvados/ca-certificates.crt": {stubCertPath, true}})
1455                 os.RemoveAll(cr.ArvMountPoint)
1456                 cr.CleanupDirs()
1457                 checkEmpty()
1458
1459                 cr.Container.RuntimeConstraints.API = false
1460         }
1461
1462         {
1463                 i = 0
1464                 cr.ArvMountPoint = ""
1465                 cr.Container.Mounts = map[string]arvados.Mount{
1466                         "/keeptmp": {Kind: "collection", Writable: true},
1467                 }
1468                 cr.Container.OutputPath = "/keeptmp"
1469
1470                 os.MkdirAll(realTemp+"/keep1/tmp0", os.ModePerm)
1471
1472                 bindmounts, err := cr.SetupMounts()
1473                 c.Check(err, IsNil)
1474                 c.Check(am.Cmd, DeepEquals, []string{"arv-mount", "--foreground",
1475                         "--read-write", "--storage-classes", "default", "--crunchstat-interval=5",
1476                         "--mount-tmp", "tmp0", "--mount-by-pdh", "by_id", "--disable-event-listening", "--mount-by-id", "by_uuid", realTemp + "/keep1"})
1477                 c.Check(bindmounts, DeepEquals, map[string]bindmount{"/keeptmp": {realTemp + "/keep1/tmp0", false}})
1478                 os.RemoveAll(cr.ArvMountPoint)
1479                 cr.CleanupDirs()
1480                 checkEmpty()
1481         }
1482
1483         {
1484                 i = 0
1485                 cr.ArvMountPoint = ""
1486                 cr.Container.Mounts = map[string]arvados.Mount{
1487                         "/keepinp": {Kind: "collection", PortableDataHash: "59389a8f9ee9d399be35462a0f92541c+53"},
1488                         "/keepout": {Kind: "collection", Writable: true},
1489                 }
1490                 cr.Container.OutputPath = "/keepout"
1491
1492                 os.MkdirAll(realTemp+"/keep1/by_id/59389a8f9ee9d399be35462a0f92541c+53", os.ModePerm)
1493                 os.MkdirAll(realTemp+"/keep1/tmp0", os.ModePerm)
1494
1495                 bindmounts, err := cr.SetupMounts()
1496                 c.Check(err, IsNil)
1497                 c.Check(am.Cmd, DeepEquals, []string{"arv-mount", "--foreground",
1498                         "--read-write", "--storage-classes", "default", "--crunchstat-interval=5",
1499                         "--mount-tmp", "tmp0", "--mount-by-pdh", "by_id", "--disable-event-listening", "--mount-by-id", "by_uuid", realTemp + "/keep1"})
1500                 c.Check(bindmounts, DeepEquals, map[string]bindmount{
1501                         "/keepinp": {realTemp + "/keep1/by_id/59389a8f9ee9d399be35462a0f92541c+53", true},
1502                         "/keepout": {realTemp + "/keep1/tmp0", false},
1503                 })
1504                 os.RemoveAll(cr.ArvMountPoint)
1505                 cr.CleanupDirs()
1506                 checkEmpty()
1507         }
1508
1509         {
1510                 i = 0
1511                 cr.ArvMountPoint = ""
1512                 cr.Container.RuntimeConstraints.KeepCacheRAM = 512
1513                 cr.Container.Mounts = map[string]arvados.Mount{
1514                         "/keepinp": {Kind: "collection", PortableDataHash: "59389a8f9ee9d399be35462a0f92541c+53"},
1515                         "/keepout": {Kind: "collection", Writable: true},
1516                 }
1517                 cr.Container.OutputPath = "/keepout"
1518
1519                 os.MkdirAll(realTemp+"/keep1/by_id/59389a8f9ee9d399be35462a0f92541c+53", os.ModePerm)
1520                 os.MkdirAll(realTemp+"/keep1/tmp0", os.ModePerm)
1521
1522                 bindmounts, err := cr.SetupMounts()
1523                 c.Check(err, IsNil)
1524                 c.Check(am.Cmd, DeepEquals, []string{"arv-mount", "--foreground",
1525                         "--read-write", "--storage-classes", "default", "--crunchstat-interval=5", "--ram-cache",
1526                         "--file-cache", "512", "--mount-tmp", "tmp0", "--mount-by-pdh", "by_id", "--disable-event-listening", "--mount-by-id", "by_uuid", realTemp + "/keep1"})
1527                 c.Check(bindmounts, DeepEquals, map[string]bindmount{
1528                         "/keepinp": {realTemp + "/keep1/by_id/59389a8f9ee9d399be35462a0f92541c+53", true},
1529                         "/keepout": {realTemp + "/keep1/tmp0", false},
1530                 })
1531                 os.RemoveAll(cr.ArvMountPoint)
1532                 cr.CleanupDirs()
1533                 checkEmpty()
1534         }
1535
1536         for _, test := range []struct {
1537                 in  interface{}
1538                 out string
1539         }{
1540                 {in: "foo", out: `"foo"`},
1541                 {in: nil, out: `null`},
1542                 {in: map[string]int64{"foo": 123456789123456789}, out: `{"foo":123456789123456789}`},
1543         } {
1544                 i = 0
1545                 cr.ArvMountPoint = ""
1546                 cr.Container.Mounts = map[string]arvados.Mount{
1547                         "/mnt/test.json": {Kind: "json", Content: test.in},
1548                 }
1549                 bindmounts, err := cr.SetupMounts()
1550                 c.Check(err, IsNil)
1551                 c.Check(bindmounts, DeepEquals, map[string]bindmount{
1552                         "/mnt/test.json": {realTemp + "/json2/mountdata.json", true},
1553                 })
1554                 content, err := ioutil.ReadFile(realTemp + "/json2/mountdata.json")
1555                 c.Check(err, IsNil)
1556                 c.Check(content, DeepEquals, []byte(test.out))
1557                 os.RemoveAll(cr.ArvMountPoint)
1558                 cr.CleanupDirs()
1559                 checkEmpty()
1560         }
1561
1562         for _, test := range []struct {
1563                 in  interface{}
1564                 out string
1565         }{
1566                 {in: "foo", out: `foo`},
1567                 {in: nil, out: "error"},
1568                 {in: map[string]int64{"foo": 123456789123456789}, out: "error"},
1569         } {
1570                 i = 0
1571                 cr.ArvMountPoint = ""
1572                 cr.Container.Mounts = map[string]arvados.Mount{
1573                         "/mnt/test.txt": {Kind: "text", Content: test.in},
1574                 }
1575                 bindmounts, err := cr.SetupMounts()
1576                 if test.out == "error" {
1577                         c.Check(err.Error(), Equals, "content for mount \"/mnt/test.txt\" must be a string")
1578                 } else {
1579                         c.Check(err, IsNil)
1580                         c.Check(bindmounts, DeepEquals, map[string]bindmount{
1581                                 "/mnt/test.txt": {realTemp + "/text2/mountdata.text", true},
1582                         })
1583                         content, err := ioutil.ReadFile(realTemp + "/text2/mountdata.text")
1584                         c.Check(err, IsNil)
1585                         c.Check(content, DeepEquals, []byte(test.out))
1586                 }
1587                 os.RemoveAll(cr.ArvMountPoint)
1588                 cr.CleanupDirs()
1589                 checkEmpty()
1590         }
1591
1592         // Read-only mount points are allowed underneath output_dir mount point
1593         {
1594                 i = 0
1595                 cr.ArvMountPoint = ""
1596                 cr.Container.Mounts = make(map[string]arvados.Mount)
1597                 cr.Container.Mounts = map[string]arvados.Mount{
1598                         "/tmp":     {Kind: "tmp"},
1599                         "/tmp/foo": {Kind: "collection"},
1600                 }
1601                 cr.Container.OutputPath = "/tmp"
1602
1603                 os.MkdirAll(realTemp+"/keep1/tmp0", os.ModePerm)
1604
1605                 bindmounts, err := cr.SetupMounts()
1606                 c.Check(err, IsNil)
1607                 c.Check(am.Cmd, DeepEquals, []string{"arv-mount", "--foreground",
1608                         "--read-write", "--storage-classes", "default", "--crunchstat-interval=5", "--ram-cache",
1609                         "--file-cache", "512", "--mount-tmp", "tmp0", "--mount-by-pdh", "by_id", "--disable-event-listening", "--mount-by-id", "by_uuid", realTemp + "/keep1"})
1610                 c.Check(bindmounts, DeepEquals, map[string]bindmount{
1611                         "/tmp":     {realTemp + "/tmp2", false},
1612                         "/tmp/foo": {realTemp + "/keep1/tmp0", true},
1613                 })
1614                 os.RemoveAll(cr.ArvMountPoint)
1615                 cr.CleanupDirs()
1616                 checkEmpty()
1617         }
1618
1619         // Writable mount points copied to output_dir mount point
1620         {
1621                 i = 0
1622                 cr.ArvMountPoint = ""
1623                 cr.Container.Mounts = make(map[string]arvados.Mount)
1624                 cr.Container.Mounts = map[string]arvados.Mount{
1625                         "/tmp": {Kind: "tmp"},
1626                         "/tmp/foo": {Kind: "collection",
1627                                 PortableDataHash: "59389a8f9ee9d399be35462a0f92541c+53",
1628                                 Writable:         true},
1629                         "/tmp/bar": {Kind: "collection",
1630                                 PortableDataHash: "59389a8f9ee9d399be35462a0f92541d+53",
1631                                 Path:             "baz",
1632                                 Writable:         true},
1633                 }
1634                 cr.Container.OutputPath = "/tmp"
1635
1636                 os.MkdirAll(realTemp+"/keep1/by_id/59389a8f9ee9d399be35462a0f92541c+53", os.ModePerm)
1637                 os.MkdirAll(realTemp+"/keep1/by_id/59389a8f9ee9d399be35462a0f92541d+53/baz", os.ModePerm)
1638
1639                 rf, _ := os.Create(realTemp + "/keep1/by_id/59389a8f9ee9d399be35462a0f92541d+53/baz/quux")
1640                 rf.Write([]byte("bar"))
1641                 rf.Close()
1642
1643                 _, err := cr.SetupMounts()
1644                 c.Check(err, IsNil)
1645                 _, err = os.Stat(cr.HostOutputDir + "/foo")
1646                 c.Check(err, IsNil)
1647                 _, err = os.Stat(cr.HostOutputDir + "/bar/quux")
1648                 c.Check(err, IsNil)
1649                 os.RemoveAll(cr.ArvMountPoint)
1650                 cr.CleanupDirs()
1651                 checkEmpty()
1652         }
1653
1654         // Only mount points of kind 'collection' are allowed underneath output_dir mount point
1655         {
1656                 i = 0
1657                 cr.ArvMountPoint = ""
1658                 cr.Container.Mounts = make(map[string]arvados.Mount)
1659                 cr.Container.Mounts = map[string]arvados.Mount{
1660                         "/tmp":     {Kind: "tmp"},
1661                         "/tmp/foo": {Kind: "tmp"},
1662                 }
1663                 cr.Container.OutputPath = "/tmp"
1664
1665                 _, err := cr.SetupMounts()
1666                 c.Check(err, NotNil)
1667                 c.Check(err, ErrorMatches, `only mount points of kind 'collection', 'text' or 'json' are supported underneath the output_path.*`)
1668                 os.RemoveAll(cr.ArvMountPoint)
1669                 cr.CleanupDirs()
1670                 checkEmpty()
1671         }
1672
1673         // Only mount point of kind 'collection' is allowed for stdin
1674         {
1675                 i = 0
1676                 cr.ArvMountPoint = ""
1677                 cr.Container.Mounts = make(map[string]arvados.Mount)
1678                 cr.Container.Mounts = map[string]arvados.Mount{
1679                         "stdin": {Kind: "tmp"},
1680                 }
1681
1682                 _, err := cr.SetupMounts()
1683                 c.Check(err, NotNil)
1684                 c.Check(err, ErrorMatches, `unsupported mount kind 'tmp' for stdin.*`)
1685                 os.RemoveAll(cr.ArvMountPoint)
1686                 cr.CleanupDirs()
1687                 checkEmpty()
1688         }
1689 }
1690
1691 func (s *TestSuite) TestStdout(c *C) {
1692         helperRecord := `{
1693                 "command": ["/bin/sh", "-c", "echo $FROBIZ"],
1694                 "container_image": "` + arvadostest.DockerImage112PDH + `",
1695                 "cwd": "/bin",
1696                 "environment": {"FROBIZ": "bilbo"},
1697                 "mounts": {"/tmp": {"kind": "tmp"}, "stdout": {"kind": "file", "path": "/tmp/a/b/c.out"} },
1698                 "output_path": "/tmp",
1699                 "priority": 1,
1700                 "runtime_constraints": {},
1701                 "state": "Locked"
1702         }`
1703
1704         s.fullRunHelper(c, helperRecord, nil, func() int {
1705                 fmt.Fprintln(s.executor.created.Stdout, s.executor.created.Env["FROBIZ"])
1706                 return 0
1707         })
1708
1709         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
1710         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
1711         c.Check(s.runner.ContainerArvClient.(*ArvTestClient).CalledWith("collection.manifest_text", "./a/b 307372fa8fd5c146b22ae7a45b49bc31+6 0:6:c.out\n"), NotNil)
1712 }
1713
1714 // Used by the TestStdoutWithWrongPath*()
1715 func (s *TestSuite) stdoutErrorRunHelper(c *C, record string, fn func() int) (*ArvTestClient, *ContainerRunner, error) {
1716         err := json.Unmarshal([]byte(record), &s.api.Container)
1717         c.Assert(err, IsNil)
1718         s.executor.runFunc = fn
1719         s.runner.RunArvMount = (&ArvMountCmdLine{}).ArvMountTest
1720         s.runner.MkArvClient = func(token string) (IArvadosClient, IKeepClient, *arvados.Client, error) {
1721                 return s.api, &KeepTestClient{}, nil, nil
1722         }
1723         return s.api, s.runner, s.runner.Run()
1724 }
1725
1726 func (s *TestSuite) TestStdoutWithWrongPath(c *C) {
1727         _, _, err := s.stdoutErrorRunHelper(c, `{
1728     "mounts": {"/tmp": {"kind": "tmp"}, "stdout": {"kind": "file", "path":"/tmpa.out"} },
1729     "output_path": "/tmp",
1730     "state": "Locked"
1731 }`, func() int { return 0 })
1732         c.Check(err, ErrorMatches, ".*Stdout path does not start with OutputPath.*")
1733 }
1734
1735 func (s *TestSuite) TestStdoutWithWrongKindTmp(c *C) {
1736         _, _, err := s.stdoutErrorRunHelper(c, `{
1737     "mounts": {"/tmp": {"kind": "tmp"}, "stdout": {"kind": "tmp", "path":"/tmp/a.out"} },
1738     "output_path": "/tmp",
1739     "state": "Locked"
1740 }`, func() int { return 0 })
1741         c.Check(err, ErrorMatches, ".*unsupported mount kind 'tmp' for stdout.*")
1742 }
1743
1744 func (s *TestSuite) TestStdoutWithWrongKindCollection(c *C) {
1745         _, _, err := s.stdoutErrorRunHelper(c, `{
1746     "mounts": {"/tmp": {"kind": "tmp"}, "stdout": {"kind": "collection", "path":"/tmp/a.out"} },
1747     "output_path": "/tmp",
1748     "state": "Locked"
1749 }`, func() int { return 0 })
1750         c.Check(err, ErrorMatches, ".*unsupported mount kind 'collection' for stdout.*")
1751 }
1752
1753 func (s *TestSuite) TestFullRunWithAPI(c *C) {
1754         s.fullRunHelper(c, `{
1755     "command": ["/bin/sh", "-c", "true $ARVADOS_API_HOST"],
1756     "container_image": "`+arvadostest.DockerImage112PDH+`",
1757     "cwd": "/bin",
1758     "environment": {},
1759     "mounts": {"/tmp": {"kind": "tmp"} },
1760     "output_path": "/tmp",
1761     "priority": 1,
1762     "runtime_constraints": {"API": true},
1763     "state": "Locked"
1764 }`, nil, func() int {
1765                 c.Check(s.executor.created.Env["ARVADOS_API_HOST"], Equals, os.Getenv("ARVADOS_API_HOST"))
1766                 return 3
1767         })
1768         c.Check(s.api.CalledWith("container.exit_code", 3), NotNil)
1769         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
1770         c.Check(logFileContent(c, s.runner, "crunch-run.txt"), Matches, `(?ms).*`+reTimestamp+` Container exited with status code 3\n.*`)
1771 }
1772
1773 func (s *TestSuite) TestFullRunSetOutput(c *C) {
1774         defer os.Setenv("ARVADOS_API_HOST", os.Getenv("ARVADOS_API_HOST"))
1775         os.Setenv("ARVADOS_API_HOST", "test.arvados.org")
1776         s.fullRunHelper(c, `{
1777     "command": ["/bin/sh", "-c", "echo $ARVADOS_API_HOST"],
1778     "container_image": "`+arvadostest.DockerImage112PDH+`",
1779     "cwd": "/bin",
1780     "environment": {},
1781     "mounts": {"/tmp": {"kind": "tmp"} },
1782     "output_path": "/tmp",
1783     "priority": 1,
1784     "runtime_constraints": {"API": true},
1785     "state": "Locked"
1786 }`, nil, func() int {
1787                 s.api.Container.Output = arvadostest.DockerImage112PDH
1788                 return 0
1789         })
1790
1791         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
1792         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
1793         c.Check(s.api.CalledWith("container.output", arvadostest.DockerImage112PDH), NotNil)
1794 }
1795
1796 func (s *TestSuite) TestArvMountRuntimeStatusWarning(c *C) {
1797         s.runner.RunArvMount = func([]string, string) (*exec.Cmd, error) {
1798                 os.Mkdir(s.runner.ArvMountPoint+"/by_id", 0666)
1799                 ioutil.WriteFile(s.runner.ArvMountPoint+"/by_id/README", nil, 0666)
1800                 return s.runner.ArvMountCmd([]string{"bash", "-c", "echo >&2 Test: Keep write error: I am a teapot; sleep 3"}, "")
1801         }
1802         s.executor.runFunc = func() int {
1803                 time.Sleep(time.Second)
1804                 return 137
1805         }
1806         record := `{
1807     "command": ["sleep", "1"],
1808     "container_image": "` + arvadostest.DockerImage112PDH + `",
1809     "cwd": "/bin",
1810     "environment": {},
1811     "mounts": {"/tmp": {"kind": "tmp"} },
1812     "output_path": "/tmp",
1813     "priority": 1,
1814     "runtime_constraints": {"API": true},
1815     "state": "Locked"
1816 }`
1817         err := json.Unmarshal([]byte(record), &s.api.Container)
1818         c.Assert(err, IsNil)
1819         err = s.runner.Run()
1820         c.Assert(err, IsNil)
1821         c.Check(s.api.CalledWith("container.exit_code", 137), NotNil)
1822         c.Check(s.api.CalledWith("container.runtime_status.warning", "arv-mount: Keep write error"), NotNil)
1823         c.Check(s.api.CalledWith("container.runtime_status.warningDetail", "Test: Keep write error: I am a teapot"), NotNil)
1824         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
1825         c.Check(logFileContent(c, s.runner, "crunch-run.txt"), Matches, `(?ms).*`+reTimestamp+` Container exited with status code 137 \(signal 9, SIGKILL\).*`)
1826         c.Check(logFileContent(c, s.runner, "arv-mount.txt"), Matches, reTimestamp+` Test: Keep write error: I am a teapot\n`)
1827 }
1828
1829 func (s *TestSuite) TestStdoutWithExcludeFromOutputMountPointUnderOutputDir(c *C) {
1830         helperRecord := `{
1831                 "command": ["/bin/sh", "-c", "echo $FROBIZ"],
1832                 "container_image": "` + arvadostest.DockerImage112PDH + `",
1833                 "cwd": "/bin",
1834                 "environment": {"FROBIZ": "bilbo"},
1835                 "mounts": {
1836         "/tmp": {"kind": "tmp"},
1837         "/tmp/foo": {"kind": "collection",
1838                      "portable_data_hash": "a3e8f74c6f101eae01fa08bfb4e49b3a+54",
1839                      "exclude_from_output": true
1840         },
1841         "stdout": {"kind": "file", "path": "/tmp/a/b/c.out"}
1842     },
1843                 "output_path": "/tmp",
1844                 "priority": 1,
1845                 "runtime_constraints": {},
1846                 "state": "Locked"
1847         }`
1848
1849         extraMounts := []string{"a3e8f74c6f101eae01fa08bfb4e49b3a+54"}
1850
1851         s.fullRunHelper(c, helperRecord, extraMounts, func() int {
1852                 fmt.Fprintln(s.executor.created.Stdout, s.executor.created.Env["FROBIZ"])
1853                 return 0
1854         })
1855
1856         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
1857         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
1858         c.Check(s.runner.ContainerArvClient.(*ArvTestClient).CalledWith("collection.manifest_text", "./a/b 307372fa8fd5c146b22ae7a45b49bc31+6 0:6:c.out\n"), NotNil)
1859 }
1860
1861 func (s *TestSuite) TestStdoutWithMultipleMountPointsUnderOutputDir(c *C) {
1862         helperRecord := `{
1863                 "command": ["/bin/sh", "-c", "echo $FROBIZ"],
1864                 "container_image": "` + arvadostest.DockerImage112PDH + `",
1865                 "cwd": "/bin",
1866                 "environment": {"FROBIZ": "bilbo"},
1867                 "mounts": {
1868         "/tmp": {"kind": "tmp"},
1869         "/tmp/foo/bar": {"kind": "collection", "portable_data_hash": "a0def87f80dd594d4675809e83bd4f15+367", "path":"/file2_in_main.txt"},
1870         "/tmp/foo/sub1": {"kind": "collection", "portable_data_hash": "a0def87f80dd594d4675809e83bd4f15+367", "path":"/subdir1"},
1871         "/tmp/foo/sub1file2": {"kind": "collection", "portable_data_hash": "a0def87f80dd594d4675809e83bd4f15+367", "path":"/subdir1/file2_in_subdir1.txt"},
1872         "/tmp/foo/baz/sub2file2": {"kind": "collection", "portable_data_hash": "a0def87f80dd594d4675809e83bd4f15+367", "path":"/subdir1/subdir2/file2_in_subdir2.txt"},
1873         "stdout": {"kind": "file", "path": "/tmp/a/b/c.out"}
1874     },
1875                 "output_path": "/tmp",
1876                 "priority": 1,
1877                 "runtime_constraints": {},
1878                 "state": "Locked",
1879                 "uuid": "zzzzz-dz642-202301130848001"
1880         }`
1881
1882         extraMounts := []string{
1883                 "a0def87f80dd594d4675809e83bd4f15+367/file2_in_main.txt",
1884                 "a0def87f80dd594d4675809e83bd4f15+367/subdir1/file2_in_subdir1.txt",
1885                 "a0def87f80dd594d4675809e83bd4f15+367/subdir1/subdir2/file2_in_subdir2.txt",
1886         }
1887
1888         api, _, realtemp := s.fullRunHelper(c, helperRecord, extraMounts, func() int {
1889                 fmt.Fprintln(s.executor.created.Stdout, s.executor.created.Env["FROBIZ"])
1890                 return 0
1891         })
1892
1893         c.Check(s.executor.created.BindMounts, DeepEquals, map[string]bindmount{
1894                 "/tmp":                   {realtemp + "/tmp1", false},
1895                 "/tmp/foo/bar":           {s.keepmount + "/by_id/a0def87f80dd594d4675809e83bd4f15+367/file2_in_main.txt", true},
1896                 "/tmp/foo/baz/sub2file2": {s.keepmount + "/by_id/a0def87f80dd594d4675809e83bd4f15+367/subdir1/subdir2/file2_in_subdir2.txt", true},
1897                 "/tmp/foo/sub1":          {s.keepmount + "/by_id/a0def87f80dd594d4675809e83bd4f15+367/subdir1", true},
1898                 "/tmp/foo/sub1file2":     {s.keepmount + "/by_id/a0def87f80dd594d4675809e83bd4f15+367/subdir1/file2_in_subdir1.txt", true},
1899         })
1900
1901         c.Check(api.CalledWith("container.exit_code", 0), NotNil)
1902         c.Check(api.CalledWith("container.state", "Complete"), NotNil)
1903         output_count := uint(0)
1904         for _, v := range s.runner.ContainerArvClient.(*ArvTestClient).Content {
1905                 if v["collection"] == nil {
1906                         continue
1907                 }
1908                 collection := v["collection"].(arvadosclient.Dict)
1909                 if collection["name"].(string) != "output for zzzzz-dz642-202301130848001" {
1910                         continue
1911                 }
1912                 c.Check(v["ensure_unique_name"], Equals, true)
1913                 c.Check(collection["manifest_text"].(string), Equals, `./a/b 307372fa8fd5c146b22ae7a45b49bc31+6 0:6:c.out
1914 ./foo 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396c73c0abcdefgh11234567890@569fa8c3 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396cabcdefghij6419876543234@569fa8c4 9:18:bar 36:18:sub1file2
1915 ./foo/baz 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396c73c0bcdefghijk544332211@569fa8c5 9:18:sub2file2
1916 ./foo/sub1 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396cabcdefghij6419876543234@569fa8c4 0:9:file1_in_subdir1.txt 9:18:file2_in_subdir1.txt
1917 ./foo/sub1/subdir2 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396c73c0bcdefghijk544332211@569fa8c5 0:9:file1_in_subdir2.txt 9:18:file2_in_subdir2.txt
1918 `)
1919                 output_count++
1920         }
1921         c.Check(output_count, Not(Equals), uint(0))
1922 }
1923
1924 func (s *TestSuite) TestStdoutWithMountPointsUnderOutputDirDenormalizedManifest(c *C) {
1925         helperRecord := `{
1926                 "command": ["/bin/sh", "-c", "echo $FROBIZ"],
1927                 "container_image": "` + arvadostest.DockerImage112PDH + `",
1928                 "cwd": "/bin",
1929                 "environment": {"FROBIZ": "bilbo"},
1930                 "mounts": {
1931         "/tmp": {"kind": "tmp"},
1932         "/tmp/foo/bar": {"kind": "collection", "portable_data_hash": "b0def87f80dd594d4675809e83bd4f15+367", "path": "/subdir1/file2_in_subdir1.txt"},
1933         "stdout": {"kind": "file", "path": "/tmp/a/b/c.out"}
1934     },
1935                 "output_path": "/tmp",
1936                 "priority": 1,
1937                 "runtime_constraints": {},
1938                 "state": "Locked",
1939                 "uuid": "zzzzz-dz642-202301130848002"
1940         }`
1941
1942         extraMounts := []string{
1943                 "b0def87f80dd594d4675809e83bd4f15+367/subdir1/file2_in_subdir1.txt",
1944         }
1945
1946         s.fullRunHelper(c, helperRecord, extraMounts, func() int {
1947                 fmt.Fprintln(s.executor.created.Stdout, s.executor.created.Env["FROBIZ"])
1948                 return 0
1949         })
1950
1951         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
1952         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
1953         output_count := uint(0)
1954         for _, v := range s.runner.ContainerArvClient.(*ArvTestClient).Content {
1955                 if v["collection"] == nil {
1956                         continue
1957                 }
1958                 collection := v["collection"].(arvadosclient.Dict)
1959                 if collection["name"].(string) != "output for zzzzz-dz642-202301130848002" {
1960                         continue
1961                 }
1962                 c.Check(collection["manifest_text"].(string), Equals, `./a/b 307372fa8fd5c146b22ae7a45b49bc31+6 0:6:c.out
1963 ./foo 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396c73c0abcdefgh11234567890@569fa8c3 10:17:bar
1964 `)
1965                 output_count++
1966         }
1967         c.Check(output_count, Not(Equals), uint(0))
1968 }
1969
1970 func (s *TestSuite) TestOutputError(c *C) {
1971         helperRecord := `{
1972                 "command": ["/bin/sh", "-c", "echo $FROBIZ"],
1973                 "container_image": "` + arvadostest.DockerImage112PDH + `",
1974                 "cwd": "/bin",
1975                 "environment": {"FROBIZ": "bilbo"},
1976                 "mounts": {
1977                         "/tmp": {"kind": "tmp"}
1978                 },
1979                 "output_path": "/tmp",
1980                 "priority": 1,
1981                 "runtime_constraints": {},
1982                 "state": "Locked"
1983         }`
1984         s.fullRunHelper(c, helperRecord, nil, func() int {
1985                 os.Symlink("/etc/hosts", s.runner.HostOutputDir+"/baz")
1986                 return 0
1987         })
1988
1989         c.Check(s.api.CalledWith("container.state", "Cancelled"), NotNil)
1990 }
1991
1992 func (s *TestSuite) TestStdinCollectionMountPoint(c *C) {
1993         helperRecord := `{
1994                 "command": ["/bin/sh", "-c", "echo $FROBIZ"],
1995                 "container_image": "` + arvadostest.DockerImage112PDH + `",
1996                 "cwd": "/bin",
1997                 "environment": {"FROBIZ": "bilbo"},
1998                 "mounts": {
1999         "/tmp": {"kind": "tmp"},
2000         "stdin": {"kind": "collection", "portable_data_hash": "b0def87f80dd594d4675809e83bd4f15+367", "path": "/file1_in_main.txt"},
2001         "stdout": {"kind": "file", "path": "/tmp/a/b/c.out"}
2002     },
2003                 "output_path": "/tmp",
2004                 "priority": 1,
2005                 "runtime_constraints": {},
2006                 "state": "Locked"
2007         }`
2008
2009         extraMounts := []string{
2010                 "b0def87f80dd594d4675809e83bd4f15+367/file1_in_main.txt",
2011         }
2012
2013         api, _, _ := s.fullRunHelper(c, helperRecord, extraMounts, func() int {
2014                 fmt.Fprintln(s.executor.created.Stdout, s.executor.created.Env["FROBIZ"])
2015                 return 0
2016         })
2017
2018         c.Check(api.CalledWith("container.exit_code", 0), NotNil)
2019         c.Check(api.CalledWith("container.state", "Complete"), NotNil)
2020         for _, v := range api.Content {
2021                 if v["collection"] != nil {
2022                         collection := v["collection"].(arvadosclient.Dict)
2023                         if strings.Index(collection["name"].(string), "output") == 0 {
2024                                 manifest := collection["manifest_text"].(string)
2025                                 c.Check(manifest, Equals, `./a/b 307372fa8fd5c146b22ae7a45b49bc31+6 0:6:c.out
2026 `)
2027                         }
2028                 }
2029         }
2030 }
2031
2032 func (s *TestSuite) TestStdinJsonMountPoint(c *C) {
2033         helperRecord := `{
2034                 "command": ["/bin/sh", "-c", "echo $FROBIZ"],
2035                 "container_image": "` + arvadostest.DockerImage112PDH + `",
2036                 "cwd": "/bin",
2037                 "environment": {"FROBIZ": "bilbo"},
2038                 "mounts": {
2039         "/tmp": {"kind": "tmp"},
2040         "stdin": {"kind": "json", "content": "foo"},
2041         "stdout": {"kind": "file", "path": "/tmp/a/b/c.out"}
2042     },
2043                 "output_path": "/tmp",
2044                 "priority": 1,
2045                 "runtime_constraints": {},
2046                 "state": "Locked"
2047         }`
2048
2049         api, _, _ := s.fullRunHelper(c, helperRecord, nil, func() int {
2050                 fmt.Fprintln(s.executor.created.Stdout, s.executor.created.Env["FROBIZ"])
2051                 return 0
2052         })
2053
2054         c.Check(api.CalledWith("container.exit_code", 0), NotNil)
2055         c.Check(api.CalledWith("container.state", "Complete"), NotNil)
2056         for _, v := range api.Content {
2057                 if v["collection"] != nil {
2058                         collection := v["collection"].(arvadosclient.Dict)
2059                         if strings.Index(collection["name"].(string), "output") == 0 {
2060                                 manifest := collection["manifest_text"].(string)
2061                                 c.Check(manifest, Equals, `./a/b 307372fa8fd5c146b22ae7a45b49bc31+6 0:6:c.out
2062 `)
2063                         }
2064                 }
2065         }
2066 }
2067
2068 func (s *TestSuite) TestStderrMount(c *C) {
2069         api, cr, _ := s.fullRunHelper(c, `{
2070     "command": ["/bin/sh", "-c", "echo hello;exit 1"],
2071     "container_image": "`+arvadostest.DockerImage112PDH+`",
2072     "cwd": ".",
2073     "environment": {},
2074     "mounts": {"/tmp": {"kind": "tmp"},
2075                "stdout": {"kind": "file", "path": "/tmp/a/out.txt"},
2076                "stderr": {"kind": "file", "path": "/tmp/b/err.txt"}},
2077     "output_path": "/tmp",
2078     "priority": 1,
2079     "runtime_constraints": {},
2080     "state": "Locked"
2081 }`, nil, func() int {
2082                 fmt.Fprintln(s.executor.created.Stdout, "hello")
2083                 fmt.Fprintln(s.executor.created.Stderr, "oops")
2084                 return 1
2085         })
2086
2087         final := api.CalledWith("container.state", "Complete")
2088         c.Assert(final, NotNil)
2089         c.Check(final["container"].(arvadosclient.Dict)["exit_code"], Equals, 1)
2090         c.Check(final["container"].(arvadosclient.Dict)["log"], NotNil)
2091
2092         c.Check(cr.ContainerArvClient.(*ArvTestClient).CalledWith("collection.manifest_text", "./a b1946ac92492d2347c6235b4d2611184+6 0:6:out.txt\n./b 38af5c54926b620264ab1501150cf189+5 0:5:err.txt\n"), NotNil)
2093 }
2094
2095 func (s *TestSuite) TestNumberRoundTrip(c *C) {
2096         s.api.callraw = true
2097         err := s.runner.fetchContainerRecord()
2098         c.Assert(err, IsNil)
2099         jsondata, err := json.Marshal(s.runner.Container.Mounts["/json"].Content)
2100         c.Logf("%#v", s.runner.Container)
2101         c.Check(err, IsNil)
2102         c.Check(string(jsondata), Equals, `{"number":123456789123456789}`)
2103 }
2104
2105 func (s *TestSuite) TestFullBrokenDocker(c *C) {
2106         nextState := ""
2107         for _, setup := range []func(){
2108                 func() {
2109                         c.Log("// waitErr = ocl runtime error")
2110                         s.executor.waitErr = errors.New(`Error response from daemon: oci runtime error: container_linux.go:247: starting container process caused "process_linux.go:359: container init caused \"rootfs_linux.go:54: mounting \\\"/tmp/keep453790790/by_id/99999999999999999999999999999999+99999/myGenome\\\" to rootfs \\\"/tmp/docker/overlay2/9999999999999999999999999999999999999999999999999999999999999999/merged\\\" at \\\"/tmp/docker/overlay2/9999999999999999999999999999999999999999999999999999999999999999/merged/keep/99999999999999999999999999999999+99999/myGenome\\\" caused \\\"no such file or directory\\\"\""`)
2111                         nextState = "Cancelled"
2112                 },
2113                 func() {
2114                         c.Log("// loadErr = cannot connect")
2115                         s.executor.loadErr = errors.New("Cannot connect to the Docker daemon at unix:///var/run/docker.sock. Is the docker daemon running?")
2116                         s.runner.brokenNodeHook = c.MkDir() + "/broken-node-hook"
2117                         err := ioutil.WriteFile(s.runner.brokenNodeHook, []byte("#!/bin/sh\nexec echo killme\n"), 0700)
2118                         c.Assert(err, IsNil)
2119                         nextState = "Queued"
2120                 },
2121         } {
2122                 s.SetUpTest(c)
2123                 setup()
2124                 s.fullRunHelper(c, `{
2125     "command": ["echo", "hello world"],
2126     "container_image": "`+arvadostest.DockerImage112PDH+`",
2127     "cwd": ".",
2128     "environment": {},
2129     "mounts": {"/tmp": {"kind": "tmp"} },
2130     "output_path": "/tmp",
2131     "priority": 1,
2132     "runtime_constraints": {},
2133     "state": "Locked"
2134 }`, nil, func() int { return 0 })
2135                 c.Check(s.api.CalledWith("container.state", nextState), NotNil)
2136                 logs := logFileContent(c, s.runner, "crunch-run.txt")
2137                 c.Check(logs, Matches, "(?ms).*unable to run containers.*")
2138                 if s.runner.brokenNodeHook != "" {
2139                         c.Check(logs, Matches, "(?ms).*Running broken node hook.*")
2140                         c.Check(logs, Matches, "(?ms).*killme.*")
2141                         c.Check(logs, Not(Matches), "(?ms).*Writing /var/lock/crunch-run-broken to mark node as broken.*")
2142                 } else {
2143                         c.Check(logs, Matches, "(?ms).*Writing /var/lock/crunch-run-broken to mark node as broken.*")
2144                 }
2145         }
2146 }
2147
2148 func (s *TestSuite) TestBadCommand(c *C) {
2149         for _, startError := range []string{
2150                 `panic: standard_init_linux.go:175: exec user process caused "no such file or directory"`,
2151                 `Error response from daemon: Cannot start container 41f26cbc43bcc1280f4323efb1830a394ba8660c9d1c2b564ba42bf7f7694845: [8] System error: no such file or directory`,
2152                 `Error response from daemon: Cannot start container 58099cd76c834f3dc2a4fb76c8028f049ae6d4fdf0ec373e1f2cfea030670c2d: [8] System error: exec: "foobar": executable file not found in $PATH`,
2153         } {
2154                 s.SetUpTest(c)
2155                 s.executor.startErr = errors.New(startError)
2156                 s.fullRunHelper(c, `{
2157     "command": ["echo", "hello world"],
2158     "container_image": "`+arvadostest.DockerImage112PDH+`",
2159     "cwd": ".",
2160     "environment": {},
2161     "mounts": {"/tmp": {"kind": "tmp"} },
2162     "output_path": "/tmp",
2163     "priority": 1,
2164     "runtime_constraints": {},
2165     "state": "Locked"
2166 }`, nil, func() int { return 0 })
2167                 c.Check(s.api.CalledWith("container.state", "Cancelled"), NotNil)
2168                 c.Check(logFileContent(c, s.runner, "crunch-run.txt"), Matches, "(?ms).*Possible causes:.*is missing.*")
2169         }
2170 }
2171
2172 func (s *TestSuite) TestSecretTextMountPoint(c *C) {
2173         helperRecord := `{
2174                 "command": ["true"],
2175                 "container_image": "` + arvadostest.DockerImage112PDH + `",
2176                 "cwd": "/bin",
2177                 "mounts": {
2178                     "/tmp": {"kind": "tmp"},
2179                     "/tmp/secret.conf": {"kind": "text", "content": "mypassword"}
2180                 },
2181                 "secret_mounts": {
2182                 },
2183                 "output_path": "/tmp",
2184                 "priority": 1,
2185                 "runtime_constraints": {},
2186                 "state": "Locked"
2187         }`
2188
2189         s.fullRunHelper(c, helperRecord, nil, func() int {
2190                 content, err := ioutil.ReadFile(s.runner.HostOutputDir + "/secret.conf")
2191                 c.Check(err, IsNil)
2192                 c.Check(string(content), Equals, "mypassword")
2193                 return 0
2194         })
2195
2196         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
2197         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
2198         c.Check(s.runner.ContainerArvClient.(*ArvTestClient).CalledWith("collection.manifest_text", ". 34819d7beeabb9260a5c854bc85b3e44+10 0:10:secret.conf\n"), NotNil)
2199         c.Check(s.runner.ContainerArvClient.(*ArvTestClient).CalledWith("collection.manifest_text", ""), IsNil)
2200
2201         // under secret mounts, not captured in output
2202         helperRecord = `{
2203                 "command": ["true"],
2204                 "container_image": "` + arvadostest.DockerImage112PDH + `",
2205                 "cwd": "/bin",
2206                 "mounts": {
2207                     "/tmp": {"kind": "tmp"}
2208                 },
2209                 "secret_mounts": {
2210                     "/tmp/secret.conf": {"kind": "text", "content": "mypassword"}
2211                 },
2212                 "output_path": "/tmp",
2213                 "priority": 1,
2214                 "runtime_constraints": {},
2215                 "state": "Locked"
2216         }`
2217
2218         s.SetUpTest(c)
2219         s.fullRunHelper(c, helperRecord, nil, func() int {
2220                 content, err := ioutil.ReadFile(s.runner.HostOutputDir + "/secret.conf")
2221                 c.Check(err, IsNil)
2222                 c.Check(string(content), Equals, "mypassword")
2223                 return 0
2224         })
2225
2226         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
2227         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
2228         c.Check(s.runner.ContainerArvClient.(*ArvTestClient).CalledWith("collection.manifest_text", ". 34819d7beeabb9260a5c854bc85b3e44+10 0:10:secret.conf\n"), IsNil)
2229         c.Check(s.runner.ContainerArvClient.(*ArvTestClient).CalledWith("collection.manifest_text", ""), NotNil)
2230
2231         // under secret mounts, output dir is a collection, not captured in output
2232         helperRecord = `{
2233                 "command": ["true"],
2234                 "container_image": "` + arvadostest.DockerImage112PDH + `",
2235                 "cwd": "/bin",
2236                 "mounts": {
2237                     "/tmp": {"kind": "collection", "writable": true}
2238                 },
2239                 "secret_mounts": {
2240                     "/tmp/secret.conf": {"kind": "text", "content": "mypassword"}
2241                 },
2242                 "output_path": "/tmp",
2243                 "priority": 1,
2244                 "runtime_constraints": {},
2245                 "state": "Locked"
2246         }`
2247
2248         s.SetUpTest(c)
2249         _, _, realtemp := s.fullRunHelper(c, helperRecord, nil, func() int {
2250                 // secret.conf should be provisioned as a separate
2251                 // bind mount, i.e., it should not appear in the
2252                 // (fake) fuse filesystem as viewed from the host.
2253                 content, err := ioutil.ReadFile(s.runner.HostOutputDir + "/secret.conf")
2254                 if !c.Check(errors.Is(err, os.ErrNotExist), Equals, true) {
2255                         c.Logf("secret.conf: content %q, err %#v", content, err)
2256                 }
2257                 err = ioutil.WriteFile(s.runner.HostOutputDir+"/.arvados#collection", []byte(`{"manifest_text":". acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:foo.txt\n"}`), 0700)
2258                 c.Check(err, IsNil)
2259                 return 0
2260         })
2261
2262         content, err := ioutil.ReadFile(realtemp + "/text1/mountdata.text")
2263         c.Check(err, IsNil)
2264         c.Check(string(content), Equals, "mypassword")
2265         c.Check(s.executor.created.BindMounts["/tmp/secret.conf"], DeepEquals, bindmount{realtemp + "/text1/mountdata.text", true})
2266         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
2267         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
2268         c.Check(s.runner.ContainerArvClient.(*ArvTestClient).CalledWith("collection.manifest_text", ". acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:foo.txt\n"), NotNil)
2269 }
2270
2271 func (s *TestSuite) TestCalculateCost(c *C) {
2272         defer func(s string) { lockdir = s }(lockdir)
2273         lockdir = c.MkDir()
2274         now := time.Now()
2275         cr := s.runner
2276         cr.costStartTime = now.Add(-time.Hour)
2277         var logbuf bytes.Buffer
2278         cr.CrunchLog = newLogWriter(&logbuf)
2279
2280         // if there's no InstanceType env var, cost is calculated as 0
2281         os.Unsetenv("InstanceType")
2282         cost := cr.calculateCost(now)
2283         c.Check(cost, Equals, 0.0)
2284
2285         // with InstanceType env var and loadPrices() hasn't run (or
2286         // hasn't found any data), cost is calculated based on
2287         // InstanceType env var
2288         os.Setenv("InstanceType", `{"Price":1.2}`)
2289         cost = cr.calculateCost(now)
2290         c.Check(cost, Equals, 1.2)
2291
2292         // first update tells us the spot price was $1/h until 30
2293         // minutes ago when it increased to $2/h
2294         j, err := json.Marshal([]cloud.InstancePrice{
2295                 {StartTime: now.Add(-4 * time.Hour), Price: 1.0},
2296                 {StartTime: now.Add(-time.Hour / 2), Price: 2.0},
2297         })
2298         c.Assert(err, IsNil)
2299         os.WriteFile(lockdir+"/"+pricesfile, j, 0777)
2300         cr.loadPrices()
2301         cost = cr.calculateCost(now)
2302         c.Check(cost, Equals, 1.5)
2303
2304         // next update (via --list + SIGUSR2) tells us the spot price
2305         // increased to $3/h 15 minutes ago
2306         j, err = json.Marshal([]cloud.InstancePrice{
2307                 {StartTime: now.Add(-time.Hour / 3), Price: 2.0}, // dup of -time.Hour/2 price
2308                 {StartTime: now.Add(-time.Hour / 4), Price: 3.0},
2309         })
2310         c.Assert(err, IsNil)
2311         os.WriteFile(lockdir+"/"+pricesfile, j, 0777)
2312         cr.loadPrices()
2313         cost = cr.calculateCost(now)
2314         c.Check(cost, Equals, 1.0/2+2.0/4+3.0/4)
2315
2316         cost = cr.calculateCost(now.Add(-time.Hour / 2))
2317         c.Check(cost, Equals, 0.5)
2318
2319         c.Logf("%s", logbuf.String())
2320         c.Check(logbuf.String(), Matches, `(?ms).*Instance price changed to 1\.00 at 20.* changed to 2\.00 .* changed to 3\.00 .*`)
2321         c.Check(logbuf.String(), Not(Matches), `(?ms).*changed to 2\.00 .* changed to 2\.00 .*`)
2322 }
2323
2324 func (s *TestSuite) TestSIGUSR2CostUpdate(c *C) {
2325         pid := os.Getpid()
2326         now := time.Now()
2327         pricesJSON, err := json.Marshal([]cloud.InstancePrice{
2328                 {StartTime: now.Add(-4 * time.Hour), Price: 2.4},
2329                 {StartTime: now.Add(-2 * time.Hour), Price: 2.6},
2330         })
2331         c.Assert(err, IsNil)
2332
2333         os.Setenv("InstanceType", `{"Price":2.2}`)
2334         defer func(s string) { lockdir = s }(lockdir)
2335         lockdir = c.MkDir()
2336
2337         // We can't use s.api.CalledWith because timing differences will yield
2338         // different cost values across runs. getCostUpdate iterates over API
2339         // calls until it finds one that sets the cost, then writes that value
2340         // to the next index of costUpdates.
2341         deadline := now.Add(time.Second)
2342         costUpdates := make([]float64, 2)
2343         costIndex := 0
2344         apiIndex := 0
2345         getCostUpdate := func() {
2346                 for ; time.Now().Before(deadline); time.Sleep(time.Second / 10) {
2347                         for apiIndex < len(s.api.Content) {
2348                                 update := s.api.Content[apiIndex]
2349                                 apiIndex++
2350                                 var ok bool
2351                                 var cost float64
2352                                 if update, ok = update["container"].(arvadosclient.Dict); !ok {
2353                                         continue
2354                                 }
2355                                 if cost, ok = update["cost"].(float64); !ok {
2356                                         continue
2357                                 }
2358                                 c.Logf("API call #%d updates cost to %v", apiIndex-1, cost)
2359                                 costUpdates[costIndex] = cost
2360                                 costIndex++
2361                                 return
2362                         }
2363                 }
2364         }
2365
2366         s.fullRunHelper(c, `{
2367                 "command": ["true"],
2368                 "container_image": "`+arvadostest.DockerImage112PDH+`",
2369                 "cwd": ".",
2370                 "environment": {},
2371                 "mounts": {"/tmp": {"kind": "tmp"} },
2372                 "output_path": "/tmp",
2373                 "priority": 1,
2374                 "runtime_constraints": {},
2375                 "state": "Locked",
2376                 "uuid": "zzzzz-dz642-20230320101530a"
2377         }`, nil, func() int {
2378                 s.runner.costStartTime = now.Add(-3 * time.Hour)
2379                 err := syscall.Kill(pid, syscall.SIGUSR2)
2380                 c.Check(err, IsNil, Commentf("error sending first SIGUSR2 to runner"))
2381                 getCostUpdate()
2382
2383                 err = os.WriteFile(path.Join(lockdir, pricesfile), pricesJSON, 0o700)
2384                 c.Check(err, IsNil, Commentf("error writing JSON prices file"))
2385                 err = syscall.Kill(pid, syscall.SIGUSR2)
2386                 c.Check(err, IsNil, Commentf("error sending second SIGUSR2 to runner"))
2387                 getCostUpdate()
2388
2389                 return 0
2390         })
2391         // Comparing with format strings makes it easy to ignore minor variations
2392         // in cost across runs while keeping diagnostics pretty.
2393         c.Check(fmt.Sprintf("%.3f", costUpdates[0]), Equals, "6.600")
2394         c.Check(fmt.Sprintf("%.3f", costUpdates[1]), Equals, "7.600")
2395 }
2396
2397 type FakeProcess struct {
2398         cmdLine []string
2399 }
2400
2401 func (fp FakeProcess) CmdlineSlice() ([]string, error) {
2402         return fp.cmdLine, nil
2403 }
2404
2405 func logFileContent(c *C, cr *ContainerRunner, fnm string) string {
2406         buf, err := fs.ReadFile(arvados.FS(cr.LogCollection), fnm)
2407         c.Assert(err, IsNil)
2408         return string(buf)
2409 }
2410
2411 func dumpAllLogFiles(c *C, cr *ContainerRunner) {
2412         d, err := cr.LogCollection.OpenFile("/", os.O_RDONLY, 0)
2413         c.Assert(err, IsNil)
2414         fis, err := d.Readdir(-1)
2415         c.Assert(err, IsNil)
2416         for _, fi := range fis {
2417                 c.Logf("=== %s", fi.Name())
2418                 c.Log(logFileContent(c, cr, fi.Name()))
2419         }
2420 }