]> git.arvados.org - arvados.git/blob - lib/crunchrun/crunchrun_test.go
Merge branch 'main' into 21926-rocm refs #21926
[arvados.git] / lib / crunchrun / crunchrun_test.go
1 // Copyright (C) The Arvados Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4
5 package crunchrun
6
7 import (
8         "bytes"
9         "context"
10         "crypto/md5"
11         "encoding/json"
12         "errors"
13         "fmt"
14         "io"
15         "io/fs"
16         "io/ioutil"
17         "math/rand"
18         "net/http"
19         "net/http/httptest"
20         "net/http/httputil"
21         "net/url"
22         "os"
23         "os/exec"
24         "path"
25         "regexp"
26         "runtime/pprof"
27         "strings"
28         "sync"
29         "sync/atomic"
30         "syscall"
31         "testing"
32         "time"
33
34         "git.arvados.org/arvados.git/lib/cloud"
35         "git.arvados.org/arvados.git/lib/cmd"
36         "git.arvados.org/arvados.git/sdk/go/arvados"
37         "git.arvados.org/arvados.git/sdk/go/arvadosclient"
38         "git.arvados.org/arvados.git/sdk/go/arvadostest"
39
40         . "gopkg.in/check.v1"
41 )
42
43 // Gocheck boilerplate
44 func TestCrunchExec(t *testing.T) {
45         TestingT(t)
46 }
47
48 const logLineStart = `(?m)(.*\n)*\d{4}-\d\d-\d\dT\d\d:\d\d:\d\d\.\d+Z `
49
50 var _ = Suite(&TestSuite{})
51
52 type TestSuite struct {
53         client                   *arvados.Client
54         api                      *ArvTestClient
55         runner                   *ContainerRunner
56         executor                 *stubExecutor
57         keepmount                string
58         keepmountTmp             []string
59         testDispatcherKeepClient KeepTestClient
60         testContainerKeepClient  KeepTestClient
61         debian12MemoryCurrent    int64
62         debian12SwapCurrent      int64
63 }
64
65 func (s *TestSuite) SetUpSuite(c *C) {
66         buf, err := os.ReadFile("../crunchstat/testdata/debian12/sys/fs/cgroup/user.slice/user-1000.slice/session-4.scope/memory.current")
67         c.Assert(err, IsNil)
68         _, err = fmt.Sscanf(string(buf), "%d", &s.debian12MemoryCurrent)
69         c.Assert(err, IsNil)
70
71         buf, err = os.ReadFile("../crunchstat/testdata/debian12/sys/fs/cgroup/user.slice/user-1000.slice/session-4.scope/memory.swap.current")
72         c.Assert(err, IsNil)
73         _, err = fmt.Sscanf(string(buf), "%d", &s.debian12SwapCurrent)
74         c.Assert(err, IsNil)
75 }
76
77 func (s *TestSuite) SetUpTest(c *C) {
78         s.client = arvados.NewClientFromEnv()
79         s.executor = &stubExecutor{}
80         var err error
81         s.api = &ArvTestClient{}
82         s.runner, err = NewContainerRunner(s.client, s.api, &s.testDispatcherKeepClient, "zzzzz-zzzzz-zzzzzzzzzzzzzzz")
83         c.Assert(err, IsNil)
84         s.runner.executor = s.executor
85         s.runner.MkArvClient = func(token string) (IArvadosClient, IKeepClient, *arvados.Client, error) {
86                 return s.api, &s.testContainerKeepClient, s.client, nil
87         }
88         s.runner.RunArvMount = func(cmd []string, tok string) (*exec.Cmd, error) {
89                 s.runner.ArvMountPoint = s.keepmount
90                 for i, opt := range cmd {
91                         if opt == "--mount-tmp" {
92                                 err := os.Mkdir(s.keepmount+"/"+cmd[i+1], 0700)
93                                 if err != nil {
94                                         return nil, err
95                                 }
96                                 s.keepmountTmp = append(s.keepmountTmp, cmd[i+1])
97                         }
98                 }
99                 return nil, nil
100         }
101         s.keepmount = c.MkDir()
102         err = os.Mkdir(s.keepmount+"/by_id", 0755)
103         s.keepmountTmp = nil
104         c.Assert(err, IsNil)
105         err = os.Mkdir(s.keepmount+"/by_id/"+arvadostest.DockerImage112PDH, 0755)
106         c.Assert(err, IsNil)
107         err = ioutil.WriteFile(s.keepmount+"/by_id/"+arvadostest.DockerImage112PDH+"/"+arvadostest.DockerImage112Filename, []byte("#notarealtarball"), 0644)
108         err = os.Mkdir(s.keepmount+"/by_id/"+fakeInputCollectionPDH, 0755)
109         c.Assert(err, IsNil)
110         err = ioutil.WriteFile(s.keepmount+"/by_id/"+fakeInputCollectionPDH+"/input.json", []byte(`{"input":true}`), 0644)
111         c.Assert(err, IsNil)
112         s.runner.ArvMountPoint = s.keepmount
113         os.Setenv("InstanceType", `{"ProviderType":"a1.2xlarge","Price":1.2}`)
114 }
115
116 type ArvTestClient struct {
117         Total   int64
118         Calls   int
119         Content []arvadosclient.Dict
120         arvados.Container
121         secretMounts []byte
122         sync.Mutex
123         WasSetRunning bool
124         callraw       bool
125 }
126
127 type KeepTestClient struct {
128         Called         bool
129         StorageClasses []string
130         blocks         sync.Map
131 }
132
133 type stubExecutor struct {
134         imageLoaded bool
135         loaded      string
136         loadErr     error
137         exitCode    int
138         createErr   error
139         created     containerSpec
140         startErr    error
141         waitSleep   time.Duration
142         waitErr     error
143         stopErr     error
144         stopped     bool
145         closed      bool
146         runFunc     func() int
147         exit        chan int
148 }
149
150 func (e *stubExecutor) LoadImage(imageId string, tarball string, container arvados.Container, keepMount string,
151         containerClient *arvados.Client) error {
152         e.loaded = tarball
153         return e.loadErr
154 }
155 func (e *stubExecutor) Runtime() string                 { return "stub" }
156 func (e *stubExecutor) Version() string                 { return "stub " + cmd.Version.String() }
157 func (e *stubExecutor) Create(spec containerSpec) error { e.created = spec; return e.createErr }
158 func (e *stubExecutor) Start() error {
159         e.exit = make(chan int, 1)
160         go func() { e.exit <- e.runFunc() }()
161         return e.startErr
162 }
163 func (e *stubExecutor) Pid() int    { return 1115883 } // matches pid in ../crunchstat/testdata/debian12/proc/
164 func (e *stubExecutor) Stop() error { e.stopped = true; go func() { e.exit <- -1 }(); return e.stopErr }
165 func (e *stubExecutor) Close()      { e.closed = true }
166 func (e *stubExecutor) Wait(context.Context) (int, error) {
167         return <-e.exit, e.waitErr
168 }
169 func (e *stubExecutor) InjectCommand(ctx context.Context, _, _ string, _ bool, _ []string) (*exec.Cmd, error) {
170         return nil, errors.New("unimplemented")
171 }
172 func (e *stubExecutor) IPAddress() (string, error) { return "", errors.New("unimplemented") }
173
174 const fakeInputCollectionPDH = "ffffffffaaaaaaaa88888888eeeeeeee+1234"
175
176 var hwManifest = ". 82ab40c24fc8df01798e57ba66795bb1+841216+Aa124ac75e5168396c73c0a18eda641a4f41791c0@569fa8c3 0:841216:9c31ee32b3d15268a0754e8edc74d4f815ee014b693bc5109058e431dd5caea7.tar\n"
177 var hwPDH = "a45557269dcb65a6b78f9ac061c0850b+120"
178 var hwImageID = "9c31ee32b3d15268a0754e8edc74d4f815ee014b693bc5109058e431dd5caea7"
179
180 var otherManifest = ". 68a84f561b1d1708c6baff5e019a9ab3+46+Ae5d0af96944a3690becb1decdf60cc1c937f556d@5693216f 0:46:md5sum.txt\n"
181 var otherPDH = "a3e8f74c6f101eae01fa08bfb4e49b3a+54"
182
183 var normalizedManifestWithSubdirs = `. 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396c73c0abcdefgh11234567890@569fa8c3 0:9:file1_in_main.txt 9:18:file2_in_main.txt 0:27:zzzzz-8i9sb-bcdefghijkdhvnk.log.txt
184 ./subdir1 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396cabcdefghij6419876543234@569fa8c4 0:9:file1_in_subdir1.txt 9:18:file2_in_subdir1.txt
185 ./subdir1/subdir2 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396c73c0bcdefghijk544332211@569fa8c5 0:9:file1_in_subdir2.txt 9:18:file2_in_subdir2.txt
186 `
187
188 var normalizedWithSubdirsPDH = "a0def87f80dd594d4675809e83bd4f15+367"
189
190 var denormalizedManifestWithSubdirs = ". 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396c73c0abcdefgh11234567890@569fa8c3 0:9:file1_in_main.txt 9:18:file2_in_main.txt 0:27:zzzzz-8i9sb-bcdefghijkdhvnk.log.txt 0:10:subdir1/file1_in_subdir1.txt 10:17:subdir1/file2_in_subdir1.txt\n"
191 var denormalizedWithSubdirsPDH = "b0def87f80dd594d4675809e83bd4f15+367"
192
193 var fakeAuthUUID = "zzzzz-gj3su-55pqoyepgi2glem"
194 var fakeAuthToken = "a3ltuwzqcu2u4sc0q7yhpc2w7s00fdcqecg5d6e0u3pfohmbjt"
195
196 func (client *ArvTestClient) Create(resourceType string,
197         parameters arvadosclient.Dict,
198         output interface{}) error {
199
200         client.Mutex.Lock()
201         defer client.Mutex.Unlock()
202
203         client.Calls++
204         client.Content = append(client.Content, parameters)
205
206         if resourceType == "logs" {
207                 panic("logs.create called")
208         }
209
210         if resourceType == "collections" && output != nil {
211                 mt := parameters["collection"].(arvadosclient.Dict)["manifest_text"].(string)
212                 md5sum := md5.Sum([]byte(mt))
213                 outmap := output.(*arvados.Collection)
214                 outmap.PortableDataHash = fmt.Sprintf("%x+%d", md5sum, len(mt))
215                 outmap.UUID = fmt.Sprintf("zzzzz-4zz18-%015x", md5sum[:7])
216         }
217
218         return nil
219 }
220
221 func (client *ArvTestClient) Call(method, resourceType, uuid, action string, parameters arvadosclient.Dict, output interface{}) error {
222         switch {
223         case method == "GET" && resourceType == "containers" && action == "auth":
224                 return json.Unmarshal([]byte(`{
225                         "kind": "arvados#api_client_authorization",
226                         "uuid": "`+fakeAuthUUID+`",
227                         "api_token": "`+fakeAuthToken+`"
228                         }`), output)
229         case method == "GET" && resourceType == "containers" && action == "secret_mounts":
230                 if client.secretMounts != nil {
231                         return json.Unmarshal(client.secretMounts, output)
232                 }
233                 return json.Unmarshal([]byte(`{"secret_mounts":{}}`), output)
234         default:
235                 return fmt.Errorf("Not found")
236         }
237 }
238
239 func (client *ArvTestClient) CallRaw(method, resourceType, uuid, action string,
240         parameters arvadosclient.Dict) (reader io.ReadCloser, err error) {
241         var j []byte
242         if method == "GET" && resourceType == "nodes" && uuid == "" && action == "" {
243                 j = []byte(`{
244                         "kind": "arvados#nodeList",
245                         "items": [{
246                                 "uuid": "zzzzz-7ekkf-2z3mc76g2q73aio",
247                                 "hostname": "compute2",
248                                 "properties": {"total_cpu_cores": 16}
249                         }]}`)
250         } else if method == "GET" && resourceType == "containers" && action == "" && !client.callraw {
251                 if uuid == "" {
252                         j, err = json.Marshal(map[string]interface{}{
253                                 "items": []interface{}{client.Container},
254                                 "kind":  "arvados#nodeList",
255                         })
256                 } else {
257                         j, err = json.Marshal(client.Container)
258                 }
259         } else {
260                 j = []byte(`{
261                         "command": ["sleep", "1"],
262                         "container_image": "` + arvadostest.DockerImage112PDH + `",
263                         "cwd": ".",
264                         "environment": {},
265                         "mounts": {"/tmp": {"kind": "tmp"}, "/json": {"kind": "json", "content": {"number": 123456789123456789}}},
266                         "output_path": "/tmp",
267                         "priority": 1,
268                         "runtime_constraints": {}
269                 }`)
270         }
271         return ioutil.NopCloser(bytes.NewReader(j)), err
272 }
273
274 func (client *ArvTestClient) Get(resourceType string, uuid string, parameters arvadosclient.Dict, output interface{}) error {
275         if resourceType == "collections" {
276                 if uuid == hwPDH {
277                         output.(*arvados.Collection).ManifestText = hwManifest
278                 } else if uuid == otherPDH {
279                         output.(*arvados.Collection).ManifestText = otherManifest
280                 } else if uuid == normalizedWithSubdirsPDH {
281                         output.(*arvados.Collection).ManifestText = normalizedManifestWithSubdirs
282                 } else if uuid == denormalizedWithSubdirsPDH {
283                         output.(*arvados.Collection).ManifestText = denormalizedManifestWithSubdirs
284                 }
285         }
286         if resourceType == "containers" {
287                 (*output.(*arvados.Container)) = client.Container
288         }
289         return nil
290 }
291
292 func (client *ArvTestClient) Update(resourceType string, uuid string, parameters arvadosclient.Dict, output interface{}) (err error) {
293         client.Mutex.Lock()
294         defer client.Mutex.Unlock()
295         client.Calls++
296         client.Content = append(client.Content, parameters)
297         if resourceType == "containers" {
298                 if parameters["container"].(arvadosclient.Dict)["state"] == "Running" {
299                         client.WasSetRunning = true
300                 }
301         } else if resourceType == "collections" && output != nil {
302                 mt := parameters["collection"].(arvadosclient.Dict)["manifest_text"].(string)
303                 output.(*arvados.Collection).UUID = uuid
304                 output.(*arvados.Collection).PortableDataHash = arvados.PortableDataHash(mt)
305         }
306         return nil
307 }
308
309 var discoveryMap = map[string]interface{}{
310         "crunchLogUpdateSize":   float64(crunchLogUpdateSize),
311         "crunchLogUpdatePeriod": float64(crunchLogUpdatePeriod.Seconds()),
312 }
313
314 func (client *ArvTestClient) Discovery(key string) (interface{}, error) {
315         return discoveryMap[key], nil
316 }
317
318 // CalledWith returns the parameters from the first API call whose
319 // parameters match jpath/string. E.g., CalledWith(c, "foo.bar",
320 // "baz") returns parameters with parameters["foo"]["bar"]=="baz". If
321 // no call matches, it returns nil.
322 func (client *ArvTestClient) CalledWith(jpath string, expect interface{}) arvadosclient.Dict {
323 call:
324         for _, content := range client.Content {
325                 var v interface{} = content
326                 for _, k := range strings.Split(jpath, ".") {
327                         if dict, ok := v.(arvadosclient.Dict); !ok {
328                                 continue call
329                         } else {
330                                 v = dict[k]
331                         }
332                 }
333                 if v == expect {
334                         return content
335                 }
336         }
337         return nil
338 }
339
340 func (client *KeepTestClient) LocalLocator(locator string) (string, error) {
341         return locator, nil
342 }
343
344 func (client *KeepTestClient) BlockWrite(_ context.Context, opts arvados.BlockWriteOptions) (arvados.BlockWriteResponse, error) {
345         locator := fmt.Sprintf("%x+%d", md5.Sum(opts.Data), len(opts.Data))
346         client.blocks.Store(locator, append([]byte(nil), opts.Data...))
347         return arvados.BlockWriteResponse{
348                 Locator: locator,
349         }, nil
350 }
351
352 func (client *KeepTestClient) ReadAt(locator string, dst []byte, offset int) (int, error) {
353         loaded, ok := client.blocks.Load(locator)
354         if !ok {
355                 return 0, os.ErrNotExist
356         }
357         data := loaded.([]byte)
358         if offset >= len(data) {
359                 return 0, io.EOF
360         }
361         return copy(dst, data[offset:]), nil
362 }
363
364 func (client *KeepTestClient) Close() {
365         client.blocks.Range(func(locator, value interface{}) bool {
366                 client.blocks.Delete(locator)
367                 return true
368         })
369 }
370
371 func (client *KeepTestClient) SetStorageClasses(sc []string) {
372         client.StorageClasses = sc
373 }
374
375 type FileWrapper struct {
376         io.ReadCloser
377         len int64
378 }
379
380 func (fw FileWrapper) Readdir(n int) ([]os.FileInfo, error) {
381         return nil, errors.New("not implemented")
382 }
383
384 func (fw FileWrapper) Seek(int64, int) (int64, error) {
385         return 0, errors.New("not implemented")
386 }
387
388 func (fw FileWrapper) Size() int64 {
389         return fw.len
390 }
391
392 func (fw FileWrapper) Stat() (os.FileInfo, error) {
393         return nil, errors.New("not implemented")
394 }
395
396 func (fw FileWrapper) Truncate(int64) error {
397         return errors.New("not implemented")
398 }
399
400 func (fw FileWrapper) Write([]byte) (int, error) {
401         return 0, errors.New("not implemented")
402 }
403
404 func (fw FileWrapper) Sync() error {
405         return errors.New("not implemented")
406 }
407
408 func (fw FileWrapper) Snapshot() (*arvados.Subtree, error) {
409         return nil, errors.New("not implemented")
410 }
411
412 func (fw FileWrapper) Splice(*arvados.Subtree) error {
413         return errors.New("not implemented")
414 }
415
416 type apiStubServer struct {
417         server    *httptest.Server
418         proxy     *httputil.ReverseProxy
419         intercept func(http.ResponseWriter, *http.Request) bool
420
421         container arvados.Container
422 }
423
424 func apiStub() (*arvados.Client, *apiStubServer) {
425         client := arvados.NewClientFromEnv()
426         apistub := &apiStubServer{}
427         apistub.server = httptest.NewTLSServer(apistub)
428         apistub.proxy = httputil.NewSingleHostReverseProxy(&url.URL{Scheme: "https", Host: client.APIHost})
429         if client.Insecure {
430                 apistub.proxy.Transport = arvados.InsecureHTTPClient.Transport
431         }
432         client.APIHost = apistub.server.Listener.Addr().String()
433         return client, apistub
434 }
435
436 func (apistub *apiStubServer) ServeHTTP(w http.ResponseWriter, r *http.Request) {
437         if apistub.intercept != nil && apistub.intercept(w, r) {
438                 return
439         }
440         if r.Method == "GET" && r.URL.Path == "/arvados/v1/collections/"+hwPDH {
441                 json.NewEncoder(w).Encode(arvados.Collection{ManifestText: hwManifest})
442                 return
443         }
444         if r.Method == "GET" && r.URL.Path == "/arvados/v1/collections/"+otherPDH {
445                 json.NewEncoder(w).Encode(arvados.Collection{ManifestText: otherManifest})
446                 return
447         }
448         if r.Method == "GET" && r.URL.Path == "/arvados/v1/collections/"+normalizedWithSubdirsPDH {
449                 json.NewEncoder(w).Encode(arvados.Collection{ManifestText: normalizedManifestWithSubdirs})
450                 return
451         }
452         if r.Method == "GET" && r.URL.Path == "/arvados/v1/collections/"+denormalizedWithSubdirsPDH {
453                 json.NewEncoder(w).Encode(arvados.Collection{ManifestText: denormalizedManifestWithSubdirs})
454                 return
455         }
456         if r.Method == "GET" && r.URL.Path == "/arvados/v1/containers/"+apistub.container.UUID {
457                 json.NewEncoder(w).Encode(apistub.container)
458                 return
459         }
460         apistub.proxy.ServeHTTP(w, r)
461 }
462
463 func (s *TestSuite) TestLoadImage(c *C) {
464         s.runner.Container.ContainerImage = arvadostest.DockerImage112PDH
465         s.runner.Container.Mounts = map[string]arvados.Mount{
466                 "/out": {Kind: "tmp", Writable: true},
467         }
468         s.runner.Container.OutputPath = "/out"
469
470         _, err := s.runner.SetupMounts()
471         c.Assert(err, IsNil)
472
473         imageID, err := s.runner.LoadImage()
474         c.Check(err, IsNil)
475         c.Check(s.executor.loaded, Matches, ".*"+regexp.QuoteMeta(arvadostest.DockerImage112Filename))
476         c.Check(imageID, Equals, strings.TrimSuffix(arvadostest.DockerImage112Filename, ".tar"))
477
478         s.runner.Container.ContainerImage = arvadostest.DockerImage112PDH
479         s.executor.imageLoaded = false
480         s.executor.loaded = ""
481         s.executor.loadErr = errors.New("bork")
482         imageID, err = s.runner.LoadImage()
483         c.Check(err, ErrorMatches, ".*bork")
484         c.Check(s.executor.loaded, Matches, ".*"+regexp.QuoteMeta(arvadostest.DockerImage112Filename))
485
486         s.runner.Container.ContainerImage = fakeInputCollectionPDH
487         s.executor.imageLoaded = false
488         s.executor.loaded = ""
489         s.executor.loadErr = nil
490         imageID, err = s.runner.LoadImage()
491         c.Check(err, ErrorMatches, "image collection does not include a \\.tar image file")
492         c.Check(s.executor.loaded, Equals, "")
493 }
494
495 type ArvErrorTestClient struct{}
496
497 func (ArvErrorTestClient) Create(resourceType string,
498         parameters arvadosclient.Dict,
499         output interface{}) error {
500         return nil
501 }
502
503 func (ArvErrorTestClient) Call(method, resourceType, uuid, action string, parameters arvadosclient.Dict, output interface{}) error {
504         if method == "GET" && resourceType == "containers" && action == "auth" {
505                 return nil
506         }
507         return errors.New("ArvError")
508 }
509
510 func (ArvErrorTestClient) CallRaw(method, resourceType, uuid, action string,
511         parameters arvadosclient.Dict) (reader io.ReadCloser, err error) {
512         return nil, errors.New("ArvError")
513 }
514
515 func (ArvErrorTestClient) Get(resourceType string, uuid string, parameters arvadosclient.Dict, output interface{}) error {
516         return errors.New("ArvError")
517 }
518
519 func (ArvErrorTestClient) Update(resourceType string, uuid string, parameters arvadosclient.Dict, output interface{}) (err error) {
520         return nil
521 }
522
523 func (ArvErrorTestClient) Discovery(key string) (interface{}, error) {
524         return discoveryMap[key], nil
525 }
526
527 type KeepErrorTestClient struct {
528         KeepTestClient
529 }
530
531 func (*KeepErrorTestClient) BlockWrite(context.Context, arvados.BlockWriteOptions) (arvados.BlockWriteResponse, error) {
532         return arvados.BlockWriteResponse{}, errors.New("KeepError")
533 }
534
535 func (*KeepErrorTestClient) LocalLocator(string) (string, error) {
536         return "", errors.New("KeepError")
537 }
538
539 type KeepReadErrorTestClient struct {
540         KeepTestClient
541 }
542
543 func (*KeepReadErrorTestClient) ReadAt(string, []byte, int) (int, error) {
544         return 0, errors.New("KeepError")
545 }
546
547 func dockerLog(fd byte, msg string) []byte {
548         by := []byte(msg)
549         header := make([]byte, 8+len(by))
550         header[0] = fd
551         header[7] = byte(len(by))
552         copy(header[8:], by)
553         return header
554 }
555
556 func (s *TestSuite) TestRunContainer(c *C) {
557         s.executor.runFunc = func() int {
558                 fmt.Fprintf(s.executor.created.Stdout, "Hello world\n")
559                 return 0
560         }
561
562         s.runner.Container.ContainerImage = arvadostest.DockerImage112PDH
563         s.runner.Container.Command = []string{"./hw"}
564         s.runner.Container.OutputStorageClasses = []string{"default"}
565
566         imageID, err := s.runner.LoadImage()
567         c.Assert(err, IsNil)
568
569         err = s.runner.CreateContainer(imageID, nil)
570         c.Assert(err, IsNil)
571
572         err = s.runner.StartContainer()
573         c.Assert(err, IsNil)
574
575         err = s.runner.WaitFinish()
576         c.Assert(err, IsNil)
577
578         c.Check(logFileContent(c, s.runner, "stdout.txt"), Matches, `2\S+Z Hello world\n`)
579         c.Check(logFileContent(c, s.runner, "stderr.txt"), Matches, ``)
580 }
581
582 func (s *TestSuite) TestCommitLogs(c *C) {
583         api := &ArvTestClient{}
584         kc := &KeepTestClient{}
585         defer kc.Close()
586         cr, err := NewContainerRunner(s.client, api, kc, "zzzzz-zzzzz-zzzzzzzzzzzzzzz")
587         c.Assert(err, IsNil)
588         f, err := cr.openLogFile("crunch-run")
589         c.Assert(err, IsNil)
590         cr.CrunchLog = newLogWriter(newTestTimestamper(f))
591
592         cr.CrunchLog.Print("Hello world!")
593         cr.CrunchLog.Print("Goodbye")
594         cr.finalState = "Complete"
595
596         err = cr.CommitLogs()
597         c.Check(err, IsNil)
598
599         c.Check(api.Calls, Equals, 1)
600         c.Check(api.Content[0]["ensure_unique_name"], Equals, true)
601         c.Check(api.Content[0]["collection"].(arvadosclient.Dict)["name"], Equals, "logs for zzzzz-zzzzz-zzzzzzzzzzzzzzz")
602         c.Check(api.Content[0]["collection"].(arvadosclient.Dict)["manifest_text"], Equals, ". 744b2e4553123b02fa7b452ec5c18993+123 0:123:crunch-run.txt\n")
603         c.Check(*cr.LogsPDH, Equals, "63da7bdacf08c40f604daad80c261e9a+60")
604 }
605
606 func (s *TestSuite) TestUpdateContainerRunning(c *C) {
607         api := &ArvTestClient{}
608         kc := &KeepTestClient{}
609         defer kc.Close()
610         cr, err := NewContainerRunner(s.client, api, kc, "zzzzz-zzzzz-zzzzzzzzzzzzzzz")
611         c.Assert(err, IsNil)
612
613         err = cr.UpdateContainerRunning("")
614         c.Check(err, IsNil)
615
616         c.Check(api.Content[0]["container"].(arvadosclient.Dict)["state"], Equals, "Running")
617 }
618
619 func (s *TestSuite) TestUpdateContainerComplete(c *C) {
620         api := &ArvTestClient{}
621         kc := &KeepTestClient{}
622         defer kc.Close()
623         cr, err := NewContainerRunner(s.client, api, kc, "zzzzz-zzzzz-zzzzzzzzzzzzzzz")
624         c.Assert(err, IsNil)
625
626         cr.LogsPDH = new(string)
627         *cr.LogsPDH = "d3a229d2fe3690c2c3e75a71a153c6a3+60"
628
629         cr.ExitCode = new(int)
630         *cr.ExitCode = 42
631         cr.finalState = "Complete"
632
633         err = cr.UpdateContainerFinal()
634         c.Check(err, IsNil)
635
636         c.Check(api.Content[0]["container"].(arvadosclient.Dict)["log"], Equals, *cr.LogsPDH)
637         c.Check(api.Content[0]["container"].(arvadosclient.Dict)["exit_code"], Equals, *cr.ExitCode)
638         c.Check(api.Content[0]["container"].(arvadosclient.Dict)["state"], Equals, "Complete")
639 }
640
641 func (s *TestSuite) TestUpdateContainerCancelled(c *C) {
642         api := &ArvTestClient{}
643         kc := &KeepTestClient{}
644         defer kc.Close()
645         cr, err := NewContainerRunner(s.client, api, kc, "zzzzz-zzzzz-zzzzzzzzzzzzzzz")
646         c.Assert(err, IsNil)
647         cr.cCancelled = true
648         cr.finalState = "Cancelled"
649
650         err = cr.UpdateContainerFinal()
651         c.Check(err, IsNil)
652
653         c.Check(api.Content[0]["container"].(arvadosclient.Dict)["log"], IsNil)
654         c.Check(api.Content[0]["container"].(arvadosclient.Dict)["exit_code"], IsNil)
655         c.Check(api.Content[0]["container"].(arvadosclient.Dict)["state"], Equals, "Cancelled")
656 }
657
658 // Used by the TestFullRun*() test below to DRY up boilerplate setup to do full
659 // dress rehearsal of the Run() function, starting from a JSON container record.
660 func (s *TestSuite) fullRunHelper(c *C, record string, extraMounts []string, fn func() int) (*ArvTestClient, *ContainerRunner, string) {
661         err := json.Unmarshal([]byte(record), &s.api.Container)
662         c.Assert(err, IsNil)
663         initialState := s.api.Container.State
664
665         var sm struct {
666                 SecretMounts map[string]arvados.Mount `json:"secret_mounts"`
667         }
668         err = json.Unmarshal([]byte(record), &sm)
669         c.Check(err, IsNil)
670         secretMounts, err := json.Marshal(sm)
671         c.Assert(err, IsNil)
672         c.Logf("SecretMounts decoded %v json %q", sm, secretMounts)
673
674         s.executor.runFunc = fn
675
676         s.runner.statInterval = 100 * time.Millisecond
677         s.runner.containerWatchdogInterval = time.Second
678
679         realTemp := c.MkDir()
680         tempcount := 0
681         s.runner.MkTempDir = func(_, prefix string) (string, error) {
682                 tempcount++
683                 d := fmt.Sprintf("%s/%s%d", realTemp, prefix, tempcount)
684                 err := os.Mkdir(d, os.ModePerm)
685                 if err != nil && strings.Contains(err.Error(), ": file exists") {
686                         // Test case must have pre-populated the tempdir
687                         err = nil
688                 }
689                 return d, err
690         }
691         client, _ := apiStub()
692         s.runner.MkArvClient = func(token string) (IArvadosClient, IKeepClient, *arvados.Client, error) {
693                 return &ArvTestClient{secretMounts: secretMounts}, &s.testContainerKeepClient, client, nil
694         }
695
696         if extraMounts != nil && len(extraMounts) > 0 {
697                 err := s.runner.SetupArvMountPoint("keep")
698                 c.Check(err, IsNil)
699
700                 for _, m := range extraMounts {
701                         os.MkdirAll(s.runner.ArvMountPoint+"/by_id/"+m, os.ModePerm)
702                 }
703         }
704
705         err = s.runner.Run()
706         if s.api.CalledWith("container.state", "Complete") != nil {
707                 c.Check(err, IsNil)
708         }
709         if s.executor.loadErr == nil && s.executor.createErr == nil && initialState != "Running" {
710                 c.Check(s.api.WasSetRunning, Equals, true)
711                 var lastupdate arvadosclient.Dict
712                 for _, content := range s.api.Content {
713                         if content["container"] != nil {
714                                 lastupdate = content["container"].(arvadosclient.Dict)
715                         }
716                 }
717                 if lastupdate["log"] == nil {
718                         c.Errorf("no container update with non-nil log -- updates were: %v", s.api.Content)
719                 }
720         }
721
722         if err != nil {
723                 dumpAllLogFiles(c, s.runner)
724         }
725
726         return s.api, s.runner, realTemp
727 }
728
729 func (s *TestSuite) TestFullRunHello(c *C) {
730         s.runner.enableMemoryLimit = true
731         s.runner.networkMode = "default"
732         s.fullRunHelper(c, `{
733     "command": ["echo", "hello world"],
734     "container_image": "`+arvadostest.DockerImage112PDH+`",
735     "cwd": ".",
736     "environment": {"foo":"bar","baz":"waz"},
737     "mounts": {"/tmp": {"kind": "tmp"} },
738     "output_path": "/tmp",
739     "priority": 1,
740     "runtime_constraints": {"vcpus":1,"ram":1000000},
741     "state": "Locked",
742     "output_storage_classes": ["default"]
743 }`, nil, func() int {
744                 c.Check(s.executor.created.Command, DeepEquals, []string{"echo", "hello world"})
745                 c.Check(s.executor.created.Image, Equals, "sha256:d8309758b8fe2c81034ffc8a10c36460b77db7bc5e7b448c4e5b684f9d95a678")
746                 c.Check(s.executor.created.Env, DeepEquals, map[string]string{"foo": "bar", "baz": "waz"})
747                 c.Check(s.executor.created.VCPUs, Equals, 1)
748                 c.Check(s.executor.created.RAM, Equals, int64(1000000))
749                 c.Check(s.executor.created.NetworkMode, Equals, "default")
750                 c.Check(s.executor.created.EnableNetwork, Equals, false)
751                 c.Check(s.executor.created.GPUDeviceCount, Equals, 0)
752                 fmt.Fprintln(s.executor.created.Stdout, "hello world")
753                 return 0
754         })
755
756         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
757         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
758         c.Check(logFileContent(c, s.runner, "stdout.txt"), Matches, `2\S+Z hello world\n`)
759         c.Check(s.testDispatcherKeepClient.StorageClasses, DeepEquals, []string{"default"})
760         c.Check(s.testContainerKeepClient.StorageClasses, DeepEquals, []string{"default"})
761 }
762
763 func (s *TestSuite) TestRunAlreadyRunning(c *C) {
764         var ran bool
765         s.fullRunHelper(c, `{
766     "command": ["sleep", "3"],
767     "container_image": "`+arvadostest.DockerImage112PDH+`",
768     "cwd": ".",
769     "environment": {},
770     "mounts": {"/tmp": {"kind": "tmp"} },
771     "output_path": "/tmp",
772     "priority": 1,
773     "runtime_constraints": {},
774     "scheduling_parameters":{"max_run_time": 1},
775     "state": "Running"
776 }`, nil, func() int {
777                 ran = true
778                 return 2
779         })
780         c.Check(s.api.CalledWith("container.state", "Cancelled"), IsNil)
781         c.Check(s.api.CalledWith("container.state", "Complete"), IsNil)
782         c.Check(ran, Equals, false)
783 }
784
785 func ec2MetadataServerStub(c *C, token *string, failureRate float64, stoptime *atomic.Value) *httptest.Server {
786         failedOnce := false
787         return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
788                 if !failedOnce || rand.Float64() < failureRate {
789                         w.WriteHeader(http.StatusServiceUnavailable)
790                         failedOnce = true
791                         return
792                 }
793                 switch r.URL.Path {
794                 case "/latest/api/token":
795                         fmt.Fprintln(w, *token)
796                 case "/latest/meta-data/spot/instance-action":
797                         if r.Header.Get("X-aws-ec2-metadata-token") != *token {
798                                 w.WriteHeader(http.StatusUnauthorized)
799                         } else if t, _ := stoptime.Load().(time.Time); t.IsZero() {
800                                 w.WriteHeader(http.StatusNotFound)
801                         } else {
802                                 fmt.Fprintf(w, `{"action":"stop","time":"%s"}`, t.Format(time.RFC3339))
803                         }
804                 default:
805                         w.WriteHeader(http.StatusNotFound)
806                 }
807         }))
808 }
809
810 func (s *TestSuite) TestSpotInterruptionNotice(c *C) {
811         s.testSpotInterruptionNotice(c, 0.1)
812 }
813
814 func (s *TestSuite) TestSpotInterruptionNoticeNotAvailable(c *C) {
815         s.testSpotInterruptionNotice(c, 1)
816 }
817
818 func (s *TestSuite) testSpotInterruptionNotice(c *C, failureRate float64) {
819         var stoptime atomic.Value
820         token := "fake-ec2-metadata-token"
821         stub := ec2MetadataServerStub(c, &token, failureRate, &stoptime)
822         defer stub.Close()
823
824         defer func(i time.Duration, u string) {
825                 spotInterruptionCheckInterval = i
826                 ec2MetadataBaseURL = u
827         }(spotInterruptionCheckInterval, ec2MetadataBaseURL)
828         spotInterruptionCheckInterval = time.Second / 8
829         ec2MetadataBaseURL = stub.URL
830
831         checkedLogs := false
832         checkLogs := func() {
833                 checkedLogs = true
834                 c.Check(logFileContent(c, s.runner, "crunch-run.txt"), Matches, `(?ms).*Checking for spot instance interruptions every 125ms using instance metadata at http://.*`)
835                 c.Check(logFileContent(c, s.runner, "crunch-run.txt"), Matches, `(?ms).*Spot instance interruption check was inconclusive: 503 Service Unavailable -- will retry in 125ms.*`)
836                 if failureRate == 1 {
837                         c.Check(logFileContent(c, s.runner, "crunch-run.txt"), Matches, `(?ms).*Spot instance interruption check was inconclusive: 503 Service Unavailable -- now giving up after too many consecutive errors.*`)
838                         return
839                 }
840                 text := `Cloud provider scheduled instance stop at ` + stoptime.Load().(time.Time).Format(time.RFC3339)
841                 c.Check(logFileContent(c, s.runner, "crunch-run.txt"), Matches, `(?ms).*`+text+`.*`)
842                 c.Check(s.api.CalledWith("container.runtime_status.warning", "preemption notice"), NotNil)
843                 c.Check(s.api.CalledWith("container.runtime_status.warningDetail", text), NotNil)
844                 c.Check(s.api.CalledWith("container.runtime_status.preemptionNotice", text), NotNil)
845
846                 // Check that the log collection was saved, and the
847                 // container record updated with the new PDH,
848                 // immediately after the preemption notice was
849                 // received -- i.e., while the container is still
850                 // running.
851                 lastpdh := ""
852                 saved := make(map[string]string) // pdh => manifest_text
853                 for _, call := range s.api.Content {
854                         if ctr, ok := call["container"].(arvadosclient.Dict); ok {
855                                 if pdh, ok := ctr["log"].(string); ok {
856                                         lastpdh = pdh
857                                 }
858                         }
859                         if coll, ok := call["collection"].(arvadosclient.Dict); ok {
860                                 mt, _ := coll["manifest_text"].(string)
861                                 if strings.Contains(mt, ":crunch-run.txt") {
862                                         saved[arvados.PortableDataHash(mt)] = mt
863                                 }
864                         }
865                 }
866                 logfs, err := (&arvados.Collection{ManifestText: saved[lastpdh]}).FileSystem(s.runner.dispatcherClient, s.runner.DispatcherKeepClient)
867                 c.Assert(err, IsNil)
868                 log, err := fs.ReadFile(arvados.FS(logfs), "crunch-run.txt")
869                 c.Check(err, IsNil)
870                 c.Check(string(log), Matches, `(?ms).*\Q`+text+`\E.*`)
871         }
872
873         go s.runner.checkSpotInterruptionNotices()
874         s.fullRunHelper(c, `{
875     "command": ["sleep", "3"],
876     "container_image": "`+arvadostest.DockerImage112PDH+`",
877     "cwd": ".",
878     "environment": {},
879     "mounts": {"/tmp": {"kind": "tmp"} },
880     "output_path": "/tmp",
881     "priority": 1,
882     "runtime_constraints": {},
883     "state": "Locked"
884 }`, nil, func() int {
885                 time.Sleep(time.Second)
886                 stoptime.Store(time.Now().Add(time.Minute).UTC())
887                 token = "different-fake-ec2-metadata-token"
888                 time.Sleep(time.Second)
889                 checkLogs()
890                 return 0
891         })
892         c.Check(checkedLogs, Equals, true)
893 }
894
895 func (s *TestSuite) TestRunTimeExceeded(c *C) {
896         s.fullRunHelper(c, `{
897     "command": ["sleep", "3"],
898     "container_image": "`+arvadostest.DockerImage112PDH+`",
899     "cwd": ".",
900     "environment": {},
901     "mounts": {"/tmp": {"kind": "tmp"} },
902     "output_path": "/tmp",
903     "priority": 1,
904     "runtime_constraints": {},
905     "scheduling_parameters":{"max_run_time": 1},
906     "state": "Locked"
907 }`, nil, func() int {
908                 time.Sleep(3 * time.Second)
909                 return 0
910         })
911
912         c.Check(s.api.CalledWith("container.state", "Cancelled"), NotNil)
913         c.Check(logFileContent(c, s.runner, "crunch-run.txt"), Matches, "(?ms).*maximum run time exceeded.*")
914 }
915
916 func (s *TestSuite) TestContainerWaitFails(c *C) {
917         s.fullRunHelper(c, `{
918     "command": ["sleep", "3"],
919     "container_image": "`+arvadostest.DockerImage112PDH+`",
920     "cwd": ".",
921     "mounts": {"/tmp": {"kind": "tmp"} },
922     "output_path": "/tmp",
923     "priority": 1,
924     "state": "Locked"
925 }`, nil, func() int {
926                 s.executor.waitErr = errors.New("Container is not running")
927                 return 0
928         })
929
930         c.Check(s.api.CalledWith("container.state", "Cancelled"), NotNil)
931         c.Check(logFileContent(c, s.runner, "crunch-run.txt"), Matches, "(?ms).*Container is not running.*")
932 }
933
934 func (s *TestSuite) TestCrunchstat(c *C) {
935         s.runner.crunchstatFakeFS = os.DirFS("../crunchstat/testdata/debian12")
936         s.fullRunHelper(c, `{
937                 "command": ["sleep", "1"],
938                 "container_image": "`+arvadostest.DockerImage112PDH+`",
939                 "cwd": ".",
940                 "environment": {},
941                 "mounts": {"/tmp": {"kind": "tmp"} },
942                 "output_path": "/tmp",
943                 "priority": 1,
944                 "runtime_constraints": {},
945                 "state": "Locked"
946         }`, nil, func() int {
947                 time.Sleep(time.Second)
948                 return 0
949         })
950
951         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
952         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
953
954         c.Check(logFileContent(c, s.runner, "crunchstat.txt"), Matches, `(?ms).*`+reTimestamp+` mem \d+ swap \d+ pgmajfault \d+ rss.*`)
955         c.Check(logFileContent(c, s.runner, "hoststat.txt"), Matches, `(?ms).*`+reTimestamp+` mem \d+ swap \d+ pgmajfault \d+ rss.*`)
956
957         // Check that we called (*crunchstat.Reporter)Stop().
958         c.Check(logFileContent(c, s.runner, "crunch-run.txt"), Matches, `(?ms).*`+reTimestamp+` Maximum crunch-run memory rss usage was \d+ bytes\n.*`)
959 }
960
961 func (s *TestSuite) TestNodeInfoLog(c *C) {
962         s.fullRunHelper(c, `{
963                 "command": ["true"],
964                 "container_image": "`+arvadostest.DockerImage112PDH+`",
965                 "cwd": ".",
966                 "environment": {},
967                 "mounts": {"/tmp": {"kind": "tmp"} },
968                 "output_path": "/tmp",
969                 "priority": 1,
970                 "runtime_constraints": {},
971                 "state": "Locked"
972         }`, nil, func() int {
973                 return 0
974         })
975
976         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
977         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
978
979         json := logFileContent(c, s.runner, "node.json")
980         c.Check(json, Matches, `(?ms).*"ProviderType": *"a1\.2xlarge".*`)
981         c.Check(json, Matches, `(?ms).*"Price": *1\.2.*`)
982
983         nodeinfo := logFileContent(c, s.runner, "node-info.txt")
984         c.Check(nodeinfo, Matches, `(?ms).*Host Information.*`)
985         c.Check(nodeinfo, Matches, `(?ms).*CPU Information.*`)
986         c.Check(nodeinfo, Matches, `(?ms).*Memory Information.*`)
987         c.Check(nodeinfo, Matches, `(?ms).*Disk Space.*`)
988         c.Check(nodeinfo, Matches, `(?ms).*Disk INodes.*`)
989 }
990
991 func (s *TestSuite) TestLogVersionAndRuntime(c *C) {
992         s.fullRunHelper(c, `{
993                 "command": ["sleep", "1"],
994                 "container_image": "`+arvadostest.DockerImage112PDH+`",
995                 "cwd": ".",
996                 "environment": {},
997                 "mounts": {"/tmp": {"kind": "tmp"} },
998                 "output_path": "/tmp",
999                 "priority": 1,
1000                 "runtime_constraints": {},
1001                 "state": "Locked"
1002         }`, nil, func() int {
1003                 return 0
1004         })
1005
1006         c.Check(logFileContent(c, s.runner, "crunch-run.txt"), Matches, `(?ms).*crunch-run \S+ \(go\S+\) start.*`)
1007         c.Check(logFileContent(c, s.runner, "crunch-run.txt"), Matches, `(?ms).*crunch-run process has uid=\d+\(.+\) gid=\d+\(.+\) groups=\d+\(.+\)(,\d+\(.+\))*\n.*`)
1008         c.Check(logFileContent(c, s.runner, "crunch-run.txt"), Matches, `(?ms).*Executing container: zzzzz-zzzzz-zzzzzzzzzzzzzzz.*`)
1009         c.Check(logFileContent(c, s.runner, "crunch-run.txt"), Matches, `(?ms).*Using container runtime: stub.*`)
1010 }
1011
1012 func (s *TestSuite) testLogRSSThresholds(c *C, ram int64, expected []int, notExpected int) {
1013         s.runner.crunchstatFakeFS = os.DirFS("../crunchstat/testdata/debian12")
1014         s.fullRunHelper(c, `{
1015                 "command": ["true"],
1016                 "container_image": "`+arvadostest.DockerImage112PDH+`",
1017                 "cwd": ".",
1018                 "environment": {},
1019                 "mounts": {"/tmp": {"kind": "tmp"} },
1020                 "output_path": "/tmp",
1021                 "priority": 1,
1022                 "runtime_constraints": {"ram": `+fmt.Sprintf("%d", ram)+`},
1023                 "state": "Locked"
1024         }`, nil, func() int { return 0 })
1025         logs := logFileContent(c, s.runner, "crunch-run.txt")
1026         c.Log("=== crunchstat logs")
1027         c.Log(logs)
1028         pattern := logLineStart + `Container using over %d%% of memory \(rss %d/%d bytes\)`
1029         var threshold int
1030         for _, threshold = range expected {
1031                 c.Check(logs, Matches, fmt.Sprintf(pattern, threshold, s.debian12MemoryCurrent, ram))
1032         }
1033         if notExpected > threshold {
1034                 c.Check(logs, Not(Matches), fmt.Sprintf(pattern, notExpected, s.debian12MemoryCurrent, ram))
1035         }
1036 }
1037
1038 func (s *TestSuite) TestLogNoRSSThresholds(c *C) {
1039         s.testLogRSSThresholds(c, s.debian12MemoryCurrent*10, []int{}, 90)
1040 }
1041
1042 func (s *TestSuite) TestLogSomeRSSThresholds(c *C) {
1043         onePercentRSS := s.debian12MemoryCurrent / 100
1044         s.testLogRSSThresholds(c, 102*onePercentRSS, []int{90, 95}, 99)
1045 }
1046
1047 func (s *TestSuite) TestLogAllRSSThresholds(c *C) {
1048         s.testLogRSSThresholds(c, s.debian12MemoryCurrent, []int{90, 95, 99}, 0)
1049 }
1050
1051 func (s *TestSuite) TestLogMaximaAfterRun(c *C) {
1052         s.runner.crunchstatFakeFS = os.DirFS("../crunchstat/testdata/debian12")
1053         s.runner.parentTemp = c.MkDir()
1054         s.fullRunHelper(c, `{
1055         "command": ["true"],
1056         "container_image": "`+arvadostest.DockerImage112PDH+`",
1057         "cwd": ".",
1058         "environment": {},
1059         "mounts": {"/tmp": {"kind": "tmp"} },
1060         "output_path": "/tmp",
1061         "priority": 1,
1062         "runtime_constraints": {"ram": `+fmt.Sprintf("%d", s.debian12MemoryCurrent*10)+`},
1063         "state": "Locked"
1064     }`, nil, func() int { return 0 })
1065         logs := logFileContent(c, s.runner, "crunch-run.txt")
1066         for _, expected := range []string{
1067                 `Maximum disk usage was \d+%, \d+/\d+ bytes`,
1068                 fmt.Sprintf(`Maximum container memory swap usage was %d bytes`, s.debian12SwapCurrent),
1069                 `Maximum container memory pgmajfault usage was \d+ faults`,
1070                 fmt.Sprintf(`Maximum container memory rss usage was 10%%, %d/%d bytes`, s.debian12MemoryCurrent, s.debian12MemoryCurrent*10),
1071                 `Maximum crunch-run memory rss usage was \d+ bytes`,
1072         } {
1073                 c.Check(logs, Matches, logLineStart+expected)
1074         }
1075 }
1076
1077 func (s *TestSuite) TestCommitNodeInfoBeforeStart(c *C) {
1078         var collection_create, container_update arvadosclient.Dict
1079         s.fullRunHelper(c, `{
1080                 "command": ["true"],
1081                 "container_image": "`+arvadostest.DockerImage112PDH+`",
1082                 "cwd": ".",
1083                 "environment": {},
1084                 "mounts": {"/tmp": {"kind": "tmp"} },
1085                 "output_path": "/tmp",
1086                 "priority": 1,
1087                 "runtime_constraints": {},
1088                 "state": "Locked",
1089                 "uuid": "zzzzz-dz642-202301121543210"
1090         }`, nil, func() int {
1091                 collection_create = s.api.CalledWith("ensure_unique_name", true)
1092                 container_update = s.api.CalledWith("container.state", "Running")
1093                 return 0
1094         })
1095
1096         c.Assert(collection_create, NotNil)
1097         log_collection := collection_create["collection"].(arvadosclient.Dict)
1098         c.Check(log_collection["name"], Equals, "logs for zzzzz-dz642-202301121543210")
1099         manifest_text := log_collection["manifest_text"].(string)
1100         // We check that the file size is at least two digits as an easy way to
1101         // check the file isn't empty.
1102         c.Check(manifest_text, Matches, `\. .+ \d+:\d{2,}:node-info\.txt( .+)?\n`)
1103         c.Check(manifest_text, Matches, `\. .+ \d+:\d{2,}:node\.json( .+)?\n`)
1104
1105         c.Assert(container_update, NotNil)
1106         // As of Arvados 2.5.0, the container update must specify its log in PDH
1107         // format for the API server to propagate it to container requests, which
1108         // is what we care about for this test.
1109         expect_pdh := fmt.Sprintf("%x+%d", md5.Sum([]byte(manifest_text)), len(manifest_text))
1110         c.Check(container_update["container"].(arvadosclient.Dict)["log"], Equals, expect_pdh)
1111 }
1112
1113 func (s *TestSuite) TestContainerRecordLog(c *C) {
1114         s.fullRunHelper(c, `{
1115                 "command": ["sleep", "1"],
1116                 "container_image": "`+arvadostest.DockerImage112PDH+`",
1117                 "cwd": ".",
1118                 "environment": {},
1119                 "mounts": {"/tmp": {"kind": "tmp"} },
1120                 "output_path": "/tmp",
1121                 "priority": 1,
1122                 "runtime_constraints": {},
1123                 "state": "Locked"
1124         }`, nil,
1125                 func() int {
1126                         time.Sleep(time.Second)
1127                         return 0
1128                 })
1129
1130         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
1131         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
1132
1133         c.Check(logFileContent(c, s.runner, "container.json"), Matches, `(?ms).*container_image.*`)
1134 }
1135
1136 func (s *TestSuite) TestFullRunStderr(c *C) {
1137         s.fullRunHelper(c, `{
1138     "command": ["/bin/sh", "-c", "echo hello ; echo world 1>&2 ; exit 1"],
1139     "container_image": "`+arvadostest.DockerImage112PDH+`",
1140     "cwd": ".",
1141     "environment": {},
1142     "mounts": {"/tmp": {"kind": "tmp"} },
1143     "output_path": "/tmp",
1144     "priority": 1,
1145     "runtime_constraints": {},
1146     "state": "Locked"
1147 }`, nil, func() int {
1148                 fmt.Fprintln(s.executor.created.Stdout, "hello")
1149                 fmt.Fprintln(s.executor.created.Stderr, "world")
1150                 return 1
1151         })
1152
1153         final := s.api.CalledWith("container.state", "Complete")
1154         c.Assert(final, NotNil)
1155         c.Check(final["container"].(arvadosclient.Dict)["exit_code"], Equals, 1)
1156         c.Check(final["container"].(arvadosclient.Dict)["log"], NotNil)
1157
1158         c.Check(logFileContent(c, s.runner, "stdout.txt"), Matches, ".*hello\n")
1159         c.Check(logFileContent(c, s.runner, "stderr.txt"), Matches, ".*world\n")
1160 }
1161
1162 func (s *TestSuite) TestFullRunDefaultCwd(c *C) {
1163         s.fullRunHelper(c, `{
1164     "command": ["pwd"],
1165     "container_image": "`+arvadostest.DockerImage112PDH+`",
1166     "cwd": ".",
1167     "environment": {},
1168     "mounts": {"/tmp": {"kind": "tmp"} },
1169     "output_path": "/tmp",
1170     "priority": 1,
1171     "runtime_constraints": {},
1172     "state": "Locked"
1173 }`, nil, func() int {
1174                 fmt.Fprintf(s.executor.created.Stdout, "workdir=%q", s.executor.created.WorkingDir)
1175                 return 0
1176         })
1177
1178         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
1179         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
1180         c.Check(logFileContent(c, s.runner, "stdout.txt"), Matches, `.*workdir=""`)
1181 }
1182
1183 func (s *TestSuite) TestFullRunSetCwd(c *C) {
1184         s.fullRunHelper(c, `{
1185     "command": ["pwd"],
1186     "container_image": "`+arvadostest.DockerImage112PDH+`",
1187     "cwd": "/bin",
1188     "environment": {},
1189     "mounts": {"/tmp": {"kind": "tmp"} },
1190     "output_path": "/tmp",
1191     "priority": 1,
1192     "runtime_constraints": {},
1193     "state": "Locked"
1194 }`, nil, func() int {
1195                 fmt.Fprintln(s.executor.created.Stdout, s.executor.created.WorkingDir)
1196                 return 0
1197         })
1198
1199         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
1200         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
1201         c.Check(logFileContent(c, s.runner, "stdout.txt"), Matches, ".*/bin\n")
1202 }
1203
1204 func (s *TestSuite) TestFullRunSetOutputStorageClasses(c *C) {
1205         s.fullRunHelper(c, `{
1206     "command": ["pwd"],
1207     "container_image": "`+arvadostest.DockerImage112PDH+`",
1208     "cwd": "/bin",
1209     "environment": {},
1210     "mounts": {"/tmp": {"kind": "tmp"} },
1211     "output_path": "/tmp",
1212     "priority": 1,
1213     "runtime_constraints": {},
1214     "state": "Locked",
1215     "output_storage_classes": ["foo", "bar"]
1216 }`, nil, func() int {
1217                 fmt.Fprintln(s.executor.created.Stdout, s.executor.created.WorkingDir)
1218                 return 0
1219         })
1220
1221         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
1222         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
1223         c.Check(logFileContent(c, s.runner, "stdout.txt"), Matches, ".*/bin\n")
1224         c.Check(s.testDispatcherKeepClient.StorageClasses, DeepEquals, []string{"foo", "bar"})
1225         c.Check(s.testContainerKeepClient.StorageClasses, DeepEquals, []string{"foo", "bar"})
1226 }
1227
1228 func (s *TestSuite) TestEnableCUDADeviceCount(c *C) {
1229         s.fullRunHelper(c, `{
1230     "command": ["pwd"],
1231     "container_image": "`+arvadostest.DockerImage112PDH+`",
1232     "cwd": "/bin",
1233     "environment": {},
1234     "mounts": {"/tmp": {"kind": "tmp"} },
1235     "output_path": "/tmp",
1236     "priority": 1,
1237     "runtime_constraints": {"gpu": {"device_count": 2, "stack": "cuda", "hardware_target": ["9.0"], "driver_version": "11.0", vram: 8000000000}},
1238     "state": "Locked",
1239     "output_storage_classes": ["foo", "bar"]
1240 }`, nil, func() int {
1241                 fmt.Fprintln(s.executor.created.Stdout, "ok")
1242                 return 0
1243         })
1244         c.Check(s.executor.created.GPUDeviceCount, Equals, 2)
1245         c.Check(s.executor.created.GPUStack, Equals, "cuda")
1246 }
1247
1248 func (s *TestSuite) TestStopOnSignal(c *C) {
1249         s.executor.runFunc = func() int {
1250                 s.executor.created.Stdout.Write([]byte("foo\n"))
1251                 s.runner.SigChan <- syscall.SIGINT
1252                 time.Sleep(10 * time.Second)
1253                 return 0
1254         }
1255         s.testStopContainer(c)
1256 }
1257
1258 func (s *TestSuite) TestStopOnArvMountDeath(c *C) {
1259         s.executor.runFunc = func() int {
1260                 s.executor.created.Stdout.Write([]byte("foo\n"))
1261                 s.runner.ArvMountExit <- nil
1262                 close(s.runner.ArvMountExit)
1263                 time.Sleep(10 * time.Second)
1264                 return 0
1265         }
1266         s.runner.ArvMountExit = make(chan error)
1267         s.testStopContainer(c)
1268 }
1269
1270 func (s *TestSuite) testStopContainer(c *C) {
1271         record := `{
1272     "command": ["/bin/sh", "-c", "echo foo && sleep 30 && echo bar"],
1273     "container_image": "` + arvadostest.DockerImage112PDH + `",
1274     "cwd": ".",
1275     "environment": {},
1276     "mounts": {"/tmp": {"kind": "tmp"} },
1277     "output_path": "/tmp",
1278     "priority": 1,
1279     "runtime_constraints": {},
1280     "state": "Locked"
1281 }`
1282
1283         err := json.Unmarshal([]byte(record), &s.api.Container)
1284         c.Assert(err, IsNil)
1285
1286         s.runner.RunArvMount = func([]string, string) (*exec.Cmd, error) { return nil, nil }
1287         s.runner.MkArvClient = func(token string) (IArvadosClient, IKeepClient, *arvados.Client, error) {
1288                 return &ArvTestClient{}, &KeepTestClient{}, nil, nil
1289         }
1290
1291         done := make(chan error)
1292         go func() {
1293                 done <- s.runner.Run()
1294         }()
1295         select {
1296         case <-time.After(20 * time.Second):
1297                 pprof.Lookup("goroutine").WriteTo(os.Stderr, 1)
1298                 c.Fatal("timed out")
1299         case err = <-done:
1300                 c.Check(err, IsNil)
1301         }
1302         dumpAllLogFiles(c, s.runner)
1303
1304         c.Check(s.api.CalledWith("container.log", nil), NotNil)
1305         c.Check(s.api.CalledWith("container.state", "Cancelled"), NotNil)
1306         c.Check(logFileContent(c, s.runner, "stdout.txt"), Matches, "(?ms).*foo\n$")
1307 }
1308
1309 func (s *TestSuite) TestFullRunSetEnv(c *C) {
1310         s.fullRunHelper(c, `{
1311     "command": ["/bin/sh", "-c", "echo $FROBIZ"],
1312     "container_image": "`+arvadostest.DockerImage112PDH+`",
1313     "cwd": "/bin",
1314     "environment": {"FROBIZ": "bilbo"},
1315     "mounts": {"/tmp": {"kind": "tmp"} },
1316     "output_path": "/tmp",
1317     "priority": 1,
1318     "runtime_constraints": {},
1319     "state": "Locked"
1320 }`, nil, func() int {
1321                 fmt.Fprintf(s.executor.created.Stdout, "%v", s.executor.created.Env)
1322                 return 0
1323         })
1324
1325         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
1326         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
1327         c.Check(logFileContent(c, s.runner, "stdout.txt"), Matches, `.*map\[FROBIZ:bilbo\]`)
1328 }
1329
1330 type ArvMountCmdLine struct {
1331         Cmd   []string
1332         token string
1333 }
1334
1335 func (am *ArvMountCmdLine) ArvMountTest(c []string, token string) (*exec.Cmd, error) {
1336         am.Cmd = c
1337         am.token = token
1338         return nil, nil
1339 }
1340
1341 func stubCert(c *C, temp string) string {
1342         path := temp + "/ca-certificates.crt"
1343         err := os.WriteFile(path, []byte{}, 0666)
1344         c.Assert(err, IsNil)
1345         os.Setenv("SSL_CERT_FILE", path)
1346         return path
1347 }
1348
1349 func (s *TestSuite) TestSetupMounts(c *C) {
1350         cr := s.runner
1351         am := &ArvMountCmdLine{}
1352         cr.RunArvMount = am.ArvMountTest
1353         cr.containerClient, _ = apiStub()
1354         cr.ContainerArvClient = &ArvTestClient{}
1355         cr.ContainerKeepClient = &KeepTestClient{}
1356         cr.Container.OutputStorageClasses = []string{"default"}
1357
1358         realTemp := c.MkDir()
1359         certTemp := c.MkDir()
1360         stubCertPath := stubCert(c, certTemp)
1361         cr.parentTemp = realTemp
1362
1363         i := 0
1364         cr.MkTempDir = func(_ string, prefix string) (string, error) {
1365                 i++
1366                 d := fmt.Sprintf("%s/%s%d", realTemp, prefix, i)
1367                 err := os.Mkdir(d, os.ModePerm)
1368                 if err != nil && strings.Contains(err.Error(), ": file exists") {
1369                         // Test case must have pre-populated the tempdir
1370                         err = nil
1371                 }
1372                 return d, err
1373         }
1374
1375         checkEmpty := func() {
1376                 // Should be deleted.
1377                 _, err := os.Stat(realTemp)
1378                 c.Assert(os.IsNotExist(err), Equals, true)
1379
1380                 // Now recreate it for the next test.
1381                 c.Assert(os.Mkdir(realTemp, 0777), IsNil)
1382         }
1383
1384         {
1385                 i = 0
1386                 cr.ArvMountPoint = ""
1387                 cr.Container.Mounts = make(map[string]arvados.Mount)
1388                 cr.Container.Mounts["/tmp"] = arvados.Mount{Kind: "tmp"}
1389                 cr.Container.OutputPath = "/tmp"
1390                 cr.statInterval = 5 * time.Second
1391                 bindmounts, err := cr.SetupMounts()
1392                 c.Check(err, IsNil)
1393                 c.Check(am.Cmd, DeepEquals, []string{"arv-mount", "--foreground",
1394                         "--read-write", "--storage-classes", "default", "--crunchstat-interval=5",
1395                         "--mount-by-pdh", "by_id", "--disable-event-listening", "--mount-by-id", "by_uuid", realTemp + "/keep1"})
1396                 c.Check(bindmounts, DeepEquals, map[string]bindmount{"/tmp": {realTemp + "/tmp2", false}})
1397                 os.RemoveAll(cr.ArvMountPoint)
1398                 cr.CleanupDirs()
1399                 checkEmpty()
1400         }
1401
1402         {
1403                 i = 0
1404                 cr.ArvMountPoint = ""
1405                 cr.Container.Mounts = make(map[string]arvados.Mount)
1406                 cr.Container.Mounts["/out"] = arvados.Mount{Kind: "tmp"}
1407                 cr.Container.Mounts["/tmp"] = arvados.Mount{Kind: "tmp"}
1408                 cr.Container.OutputPath = "/out"
1409                 cr.Container.OutputStorageClasses = []string{"foo", "bar"}
1410
1411                 bindmounts, err := cr.SetupMounts()
1412                 c.Check(err, IsNil)
1413                 c.Check(am.Cmd, DeepEquals, []string{"arv-mount", "--foreground",
1414                         "--read-write", "--storage-classes", "foo,bar", "--crunchstat-interval=5",
1415                         "--mount-by-pdh", "by_id", "--disable-event-listening", "--mount-by-id", "by_uuid", realTemp + "/keep1"})
1416                 c.Check(bindmounts, DeepEquals, map[string]bindmount{"/out": {realTemp + "/tmp2", false}, "/tmp": {realTemp + "/tmp3", false}})
1417                 os.RemoveAll(cr.ArvMountPoint)
1418                 cr.CleanupDirs()
1419                 checkEmpty()
1420         }
1421
1422         {
1423                 i = 0
1424                 cr.ArvMountPoint = ""
1425                 cr.Container.Mounts = make(map[string]arvados.Mount)
1426                 cr.Container.Mounts["/tmp"] = arvados.Mount{Kind: "tmp"}
1427                 cr.Container.OutputPath = "/tmp"
1428                 cr.Container.RuntimeConstraints.API = true
1429                 cr.Container.OutputStorageClasses = []string{"default"}
1430
1431                 bindmounts, err := cr.SetupMounts()
1432                 c.Check(err, IsNil)
1433                 c.Check(am.Cmd, DeepEquals, []string{"arv-mount", "--foreground",
1434                         "--read-write", "--storage-classes", "default", "--crunchstat-interval=5",
1435                         "--mount-by-pdh", "by_id", "--disable-event-listening", "--mount-by-id", "by_uuid", realTemp + "/keep1"})
1436                 c.Check(bindmounts, DeepEquals, map[string]bindmount{"/tmp": {realTemp + "/tmp2", false}, "/etc/arvados/ca-certificates.crt": {stubCertPath, true}})
1437                 os.RemoveAll(cr.ArvMountPoint)
1438                 cr.CleanupDirs()
1439                 checkEmpty()
1440
1441                 cr.Container.RuntimeConstraints.API = false
1442         }
1443
1444         {
1445                 i = 0
1446                 cr.ArvMountPoint = ""
1447                 cr.Container.Mounts = map[string]arvados.Mount{
1448                         "/keeptmp": {Kind: "collection", Writable: true},
1449                 }
1450                 cr.Container.OutputPath = "/keeptmp"
1451
1452                 os.MkdirAll(realTemp+"/keep1/tmp0", os.ModePerm)
1453
1454                 bindmounts, err := cr.SetupMounts()
1455                 c.Check(err, IsNil)
1456                 c.Check(am.Cmd, DeepEquals, []string{"arv-mount", "--foreground",
1457                         "--read-write", "--storage-classes", "default", "--crunchstat-interval=5",
1458                         "--mount-tmp", "tmp0", "--mount-by-pdh", "by_id", "--disable-event-listening", "--mount-by-id", "by_uuid", realTemp + "/keep1"})
1459                 c.Check(bindmounts, DeepEquals, map[string]bindmount{"/keeptmp": {realTemp + "/keep1/tmp0", false}})
1460                 os.RemoveAll(cr.ArvMountPoint)
1461                 cr.CleanupDirs()
1462                 checkEmpty()
1463         }
1464
1465         {
1466                 i = 0
1467                 cr.ArvMountPoint = ""
1468                 cr.Container.Mounts = map[string]arvados.Mount{
1469                         "/keepinp": {Kind: "collection", PortableDataHash: "59389a8f9ee9d399be35462a0f92541c+53"},
1470                         "/keepout": {Kind: "collection", Writable: true},
1471                 }
1472                 cr.Container.OutputPath = "/keepout"
1473
1474                 os.MkdirAll(realTemp+"/keep1/by_id/59389a8f9ee9d399be35462a0f92541c+53", os.ModePerm)
1475                 os.MkdirAll(realTemp+"/keep1/tmp0", os.ModePerm)
1476
1477                 bindmounts, err := cr.SetupMounts()
1478                 c.Check(err, IsNil)
1479                 c.Check(am.Cmd, DeepEquals, []string{"arv-mount", "--foreground",
1480                         "--read-write", "--storage-classes", "default", "--crunchstat-interval=5",
1481                         "--mount-tmp", "tmp0", "--mount-by-pdh", "by_id", "--disable-event-listening", "--mount-by-id", "by_uuid", realTemp + "/keep1"})
1482                 c.Check(bindmounts, DeepEquals, map[string]bindmount{
1483                         "/keepinp": {realTemp + "/keep1/by_id/59389a8f9ee9d399be35462a0f92541c+53", true},
1484                         "/keepout": {realTemp + "/keep1/tmp0", false},
1485                 })
1486                 os.RemoveAll(cr.ArvMountPoint)
1487                 cr.CleanupDirs()
1488                 checkEmpty()
1489         }
1490
1491         {
1492                 i = 0
1493                 cr.ArvMountPoint = ""
1494                 cr.Container.RuntimeConstraints.KeepCacheRAM = 512
1495                 cr.Container.Mounts = map[string]arvados.Mount{
1496                         "/keepinp": {Kind: "collection", PortableDataHash: "59389a8f9ee9d399be35462a0f92541c+53"},
1497                         "/keepout": {Kind: "collection", Writable: true},
1498                 }
1499                 cr.Container.OutputPath = "/keepout"
1500
1501                 os.MkdirAll(realTemp+"/keep1/by_id/59389a8f9ee9d399be35462a0f92541c+53", os.ModePerm)
1502                 os.MkdirAll(realTemp+"/keep1/tmp0", os.ModePerm)
1503
1504                 bindmounts, err := cr.SetupMounts()
1505                 c.Check(err, IsNil)
1506                 c.Check(am.Cmd, DeepEquals, []string{"arv-mount", "--foreground",
1507                         "--read-write", "--storage-classes", "default", "--crunchstat-interval=5", "--ram-cache",
1508                         "--file-cache", "512", "--mount-tmp", "tmp0", "--mount-by-pdh", "by_id", "--disable-event-listening", "--mount-by-id", "by_uuid", realTemp + "/keep1"})
1509                 c.Check(bindmounts, DeepEquals, map[string]bindmount{
1510                         "/keepinp": {realTemp + "/keep1/by_id/59389a8f9ee9d399be35462a0f92541c+53", true},
1511                         "/keepout": {realTemp + "/keep1/tmp0", false},
1512                 })
1513                 os.RemoveAll(cr.ArvMountPoint)
1514                 cr.CleanupDirs()
1515                 checkEmpty()
1516         }
1517
1518         for _, test := range []struct {
1519                 in  interface{}
1520                 out string
1521         }{
1522                 {in: "foo", out: `"foo"`},
1523                 {in: nil, out: `null`},
1524                 {in: map[string]int64{"foo": 123456789123456789}, out: `{"foo":123456789123456789}`},
1525         } {
1526                 i = 0
1527                 cr.ArvMountPoint = ""
1528                 cr.Container.Mounts = map[string]arvados.Mount{
1529                         "/mnt/test.json": {Kind: "json", Content: test.in},
1530                 }
1531                 bindmounts, err := cr.SetupMounts()
1532                 c.Check(err, IsNil)
1533                 c.Check(bindmounts, DeepEquals, map[string]bindmount{
1534                         "/mnt/test.json": {realTemp + "/json2/mountdata.json", true},
1535                 })
1536                 content, err := ioutil.ReadFile(realTemp + "/json2/mountdata.json")
1537                 c.Check(err, IsNil)
1538                 c.Check(content, DeepEquals, []byte(test.out))
1539                 os.RemoveAll(cr.ArvMountPoint)
1540                 cr.CleanupDirs()
1541                 checkEmpty()
1542         }
1543
1544         for _, test := range []struct {
1545                 in  interface{}
1546                 out string
1547         }{
1548                 {in: "foo", out: `foo`},
1549                 {in: nil, out: "error"},
1550                 {in: map[string]int64{"foo": 123456789123456789}, out: "error"},
1551         } {
1552                 i = 0
1553                 cr.ArvMountPoint = ""
1554                 cr.Container.Mounts = map[string]arvados.Mount{
1555                         "/mnt/test.txt": {Kind: "text", Content: test.in},
1556                 }
1557                 bindmounts, err := cr.SetupMounts()
1558                 if test.out == "error" {
1559                         c.Check(err.Error(), Equals, "content for mount \"/mnt/test.txt\" must be a string")
1560                 } else {
1561                         c.Check(err, IsNil)
1562                         c.Check(bindmounts, DeepEquals, map[string]bindmount{
1563                                 "/mnt/test.txt": {realTemp + "/text2/mountdata.text", true},
1564                         })
1565                         content, err := ioutil.ReadFile(realTemp + "/text2/mountdata.text")
1566                         c.Check(err, IsNil)
1567                         c.Check(content, DeepEquals, []byte(test.out))
1568                 }
1569                 os.RemoveAll(cr.ArvMountPoint)
1570                 cr.CleanupDirs()
1571                 checkEmpty()
1572         }
1573
1574         // Read-only mount points are allowed underneath output_dir mount point
1575         {
1576                 i = 0
1577                 cr.ArvMountPoint = ""
1578                 cr.Container.Mounts = make(map[string]arvados.Mount)
1579                 cr.Container.Mounts = map[string]arvados.Mount{
1580                         "/tmp":     {Kind: "tmp"},
1581                         "/tmp/foo": {Kind: "collection"},
1582                 }
1583                 cr.Container.OutputPath = "/tmp"
1584
1585                 os.MkdirAll(realTemp+"/keep1/tmp0", os.ModePerm)
1586
1587                 bindmounts, err := cr.SetupMounts()
1588                 c.Check(err, IsNil)
1589                 c.Check(am.Cmd, DeepEquals, []string{"arv-mount", "--foreground",
1590                         "--read-write", "--storage-classes", "default", "--crunchstat-interval=5", "--ram-cache",
1591                         "--file-cache", "512", "--mount-tmp", "tmp0", "--mount-by-pdh", "by_id", "--disable-event-listening", "--mount-by-id", "by_uuid", realTemp + "/keep1"})
1592                 c.Check(bindmounts, DeepEquals, map[string]bindmount{
1593                         "/tmp":     {realTemp + "/tmp2", false},
1594                         "/tmp/foo": {realTemp + "/keep1/tmp0", true},
1595                 })
1596                 os.RemoveAll(cr.ArvMountPoint)
1597                 cr.CleanupDirs()
1598                 checkEmpty()
1599         }
1600
1601         // Writable mount points copied to output_dir mount point
1602         {
1603                 i = 0
1604                 cr.ArvMountPoint = ""
1605                 cr.Container.Mounts = make(map[string]arvados.Mount)
1606                 cr.Container.Mounts = map[string]arvados.Mount{
1607                         "/tmp": {Kind: "tmp"},
1608                         "/tmp/foo": {Kind: "collection",
1609                                 PortableDataHash: "59389a8f9ee9d399be35462a0f92541c+53",
1610                                 Writable:         true},
1611                         "/tmp/bar": {Kind: "collection",
1612                                 PortableDataHash: "59389a8f9ee9d399be35462a0f92541d+53",
1613                                 Path:             "baz",
1614                                 Writable:         true},
1615                 }
1616                 cr.Container.OutputPath = "/tmp"
1617
1618                 os.MkdirAll(realTemp+"/keep1/by_id/59389a8f9ee9d399be35462a0f92541c+53", os.ModePerm)
1619                 os.MkdirAll(realTemp+"/keep1/by_id/59389a8f9ee9d399be35462a0f92541d+53/baz", os.ModePerm)
1620
1621                 rf, _ := os.Create(realTemp + "/keep1/by_id/59389a8f9ee9d399be35462a0f92541d+53/baz/quux")
1622                 rf.Write([]byte("bar"))
1623                 rf.Close()
1624
1625                 _, err := cr.SetupMounts()
1626                 c.Check(err, IsNil)
1627                 _, err = os.Stat(cr.HostOutputDir + "/foo")
1628                 c.Check(err, IsNil)
1629                 _, err = os.Stat(cr.HostOutputDir + "/bar/quux")
1630                 c.Check(err, IsNil)
1631                 os.RemoveAll(cr.ArvMountPoint)
1632                 cr.CleanupDirs()
1633                 checkEmpty()
1634         }
1635
1636         // Only mount points of kind 'collection' are allowed underneath output_dir mount point
1637         {
1638                 i = 0
1639                 cr.ArvMountPoint = ""
1640                 cr.Container.Mounts = make(map[string]arvados.Mount)
1641                 cr.Container.Mounts = map[string]arvados.Mount{
1642                         "/tmp":     {Kind: "tmp"},
1643                         "/tmp/foo": {Kind: "tmp"},
1644                 }
1645                 cr.Container.OutputPath = "/tmp"
1646
1647                 _, err := cr.SetupMounts()
1648                 c.Check(err, NotNil)
1649                 c.Check(err, ErrorMatches, `only mount points of kind 'collection', 'text' or 'json' are supported underneath the output_path.*`)
1650                 os.RemoveAll(cr.ArvMountPoint)
1651                 cr.CleanupDirs()
1652                 checkEmpty()
1653         }
1654
1655         // Only mount point of kind 'collection' is allowed for stdin
1656         {
1657                 i = 0
1658                 cr.ArvMountPoint = ""
1659                 cr.Container.Mounts = make(map[string]arvados.Mount)
1660                 cr.Container.Mounts = map[string]arvados.Mount{
1661                         "stdin": {Kind: "tmp"},
1662                 }
1663
1664                 _, err := cr.SetupMounts()
1665                 c.Check(err, NotNil)
1666                 c.Check(err, ErrorMatches, `unsupported mount kind 'tmp' for stdin.*`)
1667                 os.RemoveAll(cr.ArvMountPoint)
1668                 cr.CleanupDirs()
1669                 checkEmpty()
1670         }
1671 }
1672
1673 func (s *TestSuite) TestStdout(c *C) {
1674         helperRecord := `{
1675                 "command": ["/bin/sh", "-c", "echo $FROBIZ"],
1676                 "container_image": "` + arvadostest.DockerImage112PDH + `",
1677                 "cwd": "/bin",
1678                 "environment": {"FROBIZ": "bilbo"},
1679                 "mounts": {"/tmp": {"kind": "tmp"}, "stdout": {"kind": "file", "path": "/tmp/a/b/c.out"} },
1680                 "output_path": "/tmp",
1681                 "priority": 1,
1682                 "runtime_constraints": {},
1683                 "state": "Locked"
1684         }`
1685
1686         s.fullRunHelper(c, helperRecord, nil, func() int {
1687                 fmt.Fprintln(s.executor.created.Stdout, s.executor.created.Env["FROBIZ"])
1688                 return 0
1689         })
1690
1691         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
1692         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
1693         c.Check(s.runner.ContainerArvClient.(*ArvTestClient).CalledWith("collection.manifest_text", "./a/b 307372fa8fd5c146b22ae7a45b49bc31+6 0:6:c.out\n"), NotNil)
1694 }
1695
1696 // Used by the TestStdoutWithWrongPath*()
1697 func (s *TestSuite) stdoutErrorRunHelper(c *C, record string, fn func() int) (*ArvTestClient, *ContainerRunner, error) {
1698         err := json.Unmarshal([]byte(record), &s.api.Container)
1699         c.Assert(err, IsNil)
1700         s.executor.runFunc = fn
1701         s.runner.RunArvMount = (&ArvMountCmdLine{}).ArvMountTest
1702         s.runner.MkArvClient = func(token string) (IArvadosClient, IKeepClient, *arvados.Client, error) {
1703                 return s.api, &KeepTestClient{}, nil, nil
1704         }
1705         return s.api, s.runner, s.runner.Run()
1706 }
1707
1708 func (s *TestSuite) TestStdoutWithWrongPath(c *C) {
1709         _, _, err := s.stdoutErrorRunHelper(c, `{
1710     "mounts": {"/tmp": {"kind": "tmp"}, "stdout": {"kind": "file", "path":"/tmpa.out"} },
1711     "output_path": "/tmp",
1712     "state": "Locked"
1713 }`, func() int { return 0 })
1714         c.Check(err, ErrorMatches, ".*Stdout path does not start with OutputPath.*")
1715 }
1716
1717 func (s *TestSuite) TestStdoutWithWrongKindTmp(c *C) {
1718         _, _, err := s.stdoutErrorRunHelper(c, `{
1719     "mounts": {"/tmp": {"kind": "tmp"}, "stdout": {"kind": "tmp", "path":"/tmp/a.out"} },
1720     "output_path": "/tmp",
1721     "state": "Locked"
1722 }`, func() int { return 0 })
1723         c.Check(err, ErrorMatches, ".*unsupported mount kind 'tmp' for stdout.*")
1724 }
1725
1726 func (s *TestSuite) TestStdoutWithWrongKindCollection(c *C) {
1727         _, _, err := s.stdoutErrorRunHelper(c, `{
1728     "mounts": {"/tmp": {"kind": "tmp"}, "stdout": {"kind": "collection", "path":"/tmp/a.out"} },
1729     "output_path": "/tmp",
1730     "state": "Locked"
1731 }`, func() int { return 0 })
1732         c.Check(err, ErrorMatches, ".*unsupported mount kind 'collection' for stdout.*")
1733 }
1734
1735 func (s *TestSuite) TestFullRunWithAPI(c *C) {
1736         s.fullRunHelper(c, `{
1737     "command": ["/bin/sh", "-c", "true $ARVADOS_API_HOST"],
1738     "container_image": "`+arvadostest.DockerImage112PDH+`",
1739     "cwd": "/bin",
1740     "environment": {},
1741     "mounts": {"/tmp": {"kind": "tmp"} },
1742     "output_path": "/tmp",
1743     "priority": 1,
1744     "runtime_constraints": {"API": true},
1745     "state": "Locked"
1746 }`, nil, func() int {
1747                 c.Check(s.executor.created.Env["ARVADOS_API_HOST"], Equals, os.Getenv("ARVADOS_API_HOST"))
1748                 return 3
1749         })
1750         c.Check(s.api.CalledWith("container.exit_code", 3), NotNil)
1751         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
1752         c.Check(logFileContent(c, s.runner, "crunch-run.txt"), Matches, `(?ms).*`+reTimestamp+` Container exited with status code 3\n.*`)
1753 }
1754
1755 func (s *TestSuite) TestFullRunSetOutput(c *C) {
1756         defer os.Setenv("ARVADOS_API_HOST", os.Getenv("ARVADOS_API_HOST"))
1757         os.Setenv("ARVADOS_API_HOST", "test.arvados.org")
1758         s.fullRunHelper(c, `{
1759     "command": ["/bin/sh", "-c", "echo $ARVADOS_API_HOST"],
1760     "container_image": "`+arvadostest.DockerImage112PDH+`",
1761     "cwd": "/bin",
1762     "environment": {},
1763     "mounts": {"/tmp": {"kind": "tmp"} },
1764     "output_path": "/tmp",
1765     "priority": 1,
1766     "runtime_constraints": {"API": true},
1767     "state": "Locked"
1768 }`, nil, func() int {
1769                 s.api.Container.Output = arvadostest.DockerImage112PDH
1770                 return 0
1771         })
1772
1773         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
1774         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
1775         c.Check(s.api.CalledWith("container.output", arvadostest.DockerImage112PDH), NotNil)
1776 }
1777
1778 func (s *TestSuite) TestArvMountRuntimeStatusWarning(c *C) {
1779         s.runner.RunArvMount = func([]string, string) (*exec.Cmd, error) {
1780                 os.Mkdir(s.runner.ArvMountPoint+"/by_id", 0666)
1781                 ioutil.WriteFile(s.runner.ArvMountPoint+"/by_id/README", nil, 0666)
1782                 return s.runner.ArvMountCmd([]string{"bash", "-c", "echo >&2 Test: Keep write error: I am a teapot; sleep 3"}, "")
1783         }
1784         s.executor.runFunc = func() int {
1785                 time.Sleep(time.Second)
1786                 return 137
1787         }
1788         record := `{
1789     "command": ["sleep", "1"],
1790     "container_image": "` + arvadostest.DockerImage112PDH + `",
1791     "cwd": "/bin",
1792     "environment": {},
1793     "mounts": {"/tmp": {"kind": "tmp"} },
1794     "output_path": "/tmp",
1795     "priority": 1,
1796     "runtime_constraints": {"API": true},
1797     "state": "Locked"
1798 }`
1799         err := json.Unmarshal([]byte(record), &s.api.Container)
1800         c.Assert(err, IsNil)
1801         err = s.runner.Run()
1802         c.Assert(err, IsNil)
1803         c.Check(s.api.CalledWith("container.exit_code", 137), NotNil)
1804         c.Check(s.api.CalledWith("container.runtime_status.warning", "arv-mount: Keep write error"), NotNil)
1805         c.Check(s.api.CalledWith("container.runtime_status.warningDetail", "Test: Keep write error: I am a teapot"), NotNil)
1806         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
1807         c.Check(logFileContent(c, s.runner, "crunch-run.txt"), Matches, `(?ms).*`+reTimestamp+` Container exited with status code 137 \(signal 9, SIGKILL\).*`)
1808         c.Check(logFileContent(c, s.runner, "arv-mount.txt"), Matches, reTimestamp+` Test: Keep write error: I am a teapot\n`)
1809 }
1810
1811 func (s *TestSuite) TestStdoutWithExcludeFromOutputMountPointUnderOutputDir(c *C) {
1812         helperRecord := `{
1813                 "command": ["/bin/sh", "-c", "echo $FROBIZ"],
1814                 "container_image": "` + arvadostest.DockerImage112PDH + `",
1815                 "cwd": "/bin",
1816                 "environment": {"FROBIZ": "bilbo"},
1817                 "mounts": {
1818         "/tmp": {"kind": "tmp"},
1819         "/tmp/foo": {"kind": "collection",
1820                      "portable_data_hash": "a3e8f74c6f101eae01fa08bfb4e49b3a+54",
1821                      "exclude_from_output": true
1822         },
1823         "stdout": {"kind": "file", "path": "/tmp/a/b/c.out"}
1824     },
1825                 "output_path": "/tmp",
1826                 "priority": 1,
1827                 "runtime_constraints": {},
1828                 "state": "Locked"
1829         }`
1830
1831         extraMounts := []string{"a3e8f74c6f101eae01fa08bfb4e49b3a+54"}
1832
1833         s.fullRunHelper(c, helperRecord, extraMounts, func() int {
1834                 fmt.Fprintln(s.executor.created.Stdout, s.executor.created.Env["FROBIZ"])
1835                 return 0
1836         })
1837
1838         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
1839         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
1840         c.Check(s.runner.ContainerArvClient.(*ArvTestClient).CalledWith("collection.manifest_text", "./a/b 307372fa8fd5c146b22ae7a45b49bc31+6 0:6:c.out\n"), NotNil)
1841 }
1842
1843 func (s *TestSuite) TestStdoutWithMultipleMountPointsUnderOutputDir(c *C) {
1844         helperRecord := `{
1845                 "command": ["/bin/sh", "-c", "echo $FROBIZ"],
1846                 "container_image": "` + arvadostest.DockerImage112PDH + `",
1847                 "cwd": "/bin",
1848                 "environment": {"FROBIZ": "bilbo"},
1849                 "mounts": {
1850         "/tmp": {"kind": "tmp"},
1851         "/tmp/foo/bar": {"kind": "collection", "portable_data_hash": "a0def87f80dd594d4675809e83bd4f15+367", "path":"/file2_in_main.txt"},
1852         "/tmp/foo/sub1": {"kind": "collection", "portable_data_hash": "a0def87f80dd594d4675809e83bd4f15+367", "path":"/subdir1"},
1853         "/tmp/foo/sub1file2": {"kind": "collection", "portable_data_hash": "a0def87f80dd594d4675809e83bd4f15+367", "path":"/subdir1/file2_in_subdir1.txt"},
1854         "/tmp/foo/baz/sub2file2": {"kind": "collection", "portable_data_hash": "a0def87f80dd594d4675809e83bd4f15+367", "path":"/subdir1/subdir2/file2_in_subdir2.txt"},
1855         "stdout": {"kind": "file", "path": "/tmp/a/b/c.out"}
1856     },
1857                 "output_path": "/tmp",
1858                 "priority": 1,
1859                 "runtime_constraints": {},
1860                 "state": "Locked",
1861                 "uuid": "zzzzz-dz642-202301130848001"
1862         }`
1863
1864         extraMounts := []string{
1865                 "a0def87f80dd594d4675809e83bd4f15+367/file2_in_main.txt",
1866                 "a0def87f80dd594d4675809e83bd4f15+367/subdir1/file2_in_subdir1.txt",
1867                 "a0def87f80dd594d4675809e83bd4f15+367/subdir1/subdir2/file2_in_subdir2.txt",
1868         }
1869
1870         api, _, realtemp := s.fullRunHelper(c, helperRecord, extraMounts, func() int {
1871                 fmt.Fprintln(s.executor.created.Stdout, s.executor.created.Env["FROBIZ"])
1872                 return 0
1873         })
1874
1875         c.Check(s.executor.created.BindMounts, DeepEquals, map[string]bindmount{
1876                 "/tmp":                   {realtemp + "/tmp1", false},
1877                 "/tmp/foo/bar":           {s.keepmount + "/by_id/a0def87f80dd594d4675809e83bd4f15+367/file2_in_main.txt", true},
1878                 "/tmp/foo/baz/sub2file2": {s.keepmount + "/by_id/a0def87f80dd594d4675809e83bd4f15+367/subdir1/subdir2/file2_in_subdir2.txt", true},
1879                 "/tmp/foo/sub1":          {s.keepmount + "/by_id/a0def87f80dd594d4675809e83bd4f15+367/subdir1", true},
1880                 "/tmp/foo/sub1file2":     {s.keepmount + "/by_id/a0def87f80dd594d4675809e83bd4f15+367/subdir1/file2_in_subdir1.txt", true},
1881         })
1882
1883         c.Check(api.CalledWith("container.exit_code", 0), NotNil)
1884         c.Check(api.CalledWith("container.state", "Complete"), NotNil)
1885         output_count := uint(0)
1886         for _, v := range s.runner.ContainerArvClient.(*ArvTestClient).Content {
1887                 if v["collection"] == nil {
1888                         continue
1889                 }
1890                 collection := v["collection"].(arvadosclient.Dict)
1891                 if collection["name"].(string) != "output for zzzzz-dz642-202301130848001" {
1892                         continue
1893                 }
1894                 c.Check(v["ensure_unique_name"], Equals, true)
1895                 c.Check(collection["manifest_text"].(string), Equals, `./a/b 307372fa8fd5c146b22ae7a45b49bc31+6 0:6:c.out
1896 ./foo 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396c73c0abcdefgh11234567890@569fa8c3 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396cabcdefghij6419876543234@569fa8c4 9:18:bar 36:18:sub1file2
1897 ./foo/baz 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396c73c0bcdefghijk544332211@569fa8c5 9:18:sub2file2
1898 ./foo/sub1 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396cabcdefghij6419876543234@569fa8c4 0:9:file1_in_subdir1.txt 9:18:file2_in_subdir1.txt
1899 ./foo/sub1/subdir2 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396c73c0bcdefghijk544332211@569fa8c5 0:9:file1_in_subdir2.txt 9:18:file2_in_subdir2.txt
1900 `)
1901                 output_count++
1902         }
1903         c.Check(output_count, Not(Equals), uint(0))
1904 }
1905
1906 func (s *TestSuite) TestStdoutWithMountPointsUnderOutputDirDenormalizedManifest(c *C) {
1907         helperRecord := `{
1908                 "command": ["/bin/sh", "-c", "echo $FROBIZ"],
1909                 "container_image": "` + arvadostest.DockerImage112PDH + `",
1910                 "cwd": "/bin",
1911                 "environment": {"FROBIZ": "bilbo"},
1912                 "mounts": {
1913         "/tmp": {"kind": "tmp"},
1914         "/tmp/foo/bar": {"kind": "collection", "portable_data_hash": "b0def87f80dd594d4675809e83bd4f15+367", "path": "/subdir1/file2_in_subdir1.txt"},
1915         "stdout": {"kind": "file", "path": "/tmp/a/b/c.out"}
1916     },
1917                 "output_path": "/tmp",
1918                 "priority": 1,
1919                 "runtime_constraints": {},
1920                 "state": "Locked",
1921                 "uuid": "zzzzz-dz642-202301130848002"
1922         }`
1923
1924         extraMounts := []string{
1925                 "b0def87f80dd594d4675809e83bd4f15+367/subdir1/file2_in_subdir1.txt",
1926         }
1927
1928         s.fullRunHelper(c, helperRecord, extraMounts, func() int {
1929                 fmt.Fprintln(s.executor.created.Stdout, s.executor.created.Env["FROBIZ"])
1930                 return 0
1931         })
1932
1933         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
1934         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
1935         output_count := uint(0)
1936         for _, v := range s.runner.ContainerArvClient.(*ArvTestClient).Content {
1937                 if v["collection"] == nil {
1938                         continue
1939                 }
1940                 collection := v["collection"].(arvadosclient.Dict)
1941                 if collection["name"].(string) != "output for zzzzz-dz642-202301130848002" {
1942                         continue
1943                 }
1944                 c.Check(collection["manifest_text"].(string), Equals, `./a/b 307372fa8fd5c146b22ae7a45b49bc31+6 0:6:c.out
1945 ./foo 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396c73c0abcdefgh11234567890@569fa8c3 10:17:bar
1946 `)
1947                 output_count++
1948         }
1949         c.Check(output_count, Not(Equals), uint(0))
1950 }
1951
1952 func (s *TestSuite) TestOutputError(c *C) {
1953         helperRecord := `{
1954                 "command": ["/bin/sh", "-c", "echo $FROBIZ"],
1955                 "container_image": "` + arvadostest.DockerImage112PDH + `",
1956                 "cwd": "/bin",
1957                 "environment": {"FROBIZ": "bilbo"},
1958                 "mounts": {
1959                         "/tmp": {"kind": "tmp"}
1960                 },
1961                 "output_path": "/tmp",
1962                 "priority": 1,
1963                 "runtime_constraints": {},
1964                 "state": "Locked"
1965         }`
1966         s.fullRunHelper(c, helperRecord, nil, func() int {
1967                 os.Symlink("/etc/hosts", s.runner.HostOutputDir+"/baz")
1968                 return 0
1969         })
1970
1971         c.Check(s.api.CalledWith("container.state", "Cancelled"), NotNil)
1972 }
1973
1974 func (s *TestSuite) TestStdinCollectionMountPoint(c *C) {
1975         helperRecord := `{
1976                 "command": ["/bin/sh", "-c", "echo $FROBIZ"],
1977                 "container_image": "` + arvadostest.DockerImage112PDH + `",
1978                 "cwd": "/bin",
1979                 "environment": {"FROBIZ": "bilbo"},
1980                 "mounts": {
1981         "/tmp": {"kind": "tmp"},
1982         "stdin": {"kind": "collection", "portable_data_hash": "b0def87f80dd594d4675809e83bd4f15+367", "path": "/file1_in_main.txt"},
1983         "stdout": {"kind": "file", "path": "/tmp/a/b/c.out"}
1984     },
1985                 "output_path": "/tmp",
1986                 "priority": 1,
1987                 "runtime_constraints": {},
1988                 "state": "Locked"
1989         }`
1990
1991         extraMounts := []string{
1992                 "b0def87f80dd594d4675809e83bd4f15+367/file1_in_main.txt",
1993         }
1994
1995         api, _, _ := s.fullRunHelper(c, helperRecord, extraMounts, func() int {
1996                 fmt.Fprintln(s.executor.created.Stdout, s.executor.created.Env["FROBIZ"])
1997                 return 0
1998         })
1999
2000         c.Check(api.CalledWith("container.exit_code", 0), NotNil)
2001         c.Check(api.CalledWith("container.state", "Complete"), NotNil)
2002         for _, v := range api.Content {
2003                 if v["collection"] != nil {
2004                         collection := v["collection"].(arvadosclient.Dict)
2005                         if strings.Index(collection["name"].(string), "output") == 0 {
2006                                 manifest := collection["manifest_text"].(string)
2007                                 c.Check(manifest, Equals, `./a/b 307372fa8fd5c146b22ae7a45b49bc31+6 0:6:c.out
2008 `)
2009                         }
2010                 }
2011         }
2012 }
2013
2014 func (s *TestSuite) TestStdinJsonMountPoint(c *C) {
2015         helperRecord := `{
2016                 "command": ["/bin/sh", "-c", "echo $FROBIZ"],
2017                 "container_image": "` + arvadostest.DockerImage112PDH + `",
2018                 "cwd": "/bin",
2019                 "environment": {"FROBIZ": "bilbo"},
2020                 "mounts": {
2021         "/tmp": {"kind": "tmp"},
2022         "stdin": {"kind": "json", "content": "foo"},
2023         "stdout": {"kind": "file", "path": "/tmp/a/b/c.out"}
2024     },
2025                 "output_path": "/tmp",
2026                 "priority": 1,
2027                 "runtime_constraints": {},
2028                 "state": "Locked"
2029         }`
2030
2031         api, _, _ := s.fullRunHelper(c, helperRecord, nil, func() int {
2032                 fmt.Fprintln(s.executor.created.Stdout, s.executor.created.Env["FROBIZ"])
2033                 return 0
2034         })
2035
2036         c.Check(api.CalledWith("container.exit_code", 0), NotNil)
2037         c.Check(api.CalledWith("container.state", "Complete"), NotNil)
2038         for _, v := range api.Content {
2039                 if v["collection"] != nil {
2040                         collection := v["collection"].(arvadosclient.Dict)
2041                         if strings.Index(collection["name"].(string), "output") == 0 {
2042                                 manifest := collection["manifest_text"].(string)
2043                                 c.Check(manifest, Equals, `./a/b 307372fa8fd5c146b22ae7a45b49bc31+6 0:6:c.out
2044 `)
2045                         }
2046                 }
2047         }
2048 }
2049
2050 func (s *TestSuite) TestStderrMount(c *C) {
2051         api, cr, _ := s.fullRunHelper(c, `{
2052     "command": ["/bin/sh", "-c", "echo hello;exit 1"],
2053     "container_image": "`+arvadostest.DockerImage112PDH+`",
2054     "cwd": ".",
2055     "environment": {},
2056     "mounts": {"/tmp": {"kind": "tmp"},
2057                "stdout": {"kind": "file", "path": "/tmp/a/out.txt"},
2058                "stderr": {"kind": "file", "path": "/tmp/b/err.txt"}},
2059     "output_path": "/tmp",
2060     "priority": 1,
2061     "runtime_constraints": {},
2062     "state": "Locked"
2063 }`, nil, func() int {
2064                 fmt.Fprintln(s.executor.created.Stdout, "hello")
2065                 fmt.Fprintln(s.executor.created.Stderr, "oops")
2066                 return 1
2067         })
2068
2069         final := api.CalledWith("container.state", "Complete")
2070         c.Assert(final, NotNil)
2071         c.Check(final["container"].(arvadosclient.Dict)["exit_code"], Equals, 1)
2072         c.Check(final["container"].(arvadosclient.Dict)["log"], NotNil)
2073
2074         c.Check(cr.ContainerArvClient.(*ArvTestClient).CalledWith("collection.manifest_text", "./a b1946ac92492d2347c6235b4d2611184+6 0:6:out.txt\n./b 38af5c54926b620264ab1501150cf189+5 0:5:err.txt\n"), NotNil)
2075 }
2076
2077 func (s *TestSuite) TestNumberRoundTrip(c *C) {
2078         s.api.callraw = true
2079         err := s.runner.fetchContainerRecord()
2080         c.Assert(err, IsNil)
2081         jsondata, err := json.Marshal(s.runner.Container.Mounts["/json"].Content)
2082         c.Logf("%#v", s.runner.Container)
2083         c.Check(err, IsNil)
2084         c.Check(string(jsondata), Equals, `{"number":123456789123456789}`)
2085 }
2086
2087 func (s *TestSuite) TestFullBrokenDocker(c *C) {
2088         nextState := ""
2089         for _, setup := range []func(){
2090                 func() {
2091                         c.Log("// waitErr = ocl runtime error")
2092                         s.executor.waitErr = errors.New(`Error response from daemon: oci runtime error: container_linux.go:247: starting container process caused "process_linux.go:359: container init caused \"rootfs_linux.go:54: mounting \\\"/tmp/keep453790790/by_id/99999999999999999999999999999999+99999/myGenome\\\" to rootfs \\\"/tmp/docker/overlay2/9999999999999999999999999999999999999999999999999999999999999999/merged\\\" at \\\"/tmp/docker/overlay2/9999999999999999999999999999999999999999999999999999999999999999/merged/keep/99999999999999999999999999999999+99999/myGenome\\\" caused \\\"no such file or directory\\\"\""`)
2093                         nextState = "Cancelled"
2094                 },
2095                 func() {
2096                         c.Log("// loadErr = cannot connect")
2097                         s.executor.loadErr = errors.New("Cannot connect to the Docker daemon at unix:///var/run/docker.sock. Is the docker daemon running?")
2098                         s.runner.brokenNodeHook = c.MkDir() + "/broken-node-hook"
2099                         err := ioutil.WriteFile(s.runner.brokenNodeHook, []byte("#!/bin/sh\nexec echo killme\n"), 0700)
2100                         c.Assert(err, IsNil)
2101                         nextState = "Queued"
2102                 },
2103         } {
2104                 s.SetUpTest(c)
2105                 setup()
2106                 s.fullRunHelper(c, `{
2107     "command": ["echo", "hello world"],
2108     "container_image": "`+arvadostest.DockerImage112PDH+`",
2109     "cwd": ".",
2110     "environment": {},
2111     "mounts": {"/tmp": {"kind": "tmp"} },
2112     "output_path": "/tmp",
2113     "priority": 1,
2114     "runtime_constraints": {},
2115     "state": "Locked"
2116 }`, nil, func() int { return 0 })
2117                 c.Check(s.api.CalledWith("container.state", nextState), NotNil)
2118                 logs := logFileContent(c, s.runner, "crunch-run.txt")
2119                 c.Check(logs, Matches, "(?ms).*unable to run containers.*")
2120                 if s.runner.brokenNodeHook != "" {
2121                         c.Check(logs, Matches, "(?ms).*Running broken node hook.*")
2122                         c.Check(logs, Matches, "(?ms).*killme.*")
2123                         c.Check(logs, Not(Matches), "(?ms).*Writing /var/lock/crunch-run-broken to mark node as broken.*")
2124                 } else {
2125                         c.Check(logs, Matches, "(?ms).*Writing /var/lock/crunch-run-broken to mark node as broken.*")
2126                 }
2127         }
2128 }
2129
2130 func (s *TestSuite) TestBadCommand(c *C) {
2131         for _, startError := range []string{
2132                 `panic: standard_init_linux.go:175: exec user process caused "no such file or directory"`,
2133                 `Error response from daemon: Cannot start container 41f26cbc43bcc1280f4323efb1830a394ba8660c9d1c2b564ba42bf7f7694845: [8] System error: no such file or directory`,
2134                 `Error response from daemon: Cannot start container 58099cd76c834f3dc2a4fb76c8028f049ae6d4fdf0ec373e1f2cfea030670c2d: [8] System error: exec: "foobar": executable file not found in $PATH`,
2135         } {
2136                 s.SetUpTest(c)
2137                 s.executor.startErr = errors.New(startError)
2138                 s.fullRunHelper(c, `{
2139     "command": ["echo", "hello world"],
2140     "container_image": "`+arvadostest.DockerImage112PDH+`",
2141     "cwd": ".",
2142     "environment": {},
2143     "mounts": {"/tmp": {"kind": "tmp"} },
2144     "output_path": "/tmp",
2145     "priority": 1,
2146     "runtime_constraints": {},
2147     "state": "Locked"
2148 }`, nil, func() int { return 0 })
2149                 c.Check(s.api.CalledWith("container.state", "Cancelled"), NotNil)
2150                 c.Check(logFileContent(c, s.runner, "crunch-run.txt"), Matches, "(?ms).*Possible causes:.*is missing.*")
2151         }
2152 }
2153
2154 func (s *TestSuite) TestSecretTextMountPoint(c *C) {
2155         helperRecord := `{
2156                 "command": ["true"],
2157                 "container_image": "` + arvadostest.DockerImage112PDH + `",
2158                 "cwd": "/bin",
2159                 "mounts": {
2160                     "/tmp": {"kind": "tmp"},
2161                     "/tmp/secret.conf": {"kind": "text", "content": "mypassword"}
2162                 },
2163                 "secret_mounts": {
2164                 },
2165                 "output_path": "/tmp",
2166                 "priority": 1,
2167                 "runtime_constraints": {},
2168                 "state": "Locked"
2169         }`
2170
2171         s.fullRunHelper(c, helperRecord, nil, func() int {
2172                 content, err := ioutil.ReadFile(s.runner.HostOutputDir + "/secret.conf")
2173                 c.Check(err, IsNil)
2174                 c.Check(string(content), Equals, "mypassword")
2175                 return 0
2176         })
2177
2178         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
2179         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
2180         c.Check(s.runner.ContainerArvClient.(*ArvTestClient).CalledWith("collection.manifest_text", ". 34819d7beeabb9260a5c854bc85b3e44+10 0:10:secret.conf\n"), NotNil)
2181         c.Check(s.runner.ContainerArvClient.(*ArvTestClient).CalledWith("collection.manifest_text", ""), IsNil)
2182
2183         // under secret mounts, not captured in output
2184         helperRecord = `{
2185                 "command": ["true"],
2186                 "container_image": "` + arvadostest.DockerImage112PDH + `",
2187                 "cwd": "/bin",
2188                 "mounts": {
2189                     "/tmp": {"kind": "tmp"}
2190                 },
2191                 "secret_mounts": {
2192                     "/tmp/secret.conf": {"kind": "text", "content": "mypassword"}
2193                 },
2194                 "output_path": "/tmp",
2195                 "priority": 1,
2196                 "runtime_constraints": {},
2197                 "state": "Locked"
2198         }`
2199
2200         s.SetUpTest(c)
2201         s.fullRunHelper(c, helperRecord, nil, func() int {
2202                 content, err := ioutil.ReadFile(s.runner.HostOutputDir + "/secret.conf")
2203                 c.Check(err, IsNil)
2204                 c.Check(string(content), Equals, "mypassword")
2205                 return 0
2206         })
2207
2208         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
2209         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
2210         c.Check(s.runner.ContainerArvClient.(*ArvTestClient).CalledWith("collection.manifest_text", ". 34819d7beeabb9260a5c854bc85b3e44+10 0:10:secret.conf\n"), IsNil)
2211         c.Check(s.runner.ContainerArvClient.(*ArvTestClient).CalledWith("collection.manifest_text", ""), NotNil)
2212
2213         // under secret mounts, output dir is a collection, not captured in output
2214         helperRecord = `{
2215                 "command": ["true"],
2216                 "container_image": "` + arvadostest.DockerImage112PDH + `",
2217                 "cwd": "/bin",
2218                 "mounts": {
2219                     "/tmp": {"kind": "collection", "writable": true}
2220                 },
2221                 "secret_mounts": {
2222                     "/tmp/secret.conf": {"kind": "text", "content": "mypassword"}
2223                 },
2224                 "output_path": "/tmp",
2225                 "priority": 1,
2226                 "runtime_constraints": {},
2227                 "state": "Locked"
2228         }`
2229
2230         s.SetUpTest(c)
2231         _, _, realtemp := s.fullRunHelper(c, helperRecord, nil, func() int {
2232                 // secret.conf should be provisioned as a separate
2233                 // bind mount, i.e., it should not appear in the
2234                 // (fake) fuse filesystem as viewed from the host.
2235                 content, err := ioutil.ReadFile(s.runner.HostOutputDir + "/secret.conf")
2236                 if !c.Check(errors.Is(err, os.ErrNotExist), Equals, true) {
2237                         c.Logf("secret.conf: content %q, err %#v", content, err)
2238                 }
2239                 err = ioutil.WriteFile(s.runner.HostOutputDir+"/.arvados#collection", []byte(`{"manifest_text":". acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:foo.txt\n"}`), 0700)
2240                 c.Check(err, IsNil)
2241                 return 0
2242         })
2243
2244         content, err := ioutil.ReadFile(realtemp + "/text1/mountdata.text")
2245         c.Check(err, IsNil)
2246         c.Check(string(content), Equals, "mypassword")
2247         c.Check(s.executor.created.BindMounts["/tmp/secret.conf"], DeepEquals, bindmount{realtemp + "/text1/mountdata.text", true})
2248         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
2249         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
2250         c.Check(s.runner.ContainerArvClient.(*ArvTestClient).CalledWith("collection.manifest_text", ". acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:foo.txt\n"), NotNil)
2251 }
2252
2253 func (s *TestSuite) TestCalculateCost(c *C) {
2254         defer func(s string) { lockdir = s }(lockdir)
2255         lockdir = c.MkDir()
2256         now := time.Now()
2257         cr := s.runner
2258         cr.costStartTime = now.Add(-time.Hour)
2259         var logbuf bytes.Buffer
2260         cr.CrunchLog = newLogWriter(&logbuf)
2261
2262         // if there's no InstanceType env var, cost is calculated as 0
2263         os.Unsetenv("InstanceType")
2264         cost := cr.calculateCost(now)
2265         c.Check(cost, Equals, 0.0)
2266
2267         // with InstanceType env var and loadPrices() hasn't run (or
2268         // hasn't found any data), cost is calculated based on
2269         // InstanceType env var
2270         os.Setenv("InstanceType", `{"Price":1.2}`)
2271         cost = cr.calculateCost(now)
2272         c.Check(cost, Equals, 1.2)
2273
2274         // first update tells us the spot price was $1/h until 30
2275         // minutes ago when it increased to $2/h
2276         j, err := json.Marshal([]cloud.InstancePrice{
2277                 {StartTime: now.Add(-4 * time.Hour), Price: 1.0},
2278                 {StartTime: now.Add(-time.Hour / 2), Price: 2.0},
2279         })
2280         c.Assert(err, IsNil)
2281         os.WriteFile(lockdir+"/"+pricesfile, j, 0777)
2282         cr.loadPrices()
2283         cost = cr.calculateCost(now)
2284         c.Check(cost, Equals, 1.5)
2285
2286         // next update (via --list + SIGUSR2) tells us the spot price
2287         // increased to $3/h 15 minutes ago
2288         j, err = json.Marshal([]cloud.InstancePrice{
2289                 {StartTime: now.Add(-time.Hour / 3), Price: 2.0}, // dup of -time.Hour/2 price
2290                 {StartTime: now.Add(-time.Hour / 4), Price: 3.0},
2291         })
2292         c.Assert(err, IsNil)
2293         os.WriteFile(lockdir+"/"+pricesfile, j, 0777)
2294         cr.loadPrices()
2295         cost = cr.calculateCost(now)
2296         c.Check(cost, Equals, 1.0/2+2.0/4+3.0/4)
2297
2298         cost = cr.calculateCost(now.Add(-time.Hour / 2))
2299         c.Check(cost, Equals, 0.5)
2300
2301         c.Logf("%s", logbuf.String())
2302         c.Check(logbuf.String(), Matches, `(?ms).*Instance price changed to 1\.00 at 20.* changed to 2\.00 .* changed to 3\.00 .*`)
2303         c.Check(logbuf.String(), Not(Matches), `(?ms).*changed to 2\.00 .* changed to 2\.00 .*`)
2304 }
2305
2306 func (s *TestSuite) TestSIGUSR2CostUpdate(c *C) {
2307         pid := os.Getpid()
2308         now := time.Now()
2309         pricesJSON, err := json.Marshal([]cloud.InstancePrice{
2310                 {StartTime: now.Add(-4 * time.Hour), Price: 2.4},
2311                 {StartTime: now.Add(-2 * time.Hour), Price: 2.6},
2312         })
2313         c.Assert(err, IsNil)
2314
2315         os.Setenv("InstanceType", `{"Price":2.2}`)
2316         defer func(s string) { lockdir = s }(lockdir)
2317         lockdir = c.MkDir()
2318
2319         // We can't use s.api.CalledWith because timing differences will yield
2320         // different cost values across runs. getCostUpdate iterates over API
2321         // calls until it finds one that sets the cost, then writes that value
2322         // to the next index of costUpdates.
2323         deadline := now.Add(time.Second)
2324         costUpdates := make([]float64, 2)
2325         costIndex := 0
2326         apiIndex := 0
2327         getCostUpdate := func() {
2328                 for ; time.Now().Before(deadline); time.Sleep(time.Second / 10) {
2329                         for apiIndex < len(s.api.Content) {
2330                                 update := s.api.Content[apiIndex]
2331                                 apiIndex++
2332                                 var ok bool
2333                                 var cost float64
2334                                 if update, ok = update["container"].(arvadosclient.Dict); !ok {
2335                                         continue
2336                                 }
2337                                 if cost, ok = update["cost"].(float64); !ok {
2338                                         continue
2339                                 }
2340                                 c.Logf("API call #%d updates cost to %v", apiIndex-1, cost)
2341                                 costUpdates[costIndex] = cost
2342                                 costIndex++
2343                                 return
2344                         }
2345                 }
2346         }
2347
2348         s.fullRunHelper(c, `{
2349                 "command": ["true"],
2350                 "container_image": "`+arvadostest.DockerImage112PDH+`",
2351                 "cwd": ".",
2352                 "environment": {},
2353                 "mounts": {"/tmp": {"kind": "tmp"} },
2354                 "output_path": "/tmp",
2355                 "priority": 1,
2356                 "runtime_constraints": {},
2357                 "state": "Locked",
2358                 "uuid": "zzzzz-dz642-20230320101530a"
2359         }`, nil, func() int {
2360                 s.runner.costStartTime = now.Add(-3 * time.Hour)
2361                 err := syscall.Kill(pid, syscall.SIGUSR2)
2362                 c.Check(err, IsNil, Commentf("error sending first SIGUSR2 to runner"))
2363                 getCostUpdate()
2364
2365                 err = os.WriteFile(path.Join(lockdir, pricesfile), pricesJSON, 0o700)
2366                 c.Check(err, IsNil, Commentf("error writing JSON prices file"))
2367                 err = syscall.Kill(pid, syscall.SIGUSR2)
2368                 c.Check(err, IsNil, Commentf("error sending second SIGUSR2 to runner"))
2369                 getCostUpdate()
2370
2371                 return 0
2372         })
2373         // Comparing with format strings makes it easy to ignore minor variations
2374         // in cost across runs while keeping diagnostics pretty.
2375         c.Check(fmt.Sprintf("%.3f", costUpdates[0]), Equals, "6.600")
2376         c.Check(fmt.Sprintf("%.3f", costUpdates[1]), Equals, "7.600")
2377 }
2378
2379 type FakeProcess struct {
2380         cmdLine []string
2381 }
2382
2383 func (fp FakeProcess) CmdlineSlice() ([]string, error) {
2384         return fp.cmdLine, nil
2385 }
2386
2387 func logFileContent(c *C, cr *ContainerRunner, fnm string) string {
2388         buf, err := fs.ReadFile(arvados.FS(cr.LogCollection), fnm)
2389         c.Assert(err, IsNil)
2390         return string(buf)
2391 }
2392
2393 func dumpAllLogFiles(c *C, cr *ContainerRunner) {
2394         d, err := cr.LogCollection.OpenFile("/", os.O_RDONLY, 0)
2395         c.Assert(err, IsNil)
2396         fis, err := d.Readdir(-1)
2397         c.Assert(err, IsNil)
2398         for _, fi := range fis {
2399                 c.Logf("=== %s", fi.Name())
2400                 c.Log(logFileContent(c, cr, fi.Name()))
2401         }
2402 }