21891: Move string concatenations out of per-segment loop.
[arvados.git] / lib / crunchrun / crunchrun_test.go
1 // Copyright (C) The Arvados Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4
5 package crunchrun
6
7 import (
8         "bytes"
9         "context"
10         "crypto/md5"
11         "encoding/json"
12         "errors"
13         "fmt"
14         "io"
15         "io/fs"
16         "io/ioutil"
17         "math/rand"
18         "net/http"
19         "net/http/httptest"
20         "net/http/httputil"
21         "net/url"
22         "os"
23         "os/exec"
24         "path"
25         "regexp"
26         "runtime/pprof"
27         "strings"
28         "sync"
29         "sync/atomic"
30         "syscall"
31         "testing"
32         "time"
33
34         "git.arvados.org/arvados.git/lib/cloud"
35         "git.arvados.org/arvados.git/lib/cmd"
36         "git.arvados.org/arvados.git/sdk/go/arvados"
37         "git.arvados.org/arvados.git/sdk/go/arvadosclient"
38         "git.arvados.org/arvados.git/sdk/go/arvadostest"
39         "git.arvados.org/arvados.git/sdk/go/manifest"
40
41         . "gopkg.in/check.v1"
42 )
43
44 // Gocheck boilerplate
45 func TestCrunchExec(t *testing.T) {
46         TestingT(t)
47 }
48
49 const logLineStart = `(?m)(.*\n)*\d{4}-\d\d-\d\dT\d\d:\d\d:\d\d\.\d+Z `
50
51 var _ = Suite(&TestSuite{})
52
53 type TestSuite struct {
54         client                   *arvados.Client
55         api                      *ArvTestClient
56         runner                   *ContainerRunner
57         executor                 *stubExecutor
58         keepmount                string
59         keepmountTmp             []string
60         testDispatcherKeepClient KeepTestClient
61         testContainerKeepClient  KeepTestClient
62         debian12MemoryCurrent    int64
63         debian12SwapCurrent      int64
64 }
65
66 func (s *TestSuite) SetUpSuite(c *C) {
67         buf, err := os.ReadFile("../crunchstat/testdata/debian12/sys/fs/cgroup/user.slice/user-1000.slice/session-4.scope/memory.current")
68         c.Assert(err, IsNil)
69         _, err = fmt.Sscanf(string(buf), "%d", &s.debian12MemoryCurrent)
70         c.Assert(err, IsNil)
71
72         buf, err = os.ReadFile("../crunchstat/testdata/debian12/sys/fs/cgroup/user.slice/user-1000.slice/session-4.scope/memory.swap.current")
73         c.Assert(err, IsNil)
74         _, err = fmt.Sscanf(string(buf), "%d", &s.debian12SwapCurrent)
75         c.Assert(err, IsNil)
76 }
77
78 func (s *TestSuite) SetUpTest(c *C) {
79         s.client = arvados.NewClientFromEnv()
80         s.executor = &stubExecutor{}
81         var err error
82         s.api = &ArvTestClient{}
83         s.runner, err = NewContainerRunner(s.client, s.api, &s.testDispatcherKeepClient, "zzzzz-zzzzz-zzzzzzzzzzzzzzz")
84         c.Assert(err, IsNil)
85         s.runner.executor = s.executor
86         s.runner.MkArvClient = func(token string) (IArvadosClient, IKeepClient, *arvados.Client, error) {
87                 return s.api, &s.testContainerKeepClient, s.client, nil
88         }
89         s.runner.RunArvMount = func(cmd []string, tok string) (*exec.Cmd, error) {
90                 s.runner.ArvMountPoint = s.keepmount
91                 for i, opt := range cmd {
92                         if opt == "--mount-tmp" {
93                                 err := os.Mkdir(s.keepmount+"/"+cmd[i+1], 0700)
94                                 if err != nil {
95                                         return nil, err
96                                 }
97                                 s.keepmountTmp = append(s.keepmountTmp, cmd[i+1])
98                         }
99                 }
100                 return nil, nil
101         }
102         s.keepmount = c.MkDir()
103         err = os.Mkdir(s.keepmount+"/by_id", 0755)
104         s.keepmountTmp = nil
105         c.Assert(err, IsNil)
106         err = os.Mkdir(s.keepmount+"/by_id/"+arvadostest.DockerImage112PDH, 0755)
107         c.Assert(err, IsNil)
108         err = ioutil.WriteFile(s.keepmount+"/by_id/"+arvadostest.DockerImage112PDH+"/"+arvadostest.DockerImage112Filename, []byte("#notarealtarball"), 0644)
109         err = os.Mkdir(s.keepmount+"/by_id/"+fakeInputCollectionPDH, 0755)
110         c.Assert(err, IsNil)
111         err = ioutil.WriteFile(s.keepmount+"/by_id/"+fakeInputCollectionPDH+"/input.json", []byte(`{"input":true}`), 0644)
112         c.Assert(err, IsNil)
113         s.runner.ArvMountPoint = s.keepmount
114         os.Setenv("InstanceType", `{"ProviderType":"a1.2xlarge","Price":1.2}`)
115 }
116
117 type ArvTestClient struct {
118         Total   int64
119         Calls   int
120         Content []arvadosclient.Dict
121         arvados.Container
122         secretMounts []byte
123         sync.Mutex
124         WasSetRunning bool
125         callraw       bool
126 }
127
128 type KeepTestClient struct {
129         Called         bool
130         StorageClasses []string
131         blocks         sync.Map
132 }
133
134 type stubExecutor struct {
135         imageLoaded bool
136         loaded      string
137         loadErr     error
138         exitCode    int
139         createErr   error
140         created     containerSpec
141         startErr    error
142         waitSleep   time.Duration
143         waitErr     error
144         stopErr     error
145         stopped     bool
146         closed      bool
147         runFunc     func() int
148         exit        chan int
149 }
150
151 func (e *stubExecutor) LoadImage(imageId string, tarball string, container arvados.Container, keepMount string,
152         containerClient *arvados.Client) error {
153         e.loaded = tarball
154         return e.loadErr
155 }
156 func (e *stubExecutor) Runtime() string                 { return "stub" }
157 func (e *stubExecutor) Version() string                 { return "stub " + cmd.Version.String() }
158 func (e *stubExecutor) Create(spec containerSpec) error { e.created = spec; return e.createErr }
159 func (e *stubExecutor) Start() error {
160         e.exit = make(chan int, 1)
161         go func() { e.exit <- e.runFunc() }()
162         return e.startErr
163 }
164 func (e *stubExecutor) Pid() int    { return 1115883 } // matches pid in ../crunchstat/testdata/debian12/proc/
165 func (e *stubExecutor) Stop() error { e.stopped = true; go func() { e.exit <- -1 }(); return e.stopErr }
166 func (e *stubExecutor) Close()      { e.closed = true }
167 func (e *stubExecutor) Wait(context.Context) (int, error) {
168         return <-e.exit, e.waitErr
169 }
170 func (e *stubExecutor) InjectCommand(ctx context.Context, _, _ string, _ bool, _ []string) (*exec.Cmd, error) {
171         return nil, errors.New("unimplemented")
172 }
173 func (e *stubExecutor) IPAddress() (string, error) { return "", errors.New("unimplemented") }
174
175 const fakeInputCollectionPDH = "ffffffffaaaaaaaa88888888eeeeeeee+1234"
176
177 var hwManifest = ". 82ab40c24fc8df01798e57ba66795bb1+841216+Aa124ac75e5168396c73c0a18eda641a4f41791c0@569fa8c3 0:841216:9c31ee32b3d15268a0754e8edc74d4f815ee014b693bc5109058e431dd5caea7.tar\n"
178 var hwPDH = "a45557269dcb65a6b78f9ac061c0850b+120"
179 var hwImageID = "9c31ee32b3d15268a0754e8edc74d4f815ee014b693bc5109058e431dd5caea7"
180
181 var otherManifest = ". 68a84f561b1d1708c6baff5e019a9ab3+46+Ae5d0af96944a3690becb1decdf60cc1c937f556d@5693216f 0:46:md5sum.txt\n"
182 var otherPDH = "a3e8f74c6f101eae01fa08bfb4e49b3a+54"
183
184 var normalizedManifestWithSubdirs = `. 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396c73c0abcdefgh11234567890@569fa8c3 0:9:file1_in_main.txt 9:18:file2_in_main.txt 0:27:zzzzz-8i9sb-bcdefghijkdhvnk.log.txt
185 ./subdir1 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396cabcdefghij6419876543234@569fa8c4 0:9:file1_in_subdir1.txt 9:18:file2_in_subdir1.txt
186 ./subdir1/subdir2 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396c73c0bcdefghijk544332211@569fa8c5 0:9:file1_in_subdir2.txt 9:18:file2_in_subdir2.txt
187 `
188
189 var normalizedWithSubdirsPDH = "a0def87f80dd594d4675809e83bd4f15+367"
190
191 var denormalizedManifestWithSubdirs = ". 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396c73c0abcdefgh11234567890@569fa8c3 0:9:file1_in_main.txt 9:18:file2_in_main.txt 0:27:zzzzz-8i9sb-bcdefghijkdhvnk.log.txt 0:10:subdir1/file1_in_subdir1.txt 10:17:subdir1/file2_in_subdir1.txt\n"
192 var denormalizedWithSubdirsPDH = "b0def87f80dd594d4675809e83bd4f15+367"
193
194 var fakeAuthUUID = "zzzzz-gj3su-55pqoyepgi2glem"
195 var fakeAuthToken = "a3ltuwzqcu2u4sc0q7yhpc2w7s00fdcqecg5d6e0u3pfohmbjt"
196
197 func (client *ArvTestClient) Create(resourceType string,
198         parameters arvadosclient.Dict,
199         output interface{}) error {
200
201         client.Mutex.Lock()
202         defer client.Mutex.Unlock()
203
204         client.Calls++
205         client.Content = append(client.Content, parameters)
206
207         if resourceType == "logs" {
208                 panic("logs.create called")
209         }
210
211         if resourceType == "collections" && output != nil {
212                 mt := parameters["collection"].(arvadosclient.Dict)["manifest_text"].(string)
213                 md5sum := md5.Sum([]byte(mt))
214                 outmap := output.(*arvados.Collection)
215                 outmap.PortableDataHash = fmt.Sprintf("%x+%d", md5sum, len(mt))
216                 outmap.UUID = fmt.Sprintf("zzzzz-4zz18-%015x", md5sum[:7])
217         }
218
219         return nil
220 }
221
222 func (client *ArvTestClient) Call(method, resourceType, uuid, action string, parameters arvadosclient.Dict, output interface{}) error {
223         switch {
224         case method == "GET" && resourceType == "containers" && action == "auth":
225                 return json.Unmarshal([]byte(`{
226                         "kind": "arvados#api_client_authorization",
227                         "uuid": "`+fakeAuthUUID+`",
228                         "api_token": "`+fakeAuthToken+`"
229                         }`), output)
230         case method == "GET" && resourceType == "containers" && action == "secret_mounts":
231                 if client.secretMounts != nil {
232                         return json.Unmarshal(client.secretMounts, output)
233                 }
234                 return json.Unmarshal([]byte(`{"secret_mounts":{}}`), output)
235         default:
236                 return fmt.Errorf("Not found")
237         }
238 }
239
240 func (client *ArvTestClient) CallRaw(method, resourceType, uuid, action string,
241         parameters arvadosclient.Dict) (reader io.ReadCloser, err error) {
242         var j []byte
243         if method == "GET" && resourceType == "nodes" && uuid == "" && action == "" {
244                 j = []byte(`{
245                         "kind": "arvados#nodeList",
246                         "items": [{
247                                 "uuid": "zzzzz-7ekkf-2z3mc76g2q73aio",
248                                 "hostname": "compute2",
249                                 "properties": {"total_cpu_cores": 16}
250                         }]}`)
251         } else if method == "GET" && resourceType == "containers" && action == "" && !client.callraw {
252                 if uuid == "" {
253                         j, err = json.Marshal(map[string]interface{}{
254                                 "items": []interface{}{client.Container},
255                                 "kind":  "arvados#nodeList",
256                         })
257                 } else {
258                         j, err = json.Marshal(client.Container)
259                 }
260         } else {
261                 j = []byte(`{
262                         "command": ["sleep", "1"],
263                         "container_image": "` + arvadostest.DockerImage112PDH + `",
264                         "cwd": ".",
265                         "environment": {},
266                         "mounts": {"/tmp": {"kind": "tmp"}, "/json": {"kind": "json", "content": {"number": 123456789123456789}}},
267                         "output_path": "/tmp",
268                         "priority": 1,
269                         "runtime_constraints": {}
270                 }`)
271         }
272         return ioutil.NopCloser(bytes.NewReader(j)), err
273 }
274
275 func (client *ArvTestClient) Get(resourceType string, uuid string, parameters arvadosclient.Dict, output interface{}) error {
276         if resourceType == "collections" {
277                 if uuid == hwPDH {
278                         output.(*arvados.Collection).ManifestText = hwManifest
279                 } else if uuid == otherPDH {
280                         output.(*arvados.Collection).ManifestText = otherManifest
281                 } else if uuid == normalizedWithSubdirsPDH {
282                         output.(*arvados.Collection).ManifestText = normalizedManifestWithSubdirs
283                 } else if uuid == denormalizedWithSubdirsPDH {
284                         output.(*arvados.Collection).ManifestText = denormalizedManifestWithSubdirs
285                 }
286         }
287         if resourceType == "containers" {
288                 (*output.(*arvados.Container)) = client.Container
289         }
290         return nil
291 }
292
293 func (client *ArvTestClient) Update(resourceType string, uuid string, parameters arvadosclient.Dict, output interface{}) (err error) {
294         client.Mutex.Lock()
295         defer client.Mutex.Unlock()
296         client.Calls++
297         client.Content = append(client.Content, parameters)
298         if resourceType == "containers" {
299                 if parameters["container"].(arvadosclient.Dict)["state"] == "Running" {
300                         client.WasSetRunning = true
301                 }
302         } else if resourceType == "collections" && output != nil {
303                 mt := parameters["collection"].(arvadosclient.Dict)["manifest_text"].(string)
304                 output.(*arvados.Collection).UUID = uuid
305                 output.(*arvados.Collection).PortableDataHash = arvados.PortableDataHash(mt)
306         }
307         return nil
308 }
309
310 var discoveryMap = map[string]interface{}{
311         "crunchLogUpdateSize":   float64(crunchLogUpdateSize),
312         "crunchLogUpdatePeriod": float64(crunchLogUpdatePeriod.Seconds()),
313 }
314
315 func (client *ArvTestClient) Discovery(key string) (interface{}, error) {
316         return discoveryMap[key], nil
317 }
318
319 // CalledWith returns the parameters from the first API call whose
320 // parameters match jpath/string. E.g., CalledWith(c, "foo.bar",
321 // "baz") returns parameters with parameters["foo"]["bar"]=="baz". If
322 // no call matches, it returns nil.
323 func (client *ArvTestClient) CalledWith(jpath string, expect interface{}) arvadosclient.Dict {
324 call:
325         for _, content := range client.Content {
326                 var v interface{} = content
327                 for _, k := range strings.Split(jpath, ".") {
328                         if dict, ok := v.(arvadosclient.Dict); !ok {
329                                 continue call
330                         } else {
331                                 v = dict[k]
332                         }
333                 }
334                 if v == expect {
335                         return content
336                 }
337         }
338         return nil
339 }
340
341 func (client *KeepTestClient) LocalLocator(locator string) (string, error) {
342         return locator, nil
343 }
344
345 func (client *KeepTestClient) BlockWrite(_ context.Context, opts arvados.BlockWriteOptions) (arvados.BlockWriteResponse, error) {
346         locator := fmt.Sprintf("%x+%d", md5.Sum(opts.Data), len(opts.Data))
347         client.blocks.Store(locator, append([]byte(nil), opts.Data...))
348         return arvados.BlockWriteResponse{
349                 Locator: locator,
350         }, nil
351 }
352
353 func (client *KeepTestClient) ReadAt(locator string, dst []byte, offset int) (int, error) {
354         loaded, ok := client.blocks.Load(locator)
355         if !ok {
356                 return 0, os.ErrNotExist
357         }
358         data := loaded.([]byte)
359         if offset >= len(data) {
360                 return 0, io.EOF
361         }
362         return copy(dst, data[offset:]), nil
363 }
364
365 func (client *KeepTestClient) Close() {
366         client.blocks.Range(func(locator, value interface{}) bool {
367                 client.blocks.Delete(locator)
368                 return true
369         })
370 }
371
372 func (client *KeepTestClient) SetStorageClasses(sc []string) {
373         client.StorageClasses = sc
374 }
375
376 type FileWrapper struct {
377         io.ReadCloser
378         len int64
379 }
380
381 func (fw FileWrapper) Readdir(n int) ([]os.FileInfo, error) {
382         return nil, errors.New("not implemented")
383 }
384
385 func (fw FileWrapper) Seek(int64, int) (int64, error) {
386         return 0, errors.New("not implemented")
387 }
388
389 func (fw FileWrapper) Size() int64 {
390         return fw.len
391 }
392
393 func (fw FileWrapper) Stat() (os.FileInfo, error) {
394         return nil, errors.New("not implemented")
395 }
396
397 func (fw FileWrapper) Truncate(int64) error {
398         return errors.New("not implemented")
399 }
400
401 func (fw FileWrapper) Write([]byte) (int, error) {
402         return 0, errors.New("not implemented")
403 }
404
405 func (fw FileWrapper) Sync() error {
406         return errors.New("not implemented")
407 }
408
409 func (fw FileWrapper) Snapshot() (*arvados.Subtree, error) {
410         return nil, errors.New("not implemented")
411 }
412
413 func (fw FileWrapper) Splice(*arvados.Subtree) error {
414         return errors.New("not implemented")
415 }
416
417 func (client *KeepTestClient) ManifestFileReader(m manifest.Manifest, filename string) (arvados.File, error) {
418         if filename == hwImageID+".tar" {
419                 rdr := ioutil.NopCloser(&bytes.Buffer{})
420                 client.Called = true
421                 return FileWrapper{rdr, 1321984}, nil
422         } else if filename == "/file1_in_main.txt" {
423                 rdr := ioutil.NopCloser(strings.NewReader("foo"))
424                 client.Called = true
425                 return FileWrapper{rdr, 3}, nil
426         }
427         return nil, nil
428 }
429
430 type apiStubServer struct {
431         server    *httptest.Server
432         proxy     *httputil.ReverseProxy
433         intercept func(http.ResponseWriter, *http.Request) bool
434
435         container arvados.Container
436         logs      map[string]string
437 }
438
439 func apiStub() (*arvados.Client, *apiStubServer) {
440         client := arvados.NewClientFromEnv()
441         apistub := &apiStubServer{}
442         apistub.server = httptest.NewTLSServer(apistub)
443         apistub.proxy = httputil.NewSingleHostReverseProxy(&url.URL{Scheme: "https", Host: client.APIHost})
444         if client.Insecure {
445                 apistub.proxy.Transport = arvados.InsecureHTTPClient.Transport
446         }
447         client.APIHost = apistub.server.Listener.Addr().String()
448         return client, apistub
449 }
450
451 func (apistub *apiStubServer) ServeHTTP(w http.ResponseWriter, r *http.Request) {
452         if apistub.intercept != nil && apistub.intercept(w, r) {
453                 return
454         }
455         if r.Method == "POST" && r.URL.Path == "/arvados/v1/logs" {
456                 var body struct {
457                         Log struct {
458                                 EventType  string `json:"event_type"`
459                                 Properties struct {
460                                         Text string
461                                 }
462                         }
463                 }
464                 json.NewDecoder(r.Body).Decode(&body)
465                 apistub.logs[body.Log.EventType] += body.Log.Properties.Text
466                 return
467         }
468         if r.Method == "GET" && r.URL.Path == "/arvados/v1/collections/"+hwPDH {
469                 json.NewEncoder(w).Encode(arvados.Collection{ManifestText: hwManifest})
470                 return
471         }
472         if r.Method == "GET" && r.URL.Path == "/arvados/v1/collections/"+otherPDH {
473                 json.NewEncoder(w).Encode(arvados.Collection{ManifestText: otherManifest})
474                 return
475         }
476         if r.Method == "GET" && r.URL.Path == "/arvados/v1/collections/"+normalizedWithSubdirsPDH {
477                 json.NewEncoder(w).Encode(arvados.Collection{ManifestText: normalizedManifestWithSubdirs})
478                 return
479         }
480         if r.Method == "GET" && r.URL.Path == "/arvados/v1/collections/"+denormalizedWithSubdirsPDH {
481                 json.NewEncoder(w).Encode(arvados.Collection{ManifestText: denormalizedManifestWithSubdirs})
482                 return
483         }
484         if r.Method == "GET" && r.URL.Path == "/arvados/v1/containers/"+apistub.container.UUID {
485                 json.NewEncoder(w).Encode(apistub.container)
486                 return
487         }
488         apistub.proxy.ServeHTTP(w, r)
489 }
490
491 func (s *TestSuite) TestLoadImage(c *C) {
492         s.runner.Container.ContainerImage = arvadostest.DockerImage112PDH
493         s.runner.Container.Mounts = map[string]arvados.Mount{
494                 "/out": {Kind: "tmp", Writable: true},
495         }
496         s.runner.Container.OutputPath = "/out"
497
498         _, err := s.runner.SetupMounts()
499         c.Assert(err, IsNil)
500
501         imageID, err := s.runner.LoadImage()
502         c.Check(err, IsNil)
503         c.Check(s.executor.loaded, Matches, ".*"+regexp.QuoteMeta(arvadostest.DockerImage112Filename))
504         c.Check(imageID, Equals, strings.TrimSuffix(arvadostest.DockerImage112Filename, ".tar"))
505
506         s.runner.Container.ContainerImage = arvadostest.DockerImage112PDH
507         s.executor.imageLoaded = false
508         s.executor.loaded = ""
509         s.executor.loadErr = errors.New("bork")
510         imageID, err = s.runner.LoadImage()
511         c.Check(err, ErrorMatches, ".*bork")
512         c.Check(s.executor.loaded, Matches, ".*"+regexp.QuoteMeta(arvadostest.DockerImage112Filename))
513
514         s.runner.Container.ContainerImage = fakeInputCollectionPDH
515         s.executor.imageLoaded = false
516         s.executor.loaded = ""
517         s.executor.loadErr = nil
518         imageID, err = s.runner.LoadImage()
519         c.Check(err, ErrorMatches, "image collection does not include a \\.tar image file")
520         c.Check(s.executor.loaded, Equals, "")
521 }
522
523 type ArvErrorTestClient struct{}
524
525 func (ArvErrorTestClient) Create(resourceType string,
526         parameters arvadosclient.Dict,
527         output interface{}) error {
528         return nil
529 }
530
531 func (ArvErrorTestClient) Call(method, resourceType, uuid, action string, parameters arvadosclient.Dict, output interface{}) error {
532         if method == "GET" && resourceType == "containers" && action == "auth" {
533                 return nil
534         }
535         return errors.New("ArvError")
536 }
537
538 func (ArvErrorTestClient) CallRaw(method, resourceType, uuid, action string,
539         parameters arvadosclient.Dict) (reader io.ReadCloser, err error) {
540         return nil, errors.New("ArvError")
541 }
542
543 func (ArvErrorTestClient) Get(resourceType string, uuid string, parameters arvadosclient.Dict, output interface{}) error {
544         return errors.New("ArvError")
545 }
546
547 func (ArvErrorTestClient) Update(resourceType string, uuid string, parameters arvadosclient.Dict, output interface{}) (err error) {
548         return nil
549 }
550
551 func (ArvErrorTestClient) Discovery(key string) (interface{}, error) {
552         return discoveryMap[key], nil
553 }
554
555 type KeepErrorTestClient struct {
556         KeepTestClient
557 }
558
559 func (*KeepErrorTestClient) ManifestFileReader(manifest.Manifest, string) (arvados.File, error) {
560         return nil, errors.New("KeepError")
561 }
562
563 func (*KeepErrorTestClient) BlockWrite(context.Context, arvados.BlockWriteOptions) (arvados.BlockWriteResponse, error) {
564         return arvados.BlockWriteResponse{}, errors.New("KeepError")
565 }
566
567 func (*KeepErrorTestClient) LocalLocator(string) (string, error) {
568         return "", errors.New("KeepError")
569 }
570
571 type KeepReadErrorTestClient struct {
572         KeepTestClient
573 }
574
575 func (*KeepReadErrorTestClient) ReadAt(string, []byte, int) (int, error) {
576         return 0, errors.New("KeepError")
577 }
578
579 type ErrorReader struct {
580         FileWrapper
581 }
582
583 func (ErrorReader) Read(p []byte) (n int, err error) {
584         return 0, errors.New("ErrorReader")
585 }
586
587 func (ErrorReader) Seek(int64, int) (int64, error) {
588         return 0, errors.New("ErrorReader")
589 }
590
591 func (*KeepReadErrorTestClient) ManifestFileReader(m manifest.Manifest, filename string) (arvados.File, error) {
592         return ErrorReader{}, nil
593 }
594
595 func dockerLog(fd byte, msg string) []byte {
596         by := []byte(msg)
597         header := make([]byte, 8+len(by))
598         header[0] = fd
599         header[7] = byte(len(by))
600         copy(header[8:], by)
601         return header
602 }
603
604 func (s *TestSuite) TestRunContainer(c *C) {
605         s.executor.runFunc = func() int {
606                 fmt.Fprintf(s.executor.created.Stdout, "Hello world\n")
607                 return 0
608         }
609
610         s.runner.Container.ContainerImage = arvadostest.DockerImage112PDH
611         s.runner.Container.Command = []string{"./hw"}
612         s.runner.Container.OutputStorageClasses = []string{"default"}
613
614         imageID, err := s.runner.LoadImage()
615         c.Assert(err, IsNil)
616
617         err = s.runner.CreateContainer(imageID, nil)
618         c.Assert(err, IsNil)
619
620         err = s.runner.StartContainer()
621         c.Assert(err, IsNil)
622
623         err = s.runner.WaitFinish()
624         c.Assert(err, IsNil)
625
626         c.Check(logFileContent(c, s.runner, "stdout.txt"), Matches, `2\S+Z Hello world\n`)
627         c.Check(logFileContent(c, s.runner, "stderr.txt"), Matches, ``)
628 }
629
630 func (s *TestSuite) TestCommitLogs(c *C) {
631         api := &ArvTestClient{}
632         kc := &KeepTestClient{}
633         defer kc.Close()
634         cr, err := NewContainerRunner(s.client, api, kc, "zzzzz-zzzzz-zzzzzzzzzzzzzzz")
635         c.Assert(err, IsNil)
636         f, err := cr.openLogFile("crunch-run")
637         c.Assert(err, IsNil)
638         cr.CrunchLog = newLogWriter(newTestTimestamper(f))
639
640         cr.CrunchLog.Print("Hello world!")
641         cr.CrunchLog.Print("Goodbye")
642         cr.finalState = "Complete"
643
644         err = cr.CommitLogs()
645         c.Check(err, IsNil)
646
647         c.Check(api.Calls, Equals, 1)
648         c.Check(api.Content[0]["ensure_unique_name"], Equals, true)
649         c.Check(api.Content[0]["collection"].(arvadosclient.Dict)["name"], Equals, "logs for zzzzz-zzzzz-zzzzzzzzzzzzzzz")
650         c.Check(api.Content[0]["collection"].(arvadosclient.Dict)["manifest_text"], Equals, ". 744b2e4553123b02fa7b452ec5c18993+123 0:123:crunch-run.txt\n")
651         c.Check(*cr.LogsPDH, Equals, "63da7bdacf08c40f604daad80c261e9a+60")
652 }
653
654 func (s *TestSuite) TestUpdateContainerRunning(c *C) {
655         api := &ArvTestClient{}
656         kc := &KeepTestClient{}
657         defer kc.Close()
658         cr, err := NewContainerRunner(s.client, api, kc, "zzzzz-zzzzz-zzzzzzzzzzzzzzz")
659         c.Assert(err, IsNil)
660
661         err = cr.UpdateContainerRunning("")
662         c.Check(err, IsNil)
663
664         c.Check(api.Content[0]["container"].(arvadosclient.Dict)["state"], Equals, "Running")
665 }
666
667 func (s *TestSuite) TestUpdateContainerComplete(c *C) {
668         api := &ArvTestClient{}
669         kc := &KeepTestClient{}
670         defer kc.Close()
671         cr, err := NewContainerRunner(s.client, api, kc, "zzzzz-zzzzz-zzzzzzzzzzzzzzz")
672         c.Assert(err, IsNil)
673
674         cr.LogsPDH = new(string)
675         *cr.LogsPDH = "d3a229d2fe3690c2c3e75a71a153c6a3+60"
676
677         cr.ExitCode = new(int)
678         *cr.ExitCode = 42
679         cr.finalState = "Complete"
680
681         err = cr.UpdateContainerFinal()
682         c.Check(err, IsNil)
683
684         c.Check(api.Content[0]["container"].(arvadosclient.Dict)["log"], Equals, *cr.LogsPDH)
685         c.Check(api.Content[0]["container"].(arvadosclient.Dict)["exit_code"], Equals, *cr.ExitCode)
686         c.Check(api.Content[0]["container"].(arvadosclient.Dict)["state"], Equals, "Complete")
687 }
688
689 func (s *TestSuite) TestUpdateContainerCancelled(c *C) {
690         api := &ArvTestClient{}
691         kc := &KeepTestClient{}
692         defer kc.Close()
693         cr, err := NewContainerRunner(s.client, api, kc, "zzzzz-zzzzz-zzzzzzzzzzzzzzz")
694         c.Assert(err, IsNil)
695         cr.cCancelled = true
696         cr.finalState = "Cancelled"
697
698         err = cr.UpdateContainerFinal()
699         c.Check(err, IsNil)
700
701         c.Check(api.Content[0]["container"].(arvadosclient.Dict)["log"], IsNil)
702         c.Check(api.Content[0]["container"].(arvadosclient.Dict)["exit_code"], IsNil)
703         c.Check(api.Content[0]["container"].(arvadosclient.Dict)["state"], Equals, "Cancelled")
704 }
705
706 // Used by the TestFullRun*() test below to DRY up boilerplate setup to do full
707 // dress rehearsal of the Run() function, starting from a JSON container record.
708 func (s *TestSuite) fullRunHelper(c *C, record string, extraMounts []string, fn func() int) (*ArvTestClient, *ContainerRunner, string) {
709         err := json.Unmarshal([]byte(record), &s.api.Container)
710         c.Assert(err, IsNil)
711         initialState := s.api.Container.State
712
713         var sm struct {
714                 SecretMounts map[string]arvados.Mount `json:"secret_mounts"`
715         }
716         err = json.Unmarshal([]byte(record), &sm)
717         c.Check(err, IsNil)
718         secretMounts, err := json.Marshal(sm)
719         c.Assert(err, IsNil)
720         c.Logf("SecretMounts decoded %v json %q", sm, secretMounts)
721
722         s.executor.runFunc = fn
723
724         s.runner.statInterval = 100 * time.Millisecond
725         s.runner.containerWatchdogInterval = time.Second
726
727         realTemp := c.MkDir()
728         tempcount := 0
729         s.runner.MkTempDir = func(_, prefix string) (string, error) {
730                 tempcount++
731                 d := fmt.Sprintf("%s/%s%d", realTemp, prefix, tempcount)
732                 err := os.Mkdir(d, os.ModePerm)
733                 if err != nil && strings.Contains(err.Error(), ": file exists") {
734                         // Test case must have pre-populated the tempdir
735                         err = nil
736                 }
737                 return d, err
738         }
739         client, _ := apiStub()
740         s.runner.MkArvClient = func(token string) (IArvadosClient, IKeepClient, *arvados.Client, error) {
741                 return &ArvTestClient{secretMounts: secretMounts}, &s.testContainerKeepClient, client, nil
742         }
743
744         if extraMounts != nil && len(extraMounts) > 0 {
745                 err := s.runner.SetupArvMountPoint("keep")
746                 c.Check(err, IsNil)
747
748                 for _, m := range extraMounts {
749                         os.MkdirAll(s.runner.ArvMountPoint+"/by_id/"+m, os.ModePerm)
750                 }
751         }
752
753         err = s.runner.Run()
754         if s.api.CalledWith("container.state", "Complete") != nil {
755                 c.Check(err, IsNil)
756         }
757         if s.executor.loadErr == nil && s.executor.createErr == nil && initialState != "Running" {
758                 c.Check(s.api.WasSetRunning, Equals, true)
759                 var lastupdate arvadosclient.Dict
760                 for _, content := range s.api.Content {
761                         if content["container"] != nil {
762                                 lastupdate = content["container"].(arvadosclient.Dict)
763                         }
764                 }
765                 if lastupdate["log"] == nil {
766                         c.Errorf("no container update with non-nil log -- updates were: %v", s.api.Content)
767                 }
768         }
769
770         if err != nil {
771                 dumpAllLogFiles(c, s.runner)
772         }
773
774         return s.api, s.runner, realTemp
775 }
776
777 func (s *TestSuite) TestFullRunHello(c *C) {
778         s.runner.enableMemoryLimit = true
779         s.runner.networkMode = "default"
780         s.fullRunHelper(c, `{
781     "command": ["echo", "hello world"],
782     "container_image": "`+arvadostest.DockerImage112PDH+`",
783     "cwd": ".",
784     "environment": {"foo":"bar","baz":"waz"},
785     "mounts": {"/tmp": {"kind": "tmp"} },
786     "output_path": "/tmp",
787     "priority": 1,
788     "runtime_constraints": {"vcpus":1,"ram":1000000},
789     "state": "Locked",
790     "output_storage_classes": ["default"]
791 }`, nil, func() int {
792                 c.Check(s.executor.created.Command, DeepEquals, []string{"echo", "hello world"})
793                 c.Check(s.executor.created.Image, Equals, "sha256:d8309758b8fe2c81034ffc8a10c36460b77db7bc5e7b448c4e5b684f9d95a678")
794                 c.Check(s.executor.created.Env, DeepEquals, map[string]string{"foo": "bar", "baz": "waz"})
795                 c.Check(s.executor.created.VCPUs, Equals, 1)
796                 c.Check(s.executor.created.RAM, Equals, int64(1000000))
797                 c.Check(s.executor.created.NetworkMode, Equals, "default")
798                 c.Check(s.executor.created.EnableNetwork, Equals, false)
799                 c.Check(s.executor.created.CUDADeviceCount, Equals, 0)
800                 fmt.Fprintln(s.executor.created.Stdout, "hello world")
801                 return 0
802         })
803
804         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
805         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
806         c.Check(logFileContent(c, s.runner, "stdout.txt"), Matches, `2\S+Z hello world\n`)
807         c.Check(s.testDispatcherKeepClient.StorageClasses, DeepEquals, []string{"default"})
808         c.Check(s.testContainerKeepClient.StorageClasses, DeepEquals, []string{"default"})
809 }
810
811 func (s *TestSuite) TestRunAlreadyRunning(c *C) {
812         var ran bool
813         s.fullRunHelper(c, `{
814     "command": ["sleep", "3"],
815     "container_image": "`+arvadostest.DockerImage112PDH+`",
816     "cwd": ".",
817     "environment": {},
818     "mounts": {"/tmp": {"kind": "tmp"} },
819     "output_path": "/tmp",
820     "priority": 1,
821     "runtime_constraints": {},
822     "scheduling_parameters":{"max_run_time": 1},
823     "state": "Running"
824 }`, nil, func() int {
825                 ran = true
826                 return 2
827         })
828         c.Check(s.api.CalledWith("container.state", "Cancelled"), IsNil)
829         c.Check(s.api.CalledWith("container.state", "Complete"), IsNil)
830         c.Check(ran, Equals, false)
831 }
832
833 func ec2MetadataServerStub(c *C, token *string, failureRate float64, stoptime *atomic.Value) *httptest.Server {
834         failedOnce := false
835         return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
836                 if !failedOnce || rand.Float64() < failureRate {
837                         w.WriteHeader(http.StatusServiceUnavailable)
838                         failedOnce = true
839                         return
840                 }
841                 switch r.URL.Path {
842                 case "/latest/api/token":
843                         fmt.Fprintln(w, *token)
844                 case "/latest/meta-data/spot/instance-action":
845                         if r.Header.Get("X-aws-ec2-metadata-token") != *token {
846                                 w.WriteHeader(http.StatusUnauthorized)
847                         } else if t, _ := stoptime.Load().(time.Time); t.IsZero() {
848                                 w.WriteHeader(http.StatusNotFound)
849                         } else {
850                                 fmt.Fprintf(w, `{"action":"stop","time":"%s"}`, t.Format(time.RFC3339))
851                         }
852                 default:
853                         w.WriteHeader(http.StatusNotFound)
854                 }
855         }))
856 }
857
858 func (s *TestSuite) TestSpotInterruptionNotice(c *C) {
859         s.testSpotInterruptionNotice(c, 0.1)
860 }
861
862 func (s *TestSuite) TestSpotInterruptionNoticeNotAvailable(c *C) {
863         s.testSpotInterruptionNotice(c, 1)
864 }
865
866 func (s *TestSuite) testSpotInterruptionNotice(c *C, failureRate float64) {
867         var stoptime atomic.Value
868         token := "fake-ec2-metadata-token"
869         stub := ec2MetadataServerStub(c, &token, failureRate, &stoptime)
870         defer stub.Close()
871
872         defer func(i time.Duration, u string) {
873                 spotInterruptionCheckInterval = i
874                 ec2MetadataBaseURL = u
875         }(spotInterruptionCheckInterval, ec2MetadataBaseURL)
876         spotInterruptionCheckInterval = time.Second / 8
877         ec2MetadataBaseURL = stub.URL
878
879         checkedLogs := false
880         checkLogs := func() {
881                 checkedLogs = true
882                 c.Check(logFileContent(c, s.runner, "crunch-run.txt"), Matches, `(?ms).*Checking for spot interruptions every 125ms using instance metadata at http://.*`)
883                 c.Check(logFileContent(c, s.runner, "crunch-run.txt"), Matches, `(?ms).*Error checking spot interruptions: 503 Service Unavailable.*`)
884                 if failureRate == 1 {
885                         c.Check(logFileContent(c, s.runner, "crunch-run.txt"), Matches, `(?ms).*Giving up on checking spot interruptions after too many consecutive failures.*`)
886                         return
887                 }
888                 text := `Cloud provider scheduled instance stop at ` + stoptime.Load().(time.Time).Format(time.RFC3339)
889                 c.Check(logFileContent(c, s.runner, "crunch-run.txt"), Matches, `(?ms).*`+text+`.*`)
890                 c.Check(s.api.CalledWith("container.runtime_status.warning", "preemption notice"), NotNil)
891                 c.Check(s.api.CalledWith("container.runtime_status.warningDetail", text), NotNil)
892                 c.Check(s.api.CalledWith("container.runtime_status.preemptionNotice", text), NotNil)
893
894                 // Check that the log collection was saved, and the
895                 // container record updated with the new PDH,
896                 // immediately after the preemption notice was
897                 // received -- i.e., while the container is still
898                 // running.
899                 lastpdh := ""
900                 saved := make(map[string]string) // pdh => manifest_text
901                 for _, call := range s.api.Content {
902                         if ctr, ok := call["container"].(arvadosclient.Dict); ok {
903                                 if pdh, ok := ctr["log"].(string); ok {
904                                         lastpdh = pdh
905                                 }
906                         }
907                         if coll, ok := call["collection"].(arvadosclient.Dict); ok {
908                                 mt, _ := coll["manifest_text"].(string)
909                                 if strings.Contains(mt, ":crunch-run.txt") {
910                                         saved[arvados.PortableDataHash(mt)] = mt
911                                 }
912                         }
913                 }
914                 logfs, err := (&arvados.Collection{ManifestText: saved[lastpdh]}).FileSystem(s.runner.dispatcherClient, s.runner.DispatcherKeepClient)
915                 c.Assert(err, IsNil)
916                 log, err := fs.ReadFile(arvados.FS(logfs), "crunch-run.txt")
917                 c.Check(err, IsNil)
918                 c.Check(string(log), Matches, `(?ms).*\Q`+text+`\E.*`)
919         }
920
921         go s.runner.checkSpotInterruptionNotices()
922         s.fullRunHelper(c, `{
923     "command": ["sleep", "3"],
924     "container_image": "`+arvadostest.DockerImage112PDH+`",
925     "cwd": ".",
926     "environment": {},
927     "mounts": {"/tmp": {"kind": "tmp"} },
928     "output_path": "/tmp",
929     "priority": 1,
930     "runtime_constraints": {},
931     "state": "Locked"
932 }`, nil, func() int {
933                 time.Sleep(time.Second)
934                 stoptime.Store(time.Now().Add(time.Minute).UTC())
935                 token = "different-fake-ec2-metadata-token"
936                 time.Sleep(time.Second)
937                 checkLogs()
938                 return 0
939         })
940         c.Check(checkedLogs, Equals, true)
941 }
942
943 func (s *TestSuite) TestRunTimeExceeded(c *C) {
944         s.fullRunHelper(c, `{
945     "command": ["sleep", "3"],
946     "container_image": "`+arvadostest.DockerImage112PDH+`",
947     "cwd": ".",
948     "environment": {},
949     "mounts": {"/tmp": {"kind": "tmp"} },
950     "output_path": "/tmp",
951     "priority": 1,
952     "runtime_constraints": {},
953     "scheduling_parameters":{"max_run_time": 1},
954     "state": "Locked"
955 }`, nil, func() int {
956                 time.Sleep(3 * time.Second)
957                 return 0
958         })
959
960         c.Check(s.api.CalledWith("container.state", "Cancelled"), NotNil)
961         c.Check(logFileContent(c, s.runner, "crunch-run.txt"), Matches, "(?ms).*maximum run time exceeded.*")
962 }
963
964 func (s *TestSuite) TestContainerWaitFails(c *C) {
965         s.fullRunHelper(c, `{
966     "command": ["sleep", "3"],
967     "container_image": "`+arvadostest.DockerImage112PDH+`",
968     "cwd": ".",
969     "mounts": {"/tmp": {"kind": "tmp"} },
970     "output_path": "/tmp",
971     "priority": 1,
972     "state": "Locked"
973 }`, nil, func() int {
974                 s.executor.waitErr = errors.New("Container is not running")
975                 return 0
976         })
977
978         c.Check(s.api.CalledWith("container.state", "Cancelled"), NotNil)
979         c.Check(logFileContent(c, s.runner, "crunch-run.txt"), Matches, "(?ms).*Container is not running.*")
980 }
981
982 func (s *TestSuite) TestCrunchstat(c *C) {
983         s.runner.crunchstatFakeFS = os.DirFS("../crunchstat/testdata/debian12")
984         s.fullRunHelper(c, `{
985                 "command": ["sleep", "1"],
986                 "container_image": "`+arvadostest.DockerImage112PDH+`",
987                 "cwd": ".",
988                 "environment": {},
989                 "mounts": {"/tmp": {"kind": "tmp"} },
990                 "output_path": "/tmp",
991                 "priority": 1,
992                 "runtime_constraints": {},
993                 "state": "Locked"
994         }`, nil, func() int {
995                 time.Sleep(time.Second)
996                 return 0
997         })
998
999         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
1000         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
1001
1002         c.Check(logFileContent(c, s.runner, "crunchstat.txt"), Matches, `(?ms).*mem \d+ swap \d+ pgmajfault \d+ rss.*`)
1003
1004         // Check that we called (*crunchstat.Reporter)Stop().
1005         c.Check(logFileContent(c, s.runner, "crunch-run.txt"), Matches, `(?ms).*Maximum crunch-run memory rss usage was \d+ bytes\n.*`)
1006 }
1007
1008 func (s *TestSuite) TestNodeInfoLog(c *C) {
1009         s.fullRunHelper(c, `{
1010                 "command": ["true"],
1011                 "container_image": "`+arvadostest.DockerImage112PDH+`",
1012                 "cwd": ".",
1013                 "environment": {},
1014                 "mounts": {"/tmp": {"kind": "tmp"} },
1015                 "output_path": "/tmp",
1016                 "priority": 1,
1017                 "runtime_constraints": {},
1018                 "state": "Locked"
1019         }`, nil, func() int {
1020                 return 0
1021         })
1022
1023         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
1024         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
1025
1026         json := logFileContent(c, s.runner, "node.json")
1027         c.Check(json, Matches, `(?ms).*"ProviderType": *"a1\.2xlarge".*`)
1028         c.Check(json, Matches, `(?ms).*"Price": *1\.2.*`)
1029
1030         nodeinfo := logFileContent(c, s.runner, "node-info.txt")
1031         c.Check(nodeinfo, Matches, `(?ms).*Host Information.*`)
1032         c.Check(nodeinfo, Matches, `(?ms).*CPU Information.*`)
1033         c.Check(nodeinfo, Matches, `(?ms).*Memory Information.*`)
1034         c.Check(nodeinfo, Matches, `(?ms).*Disk Space.*`)
1035         c.Check(nodeinfo, Matches, `(?ms).*Disk INodes.*`)
1036 }
1037
1038 func (s *TestSuite) TestLogVersionAndRuntime(c *C) {
1039         s.fullRunHelper(c, `{
1040                 "command": ["sleep", "1"],
1041                 "container_image": "`+arvadostest.DockerImage112PDH+`",
1042                 "cwd": ".",
1043                 "environment": {},
1044                 "mounts": {"/tmp": {"kind": "tmp"} },
1045                 "output_path": "/tmp",
1046                 "priority": 1,
1047                 "runtime_constraints": {},
1048                 "state": "Locked"
1049         }`, nil, func() int {
1050                 return 0
1051         })
1052
1053         c.Check(logFileContent(c, s.runner, "crunch-run.txt"), Matches, `(?ms).*crunch-run \S+ \(go\S+\) start.*`)
1054         c.Check(logFileContent(c, s.runner, "crunch-run.txt"), Matches, `(?ms).*crunch-run process has uid=\d+\(.+\) gid=\d+\(.+\) groups=\d+\(.+\)(,\d+\(.+\))*\n.*`)
1055         c.Check(logFileContent(c, s.runner, "crunch-run.txt"), Matches, `(?ms).*Executing container: zzzzz-zzzzz-zzzzzzzzzzzzzzz.*`)
1056         c.Check(logFileContent(c, s.runner, "crunch-run.txt"), Matches, `(?ms).*Using container runtime: stub.*`)
1057 }
1058
1059 func (s *TestSuite) testLogRSSThresholds(c *C, ram int64, expected []int, notExpected int) {
1060         s.runner.crunchstatFakeFS = os.DirFS("../crunchstat/testdata/debian12")
1061         s.fullRunHelper(c, `{
1062                 "command": ["true"],
1063                 "container_image": "`+arvadostest.DockerImage112PDH+`",
1064                 "cwd": ".",
1065                 "environment": {},
1066                 "mounts": {"/tmp": {"kind": "tmp"} },
1067                 "output_path": "/tmp",
1068                 "priority": 1,
1069                 "runtime_constraints": {"ram": `+fmt.Sprintf("%d", ram)+`},
1070                 "state": "Locked"
1071         }`, nil, func() int { return 0 })
1072         logs := logFileContent(c, s.runner, "crunch-run.txt")
1073         c.Log("=== crunchstat logs")
1074         c.Log(logs)
1075         pattern := logLineStart + `Container using over %d%% of memory \(rss %d/%d bytes\)`
1076         var threshold int
1077         for _, threshold = range expected {
1078                 c.Check(logs, Matches, fmt.Sprintf(pattern, threshold, s.debian12MemoryCurrent, ram))
1079         }
1080         if notExpected > threshold {
1081                 c.Check(logs, Not(Matches), fmt.Sprintf(pattern, notExpected, s.debian12MemoryCurrent, ram))
1082         }
1083 }
1084
1085 func (s *TestSuite) TestLogNoRSSThresholds(c *C) {
1086         s.testLogRSSThresholds(c, s.debian12MemoryCurrent*10, []int{}, 90)
1087 }
1088
1089 func (s *TestSuite) TestLogSomeRSSThresholds(c *C) {
1090         onePercentRSS := s.debian12MemoryCurrent / 100
1091         s.testLogRSSThresholds(c, 102*onePercentRSS, []int{90, 95}, 99)
1092 }
1093
1094 func (s *TestSuite) TestLogAllRSSThresholds(c *C) {
1095         s.testLogRSSThresholds(c, s.debian12MemoryCurrent, []int{90, 95, 99}, 0)
1096 }
1097
1098 func (s *TestSuite) TestLogMaximaAfterRun(c *C) {
1099         s.runner.crunchstatFakeFS = os.DirFS("../crunchstat/testdata/debian12")
1100         s.runner.parentTemp = c.MkDir()
1101         s.fullRunHelper(c, `{
1102         "command": ["true"],
1103         "container_image": "`+arvadostest.DockerImage112PDH+`",
1104         "cwd": ".",
1105         "environment": {},
1106         "mounts": {"/tmp": {"kind": "tmp"} },
1107         "output_path": "/tmp",
1108         "priority": 1,
1109         "runtime_constraints": {"ram": `+fmt.Sprintf("%d", s.debian12MemoryCurrent*10)+`},
1110         "state": "Locked"
1111     }`, nil, func() int { return 0 })
1112         logs := logFileContent(c, s.runner, "crunch-run.txt")
1113         for _, expected := range []string{
1114                 `Maximum disk usage was \d+%, \d+/\d+ bytes`,
1115                 fmt.Sprintf(`Maximum container memory swap usage was %d bytes`, s.debian12SwapCurrent),
1116                 `Maximum container memory pgmajfault usage was \d+ faults`,
1117                 fmt.Sprintf(`Maximum container memory rss usage was 10%%, %d/%d bytes`, s.debian12MemoryCurrent, s.debian12MemoryCurrent*10),
1118                 `Maximum crunch-run memory rss usage was \d+ bytes`,
1119         } {
1120                 c.Check(logs, Matches, logLineStart+expected)
1121         }
1122 }
1123
1124 func (s *TestSuite) TestCommitNodeInfoBeforeStart(c *C) {
1125         var collection_create, container_update arvadosclient.Dict
1126         s.fullRunHelper(c, `{
1127                 "command": ["true"],
1128                 "container_image": "`+arvadostest.DockerImage112PDH+`",
1129                 "cwd": ".",
1130                 "environment": {},
1131                 "mounts": {"/tmp": {"kind": "tmp"} },
1132                 "output_path": "/tmp",
1133                 "priority": 1,
1134                 "runtime_constraints": {},
1135                 "state": "Locked",
1136                 "uuid": "zzzzz-dz642-202301121543210"
1137         }`, nil, func() int {
1138                 collection_create = s.api.CalledWith("ensure_unique_name", true)
1139                 container_update = s.api.CalledWith("container.state", "Running")
1140                 return 0
1141         })
1142
1143         c.Assert(collection_create, NotNil)
1144         log_collection := collection_create["collection"].(arvadosclient.Dict)
1145         c.Check(log_collection["name"], Equals, "logs for zzzzz-dz642-202301121543210")
1146         manifest_text := log_collection["manifest_text"].(string)
1147         // We check that the file size is at least two digits as an easy way to
1148         // check the file isn't empty.
1149         c.Check(manifest_text, Matches, `\. .+ \d+:\d{2,}:node-info\.txt( .+)?\n`)
1150         c.Check(manifest_text, Matches, `\. .+ \d+:\d{2,}:node\.json( .+)?\n`)
1151
1152         c.Assert(container_update, NotNil)
1153         // As of Arvados 2.5.0, the container update must specify its log in PDH
1154         // format for the API server to propagate it to container requests, which
1155         // is what we care about for this test.
1156         expect_pdh := fmt.Sprintf("%x+%d", md5.Sum([]byte(manifest_text)), len(manifest_text))
1157         c.Check(container_update["container"].(arvadosclient.Dict)["log"], Equals, expect_pdh)
1158 }
1159
1160 func (s *TestSuite) TestContainerRecordLog(c *C) {
1161         s.fullRunHelper(c, `{
1162                 "command": ["sleep", "1"],
1163                 "container_image": "`+arvadostest.DockerImage112PDH+`",
1164                 "cwd": ".",
1165                 "environment": {},
1166                 "mounts": {"/tmp": {"kind": "tmp"} },
1167                 "output_path": "/tmp",
1168                 "priority": 1,
1169                 "runtime_constraints": {},
1170                 "state": "Locked"
1171         }`, nil,
1172                 func() int {
1173                         time.Sleep(time.Second)
1174                         return 0
1175                 })
1176
1177         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
1178         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
1179
1180         c.Check(logFileContent(c, s.runner, "container.json"), Matches, `(?ms).*container_image.*`)
1181 }
1182
1183 func (s *TestSuite) TestFullRunStderr(c *C) {
1184         s.fullRunHelper(c, `{
1185     "command": ["/bin/sh", "-c", "echo hello ; echo world 1>&2 ; exit 1"],
1186     "container_image": "`+arvadostest.DockerImage112PDH+`",
1187     "cwd": ".",
1188     "environment": {},
1189     "mounts": {"/tmp": {"kind": "tmp"} },
1190     "output_path": "/tmp",
1191     "priority": 1,
1192     "runtime_constraints": {},
1193     "state": "Locked"
1194 }`, nil, func() int {
1195                 fmt.Fprintln(s.executor.created.Stdout, "hello")
1196                 fmt.Fprintln(s.executor.created.Stderr, "world")
1197                 return 1
1198         })
1199
1200         final := s.api.CalledWith("container.state", "Complete")
1201         c.Assert(final, NotNil)
1202         c.Check(final["container"].(arvadosclient.Dict)["exit_code"], Equals, 1)
1203         c.Check(final["container"].(arvadosclient.Dict)["log"], NotNil)
1204
1205         c.Check(logFileContent(c, s.runner, "stdout.txt"), Matches, ".*hello\n")
1206         c.Check(logFileContent(c, s.runner, "stderr.txt"), Matches, ".*world\n")
1207 }
1208
1209 func (s *TestSuite) TestFullRunDefaultCwd(c *C) {
1210         s.fullRunHelper(c, `{
1211     "command": ["pwd"],
1212     "container_image": "`+arvadostest.DockerImage112PDH+`",
1213     "cwd": ".",
1214     "environment": {},
1215     "mounts": {"/tmp": {"kind": "tmp"} },
1216     "output_path": "/tmp",
1217     "priority": 1,
1218     "runtime_constraints": {},
1219     "state": "Locked"
1220 }`, nil, func() int {
1221                 fmt.Fprintf(s.executor.created.Stdout, "workdir=%q", s.executor.created.WorkingDir)
1222                 return 0
1223         })
1224
1225         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
1226         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
1227         c.Check(logFileContent(c, s.runner, "stdout.txt"), Matches, `.*workdir=""`)
1228 }
1229
1230 func (s *TestSuite) TestFullRunSetCwd(c *C) {
1231         s.fullRunHelper(c, `{
1232     "command": ["pwd"],
1233     "container_image": "`+arvadostest.DockerImage112PDH+`",
1234     "cwd": "/bin",
1235     "environment": {},
1236     "mounts": {"/tmp": {"kind": "tmp"} },
1237     "output_path": "/tmp",
1238     "priority": 1,
1239     "runtime_constraints": {},
1240     "state": "Locked"
1241 }`, nil, func() int {
1242                 fmt.Fprintln(s.executor.created.Stdout, s.executor.created.WorkingDir)
1243                 return 0
1244         })
1245
1246         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
1247         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
1248         c.Check(logFileContent(c, s.runner, "stdout.txt"), Matches, ".*/bin\n")
1249 }
1250
1251 func (s *TestSuite) TestFullRunSetOutputStorageClasses(c *C) {
1252         s.fullRunHelper(c, `{
1253     "command": ["pwd"],
1254     "container_image": "`+arvadostest.DockerImage112PDH+`",
1255     "cwd": "/bin",
1256     "environment": {},
1257     "mounts": {"/tmp": {"kind": "tmp"} },
1258     "output_path": "/tmp",
1259     "priority": 1,
1260     "runtime_constraints": {},
1261     "state": "Locked",
1262     "output_storage_classes": ["foo", "bar"]
1263 }`, nil, func() int {
1264                 fmt.Fprintln(s.executor.created.Stdout, s.executor.created.WorkingDir)
1265                 return 0
1266         })
1267
1268         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
1269         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
1270         c.Check(logFileContent(c, s.runner, "stdout.txt"), Matches, ".*/bin\n")
1271         c.Check(s.testDispatcherKeepClient.StorageClasses, DeepEquals, []string{"foo", "bar"})
1272         c.Check(s.testContainerKeepClient.StorageClasses, DeepEquals, []string{"foo", "bar"})
1273 }
1274
1275 func (s *TestSuite) TestEnableCUDADeviceCount(c *C) {
1276         s.fullRunHelper(c, `{
1277     "command": ["pwd"],
1278     "container_image": "`+arvadostest.DockerImage112PDH+`",
1279     "cwd": "/bin",
1280     "environment": {},
1281     "mounts": {"/tmp": {"kind": "tmp"} },
1282     "output_path": "/tmp",
1283     "priority": 1,
1284     "runtime_constraints": {"cuda": {"device_count": 2}},
1285     "state": "Locked",
1286     "output_storage_classes": ["foo", "bar"]
1287 }`, nil, func() int {
1288                 fmt.Fprintln(s.executor.created.Stdout, "ok")
1289                 return 0
1290         })
1291         c.Check(s.executor.created.CUDADeviceCount, Equals, 2)
1292 }
1293
1294 func (s *TestSuite) TestEnableCUDAHardwareCapability(c *C) {
1295         s.fullRunHelper(c, `{
1296     "command": ["pwd"],
1297     "container_image": "`+arvadostest.DockerImage112PDH+`",
1298     "cwd": "/bin",
1299     "environment": {},
1300     "mounts": {"/tmp": {"kind": "tmp"} },
1301     "output_path": "/tmp",
1302     "priority": 1,
1303     "runtime_constraints": {"cuda": {"hardware_capability": "foo"}},
1304     "state": "Locked",
1305     "output_storage_classes": ["foo", "bar"]
1306 }`, nil, func() int {
1307                 fmt.Fprintln(s.executor.created.Stdout, "ok")
1308                 return 0
1309         })
1310         c.Check(s.executor.created.CUDADeviceCount, Equals, 0)
1311 }
1312
1313 func (s *TestSuite) TestStopOnSignal(c *C) {
1314         s.executor.runFunc = func() int {
1315                 s.executor.created.Stdout.Write([]byte("foo\n"))
1316                 s.runner.SigChan <- syscall.SIGINT
1317                 time.Sleep(10 * time.Second)
1318                 return 0
1319         }
1320         s.testStopContainer(c)
1321 }
1322
1323 func (s *TestSuite) TestStopOnArvMountDeath(c *C) {
1324         s.executor.runFunc = func() int {
1325                 s.executor.created.Stdout.Write([]byte("foo\n"))
1326                 s.runner.ArvMountExit <- nil
1327                 close(s.runner.ArvMountExit)
1328                 time.Sleep(10 * time.Second)
1329                 return 0
1330         }
1331         s.runner.ArvMountExit = make(chan error)
1332         s.testStopContainer(c)
1333 }
1334
1335 func (s *TestSuite) testStopContainer(c *C) {
1336         record := `{
1337     "command": ["/bin/sh", "-c", "echo foo && sleep 30 && echo bar"],
1338     "container_image": "` + arvadostest.DockerImage112PDH + `",
1339     "cwd": ".",
1340     "environment": {},
1341     "mounts": {"/tmp": {"kind": "tmp"} },
1342     "output_path": "/tmp",
1343     "priority": 1,
1344     "runtime_constraints": {},
1345     "state": "Locked"
1346 }`
1347
1348         err := json.Unmarshal([]byte(record), &s.api.Container)
1349         c.Assert(err, IsNil)
1350
1351         s.runner.RunArvMount = func([]string, string) (*exec.Cmd, error) { return nil, nil }
1352         s.runner.MkArvClient = func(token string) (IArvadosClient, IKeepClient, *arvados.Client, error) {
1353                 return &ArvTestClient{}, &KeepTestClient{}, nil, nil
1354         }
1355
1356         done := make(chan error)
1357         go func() {
1358                 done <- s.runner.Run()
1359         }()
1360         select {
1361         case <-time.After(20 * time.Second):
1362                 pprof.Lookup("goroutine").WriteTo(os.Stderr, 1)
1363                 c.Fatal("timed out")
1364         case err = <-done:
1365                 c.Check(err, IsNil)
1366         }
1367         dumpAllLogFiles(c, s.runner)
1368
1369         c.Check(s.api.CalledWith("container.log", nil), NotNil)
1370         c.Check(s.api.CalledWith("container.state", "Cancelled"), NotNil)
1371         c.Check(logFileContent(c, s.runner, "stdout.txt"), Matches, "(?ms).*foo\n$")
1372 }
1373
1374 func (s *TestSuite) TestFullRunSetEnv(c *C) {
1375         s.fullRunHelper(c, `{
1376     "command": ["/bin/sh", "-c", "echo $FROBIZ"],
1377     "container_image": "`+arvadostest.DockerImage112PDH+`",
1378     "cwd": "/bin",
1379     "environment": {"FROBIZ": "bilbo"},
1380     "mounts": {"/tmp": {"kind": "tmp"} },
1381     "output_path": "/tmp",
1382     "priority": 1,
1383     "runtime_constraints": {},
1384     "state": "Locked"
1385 }`, nil, func() int {
1386                 fmt.Fprintf(s.executor.created.Stdout, "%v", s.executor.created.Env)
1387                 return 0
1388         })
1389
1390         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
1391         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
1392         c.Check(logFileContent(c, s.runner, "stdout.txt"), Matches, `.*map\[FROBIZ:bilbo\]`)
1393 }
1394
1395 type ArvMountCmdLine struct {
1396         Cmd   []string
1397         token string
1398 }
1399
1400 func (am *ArvMountCmdLine) ArvMountTest(c []string, token string) (*exec.Cmd, error) {
1401         am.Cmd = c
1402         am.token = token
1403         return nil, nil
1404 }
1405
1406 func stubCert(c *C, temp string) string {
1407         path := temp + "/ca-certificates.crt"
1408         err := os.WriteFile(path, []byte{}, 0666)
1409         c.Assert(err, IsNil)
1410         os.Setenv("SSL_CERT_FILE", path)
1411         return path
1412 }
1413
1414 func (s *TestSuite) TestSetupMounts(c *C) {
1415         cr := s.runner
1416         am := &ArvMountCmdLine{}
1417         cr.RunArvMount = am.ArvMountTest
1418         cr.containerClient, _ = apiStub()
1419         cr.ContainerArvClient = &ArvTestClient{}
1420         cr.ContainerKeepClient = &KeepTestClient{}
1421         cr.Container.OutputStorageClasses = []string{"default"}
1422
1423         realTemp := c.MkDir()
1424         certTemp := c.MkDir()
1425         stubCertPath := stubCert(c, certTemp)
1426         cr.parentTemp = realTemp
1427
1428         i := 0
1429         cr.MkTempDir = func(_ string, prefix string) (string, error) {
1430                 i++
1431                 d := fmt.Sprintf("%s/%s%d", realTemp, prefix, i)
1432                 err := os.Mkdir(d, os.ModePerm)
1433                 if err != nil && strings.Contains(err.Error(), ": file exists") {
1434                         // Test case must have pre-populated the tempdir
1435                         err = nil
1436                 }
1437                 return d, err
1438         }
1439
1440         checkEmpty := func() {
1441                 // Should be deleted.
1442                 _, err := os.Stat(realTemp)
1443                 c.Assert(os.IsNotExist(err), Equals, true)
1444
1445                 // Now recreate it for the next test.
1446                 c.Assert(os.Mkdir(realTemp, 0777), IsNil)
1447         }
1448
1449         {
1450                 i = 0
1451                 cr.ArvMountPoint = ""
1452                 cr.Container.Mounts = make(map[string]arvados.Mount)
1453                 cr.Container.Mounts["/tmp"] = arvados.Mount{Kind: "tmp"}
1454                 cr.Container.OutputPath = "/tmp"
1455                 cr.statInterval = 5 * time.Second
1456                 bindmounts, err := cr.SetupMounts()
1457                 c.Check(err, IsNil)
1458                 c.Check(am.Cmd, DeepEquals, []string{"arv-mount", "--foreground",
1459                         "--read-write", "--storage-classes", "default", "--crunchstat-interval=5",
1460                         "--mount-by-pdh", "by_id", "--disable-event-listening", "--mount-by-id", "by_uuid", realTemp + "/keep1"})
1461                 c.Check(bindmounts, DeepEquals, map[string]bindmount{"/tmp": {realTemp + "/tmp2", false}})
1462                 os.RemoveAll(cr.ArvMountPoint)
1463                 cr.CleanupDirs()
1464                 checkEmpty()
1465         }
1466
1467         {
1468                 i = 0
1469                 cr.ArvMountPoint = ""
1470                 cr.Container.Mounts = make(map[string]arvados.Mount)
1471                 cr.Container.Mounts["/out"] = arvados.Mount{Kind: "tmp"}
1472                 cr.Container.Mounts["/tmp"] = arvados.Mount{Kind: "tmp"}
1473                 cr.Container.OutputPath = "/out"
1474                 cr.Container.OutputStorageClasses = []string{"foo", "bar"}
1475
1476                 bindmounts, err := cr.SetupMounts()
1477                 c.Check(err, IsNil)
1478                 c.Check(am.Cmd, DeepEquals, []string{"arv-mount", "--foreground",
1479                         "--read-write", "--storage-classes", "foo,bar", "--crunchstat-interval=5",
1480                         "--mount-by-pdh", "by_id", "--disable-event-listening", "--mount-by-id", "by_uuid", realTemp + "/keep1"})
1481                 c.Check(bindmounts, DeepEquals, map[string]bindmount{"/out": {realTemp + "/tmp2", false}, "/tmp": {realTemp + "/tmp3", false}})
1482                 os.RemoveAll(cr.ArvMountPoint)
1483                 cr.CleanupDirs()
1484                 checkEmpty()
1485         }
1486
1487         {
1488                 i = 0
1489                 cr.ArvMountPoint = ""
1490                 cr.Container.Mounts = make(map[string]arvados.Mount)
1491                 cr.Container.Mounts["/tmp"] = arvados.Mount{Kind: "tmp"}
1492                 cr.Container.OutputPath = "/tmp"
1493                 cr.Container.RuntimeConstraints.API = true
1494                 cr.Container.OutputStorageClasses = []string{"default"}
1495
1496                 bindmounts, err := cr.SetupMounts()
1497                 c.Check(err, IsNil)
1498                 c.Check(am.Cmd, DeepEquals, []string{"arv-mount", "--foreground",
1499                         "--read-write", "--storage-classes", "default", "--crunchstat-interval=5",
1500                         "--mount-by-pdh", "by_id", "--disable-event-listening", "--mount-by-id", "by_uuid", realTemp + "/keep1"})
1501                 c.Check(bindmounts, DeepEquals, map[string]bindmount{"/tmp": {realTemp + "/tmp2", false}, "/etc/arvados/ca-certificates.crt": {stubCertPath, true}})
1502                 os.RemoveAll(cr.ArvMountPoint)
1503                 cr.CleanupDirs()
1504                 checkEmpty()
1505
1506                 cr.Container.RuntimeConstraints.API = false
1507         }
1508
1509         {
1510                 i = 0
1511                 cr.ArvMountPoint = ""
1512                 cr.Container.Mounts = map[string]arvados.Mount{
1513                         "/keeptmp": {Kind: "collection", Writable: true},
1514                 }
1515                 cr.Container.OutputPath = "/keeptmp"
1516
1517                 os.MkdirAll(realTemp+"/keep1/tmp0", os.ModePerm)
1518
1519                 bindmounts, err := cr.SetupMounts()
1520                 c.Check(err, IsNil)
1521                 c.Check(am.Cmd, DeepEquals, []string{"arv-mount", "--foreground",
1522                         "--read-write", "--storage-classes", "default", "--crunchstat-interval=5",
1523                         "--mount-tmp", "tmp0", "--mount-by-pdh", "by_id", "--disable-event-listening", "--mount-by-id", "by_uuid", realTemp + "/keep1"})
1524                 c.Check(bindmounts, DeepEquals, map[string]bindmount{"/keeptmp": {realTemp + "/keep1/tmp0", false}})
1525                 os.RemoveAll(cr.ArvMountPoint)
1526                 cr.CleanupDirs()
1527                 checkEmpty()
1528         }
1529
1530         {
1531                 i = 0
1532                 cr.ArvMountPoint = ""
1533                 cr.Container.Mounts = map[string]arvados.Mount{
1534                         "/keepinp": {Kind: "collection", PortableDataHash: "59389a8f9ee9d399be35462a0f92541c+53"},
1535                         "/keepout": {Kind: "collection", Writable: true},
1536                 }
1537                 cr.Container.OutputPath = "/keepout"
1538
1539                 os.MkdirAll(realTemp+"/keep1/by_id/59389a8f9ee9d399be35462a0f92541c+53", os.ModePerm)
1540                 os.MkdirAll(realTemp+"/keep1/tmp0", os.ModePerm)
1541
1542                 bindmounts, err := cr.SetupMounts()
1543                 c.Check(err, IsNil)
1544                 c.Check(am.Cmd, DeepEquals, []string{"arv-mount", "--foreground",
1545                         "--read-write", "--storage-classes", "default", "--crunchstat-interval=5",
1546                         "--mount-tmp", "tmp0", "--mount-by-pdh", "by_id", "--disable-event-listening", "--mount-by-id", "by_uuid", realTemp + "/keep1"})
1547                 c.Check(bindmounts, DeepEquals, map[string]bindmount{
1548                         "/keepinp": {realTemp + "/keep1/by_id/59389a8f9ee9d399be35462a0f92541c+53", true},
1549                         "/keepout": {realTemp + "/keep1/tmp0", false},
1550                 })
1551                 os.RemoveAll(cr.ArvMountPoint)
1552                 cr.CleanupDirs()
1553                 checkEmpty()
1554         }
1555
1556         {
1557                 i = 0
1558                 cr.ArvMountPoint = ""
1559                 cr.Container.RuntimeConstraints.KeepCacheRAM = 512
1560                 cr.Container.Mounts = map[string]arvados.Mount{
1561                         "/keepinp": {Kind: "collection", PortableDataHash: "59389a8f9ee9d399be35462a0f92541c+53"},
1562                         "/keepout": {Kind: "collection", Writable: true},
1563                 }
1564                 cr.Container.OutputPath = "/keepout"
1565
1566                 os.MkdirAll(realTemp+"/keep1/by_id/59389a8f9ee9d399be35462a0f92541c+53", os.ModePerm)
1567                 os.MkdirAll(realTemp+"/keep1/tmp0", os.ModePerm)
1568
1569                 bindmounts, err := cr.SetupMounts()
1570                 c.Check(err, IsNil)
1571                 c.Check(am.Cmd, DeepEquals, []string{"arv-mount", "--foreground",
1572                         "--read-write", "--storage-classes", "default", "--crunchstat-interval=5", "--ram-cache",
1573                         "--file-cache", "512", "--mount-tmp", "tmp0", "--mount-by-pdh", "by_id", "--disable-event-listening", "--mount-by-id", "by_uuid", realTemp + "/keep1"})
1574                 c.Check(bindmounts, DeepEquals, map[string]bindmount{
1575                         "/keepinp": {realTemp + "/keep1/by_id/59389a8f9ee9d399be35462a0f92541c+53", true},
1576                         "/keepout": {realTemp + "/keep1/tmp0", false},
1577                 })
1578                 os.RemoveAll(cr.ArvMountPoint)
1579                 cr.CleanupDirs()
1580                 checkEmpty()
1581         }
1582
1583         for _, test := range []struct {
1584                 in  interface{}
1585                 out string
1586         }{
1587                 {in: "foo", out: `"foo"`},
1588                 {in: nil, out: `null`},
1589                 {in: map[string]int64{"foo": 123456789123456789}, out: `{"foo":123456789123456789}`},
1590         } {
1591                 i = 0
1592                 cr.ArvMountPoint = ""
1593                 cr.Container.Mounts = map[string]arvados.Mount{
1594                         "/mnt/test.json": {Kind: "json", Content: test.in},
1595                 }
1596                 bindmounts, err := cr.SetupMounts()
1597                 c.Check(err, IsNil)
1598                 c.Check(bindmounts, DeepEquals, map[string]bindmount{
1599                         "/mnt/test.json": {realTemp + "/json2/mountdata.json", true},
1600                 })
1601                 content, err := ioutil.ReadFile(realTemp + "/json2/mountdata.json")
1602                 c.Check(err, IsNil)
1603                 c.Check(content, DeepEquals, []byte(test.out))
1604                 os.RemoveAll(cr.ArvMountPoint)
1605                 cr.CleanupDirs()
1606                 checkEmpty()
1607         }
1608
1609         for _, test := range []struct {
1610                 in  interface{}
1611                 out string
1612         }{
1613                 {in: "foo", out: `foo`},
1614                 {in: nil, out: "error"},
1615                 {in: map[string]int64{"foo": 123456789123456789}, out: "error"},
1616         } {
1617                 i = 0
1618                 cr.ArvMountPoint = ""
1619                 cr.Container.Mounts = map[string]arvados.Mount{
1620                         "/mnt/test.txt": {Kind: "text", Content: test.in},
1621                 }
1622                 bindmounts, err := cr.SetupMounts()
1623                 if test.out == "error" {
1624                         c.Check(err.Error(), Equals, "content for mount \"/mnt/test.txt\" must be a string")
1625                 } else {
1626                         c.Check(err, IsNil)
1627                         c.Check(bindmounts, DeepEquals, map[string]bindmount{
1628                                 "/mnt/test.txt": {realTemp + "/text2/mountdata.text", true},
1629                         })
1630                         content, err := ioutil.ReadFile(realTemp + "/text2/mountdata.text")
1631                         c.Check(err, IsNil)
1632                         c.Check(content, DeepEquals, []byte(test.out))
1633                 }
1634                 os.RemoveAll(cr.ArvMountPoint)
1635                 cr.CleanupDirs()
1636                 checkEmpty()
1637         }
1638
1639         // Read-only mount points are allowed underneath output_dir mount point
1640         {
1641                 i = 0
1642                 cr.ArvMountPoint = ""
1643                 cr.Container.Mounts = make(map[string]arvados.Mount)
1644                 cr.Container.Mounts = map[string]arvados.Mount{
1645                         "/tmp":     {Kind: "tmp"},
1646                         "/tmp/foo": {Kind: "collection"},
1647                 }
1648                 cr.Container.OutputPath = "/tmp"
1649
1650                 os.MkdirAll(realTemp+"/keep1/tmp0", os.ModePerm)
1651
1652                 bindmounts, err := cr.SetupMounts()
1653                 c.Check(err, IsNil)
1654                 c.Check(am.Cmd, DeepEquals, []string{"arv-mount", "--foreground",
1655                         "--read-write", "--storage-classes", "default", "--crunchstat-interval=5", "--ram-cache",
1656                         "--file-cache", "512", "--mount-tmp", "tmp0", "--mount-by-pdh", "by_id", "--disable-event-listening", "--mount-by-id", "by_uuid", realTemp + "/keep1"})
1657                 c.Check(bindmounts, DeepEquals, map[string]bindmount{
1658                         "/tmp":     {realTemp + "/tmp2", false},
1659                         "/tmp/foo": {realTemp + "/keep1/tmp0", true},
1660                 })
1661                 os.RemoveAll(cr.ArvMountPoint)
1662                 cr.CleanupDirs()
1663                 checkEmpty()
1664         }
1665
1666         // Writable mount points copied to output_dir mount point
1667         {
1668                 i = 0
1669                 cr.ArvMountPoint = ""
1670                 cr.Container.Mounts = make(map[string]arvados.Mount)
1671                 cr.Container.Mounts = map[string]arvados.Mount{
1672                         "/tmp": {Kind: "tmp"},
1673                         "/tmp/foo": {Kind: "collection",
1674                                 PortableDataHash: "59389a8f9ee9d399be35462a0f92541c+53",
1675                                 Writable:         true},
1676                         "/tmp/bar": {Kind: "collection",
1677                                 PortableDataHash: "59389a8f9ee9d399be35462a0f92541d+53",
1678                                 Path:             "baz",
1679                                 Writable:         true},
1680                 }
1681                 cr.Container.OutputPath = "/tmp"
1682
1683                 os.MkdirAll(realTemp+"/keep1/by_id/59389a8f9ee9d399be35462a0f92541c+53", os.ModePerm)
1684                 os.MkdirAll(realTemp+"/keep1/by_id/59389a8f9ee9d399be35462a0f92541d+53/baz", os.ModePerm)
1685
1686                 rf, _ := os.Create(realTemp + "/keep1/by_id/59389a8f9ee9d399be35462a0f92541d+53/baz/quux")
1687                 rf.Write([]byte("bar"))
1688                 rf.Close()
1689
1690                 _, err := cr.SetupMounts()
1691                 c.Check(err, IsNil)
1692                 _, err = os.Stat(cr.HostOutputDir + "/foo")
1693                 c.Check(err, IsNil)
1694                 _, err = os.Stat(cr.HostOutputDir + "/bar/quux")
1695                 c.Check(err, IsNil)
1696                 os.RemoveAll(cr.ArvMountPoint)
1697                 cr.CleanupDirs()
1698                 checkEmpty()
1699         }
1700
1701         // Only mount points of kind 'collection' are allowed underneath output_dir mount point
1702         {
1703                 i = 0
1704                 cr.ArvMountPoint = ""
1705                 cr.Container.Mounts = make(map[string]arvados.Mount)
1706                 cr.Container.Mounts = map[string]arvados.Mount{
1707                         "/tmp":     {Kind: "tmp"},
1708                         "/tmp/foo": {Kind: "tmp"},
1709                 }
1710                 cr.Container.OutputPath = "/tmp"
1711
1712                 _, err := cr.SetupMounts()
1713                 c.Check(err, NotNil)
1714                 c.Check(err, ErrorMatches, `only mount points of kind 'collection', 'text' or 'json' are supported underneath the output_path.*`)
1715                 os.RemoveAll(cr.ArvMountPoint)
1716                 cr.CleanupDirs()
1717                 checkEmpty()
1718         }
1719
1720         // Only mount point of kind 'collection' is allowed for stdin
1721         {
1722                 i = 0
1723                 cr.ArvMountPoint = ""
1724                 cr.Container.Mounts = make(map[string]arvados.Mount)
1725                 cr.Container.Mounts = map[string]arvados.Mount{
1726                         "stdin": {Kind: "tmp"},
1727                 }
1728
1729                 _, err := cr.SetupMounts()
1730                 c.Check(err, NotNil)
1731                 c.Check(err, ErrorMatches, `unsupported mount kind 'tmp' for stdin.*`)
1732                 os.RemoveAll(cr.ArvMountPoint)
1733                 cr.CleanupDirs()
1734                 checkEmpty()
1735         }
1736 }
1737
1738 func (s *TestSuite) TestStdout(c *C) {
1739         helperRecord := `{
1740                 "command": ["/bin/sh", "-c", "echo $FROBIZ"],
1741                 "container_image": "` + arvadostest.DockerImage112PDH + `",
1742                 "cwd": "/bin",
1743                 "environment": {"FROBIZ": "bilbo"},
1744                 "mounts": {"/tmp": {"kind": "tmp"}, "stdout": {"kind": "file", "path": "/tmp/a/b/c.out"} },
1745                 "output_path": "/tmp",
1746                 "priority": 1,
1747                 "runtime_constraints": {},
1748                 "state": "Locked"
1749         }`
1750
1751         s.fullRunHelper(c, helperRecord, nil, func() int {
1752                 fmt.Fprintln(s.executor.created.Stdout, s.executor.created.Env["FROBIZ"])
1753                 return 0
1754         })
1755
1756         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
1757         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
1758         c.Check(s.runner.ContainerArvClient.(*ArvTestClient).CalledWith("collection.manifest_text", "./a/b 307372fa8fd5c146b22ae7a45b49bc31+6 0:6:c.out\n"), NotNil)
1759 }
1760
1761 // Used by the TestStdoutWithWrongPath*()
1762 func (s *TestSuite) stdoutErrorRunHelper(c *C, record string, fn func() int) (*ArvTestClient, *ContainerRunner, error) {
1763         err := json.Unmarshal([]byte(record), &s.api.Container)
1764         c.Assert(err, IsNil)
1765         s.executor.runFunc = fn
1766         s.runner.RunArvMount = (&ArvMountCmdLine{}).ArvMountTest
1767         s.runner.MkArvClient = func(token string) (IArvadosClient, IKeepClient, *arvados.Client, error) {
1768                 return s.api, &KeepTestClient{}, nil, nil
1769         }
1770         return s.api, s.runner, s.runner.Run()
1771 }
1772
1773 func (s *TestSuite) TestStdoutWithWrongPath(c *C) {
1774         _, _, err := s.stdoutErrorRunHelper(c, `{
1775     "mounts": {"/tmp": {"kind": "tmp"}, "stdout": {"kind": "file", "path":"/tmpa.out"} },
1776     "output_path": "/tmp",
1777     "state": "Locked"
1778 }`, func() int { return 0 })
1779         c.Check(err, ErrorMatches, ".*Stdout path does not start with OutputPath.*")
1780 }
1781
1782 func (s *TestSuite) TestStdoutWithWrongKindTmp(c *C) {
1783         _, _, err := s.stdoutErrorRunHelper(c, `{
1784     "mounts": {"/tmp": {"kind": "tmp"}, "stdout": {"kind": "tmp", "path":"/tmp/a.out"} },
1785     "output_path": "/tmp",
1786     "state": "Locked"
1787 }`, func() int { return 0 })
1788         c.Check(err, ErrorMatches, ".*unsupported mount kind 'tmp' for stdout.*")
1789 }
1790
1791 func (s *TestSuite) TestStdoutWithWrongKindCollection(c *C) {
1792         _, _, err := s.stdoutErrorRunHelper(c, `{
1793     "mounts": {"/tmp": {"kind": "tmp"}, "stdout": {"kind": "collection", "path":"/tmp/a.out"} },
1794     "output_path": "/tmp",
1795     "state": "Locked"
1796 }`, func() int { return 0 })
1797         c.Check(err, ErrorMatches, ".*unsupported mount kind 'collection' for stdout.*")
1798 }
1799
1800 func (s *TestSuite) TestFullRunWithAPI(c *C) {
1801         s.fullRunHelper(c, `{
1802     "command": ["/bin/sh", "-c", "true $ARVADOS_API_HOST"],
1803     "container_image": "`+arvadostest.DockerImage112PDH+`",
1804     "cwd": "/bin",
1805     "environment": {},
1806     "mounts": {"/tmp": {"kind": "tmp"} },
1807     "output_path": "/tmp",
1808     "priority": 1,
1809     "runtime_constraints": {"API": true},
1810     "state": "Locked"
1811 }`, nil, func() int {
1812                 c.Check(s.executor.created.Env["ARVADOS_API_HOST"], Equals, os.Getenv("ARVADOS_API_HOST"))
1813                 return 3
1814         })
1815         c.Check(s.api.CalledWith("container.exit_code", 3), NotNil)
1816         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
1817         c.Check(logFileContent(c, s.runner, "crunch-run.txt"), Matches, `(?ms).*status code 3\n.*`)
1818 }
1819
1820 func (s *TestSuite) TestFullRunSetOutput(c *C) {
1821         defer os.Setenv("ARVADOS_API_HOST", os.Getenv("ARVADOS_API_HOST"))
1822         os.Setenv("ARVADOS_API_HOST", "test.arvados.org")
1823         s.fullRunHelper(c, `{
1824     "command": ["/bin/sh", "-c", "echo $ARVADOS_API_HOST"],
1825     "container_image": "`+arvadostest.DockerImage112PDH+`",
1826     "cwd": "/bin",
1827     "environment": {},
1828     "mounts": {"/tmp": {"kind": "tmp"} },
1829     "output_path": "/tmp",
1830     "priority": 1,
1831     "runtime_constraints": {"API": true},
1832     "state": "Locked"
1833 }`, nil, func() int {
1834                 s.api.Container.Output = arvadostest.DockerImage112PDH
1835                 return 0
1836         })
1837
1838         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
1839         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
1840         c.Check(s.api.CalledWith("container.output", arvadostest.DockerImage112PDH), NotNil)
1841 }
1842
1843 func (s *TestSuite) TestArvMountRuntimeStatusWarning(c *C) {
1844         s.runner.RunArvMount = func([]string, string) (*exec.Cmd, error) {
1845                 os.Mkdir(s.runner.ArvMountPoint+"/by_id", 0666)
1846                 ioutil.WriteFile(s.runner.ArvMountPoint+"/by_id/README", nil, 0666)
1847                 return s.runner.ArvMountCmd([]string{"bash", "-c", "echo >&2 Test: Keep write error: I am a teapot; sleep 3"}, "")
1848         }
1849         s.executor.runFunc = func() int {
1850                 time.Sleep(time.Second)
1851                 return 137
1852         }
1853         record := `{
1854     "command": ["sleep", "1"],
1855     "container_image": "` + arvadostest.DockerImage112PDH + `",
1856     "cwd": "/bin",
1857     "environment": {},
1858     "mounts": {"/tmp": {"kind": "tmp"} },
1859     "output_path": "/tmp",
1860     "priority": 1,
1861     "runtime_constraints": {"API": true},
1862     "state": "Locked"
1863 }`
1864         err := json.Unmarshal([]byte(record), &s.api.Container)
1865         c.Assert(err, IsNil)
1866         err = s.runner.Run()
1867         c.Assert(err, IsNil)
1868         c.Check(s.api.CalledWith("container.exit_code", 137), NotNil)
1869         c.Check(s.api.CalledWith("container.runtime_status.warning", "arv-mount: Keep write error"), NotNil)
1870         c.Check(s.api.CalledWith("container.runtime_status.warningDetail", "Test: Keep write error: I am a teapot"), NotNil)
1871         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
1872         c.Check(logFileContent(c, s.runner, "crunch-run.txt"), Matches, `(?ms).*Container exited with status code 137 \(signal 9, SIGKILL\).*`)
1873 }
1874
1875 func (s *TestSuite) TestStdoutWithExcludeFromOutputMountPointUnderOutputDir(c *C) {
1876         helperRecord := `{
1877                 "command": ["/bin/sh", "-c", "echo $FROBIZ"],
1878                 "container_image": "` + arvadostest.DockerImage112PDH + `",
1879                 "cwd": "/bin",
1880                 "environment": {"FROBIZ": "bilbo"},
1881                 "mounts": {
1882         "/tmp": {"kind": "tmp"},
1883         "/tmp/foo": {"kind": "collection",
1884                      "portable_data_hash": "a3e8f74c6f101eae01fa08bfb4e49b3a+54",
1885                      "exclude_from_output": true
1886         },
1887         "stdout": {"kind": "file", "path": "/tmp/a/b/c.out"}
1888     },
1889                 "output_path": "/tmp",
1890                 "priority": 1,
1891                 "runtime_constraints": {},
1892                 "state": "Locked"
1893         }`
1894
1895         extraMounts := []string{"a3e8f74c6f101eae01fa08bfb4e49b3a+54"}
1896
1897         s.fullRunHelper(c, helperRecord, extraMounts, func() int {
1898                 fmt.Fprintln(s.executor.created.Stdout, s.executor.created.Env["FROBIZ"])
1899                 return 0
1900         })
1901
1902         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
1903         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
1904         c.Check(s.runner.ContainerArvClient.(*ArvTestClient).CalledWith("collection.manifest_text", "./a/b 307372fa8fd5c146b22ae7a45b49bc31+6 0:6:c.out\n"), NotNil)
1905 }
1906
1907 func (s *TestSuite) TestStdoutWithMultipleMountPointsUnderOutputDir(c *C) {
1908         helperRecord := `{
1909                 "command": ["/bin/sh", "-c", "echo $FROBIZ"],
1910                 "container_image": "` + arvadostest.DockerImage112PDH + `",
1911                 "cwd": "/bin",
1912                 "environment": {"FROBIZ": "bilbo"},
1913                 "mounts": {
1914         "/tmp": {"kind": "tmp"},
1915         "/tmp/foo/bar": {"kind": "collection", "portable_data_hash": "a0def87f80dd594d4675809e83bd4f15+367", "path":"/file2_in_main.txt"},
1916         "/tmp/foo/sub1": {"kind": "collection", "portable_data_hash": "a0def87f80dd594d4675809e83bd4f15+367", "path":"/subdir1"},
1917         "/tmp/foo/sub1file2": {"kind": "collection", "portable_data_hash": "a0def87f80dd594d4675809e83bd4f15+367", "path":"/subdir1/file2_in_subdir1.txt"},
1918         "/tmp/foo/baz/sub2file2": {"kind": "collection", "portable_data_hash": "a0def87f80dd594d4675809e83bd4f15+367", "path":"/subdir1/subdir2/file2_in_subdir2.txt"},
1919         "stdout": {"kind": "file", "path": "/tmp/a/b/c.out"}
1920     },
1921                 "output_path": "/tmp",
1922                 "priority": 1,
1923                 "runtime_constraints": {},
1924                 "state": "Locked",
1925                 "uuid": "zzzzz-dz642-202301130848001"
1926         }`
1927
1928         extraMounts := []string{
1929                 "a0def87f80dd594d4675809e83bd4f15+367/file2_in_main.txt",
1930                 "a0def87f80dd594d4675809e83bd4f15+367/subdir1/file2_in_subdir1.txt",
1931                 "a0def87f80dd594d4675809e83bd4f15+367/subdir1/subdir2/file2_in_subdir2.txt",
1932         }
1933
1934         api, _, realtemp := s.fullRunHelper(c, helperRecord, extraMounts, func() int {
1935                 fmt.Fprintln(s.executor.created.Stdout, s.executor.created.Env["FROBIZ"])
1936                 return 0
1937         })
1938
1939         c.Check(s.executor.created.BindMounts, DeepEquals, map[string]bindmount{
1940                 "/tmp":                   {realtemp + "/tmp1", false},
1941                 "/tmp/foo/bar":           {s.keepmount + "/by_id/a0def87f80dd594d4675809e83bd4f15+367/file2_in_main.txt", true},
1942                 "/tmp/foo/baz/sub2file2": {s.keepmount + "/by_id/a0def87f80dd594d4675809e83bd4f15+367/subdir1/subdir2/file2_in_subdir2.txt", true},
1943                 "/tmp/foo/sub1":          {s.keepmount + "/by_id/a0def87f80dd594d4675809e83bd4f15+367/subdir1", true},
1944                 "/tmp/foo/sub1file2":     {s.keepmount + "/by_id/a0def87f80dd594d4675809e83bd4f15+367/subdir1/file2_in_subdir1.txt", true},
1945         })
1946
1947         c.Check(api.CalledWith("container.exit_code", 0), NotNil)
1948         c.Check(api.CalledWith("container.state", "Complete"), NotNil)
1949         output_count := uint(0)
1950         for _, v := range s.runner.ContainerArvClient.(*ArvTestClient).Content {
1951                 if v["collection"] == nil {
1952                         continue
1953                 }
1954                 collection := v["collection"].(arvadosclient.Dict)
1955                 if collection["name"].(string) != "output for zzzzz-dz642-202301130848001" {
1956                         continue
1957                 }
1958                 c.Check(v["ensure_unique_name"], Equals, true)
1959                 c.Check(collection["manifest_text"].(string), Equals, `./a/b 307372fa8fd5c146b22ae7a45b49bc31+6 0:6:c.out
1960 ./foo 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396c73c0abcdefgh11234567890@569fa8c3 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396cabcdefghij6419876543234@569fa8c4 9:18:bar 36:18:sub1file2
1961 ./foo/baz 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396c73c0bcdefghijk544332211@569fa8c5 9:18:sub2file2
1962 ./foo/sub1 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396cabcdefghij6419876543234@569fa8c4 0:9:file1_in_subdir1.txt 9:18:file2_in_subdir1.txt
1963 ./foo/sub1/subdir2 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396c73c0bcdefghijk544332211@569fa8c5 0:9:file1_in_subdir2.txt 9:18:file2_in_subdir2.txt
1964 `)
1965                 output_count++
1966         }
1967         c.Check(output_count, Not(Equals), uint(0))
1968 }
1969
1970 func (s *TestSuite) TestStdoutWithMountPointsUnderOutputDirDenormalizedManifest(c *C) {
1971         helperRecord := `{
1972                 "command": ["/bin/sh", "-c", "echo $FROBIZ"],
1973                 "container_image": "` + arvadostest.DockerImage112PDH + `",
1974                 "cwd": "/bin",
1975                 "environment": {"FROBIZ": "bilbo"},
1976                 "mounts": {
1977         "/tmp": {"kind": "tmp"},
1978         "/tmp/foo/bar": {"kind": "collection", "portable_data_hash": "b0def87f80dd594d4675809e83bd4f15+367", "path": "/subdir1/file2_in_subdir1.txt"},
1979         "stdout": {"kind": "file", "path": "/tmp/a/b/c.out"}
1980     },
1981                 "output_path": "/tmp",
1982                 "priority": 1,
1983                 "runtime_constraints": {},
1984                 "state": "Locked",
1985                 "uuid": "zzzzz-dz642-202301130848002"
1986         }`
1987
1988         extraMounts := []string{
1989                 "b0def87f80dd594d4675809e83bd4f15+367/subdir1/file2_in_subdir1.txt",
1990         }
1991
1992         s.fullRunHelper(c, helperRecord, extraMounts, func() int {
1993                 fmt.Fprintln(s.executor.created.Stdout, s.executor.created.Env["FROBIZ"])
1994                 return 0
1995         })
1996
1997         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
1998         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
1999         output_count := uint(0)
2000         for _, v := range s.runner.ContainerArvClient.(*ArvTestClient).Content {
2001                 if v["collection"] == nil {
2002                         continue
2003                 }
2004                 collection := v["collection"].(arvadosclient.Dict)
2005                 if collection["name"].(string) != "output for zzzzz-dz642-202301130848002" {
2006                         continue
2007                 }
2008                 c.Check(collection["manifest_text"].(string), Equals, `./a/b 307372fa8fd5c146b22ae7a45b49bc31+6 0:6:c.out
2009 ./foo 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396c73c0abcdefgh11234567890@569fa8c3 10:17:bar
2010 `)
2011                 output_count++
2012         }
2013         c.Check(output_count, Not(Equals), uint(0))
2014 }
2015
2016 func (s *TestSuite) TestOutputError(c *C) {
2017         helperRecord := `{
2018                 "command": ["/bin/sh", "-c", "echo $FROBIZ"],
2019                 "container_image": "` + arvadostest.DockerImage112PDH + `",
2020                 "cwd": "/bin",
2021                 "environment": {"FROBIZ": "bilbo"},
2022                 "mounts": {
2023                         "/tmp": {"kind": "tmp"}
2024                 },
2025                 "output_path": "/tmp",
2026                 "priority": 1,
2027                 "runtime_constraints": {},
2028                 "state": "Locked"
2029         }`
2030         s.fullRunHelper(c, helperRecord, nil, func() int {
2031                 os.Symlink("/etc/hosts", s.runner.HostOutputDir+"/baz")
2032                 return 0
2033         })
2034
2035         c.Check(s.api.CalledWith("container.state", "Cancelled"), NotNil)
2036 }
2037
2038 func (s *TestSuite) TestStdinCollectionMountPoint(c *C) {
2039         helperRecord := `{
2040                 "command": ["/bin/sh", "-c", "echo $FROBIZ"],
2041                 "container_image": "` + arvadostest.DockerImage112PDH + `",
2042                 "cwd": "/bin",
2043                 "environment": {"FROBIZ": "bilbo"},
2044                 "mounts": {
2045         "/tmp": {"kind": "tmp"},
2046         "stdin": {"kind": "collection", "portable_data_hash": "b0def87f80dd594d4675809e83bd4f15+367", "path": "/file1_in_main.txt"},
2047         "stdout": {"kind": "file", "path": "/tmp/a/b/c.out"}
2048     },
2049                 "output_path": "/tmp",
2050                 "priority": 1,
2051                 "runtime_constraints": {},
2052                 "state": "Locked"
2053         }`
2054
2055         extraMounts := []string{
2056                 "b0def87f80dd594d4675809e83bd4f15+367/file1_in_main.txt",
2057         }
2058
2059         api, _, _ := s.fullRunHelper(c, helperRecord, extraMounts, func() int {
2060                 fmt.Fprintln(s.executor.created.Stdout, s.executor.created.Env["FROBIZ"])
2061                 return 0
2062         })
2063
2064         c.Check(api.CalledWith("container.exit_code", 0), NotNil)
2065         c.Check(api.CalledWith("container.state", "Complete"), NotNil)
2066         for _, v := range api.Content {
2067                 if v["collection"] != nil {
2068                         collection := v["collection"].(arvadosclient.Dict)
2069                         if strings.Index(collection["name"].(string), "output") == 0 {
2070                                 manifest := collection["manifest_text"].(string)
2071                                 c.Check(manifest, Equals, `./a/b 307372fa8fd5c146b22ae7a45b49bc31+6 0:6:c.out
2072 `)
2073                         }
2074                 }
2075         }
2076 }
2077
2078 func (s *TestSuite) TestStdinJsonMountPoint(c *C) {
2079         helperRecord := `{
2080                 "command": ["/bin/sh", "-c", "echo $FROBIZ"],
2081                 "container_image": "` + arvadostest.DockerImage112PDH + `",
2082                 "cwd": "/bin",
2083                 "environment": {"FROBIZ": "bilbo"},
2084                 "mounts": {
2085         "/tmp": {"kind": "tmp"},
2086         "stdin": {"kind": "json", "content": "foo"},
2087         "stdout": {"kind": "file", "path": "/tmp/a/b/c.out"}
2088     },
2089                 "output_path": "/tmp",
2090                 "priority": 1,
2091                 "runtime_constraints": {},
2092                 "state": "Locked"
2093         }`
2094
2095         api, _, _ := s.fullRunHelper(c, helperRecord, nil, func() int {
2096                 fmt.Fprintln(s.executor.created.Stdout, s.executor.created.Env["FROBIZ"])
2097                 return 0
2098         })
2099
2100         c.Check(api.CalledWith("container.exit_code", 0), NotNil)
2101         c.Check(api.CalledWith("container.state", "Complete"), NotNil)
2102         for _, v := range api.Content {
2103                 if v["collection"] != nil {
2104                         collection := v["collection"].(arvadosclient.Dict)
2105                         if strings.Index(collection["name"].(string), "output") == 0 {
2106                                 manifest := collection["manifest_text"].(string)
2107                                 c.Check(manifest, Equals, `./a/b 307372fa8fd5c146b22ae7a45b49bc31+6 0:6:c.out
2108 `)
2109                         }
2110                 }
2111         }
2112 }
2113
2114 func (s *TestSuite) TestStderrMount(c *C) {
2115         api, cr, _ := s.fullRunHelper(c, `{
2116     "command": ["/bin/sh", "-c", "echo hello;exit 1"],
2117     "container_image": "`+arvadostest.DockerImage112PDH+`",
2118     "cwd": ".",
2119     "environment": {},
2120     "mounts": {"/tmp": {"kind": "tmp"},
2121                "stdout": {"kind": "file", "path": "/tmp/a/out.txt"},
2122                "stderr": {"kind": "file", "path": "/tmp/b/err.txt"}},
2123     "output_path": "/tmp",
2124     "priority": 1,
2125     "runtime_constraints": {},
2126     "state": "Locked"
2127 }`, nil, func() int {
2128                 fmt.Fprintln(s.executor.created.Stdout, "hello")
2129                 fmt.Fprintln(s.executor.created.Stderr, "oops")
2130                 return 1
2131         })
2132
2133         final := api.CalledWith("container.state", "Complete")
2134         c.Assert(final, NotNil)
2135         c.Check(final["container"].(arvadosclient.Dict)["exit_code"], Equals, 1)
2136         c.Check(final["container"].(arvadosclient.Dict)["log"], NotNil)
2137
2138         c.Check(cr.ContainerArvClient.(*ArvTestClient).CalledWith("collection.manifest_text", "./a b1946ac92492d2347c6235b4d2611184+6 0:6:out.txt\n./b 38af5c54926b620264ab1501150cf189+5 0:5:err.txt\n"), NotNil)
2139 }
2140
2141 func (s *TestSuite) TestNumberRoundTrip(c *C) {
2142         s.api.callraw = true
2143         err := s.runner.fetchContainerRecord()
2144         c.Assert(err, IsNil)
2145         jsondata, err := json.Marshal(s.runner.Container.Mounts["/json"].Content)
2146         c.Logf("%#v", s.runner.Container)
2147         c.Check(err, IsNil)
2148         c.Check(string(jsondata), Equals, `{"number":123456789123456789}`)
2149 }
2150
2151 func (s *TestSuite) TestFullBrokenDocker(c *C) {
2152         nextState := ""
2153         for _, setup := range []func(){
2154                 func() {
2155                         c.Log("// waitErr = ocl runtime error")
2156                         s.executor.waitErr = errors.New(`Error response from daemon: oci runtime error: container_linux.go:247: starting container process caused "process_linux.go:359: container init caused \"rootfs_linux.go:54: mounting \\\"/tmp/keep453790790/by_id/99999999999999999999999999999999+99999/myGenome\\\" to rootfs \\\"/tmp/docker/overlay2/9999999999999999999999999999999999999999999999999999999999999999/merged\\\" at \\\"/tmp/docker/overlay2/9999999999999999999999999999999999999999999999999999999999999999/merged/keep/99999999999999999999999999999999+99999/myGenome\\\" caused \\\"no such file or directory\\\"\""`)
2157                         nextState = "Cancelled"
2158                 },
2159                 func() {
2160                         c.Log("// loadErr = cannot connect")
2161                         s.executor.loadErr = errors.New("Cannot connect to the Docker daemon at unix:///var/run/docker.sock. Is the docker daemon running?")
2162                         s.runner.brokenNodeHook = c.MkDir() + "/broken-node-hook"
2163                         err := ioutil.WriteFile(s.runner.brokenNodeHook, []byte("#!/bin/sh\nexec echo killme\n"), 0700)
2164                         c.Assert(err, IsNil)
2165                         nextState = "Queued"
2166                 },
2167         } {
2168                 s.SetUpTest(c)
2169                 setup()
2170                 s.fullRunHelper(c, `{
2171     "command": ["echo", "hello world"],
2172     "container_image": "`+arvadostest.DockerImage112PDH+`",
2173     "cwd": ".",
2174     "environment": {},
2175     "mounts": {"/tmp": {"kind": "tmp"} },
2176     "output_path": "/tmp",
2177     "priority": 1,
2178     "runtime_constraints": {},
2179     "state": "Locked"
2180 }`, nil, func() int { return 0 })
2181                 c.Check(s.api.CalledWith("container.state", nextState), NotNil)
2182                 logs := logFileContent(c, s.runner, "crunch-run.txt")
2183                 c.Check(logs, Matches, "(?ms).*unable to run containers.*")
2184                 if s.runner.brokenNodeHook != "" {
2185                         c.Check(logs, Matches, "(?ms).*Running broken node hook.*")
2186                         c.Check(logs, Matches, "(?ms).*killme.*")
2187                         c.Check(logs, Not(Matches), "(?ms).*Writing /var/lock/crunch-run-broken to mark node as broken.*")
2188                 } else {
2189                         c.Check(logs, Matches, "(?ms).*Writing /var/lock/crunch-run-broken to mark node as broken.*")
2190                 }
2191         }
2192 }
2193
2194 func (s *TestSuite) TestBadCommand(c *C) {
2195         for _, startError := range []string{
2196                 `panic: standard_init_linux.go:175: exec user process caused "no such file or directory"`,
2197                 `Error response from daemon: Cannot start container 41f26cbc43bcc1280f4323efb1830a394ba8660c9d1c2b564ba42bf7f7694845: [8] System error: no such file or directory`,
2198                 `Error response from daemon: Cannot start container 58099cd76c834f3dc2a4fb76c8028f049ae6d4fdf0ec373e1f2cfea030670c2d: [8] System error: exec: "foobar": executable file not found in $PATH`,
2199         } {
2200                 s.SetUpTest(c)
2201                 s.executor.startErr = errors.New(startError)
2202                 s.fullRunHelper(c, `{
2203     "command": ["echo", "hello world"],
2204     "container_image": "`+arvadostest.DockerImage112PDH+`",
2205     "cwd": ".",
2206     "environment": {},
2207     "mounts": {"/tmp": {"kind": "tmp"} },
2208     "output_path": "/tmp",
2209     "priority": 1,
2210     "runtime_constraints": {},
2211     "state": "Locked"
2212 }`, nil, func() int { return 0 })
2213                 c.Check(s.api.CalledWith("container.state", "Cancelled"), NotNil)
2214                 c.Check(logFileContent(c, s.runner, "crunch-run.txt"), Matches, "(?ms).*Possible causes:.*is missing.*")
2215         }
2216 }
2217
2218 func (s *TestSuite) TestSecretTextMountPoint(c *C) {
2219         helperRecord := `{
2220                 "command": ["true"],
2221                 "container_image": "` + arvadostest.DockerImage112PDH + `",
2222                 "cwd": "/bin",
2223                 "mounts": {
2224                     "/tmp": {"kind": "tmp"},
2225                     "/tmp/secret.conf": {"kind": "text", "content": "mypassword"}
2226                 },
2227                 "secret_mounts": {
2228                 },
2229                 "output_path": "/tmp",
2230                 "priority": 1,
2231                 "runtime_constraints": {},
2232                 "state": "Locked"
2233         }`
2234
2235         s.fullRunHelper(c, helperRecord, nil, func() int {
2236                 content, err := ioutil.ReadFile(s.runner.HostOutputDir + "/secret.conf")
2237                 c.Check(err, IsNil)
2238                 c.Check(string(content), Equals, "mypassword")
2239                 return 0
2240         })
2241
2242         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
2243         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
2244         c.Check(s.runner.ContainerArvClient.(*ArvTestClient).CalledWith("collection.manifest_text", ". 34819d7beeabb9260a5c854bc85b3e44+10 0:10:secret.conf\n"), NotNil)
2245         c.Check(s.runner.ContainerArvClient.(*ArvTestClient).CalledWith("collection.manifest_text", ""), IsNil)
2246
2247         // under secret mounts, not captured in output
2248         helperRecord = `{
2249                 "command": ["true"],
2250                 "container_image": "` + arvadostest.DockerImage112PDH + `",
2251                 "cwd": "/bin",
2252                 "mounts": {
2253                     "/tmp": {"kind": "tmp"}
2254                 },
2255                 "secret_mounts": {
2256                     "/tmp/secret.conf": {"kind": "text", "content": "mypassword"}
2257                 },
2258                 "output_path": "/tmp",
2259                 "priority": 1,
2260                 "runtime_constraints": {},
2261                 "state": "Locked"
2262         }`
2263
2264         s.SetUpTest(c)
2265         s.fullRunHelper(c, helperRecord, nil, func() int {
2266                 content, err := ioutil.ReadFile(s.runner.HostOutputDir + "/secret.conf")
2267                 c.Check(err, IsNil)
2268                 c.Check(string(content), Equals, "mypassword")
2269                 return 0
2270         })
2271
2272         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
2273         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
2274         c.Check(s.runner.ContainerArvClient.(*ArvTestClient).CalledWith("collection.manifest_text", ". 34819d7beeabb9260a5c854bc85b3e44+10 0:10:secret.conf\n"), IsNil)
2275         c.Check(s.runner.ContainerArvClient.(*ArvTestClient).CalledWith("collection.manifest_text", ""), NotNil)
2276
2277         // under secret mounts, output dir is a collection, not captured in output
2278         helperRecord = `{
2279                 "command": ["true"],
2280                 "container_image": "` + arvadostest.DockerImage112PDH + `",
2281                 "cwd": "/bin",
2282                 "mounts": {
2283                     "/tmp": {"kind": "collection", "writable": true}
2284                 },
2285                 "secret_mounts": {
2286                     "/tmp/secret.conf": {"kind": "text", "content": "mypassword"}
2287                 },
2288                 "output_path": "/tmp",
2289                 "priority": 1,
2290                 "runtime_constraints": {},
2291                 "state": "Locked"
2292         }`
2293
2294         s.SetUpTest(c)
2295         _, _, realtemp := s.fullRunHelper(c, helperRecord, nil, func() int {
2296                 // secret.conf should be provisioned as a separate
2297                 // bind mount, i.e., it should not appear in the
2298                 // (fake) fuse filesystem as viewed from the host.
2299                 content, err := ioutil.ReadFile(s.runner.HostOutputDir + "/secret.conf")
2300                 if !c.Check(errors.Is(err, os.ErrNotExist), Equals, true) {
2301                         c.Logf("secret.conf: content %q, err %#v", content, err)
2302                 }
2303                 err = ioutil.WriteFile(s.runner.HostOutputDir+"/.arvados#collection", []byte(`{"manifest_text":". acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:foo.txt\n"}`), 0700)
2304                 c.Check(err, IsNil)
2305                 return 0
2306         })
2307
2308         content, err := ioutil.ReadFile(realtemp + "/text1/mountdata.text")
2309         c.Check(err, IsNil)
2310         c.Check(string(content), Equals, "mypassword")
2311         c.Check(s.executor.created.BindMounts["/tmp/secret.conf"], DeepEquals, bindmount{realtemp + "/text1/mountdata.text", true})
2312         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
2313         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
2314         c.Check(s.runner.ContainerArvClient.(*ArvTestClient).CalledWith("collection.manifest_text", ". acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:foo.txt\n"), NotNil)
2315 }
2316
2317 func (s *TestSuite) TestCalculateCost(c *C) {
2318         defer func(s string) { lockdir = s }(lockdir)
2319         lockdir = c.MkDir()
2320         now := time.Now()
2321         cr := s.runner
2322         cr.costStartTime = now.Add(-time.Hour)
2323         var logbuf bytes.Buffer
2324         cr.CrunchLog = newLogWriter(&logbuf)
2325
2326         // if there's no InstanceType env var, cost is calculated as 0
2327         os.Unsetenv("InstanceType")
2328         cost := cr.calculateCost(now)
2329         c.Check(cost, Equals, 0.0)
2330
2331         // with InstanceType env var and loadPrices() hasn't run (or
2332         // hasn't found any data), cost is calculated based on
2333         // InstanceType env var
2334         os.Setenv("InstanceType", `{"Price":1.2}`)
2335         cost = cr.calculateCost(now)
2336         c.Check(cost, Equals, 1.2)
2337
2338         // first update tells us the spot price was $1/h until 30
2339         // minutes ago when it increased to $2/h
2340         j, err := json.Marshal([]cloud.InstancePrice{
2341                 {StartTime: now.Add(-4 * time.Hour), Price: 1.0},
2342                 {StartTime: now.Add(-time.Hour / 2), Price: 2.0},
2343         })
2344         c.Assert(err, IsNil)
2345         os.WriteFile(lockdir+"/"+pricesfile, j, 0777)
2346         cr.loadPrices()
2347         cost = cr.calculateCost(now)
2348         c.Check(cost, Equals, 1.5)
2349
2350         // next update (via --list + SIGUSR2) tells us the spot price
2351         // increased to $3/h 15 minutes ago
2352         j, err = json.Marshal([]cloud.InstancePrice{
2353                 {StartTime: now.Add(-time.Hour / 3), Price: 2.0}, // dup of -time.Hour/2 price
2354                 {StartTime: now.Add(-time.Hour / 4), Price: 3.0},
2355         })
2356         c.Assert(err, IsNil)
2357         os.WriteFile(lockdir+"/"+pricesfile, j, 0777)
2358         cr.loadPrices()
2359         cost = cr.calculateCost(now)
2360         c.Check(cost, Equals, 1.0/2+2.0/4+3.0/4)
2361
2362         cost = cr.calculateCost(now.Add(-time.Hour / 2))
2363         c.Check(cost, Equals, 0.5)
2364
2365         c.Logf("%s", logbuf.String())
2366         c.Check(logbuf.String(), Matches, `(?ms).*Instance price changed to 1\.00 at 20.* changed to 2\.00 .* changed to 3\.00 .*`)
2367         c.Check(logbuf.String(), Not(Matches), `(?ms).*changed to 2\.00 .* changed to 2\.00 .*`)
2368 }
2369
2370 func (s *TestSuite) TestSIGUSR2CostUpdate(c *C) {
2371         pid := os.Getpid()
2372         now := time.Now()
2373         pricesJSON, err := json.Marshal([]cloud.InstancePrice{
2374                 {StartTime: now.Add(-4 * time.Hour), Price: 2.4},
2375                 {StartTime: now.Add(-2 * time.Hour), Price: 2.6},
2376         })
2377         c.Assert(err, IsNil)
2378
2379         os.Setenv("InstanceType", `{"Price":2.2}`)
2380         defer func(s string) { lockdir = s }(lockdir)
2381         lockdir = c.MkDir()
2382
2383         // We can't use s.api.CalledWith because timing differences will yield
2384         // different cost values across runs. getCostUpdate iterates over API
2385         // calls until it finds one that sets the cost, then writes that value
2386         // to the next index of costUpdates.
2387         deadline := now.Add(time.Second)
2388         costUpdates := make([]float64, 2)
2389         costIndex := 0
2390         apiIndex := 0
2391         getCostUpdate := func() {
2392                 for ; time.Now().Before(deadline); time.Sleep(time.Second / 10) {
2393                         for apiIndex < len(s.api.Content) {
2394                                 update := s.api.Content[apiIndex]
2395                                 apiIndex++
2396                                 var ok bool
2397                                 var cost float64
2398                                 if update, ok = update["container"].(arvadosclient.Dict); !ok {
2399                                         continue
2400                                 }
2401                                 if cost, ok = update["cost"].(float64); !ok {
2402                                         continue
2403                                 }
2404                                 c.Logf("API call #%d updates cost to %v", apiIndex-1, cost)
2405                                 costUpdates[costIndex] = cost
2406                                 costIndex++
2407                                 return
2408                         }
2409                 }
2410         }
2411
2412         s.fullRunHelper(c, `{
2413                 "command": ["true"],
2414                 "container_image": "`+arvadostest.DockerImage112PDH+`",
2415                 "cwd": ".",
2416                 "environment": {},
2417                 "mounts": {"/tmp": {"kind": "tmp"} },
2418                 "output_path": "/tmp",
2419                 "priority": 1,
2420                 "runtime_constraints": {},
2421                 "state": "Locked",
2422                 "uuid": "zzzzz-dz642-20230320101530a"
2423         }`, nil, func() int {
2424                 s.runner.costStartTime = now.Add(-3 * time.Hour)
2425                 err := syscall.Kill(pid, syscall.SIGUSR2)
2426                 c.Check(err, IsNil, Commentf("error sending first SIGUSR2 to runner"))
2427                 getCostUpdate()
2428
2429                 err = os.WriteFile(path.Join(lockdir, pricesfile), pricesJSON, 0o700)
2430                 c.Check(err, IsNil, Commentf("error writing JSON prices file"))
2431                 err = syscall.Kill(pid, syscall.SIGUSR2)
2432                 c.Check(err, IsNil, Commentf("error sending second SIGUSR2 to runner"))
2433                 getCostUpdate()
2434
2435                 return 0
2436         })
2437         // Comparing with format strings makes it easy to ignore minor variations
2438         // in cost across runs while keeping diagnostics pretty.
2439         c.Check(fmt.Sprintf("%.3f", costUpdates[0]), Equals, "6.600")
2440         c.Check(fmt.Sprintf("%.3f", costUpdates[1]), Equals, "7.600")
2441 }
2442
2443 type FakeProcess struct {
2444         cmdLine []string
2445 }
2446
2447 func (fp FakeProcess) CmdlineSlice() ([]string, error) {
2448         return fp.cmdLine, nil
2449 }
2450
2451 func logFileContent(c *C, cr *ContainerRunner, fnm string) string {
2452         buf, err := fs.ReadFile(arvados.FS(cr.LogCollection), fnm)
2453         c.Assert(err, IsNil)
2454         return string(buf)
2455 }
2456
2457 func dumpAllLogFiles(c *C, cr *ContainerRunner) {
2458         d, err := cr.LogCollection.OpenFile("/", os.O_RDONLY, 0)
2459         c.Assert(err, IsNil)
2460         fis, err := d.Readdir(-1)
2461         c.Assert(err, IsNil)
2462         for _, fi := range fis {
2463                 c.Logf("=== %s", fi.Name())
2464                 c.Log(logFileContent(c, cr, fi.Name()))
2465         }
2466 }