21891: Remove unused sdk/go/manifest package and ManifestFileReader.
[arvados.git] / lib / crunchrun / crunchrun_test.go
1 // Copyright (C) The Arvados Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4
5 package crunchrun
6
7 import (
8         "bytes"
9         "context"
10         "crypto/md5"
11         "encoding/json"
12         "errors"
13         "fmt"
14         "io"
15         "io/fs"
16         "io/ioutil"
17         "math/rand"
18         "net/http"
19         "net/http/httptest"
20         "net/http/httputil"
21         "net/url"
22         "os"
23         "os/exec"
24         "path"
25         "regexp"
26         "runtime/pprof"
27         "strings"
28         "sync"
29         "sync/atomic"
30         "syscall"
31         "testing"
32         "time"
33
34         "git.arvados.org/arvados.git/lib/cloud"
35         "git.arvados.org/arvados.git/lib/cmd"
36         "git.arvados.org/arvados.git/sdk/go/arvados"
37         "git.arvados.org/arvados.git/sdk/go/arvadosclient"
38         "git.arvados.org/arvados.git/sdk/go/arvadostest"
39
40         . "gopkg.in/check.v1"
41 )
42
43 // Gocheck boilerplate
44 func TestCrunchExec(t *testing.T) {
45         TestingT(t)
46 }
47
48 const logLineStart = `(?m)(.*\n)*\d{4}-\d\d-\d\dT\d\d:\d\d:\d\d\.\d+Z `
49
50 var _ = Suite(&TestSuite{})
51
52 type TestSuite struct {
53         client                   *arvados.Client
54         api                      *ArvTestClient
55         runner                   *ContainerRunner
56         executor                 *stubExecutor
57         keepmount                string
58         keepmountTmp             []string
59         testDispatcherKeepClient KeepTestClient
60         testContainerKeepClient  KeepTestClient
61         debian12MemoryCurrent    int64
62         debian12SwapCurrent      int64
63 }
64
65 func (s *TestSuite) SetUpSuite(c *C) {
66         buf, err := os.ReadFile("../crunchstat/testdata/debian12/sys/fs/cgroup/user.slice/user-1000.slice/session-4.scope/memory.current")
67         c.Assert(err, IsNil)
68         _, err = fmt.Sscanf(string(buf), "%d", &s.debian12MemoryCurrent)
69         c.Assert(err, IsNil)
70
71         buf, err = os.ReadFile("../crunchstat/testdata/debian12/sys/fs/cgroup/user.slice/user-1000.slice/session-4.scope/memory.swap.current")
72         c.Assert(err, IsNil)
73         _, err = fmt.Sscanf(string(buf), "%d", &s.debian12SwapCurrent)
74         c.Assert(err, IsNil)
75 }
76
77 func (s *TestSuite) SetUpTest(c *C) {
78         s.client = arvados.NewClientFromEnv()
79         s.executor = &stubExecutor{}
80         var err error
81         s.api = &ArvTestClient{}
82         s.runner, err = NewContainerRunner(s.client, s.api, &s.testDispatcherKeepClient, "zzzzz-zzzzz-zzzzzzzzzzzzzzz")
83         c.Assert(err, IsNil)
84         s.runner.executor = s.executor
85         s.runner.MkArvClient = func(token string) (IArvadosClient, IKeepClient, *arvados.Client, error) {
86                 return s.api, &s.testContainerKeepClient, s.client, nil
87         }
88         s.runner.RunArvMount = func(cmd []string, tok string) (*exec.Cmd, error) {
89                 s.runner.ArvMountPoint = s.keepmount
90                 for i, opt := range cmd {
91                         if opt == "--mount-tmp" {
92                                 err := os.Mkdir(s.keepmount+"/"+cmd[i+1], 0700)
93                                 if err != nil {
94                                         return nil, err
95                                 }
96                                 s.keepmountTmp = append(s.keepmountTmp, cmd[i+1])
97                         }
98                 }
99                 return nil, nil
100         }
101         s.keepmount = c.MkDir()
102         err = os.Mkdir(s.keepmount+"/by_id", 0755)
103         s.keepmountTmp = nil
104         c.Assert(err, IsNil)
105         err = os.Mkdir(s.keepmount+"/by_id/"+arvadostest.DockerImage112PDH, 0755)
106         c.Assert(err, IsNil)
107         err = ioutil.WriteFile(s.keepmount+"/by_id/"+arvadostest.DockerImage112PDH+"/"+arvadostest.DockerImage112Filename, []byte("#notarealtarball"), 0644)
108         err = os.Mkdir(s.keepmount+"/by_id/"+fakeInputCollectionPDH, 0755)
109         c.Assert(err, IsNil)
110         err = ioutil.WriteFile(s.keepmount+"/by_id/"+fakeInputCollectionPDH+"/input.json", []byte(`{"input":true}`), 0644)
111         c.Assert(err, IsNil)
112         s.runner.ArvMountPoint = s.keepmount
113         os.Setenv("InstanceType", `{"ProviderType":"a1.2xlarge","Price":1.2}`)
114 }
115
116 type ArvTestClient struct {
117         Total   int64
118         Calls   int
119         Content []arvadosclient.Dict
120         arvados.Container
121         secretMounts []byte
122         sync.Mutex
123         WasSetRunning bool
124         callraw       bool
125 }
126
127 type KeepTestClient struct {
128         Called         bool
129         StorageClasses []string
130         blocks         sync.Map
131 }
132
133 type stubExecutor struct {
134         imageLoaded bool
135         loaded      string
136         loadErr     error
137         exitCode    int
138         createErr   error
139         created     containerSpec
140         startErr    error
141         waitSleep   time.Duration
142         waitErr     error
143         stopErr     error
144         stopped     bool
145         closed      bool
146         runFunc     func() int
147         exit        chan int
148 }
149
150 func (e *stubExecutor) LoadImage(imageId string, tarball string, container arvados.Container, keepMount string,
151         containerClient *arvados.Client) error {
152         e.loaded = tarball
153         return e.loadErr
154 }
155 func (e *stubExecutor) Runtime() string                 { return "stub" }
156 func (e *stubExecutor) Version() string                 { return "stub " + cmd.Version.String() }
157 func (e *stubExecutor) Create(spec containerSpec) error { e.created = spec; return e.createErr }
158 func (e *stubExecutor) Start() error {
159         e.exit = make(chan int, 1)
160         go func() { e.exit <- e.runFunc() }()
161         return e.startErr
162 }
163 func (e *stubExecutor) Pid() int    { return 1115883 } // matches pid in ../crunchstat/testdata/debian12/proc/
164 func (e *stubExecutor) Stop() error { e.stopped = true; go func() { e.exit <- -1 }(); return e.stopErr }
165 func (e *stubExecutor) Close()      { e.closed = true }
166 func (e *stubExecutor) Wait(context.Context) (int, error) {
167         return <-e.exit, e.waitErr
168 }
169 func (e *stubExecutor) InjectCommand(ctx context.Context, _, _ string, _ bool, _ []string) (*exec.Cmd, error) {
170         return nil, errors.New("unimplemented")
171 }
172 func (e *stubExecutor) IPAddress() (string, error) { return "", errors.New("unimplemented") }
173
174 const fakeInputCollectionPDH = "ffffffffaaaaaaaa88888888eeeeeeee+1234"
175
176 var hwManifest = ". 82ab40c24fc8df01798e57ba66795bb1+841216+Aa124ac75e5168396c73c0a18eda641a4f41791c0@569fa8c3 0:841216:9c31ee32b3d15268a0754e8edc74d4f815ee014b693bc5109058e431dd5caea7.tar\n"
177 var hwPDH = "a45557269dcb65a6b78f9ac061c0850b+120"
178 var hwImageID = "9c31ee32b3d15268a0754e8edc74d4f815ee014b693bc5109058e431dd5caea7"
179
180 var otherManifest = ". 68a84f561b1d1708c6baff5e019a9ab3+46+Ae5d0af96944a3690becb1decdf60cc1c937f556d@5693216f 0:46:md5sum.txt\n"
181 var otherPDH = "a3e8f74c6f101eae01fa08bfb4e49b3a+54"
182
183 var normalizedManifestWithSubdirs = `. 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396c73c0abcdefgh11234567890@569fa8c3 0:9:file1_in_main.txt 9:18:file2_in_main.txt 0:27:zzzzz-8i9sb-bcdefghijkdhvnk.log.txt
184 ./subdir1 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396cabcdefghij6419876543234@569fa8c4 0:9:file1_in_subdir1.txt 9:18:file2_in_subdir1.txt
185 ./subdir1/subdir2 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396c73c0bcdefghijk544332211@569fa8c5 0:9:file1_in_subdir2.txt 9:18:file2_in_subdir2.txt
186 `
187
188 var normalizedWithSubdirsPDH = "a0def87f80dd594d4675809e83bd4f15+367"
189
190 var denormalizedManifestWithSubdirs = ". 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396c73c0abcdefgh11234567890@569fa8c3 0:9:file1_in_main.txt 9:18:file2_in_main.txt 0:27:zzzzz-8i9sb-bcdefghijkdhvnk.log.txt 0:10:subdir1/file1_in_subdir1.txt 10:17:subdir1/file2_in_subdir1.txt\n"
191 var denormalizedWithSubdirsPDH = "b0def87f80dd594d4675809e83bd4f15+367"
192
193 var fakeAuthUUID = "zzzzz-gj3su-55pqoyepgi2glem"
194 var fakeAuthToken = "a3ltuwzqcu2u4sc0q7yhpc2w7s00fdcqecg5d6e0u3pfohmbjt"
195
196 func (client *ArvTestClient) Create(resourceType string,
197         parameters arvadosclient.Dict,
198         output interface{}) error {
199
200         client.Mutex.Lock()
201         defer client.Mutex.Unlock()
202
203         client.Calls++
204         client.Content = append(client.Content, parameters)
205
206         if resourceType == "logs" {
207                 panic("logs.create called")
208         }
209
210         if resourceType == "collections" && output != nil {
211                 mt := parameters["collection"].(arvadosclient.Dict)["manifest_text"].(string)
212                 md5sum := md5.Sum([]byte(mt))
213                 outmap := output.(*arvados.Collection)
214                 outmap.PortableDataHash = fmt.Sprintf("%x+%d", md5sum, len(mt))
215                 outmap.UUID = fmt.Sprintf("zzzzz-4zz18-%015x", md5sum[:7])
216         }
217
218         return nil
219 }
220
221 func (client *ArvTestClient) Call(method, resourceType, uuid, action string, parameters arvadosclient.Dict, output interface{}) error {
222         switch {
223         case method == "GET" && resourceType == "containers" && action == "auth":
224                 return json.Unmarshal([]byte(`{
225                         "kind": "arvados#api_client_authorization",
226                         "uuid": "`+fakeAuthUUID+`",
227                         "api_token": "`+fakeAuthToken+`"
228                         }`), output)
229         case method == "GET" && resourceType == "containers" && action == "secret_mounts":
230                 if client.secretMounts != nil {
231                         return json.Unmarshal(client.secretMounts, output)
232                 }
233                 return json.Unmarshal([]byte(`{"secret_mounts":{}}`), output)
234         default:
235                 return fmt.Errorf("Not found")
236         }
237 }
238
239 func (client *ArvTestClient) CallRaw(method, resourceType, uuid, action string,
240         parameters arvadosclient.Dict) (reader io.ReadCloser, err error) {
241         var j []byte
242         if method == "GET" && resourceType == "nodes" && uuid == "" && action == "" {
243                 j = []byte(`{
244                         "kind": "arvados#nodeList",
245                         "items": [{
246                                 "uuid": "zzzzz-7ekkf-2z3mc76g2q73aio",
247                                 "hostname": "compute2",
248                                 "properties": {"total_cpu_cores": 16}
249                         }]}`)
250         } else if method == "GET" && resourceType == "containers" && action == "" && !client.callraw {
251                 if uuid == "" {
252                         j, err = json.Marshal(map[string]interface{}{
253                                 "items": []interface{}{client.Container},
254                                 "kind":  "arvados#nodeList",
255                         })
256                 } else {
257                         j, err = json.Marshal(client.Container)
258                 }
259         } else {
260                 j = []byte(`{
261                         "command": ["sleep", "1"],
262                         "container_image": "` + arvadostest.DockerImage112PDH + `",
263                         "cwd": ".",
264                         "environment": {},
265                         "mounts": {"/tmp": {"kind": "tmp"}, "/json": {"kind": "json", "content": {"number": 123456789123456789}}},
266                         "output_path": "/tmp",
267                         "priority": 1,
268                         "runtime_constraints": {}
269                 }`)
270         }
271         return ioutil.NopCloser(bytes.NewReader(j)), err
272 }
273
274 func (client *ArvTestClient) Get(resourceType string, uuid string, parameters arvadosclient.Dict, output interface{}) error {
275         if resourceType == "collections" {
276                 if uuid == hwPDH {
277                         output.(*arvados.Collection).ManifestText = hwManifest
278                 } else if uuid == otherPDH {
279                         output.(*arvados.Collection).ManifestText = otherManifest
280                 } else if uuid == normalizedWithSubdirsPDH {
281                         output.(*arvados.Collection).ManifestText = normalizedManifestWithSubdirs
282                 } else if uuid == denormalizedWithSubdirsPDH {
283                         output.(*arvados.Collection).ManifestText = denormalizedManifestWithSubdirs
284                 }
285         }
286         if resourceType == "containers" {
287                 (*output.(*arvados.Container)) = client.Container
288         }
289         return nil
290 }
291
292 func (client *ArvTestClient) Update(resourceType string, uuid string, parameters arvadosclient.Dict, output interface{}) (err error) {
293         client.Mutex.Lock()
294         defer client.Mutex.Unlock()
295         client.Calls++
296         client.Content = append(client.Content, parameters)
297         if resourceType == "containers" {
298                 if parameters["container"].(arvadosclient.Dict)["state"] == "Running" {
299                         client.WasSetRunning = true
300                 }
301         } else if resourceType == "collections" && output != nil {
302                 mt := parameters["collection"].(arvadosclient.Dict)["manifest_text"].(string)
303                 output.(*arvados.Collection).UUID = uuid
304                 output.(*arvados.Collection).PortableDataHash = arvados.PortableDataHash(mt)
305         }
306         return nil
307 }
308
309 var discoveryMap = map[string]interface{}{
310         "crunchLogUpdateSize":   float64(crunchLogUpdateSize),
311         "crunchLogUpdatePeriod": float64(crunchLogUpdatePeriod.Seconds()),
312 }
313
314 func (client *ArvTestClient) Discovery(key string) (interface{}, error) {
315         return discoveryMap[key], nil
316 }
317
318 // CalledWith returns the parameters from the first API call whose
319 // parameters match jpath/string. E.g., CalledWith(c, "foo.bar",
320 // "baz") returns parameters with parameters["foo"]["bar"]=="baz". If
321 // no call matches, it returns nil.
322 func (client *ArvTestClient) CalledWith(jpath string, expect interface{}) arvadosclient.Dict {
323 call:
324         for _, content := range client.Content {
325                 var v interface{} = content
326                 for _, k := range strings.Split(jpath, ".") {
327                         if dict, ok := v.(arvadosclient.Dict); !ok {
328                                 continue call
329                         } else {
330                                 v = dict[k]
331                         }
332                 }
333                 if v == expect {
334                         return content
335                 }
336         }
337         return nil
338 }
339
340 func (client *KeepTestClient) LocalLocator(locator string) (string, error) {
341         return locator, nil
342 }
343
344 func (client *KeepTestClient) BlockWrite(_ context.Context, opts arvados.BlockWriteOptions) (arvados.BlockWriteResponse, error) {
345         locator := fmt.Sprintf("%x+%d", md5.Sum(opts.Data), len(opts.Data))
346         client.blocks.Store(locator, append([]byte(nil), opts.Data...))
347         return arvados.BlockWriteResponse{
348                 Locator: locator,
349         }, nil
350 }
351
352 func (client *KeepTestClient) ReadAt(locator string, dst []byte, offset int) (int, error) {
353         loaded, ok := client.blocks.Load(locator)
354         if !ok {
355                 return 0, os.ErrNotExist
356         }
357         data := loaded.([]byte)
358         if offset >= len(data) {
359                 return 0, io.EOF
360         }
361         return copy(dst, data[offset:]), nil
362 }
363
364 func (client *KeepTestClient) Close() {
365         client.blocks.Range(func(locator, value interface{}) bool {
366                 client.blocks.Delete(locator)
367                 return true
368         })
369 }
370
371 func (client *KeepTestClient) SetStorageClasses(sc []string) {
372         client.StorageClasses = sc
373 }
374
375 type FileWrapper struct {
376         io.ReadCloser
377         len int64
378 }
379
380 func (fw FileWrapper) Readdir(n int) ([]os.FileInfo, error) {
381         return nil, errors.New("not implemented")
382 }
383
384 func (fw FileWrapper) Seek(int64, int) (int64, error) {
385         return 0, errors.New("not implemented")
386 }
387
388 func (fw FileWrapper) Size() int64 {
389         return fw.len
390 }
391
392 func (fw FileWrapper) Stat() (os.FileInfo, error) {
393         return nil, errors.New("not implemented")
394 }
395
396 func (fw FileWrapper) Truncate(int64) error {
397         return errors.New("not implemented")
398 }
399
400 func (fw FileWrapper) Write([]byte) (int, error) {
401         return 0, errors.New("not implemented")
402 }
403
404 func (fw FileWrapper) Sync() error {
405         return errors.New("not implemented")
406 }
407
408 func (fw FileWrapper) Snapshot() (*arvados.Subtree, error) {
409         return nil, errors.New("not implemented")
410 }
411
412 func (fw FileWrapper) Splice(*arvados.Subtree) error {
413         return errors.New("not implemented")
414 }
415
416 type apiStubServer struct {
417         server    *httptest.Server
418         proxy     *httputil.ReverseProxy
419         intercept func(http.ResponseWriter, *http.Request) bool
420
421         container arvados.Container
422         logs      map[string]string
423 }
424
425 func apiStub() (*arvados.Client, *apiStubServer) {
426         client := arvados.NewClientFromEnv()
427         apistub := &apiStubServer{}
428         apistub.server = httptest.NewTLSServer(apistub)
429         apistub.proxy = httputil.NewSingleHostReverseProxy(&url.URL{Scheme: "https", Host: client.APIHost})
430         if client.Insecure {
431                 apistub.proxy.Transport = arvados.InsecureHTTPClient.Transport
432         }
433         client.APIHost = apistub.server.Listener.Addr().String()
434         return client, apistub
435 }
436
437 func (apistub *apiStubServer) ServeHTTP(w http.ResponseWriter, r *http.Request) {
438         if apistub.intercept != nil && apistub.intercept(w, r) {
439                 return
440         }
441         if r.Method == "POST" && r.URL.Path == "/arvados/v1/logs" {
442                 var body struct {
443                         Log struct {
444                                 EventType  string `json:"event_type"`
445                                 Properties struct {
446                                         Text string
447                                 }
448                         }
449                 }
450                 json.NewDecoder(r.Body).Decode(&body)
451                 apistub.logs[body.Log.EventType] += body.Log.Properties.Text
452                 return
453         }
454         if r.Method == "GET" && r.URL.Path == "/arvados/v1/collections/"+hwPDH {
455                 json.NewEncoder(w).Encode(arvados.Collection{ManifestText: hwManifest})
456                 return
457         }
458         if r.Method == "GET" && r.URL.Path == "/arvados/v1/collections/"+otherPDH {
459                 json.NewEncoder(w).Encode(arvados.Collection{ManifestText: otherManifest})
460                 return
461         }
462         if r.Method == "GET" && r.URL.Path == "/arvados/v1/collections/"+normalizedWithSubdirsPDH {
463                 json.NewEncoder(w).Encode(arvados.Collection{ManifestText: normalizedManifestWithSubdirs})
464                 return
465         }
466         if r.Method == "GET" && r.URL.Path == "/arvados/v1/collections/"+denormalizedWithSubdirsPDH {
467                 json.NewEncoder(w).Encode(arvados.Collection{ManifestText: denormalizedManifestWithSubdirs})
468                 return
469         }
470         if r.Method == "GET" && r.URL.Path == "/arvados/v1/containers/"+apistub.container.UUID {
471                 json.NewEncoder(w).Encode(apistub.container)
472                 return
473         }
474         apistub.proxy.ServeHTTP(w, r)
475 }
476
477 func (s *TestSuite) TestLoadImage(c *C) {
478         s.runner.Container.ContainerImage = arvadostest.DockerImage112PDH
479         s.runner.Container.Mounts = map[string]arvados.Mount{
480                 "/out": {Kind: "tmp", Writable: true},
481         }
482         s.runner.Container.OutputPath = "/out"
483
484         _, err := s.runner.SetupMounts()
485         c.Assert(err, IsNil)
486
487         imageID, err := s.runner.LoadImage()
488         c.Check(err, IsNil)
489         c.Check(s.executor.loaded, Matches, ".*"+regexp.QuoteMeta(arvadostest.DockerImage112Filename))
490         c.Check(imageID, Equals, strings.TrimSuffix(arvadostest.DockerImage112Filename, ".tar"))
491
492         s.runner.Container.ContainerImage = arvadostest.DockerImage112PDH
493         s.executor.imageLoaded = false
494         s.executor.loaded = ""
495         s.executor.loadErr = errors.New("bork")
496         imageID, err = s.runner.LoadImage()
497         c.Check(err, ErrorMatches, ".*bork")
498         c.Check(s.executor.loaded, Matches, ".*"+regexp.QuoteMeta(arvadostest.DockerImage112Filename))
499
500         s.runner.Container.ContainerImage = fakeInputCollectionPDH
501         s.executor.imageLoaded = false
502         s.executor.loaded = ""
503         s.executor.loadErr = nil
504         imageID, err = s.runner.LoadImage()
505         c.Check(err, ErrorMatches, "image collection does not include a \\.tar image file")
506         c.Check(s.executor.loaded, Equals, "")
507 }
508
509 type ArvErrorTestClient struct{}
510
511 func (ArvErrorTestClient) Create(resourceType string,
512         parameters arvadosclient.Dict,
513         output interface{}) error {
514         return nil
515 }
516
517 func (ArvErrorTestClient) Call(method, resourceType, uuid, action string, parameters arvadosclient.Dict, output interface{}) error {
518         if method == "GET" && resourceType == "containers" && action == "auth" {
519                 return nil
520         }
521         return errors.New("ArvError")
522 }
523
524 func (ArvErrorTestClient) CallRaw(method, resourceType, uuid, action string,
525         parameters arvadosclient.Dict) (reader io.ReadCloser, err error) {
526         return nil, errors.New("ArvError")
527 }
528
529 func (ArvErrorTestClient) Get(resourceType string, uuid string, parameters arvadosclient.Dict, output interface{}) error {
530         return errors.New("ArvError")
531 }
532
533 func (ArvErrorTestClient) Update(resourceType string, uuid string, parameters arvadosclient.Dict, output interface{}) (err error) {
534         return nil
535 }
536
537 func (ArvErrorTestClient) Discovery(key string) (interface{}, error) {
538         return discoveryMap[key], nil
539 }
540
541 type KeepErrorTestClient struct {
542         KeepTestClient
543 }
544
545 func (*KeepErrorTestClient) BlockWrite(context.Context, arvados.BlockWriteOptions) (arvados.BlockWriteResponse, error) {
546         return arvados.BlockWriteResponse{}, errors.New("KeepError")
547 }
548
549 func (*KeepErrorTestClient) LocalLocator(string) (string, error) {
550         return "", errors.New("KeepError")
551 }
552
553 type KeepReadErrorTestClient struct {
554         KeepTestClient
555 }
556
557 func (*KeepReadErrorTestClient) ReadAt(string, []byte, int) (int, error) {
558         return 0, errors.New("KeepError")
559 }
560
561 func dockerLog(fd byte, msg string) []byte {
562         by := []byte(msg)
563         header := make([]byte, 8+len(by))
564         header[0] = fd
565         header[7] = byte(len(by))
566         copy(header[8:], by)
567         return header
568 }
569
570 func (s *TestSuite) TestRunContainer(c *C) {
571         s.executor.runFunc = func() int {
572                 fmt.Fprintf(s.executor.created.Stdout, "Hello world\n")
573                 return 0
574         }
575
576         s.runner.Container.ContainerImage = arvadostest.DockerImage112PDH
577         s.runner.Container.Command = []string{"./hw"}
578         s.runner.Container.OutputStorageClasses = []string{"default"}
579
580         imageID, err := s.runner.LoadImage()
581         c.Assert(err, IsNil)
582
583         err = s.runner.CreateContainer(imageID, nil)
584         c.Assert(err, IsNil)
585
586         err = s.runner.StartContainer()
587         c.Assert(err, IsNil)
588
589         err = s.runner.WaitFinish()
590         c.Assert(err, IsNil)
591
592         c.Check(logFileContent(c, s.runner, "stdout.txt"), Matches, `2\S+Z Hello world\n`)
593         c.Check(logFileContent(c, s.runner, "stderr.txt"), Matches, ``)
594 }
595
596 func (s *TestSuite) TestCommitLogs(c *C) {
597         api := &ArvTestClient{}
598         kc := &KeepTestClient{}
599         defer kc.Close()
600         cr, err := NewContainerRunner(s.client, api, kc, "zzzzz-zzzzz-zzzzzzzzzzzzzzz")
601         c.Assert(err, IsNil)
602         f, err := cr.openLogFile("crunch-run")
603         c.Assert(err, IsNil)
604         cr.CrunchLog = newLogWriter(newTestTimestamper(f))
605
606         cr.CrunchLog.Print("Hello world!")
607         cr.CrunchLog.Print("Goodbye")
608         cr.finalState = "Complete"
609
610         err = cr.CommitLogs()
611         c.Check(err, IsNil)
612
613         c.Check(api.Calls, Equals, 1)
614         c.Check(api.Content[0]["ensure_unique_name"], Equals, true)
615         c.Check(api.Content[0]["collection"].(arvadosclient.Dict)["name"], Equals, "logs for zzzzz-zzzzz-zzzzzzzzzzzzzzz")
616         c.Check(api.Content[0]["collection"].(arvadosclient.Dict)["manifest_text"], Equals, ". 744b2e4553123b02fa7b452ec5c18993+123 0:123:crunch-run.txt\n")
617         c.Check(*cr.LogsPDH, Equals, "63da7bdacf08c40f604daad80c261e9a+60")
618 }
619
620 func (s *TestSuite) TestUpdateContainerRunning(c *C) {
621         api := &ArvTestClient{}
622         kc := &KeepTestClient{}
623         defer kc.Close()
624         cr, err := NewContainerRunner(s.client, api, kc, "zzzzz-zzzzz-zzzzzzzzzzzzzzz")
625         c.Assert(err, IsNil)
626
627         err = cr.UpdateContainerRunning("")
628         c.Check(err, IsNil)
629
630         c.Check(api.Content[0]["container"].(arvadosclient.Dict)["state"], Equals, "Running")
631 }
632
633 func (s *TestSuite) TestUpdateContainerComplete(c *C) {
634         api := &ArvTestClient{}
635         kc := &KeepTestClient{}
636         defer kc.Close()
637         cr, err := NewContainerRunner(s.client, api, kc, "zzzzz-zzzzz-zzzzzzzzzzzzzzz")
638         c.Assert(err, IsNil)
639
640         cr.LogsPDH = new(string)
641         *cr.LogsPDH = "d3a229d2fe3690c2c3e75a71a153c6a3+60"
642
643         cr.ExitCode = new(int)
644         *cr.ExitCode = 42
645         cr.finalState = "Complete"
646
647         err = cr.UpdateContainerFinal()
648         c.Check(err, IsNil)
649
650         c.Check(api.Content[0]["container"].(arvadosclient.Dict)["log"], Equals, *cr.LogsPDH)
651         c.Check(api.Content[0]["container"].(arvadosclient.Dict)["exit_code"], Equals, *cr.ExitCode)
652         c.Check(api.Content[0]["container"].(arvadosclient.Dict)["state"], Equals, "Complete")
653 }
654
655 func (s *TestSuite) TestUpdateContainerCancelled(c *C) {
656         api := &ArvTestClient{}
657         kc := &KeepTestClient{}
658         defer kc.Close()
659         cr, err := NewContainerRunner(s.client, api, kc, "zzzzz-zzzzz-zzzzzzzzzzzzzzz")
660         c.Assert(err, IsNil)
661         cr.cCancelled = true
662         cr.finalState = "Cancelled"
663
664         err = cr.UpdateContainerFinal()
665         c.Check(err, IsNil)
666
667         c.Check(api.Content[0]["container"].(arvadosclient.Dict)["log"], IsNil)
668         c.Check(api.Content[0]["container"].(arvadosclient.Dict)["exit_code"], IsNil)
669         c.Check(api.Content[0]["container"].(arvadosclient.Dict)["state"], Equals, "Cancelled")
670 }
671
672 // Used by the TestFullRun*() test below to DRY up boilerplate setup to do full
673 // dress rehearsal of the Run() function, starting from a JSON container record.
674 func (s *TestSuite) fullRunHelper(c *C, record string, extraMounts []string, fn func() int) (*ArvTestClient, *ContainerRunner, string) {
675         err := json.Unmarshal([]byte(record), &s.api.Container)
676         c.Assert(err, IsNil)
677         initialState := s.api.Container.State
678
679         var sm struct {
680                 SecretMounts map[string]arvados.Mount `json:"secret_mounts"`
681         }
682         err = json.Unmarshal([]byte(record), &sm)
683         c.Check(err, IsNil)
684         secretMounts, err := json.Marshal(sm)
685         c.Assert(err, IsNil)
686         c.Logf("SecretMounts decoded %v json %q", sm, secretMounts)
687
688         s.executor.runFunc = fn
689
690         s.runner.statInterval = 100 * time.Millisecond
691         s.runner.containerWatchdogInterval = time.Second
692
693         realTemp := c.MkDir()
694         tempcount := 0
695         s.runner.MkTempDir = func(_, prefix string) (string, error) {
696                 tempcount++
697                 d := fmt.Sprintf("%s/%s%d", realTemp, prefix, tempcount)
698                 err := os.Mkdir(d, os.ModePerm)
699                 if err != nil && strings.Contains(err.Error(), ": file exists") {
700                         // Test case must have pre-populated the tempdir
701                         err = nil
702                 }
703                 return d, err
704         }
705         client, _ := apiStub()
706         s.runner.MkArvClient = func(token string) (IArvadosClient, IKeepClient, *arvados.Client, error) {
707                 return &ArvTestClient{secretMounts: secretMounts}, &s.testContainerKeepClient, client, nil
708         }
709
710         if extraMounts != nil && len(extraMounts) > 0 {
711                 err := s.runner.SetupArvMountPoint("keep")
712                 c.Check(err, IsNil)
713
714                 for _, m := range extraMounts {
715                         os.MkdirAll(s.runner.ArvMountPoint+"/by_id/"+m, os.ModePerm)
716                 }
717         }
718
719         err = s.runner.Run()
720         if s.api.CalledWith("container.state", "Complete") != nil {
721                 c.Check(err, IsNil)
722         }
723         if s.executor.loadErr == nil && s.executor.createErr == nil && initialState != "Running" {
724                 c.Check(s.api.WasSetRunning, Equals, true)
725                 var lastupdate arvadosclient.Dict
726                 for _, content := range s.api.Content {
727                         if content["container"] != nil {
728                                 lastupdate = content["container"].(arvadosclient.Dict)
729                         }
730                 }
731                 if lastupdate["log"] == nil {
732                         c.Errorf("no container update with non-nil log -- updates were: %v", s.api.Content)
733                 }
734         }
735
736         if err != nil {
737                 dumpAllLogFiles(c, s.runner)
738         }
739
740         return s.api, s.runner, realTemp
741 }
742
743 func (s *TestSuite) TestFullRunHello(c *C) {
744         s.runner.enableMemoryLimit = true
745         s.runner.networkMode = "default"
746         s.fullRunHelper(c, `{
747     "command": ["echo", "hello world"],
748     "container_image": "`+arvadostest.DockerImage112PDH+`",
749     "cwd": ".",
750     "environment": {"foo":"bar","baz":"waz"},
751     "mounts": {"/tmp": {"kind": "tmp"} },
752     "output_path": "/tmp",
753     "priority": 1,
754     "runtime_constraints": {"vcpus":1,"ram":1000000},
755     "state": "Locked",
756     "output_storage_classes": ["default"]
757 }`, nil, func() int {
758                 c.Check(s.executor.created.Command, DeepEquals, []string{"echo", "hello world"})
759                 c.Check(s.executor.created.Image, Equals, "sha256:d8309758b8fe2c81034ffc8a10c36460b77db7bc5e7b448c4e5b684f9d95a678")
760                 c.Check(s.executor.created.Env, DeepEquals, map[string]string{"foo": "bar", "baz": "waz"})
761                 c.Check(s.executor.created.VCPUs, Equals, 1)
762                 c.Check(s.executor.created.RAM, Equals, int64(1000000))
763                 c.Check(s.executor.created.NetworkMode, Equals, "default")
764                 c.Check(s.executor.created.EnableNetwork, Equals, false)
765                 c.Check(s.executor.created.CUDADeviceCount, Equals, 0)
766                 fmt.Fprintln(s.executor.created.Stdout, "hello world")
767                 return 0
768         })
769
770         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
771         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
772         c.Check(logFileContent(c, s.runner, "stdout.txt"), Matches, `2\S+Z hello world\n`)
773         c.Check(s.testDispatcherKeepClient.StorageClasses, DeepEquals, []string{"default"})
774         c.Check(s.testContainerKeepClient.StorageClasses, DeepEquals, []string{"default"})
775 }
776
777 func (s *TestSuite) TestRunAlreadyRunning(c *C) {
778         var ran bool
779         s.fullRunHelper(c, `{
780     "command": ["sleep", "3"],
781     "container_image": "`+arvadostest.DockerImage112PDH+`",
782     "cwd": ".",
783     "environment": {},
784     "mounts": {"/tmp": {"kind": "tmp"} },
785     "output_path": "/tmp",
786     "priority": 1,
787     "runtime_constraints": {},
788     "scheduling_parameters":{"max_run_time": 1},
789     "state": "Running"
790 }`, nil, func() int {
791                 ran = true
792                 return 2
793         })
794         c.Check(s.api.CalledWith("container.state", "Cancelled"), IsNil)
795         c.Check(s.api.CalledWith("container.state", "Complete"), IsNil)
796         c.Check(ran, Equals, false)
797 }
798
799 func ec2MetadataServerStub(c *C, token *string, failureRate float64, stoptime *atomic.Value) *httptest.Server {
800         failedOnce := false
801         return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
802                 if !failedOnce || rand.Float64() < failureRate {
803                         w.WriteHeader(http.StatusServiceUnavailable)
804                         failedOnce = true
805                         return
806                 }
807                 switch r.URL.Path {
808                 case "/latest/api/token":
809                         fmt.Fprintln(w, *token)
810                 case "/latest/meta-data/spot/instance-action":
811                         if r.Header.Get("X-aws-ec2-metadata-token") != *token {
812                                 w.WriteHeader(http.StatusUnauthorized)
813                         } else if t, _ := stoptime.Load().(time.Time); t.IsZero() {
814                                 w.WriteHeader(http.StatusNotFound)
815                         } else {
816                                 fmt.Fprintf(w, `{"action":"stop","time":"%s"}`, t.Format(time.RFC3339))
817                         }
818                 default:
819                         w.WriteHeader(http.StatusNotFound)
820                 }
821         }))
822 }
823
824 func (s *TestSuite) TestSpotInterruptionNotice(c *C) {
825         s.testSpotInterruptionNotice(c, 0.1)
826 }
827
828 func (s *TestSuite) TestSpotInterruptionNoticeNotAvailable(c *C) {
829         s.testSpotInterruptionNotice(c, 1)
830 }
831
832 func (s *TestSuite) testSpotInterruptionNotice(c *C, failureRate float64) {
833         var stoptime atomic.Value
834         token := "fake-ec2-metadata-token"
835         stub := ec2MetadataServerStub(c, &token, failureRate, &stoptime)
836         defer stub.Close()
837
838         defer func(i time.Duration, u string) {
839                 spotInterruptionCheckInterval = i
840                 ec2MetadataBaseURL = u
841         }(spotInterruptionCheckInterval, ec2MetadataBaseURL)
842         spotInterruptionCheckInterval = time.Second / 8
843         ec2MetadataBaseURL = stub.URL
844
845         checkedLogs := false
846         checkLogs := func() {
847                 checkedLogs = true
848                 c.Check(logFileContent(c, s.runner, "crunch-run.txt"), Matches, `(?ms).*Checking for spot interruptions every 125ms using instance metadata at http://.*`)
849                 c.Check(logFileContent(c, s.runner, "crunch-run.txt"), Matches, `(?ms).*Error checking spot interruptions: 503 Service Unavailable.*`)
850                 if failureRate == 1 {
851                         c.Check(logFileContent(c, s.runner, "crunch-run.txt"), Matches, `(?ms).*Giving up on checking spot interruptions after too many consecutive failures.*`)
852                         return
853                 }
854                 text := `Cloud provider scheduled instance stop at ` + stoptime.Load().(time.Time).Format(time.RFC3339)
855                 c.Check(logFileContent(c, s.runner, "crunch-run.txt"), Matches, `(?ms).*`+text+`.*`)
856                 c.Check(s.api.CalledWith("container.runtime_status.warning", "preemption notice"), NotNil)
857                 c.Check(s.api.CalledWith("container.runtime_status.warningDetail", text), NotNil)
858                 c.Check(s.api.CalledWith("container.runtime_status.preemptionNotice", text), NotNil)
859
860                 // Check that the log collection was saved, and the
861                 // container record updated with the new PDH,
862                 // immediately after the preemption notice was
863                 // received -- i.e., while the container is still
864                 // running.
865                 lastpdh := ""
866                 saved := make(map[string]string) // pdh => manifest_text
867                 for _, call := range s.api.Content {
868                         if ctr, ok := call["container"].(arvadosclient.Dict); ok {
869                                 if pdh, ok := ctr["log"].(string); ok {
870                                         lastpdh = pdh
871                                 }
872                         }
873                         if coll, ok := call["collection"].(arvadosclient.Dict); ok {
874                                 mt, _ := coll["manifest_text"].(string)
875                                 if strings.Contains(mt, ":crunch-run.txt") {
876                                         saved[arvados.PortableDataHash(mt)] = mt
877                                 }
878                         }
879                 }
880                 logfs, err := (&arvados.Collection{ManifestText: saved[lastpdh]}).FileSystem(s.runner.dispatcherClient, s.runner.DispatcherKeepClient)
881                 c.Assert(err, IsNil)
882                 log, err := fs.ReadFile(arvados.FS(logfs), "crunch-run.txt")
883                 c.Check(err, IsNil)
884                 c.Check(string(log), Matches, `(?ms).*\Q`+text+`\E.*`)
885         }
886
887         go s.runner.checkSpotInterruptionNotices()
888         s.fullRunHelper(c, `{
889     "command": ["sleep", "3"],
890     "container_image": "`+arvadostest.DockerImage112PDH+`",
891     "cwd": ".",
892     "environment": {},
893     "mounts": {"/tmp": {"kind": "tmp"} },
894     "output_path": "/tmp",
895     "priority": 1,
896     "runtime_constraints": {},
897     "state": "Locked"
898 }`, nil, func() int {
899                 time.Sleep(time.Second)
900                 stoptime.Store(time.Now().Add(time.Minute).UTC())
901                 token = "different-fake-ec2-metadata-token"
902                 time.Sleep(time.Second)
903                 checkLogs()
904                 return 0
905         })
906         c.Check(checkedLogs, Equals, true)
907 }
908
909 func (s *TestSuite) TestRunTimeExceeded(c *C) {
910         s.fullRunHelper(c, `{
911     "command": ["sleep", "3"],
912     "container_image": "`+arvadostest.DockerImage112PDH+`",
913     "cwd": ".",
914     "environment": {},
915     "mounts": {"/tmp": {"kind": "tmp"} },
916     "output_path": "/tmp",
917     "priority": 1,
918     "runtime_constraints": {},
919     "scheduling_parameters":{"max_run_time": 1},
920     "state": "Locked"
921 }`, nil, func() int {
922                 time.Sleep(3 * time.Second)
923                 return 0
924         })
925
926         c.Check(s.api.CalledWith("container.state", "Cancelled"), NotNil)
927         c.Check(logFileContent(c, s.runner, "crunch-run.txt"), Matches, "(?ms).*maximum run time exceeded.*")
928 }
929
930 func (s *TestSuite) TestContainerWaitFails(c *C) {
931         s.fullRunHelper(c, `{
932     "command": ["sleep", "3"],
933     "container_image": "`+arvadostest.DockerImage112PDH+`",
934     "cwd": ".",
935     "mounts": {"/tmp": {"kind": "tmp"} },
936     "output_path": "/tmp",
937     "priority": 1,
938     "state": "Locked"
939 }`, nil, func() int {
940                 s.executor.waitErr = errors.New("Container is not running")
941                 return 0
942         })
943
944         c.Check(s.api.CalledWith("container.state", "Cancelled"), NotNil)
945         c.Check(logFileContent(c, s.runner, "crunch-run.txt"), Matches, "(?ms).*Container is not running.*")
946 }
947
948 func (s *TestSuite) TestCrunchstat(c *C) {
949         s.runner.crunchstatFakeFS = os.DirFS("../crunchstat/testdata/debian12")
950         s.fullRunHelper(c, `{
951                 "command": ["sleep", "1"],
952                 "container_image": "`+arvadostest.DockerImage112PDH+`",
953                 "cwd": ".",
954                 "environment": {},
955                 "mounts": {"/tmp": {"kind": "tmp"} },
956                 "output_path": "/tmp",
957                 "priority": 1,
958                 "runtime_constraints": {},
959                 "state": "Locked"
960         }`, nil, func() int {
961                 time.Sleep(time.Second)
962                 return 0
963         })
964
965         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
966         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
967
968         c.Check(logFileContent(c, s.runner, "crunchstat.txt"), Matches, `(?ms).*mem \d+ swap \d+ pgmajfault \d+ rss.*`)
969
970         // Check that we called (*crunchstat.Reporter)Stop().
971         c.Check(logFileContent(c, s.runner, "crunch-run.txt"), Matches, `(?ms).*Maximum crunch-run memory rss usage was \d+ bytes\n.*`)
972 }
973
974 func (s *TestSuite) TestNodeInfoLog(c *C) {
975         s.fullRunHelper(c, `{
976                 "command": ["true"],
977                 "container_image": "`+arvadostest.DockerImage112PDH+`",
978                 "cwd": ".",
979                 "environment": {},
980                 "mounts": {"/tmp": {"kind": "tmp"} },
981                 "output_path": "/tmp",
982                 "priority": 1,
983                 "runtime_constraints": {},
984                 "state": "Locked"
985         }`, nil, func() int {
986                 return 0
987         })
988
989         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
990         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
991
992         json := logFileContent(c, s.runner, "node.json")
993         c.Check(json, Matches, `(?ms).*"ProviderType": *"a1\.2xlarge".*`)
994         c.Check(json, Matches, `(?ms).*"Price": *1\.2.*`)
995
996         nodeinfo := logFileContent(c, s.runner, "node-info.txt")
997         c.Check(nodeinfo, Matches, `(?ms).*Host Information.*`)
998         c.Check(nodeinfo, Matches, `(?ms).*CPU Information.*`)
999         c.Check(nodeinfo, Matches, `(?ms).*Memory Information.*`)
1000         c.Check(nodeinfo, Matches, `(?ms).*Disk Space.*`)
1001         c.Check(nodeinfo, Matches, `(?ms).*Disk INodes.*`)
1002 }
1003
1004 func (s *TestSuite) TestLogVersionAndRuntime(c *C) {
1005         s.fullRunHelper(c, `{
1006                 "command": ["sleep", "1"],
1007                 "container_image": "`+arvadostest.DockerImage112PDH+`",
1008                 "cwd": ".",
1009                 "environment": {},
1010                 "mounts": {"/tmp": {"kind": "tmp"} },
1011                 "output_path": "/tmp",
1012                 "priority": 1,
1013                 "runtime_constraints": {},
1014                 "state": "Locked"
1015         }`, nil, func() int {
1016                 return 0
1017         })
1018
1019         c.Check(logFileContent(c, s.runner, "crunch-run.txt"), Matches, `(?ms).*crunch-run \S+ \(go\S+\) start.*`)
1020         c.Check(logFileContent(c, s.runner, "crunch-run.txt"), Matches, `(?ms).*crunch-run process has uid=\d+\(.+\) gid=\d+\(.+\) groups=\d+\(.+\)(,\d+\(.+\))*\n.*`)
1021         c.Check(logFileContent(c, s.runner, "crunch-run.txt"), Matches, `(?ms).*Executing container: zzzzz-zzzzz-zzzzzzzzzzzzzzz.*`)
1022         c.Check(logFileContent(c, s.runner, "crunch-run.txt"), Matches, `(?ms).*Using container runtime: stub.*`)
1023 }
1024
1025 func (s *TestSuite) testLogRSSThresholds(c *C, ram int64, expected []int, notExpected int) {
1026         s.runner.crunchstatFakeFS = os.DirFS("../crunchstat/testdata/debian12")
1027         s.fullRunHelper(c, `{
1028                 "command": ["true"],
1029                 "container_image": "`+arvadostest.DockerImage112PDH+`",
1030                 "cwd": ".",
1031                 "environment": {},
1032                 "mounts": {"/tmp": {"kind": "tmp"} },
1033                 "output_path": "/tmp",
1034                 "priority": 1,
1035                 "runtime_constraints": {"ram": `+fmt.Sprintf("%d", ram)+`},
1036                 "state": "Locked"
1037         }`, nil, func() int { return 0 })
1038         logs := logFileContent(c, s.runner, "crunch-run.txt")
1039         c.Log("=== crunchstat logs")
1040         c.Log(logs)
1041         pattern := logLineStart + `Container using over %d%% of memory \(rss %d/%d bytes\)`
1042         var threshold int
1043         for _, threshold = range expected {
1044                 c.Check(logs, Matches, fmt.Sprintf(pattern, threshold, s.debian12MemoryCurrent, ram))
1045         }
1046         if notExpected > threshold {
1047                 c.Check(logs, Not(Matches), fmt.Sprintf(pattern, notExpected, s.debian12MemoryCurrent, ram))
1048         }
1049 }
1050
1051 func (s *TestSuite) TestLogNoRSSThresholds(c *C) {
1052         s.testLogRSSThresholds(c, s.debian12MemoryCurrent*10, []int{}, 90)
1053 }
1054
1055 func (s *TestSuite) TestLogSomeRSSThresholds(c *C) {
1056         onePercentRSS := s.debian12MemoryCurrent / 100
1057         s.testLogRSSThresholds(c, 102*onePercentRSS, []int{90, 95}, 99)
1058 }
1059
1060 func (s *TestSuite) TestLogAllRSSThresholds(c *C) {
1061         s.testLogRSSThresholds(c, s.debian12MemoryCurrent, []int{90, 95, 99}, 0)
1062 }
1063
1064 func (s *TestSuite) TestLogMaximaAfterRun(c *C) {
1065         s.runner.crunchstatFakeFS = os.DirFS("../crunchstat/testdata/debian12")
1066         s.runner.parentTemp = c.MkDir()
1067         s.fullRunHelper(c, `{
1068         "command": ["true"],
1069         "container_image": "`+arvadostest.DockerImage112PDH+`",
1070         "cwd": ".",
1071         "environment": {},
1072         "mounts": {"/tmp": {"kind": "tmp"} },
1073         "output_path": "/tmp",
1074         "priority": 1,
1075         "runtime_constraints": {"ram": `+fmt.Sprintf("%d", s.debian12MemoryCurrent*10)+`},
1076         "state": "Locked"
1077     }`, nil, func() int { return 0 })
1078         logs := logFileContent(c, s.runner, "crunch-run.txt")
1079         for _, expected := range []string{
1080                 `Maximum disk usage was \d+%, \d+/\d+ bytes`,
1081                 fmt.Sprintf(`Maximum container memory swap usage was %d bytes`, s.debian12SwapCurrent),
1082                 `Maximum container memory pgmajfault usage was \d+ faults`,
1083                 fmt.Sprintf(`Maximum container memory rss usage was 10%%, %d/%d bytes`, s.debian12MemoryCurrent, s.debian12MemoryCurrent*10),
1084                 `Maximum crunch-run memory rss usage was \d+ bytes`,
1085         } {
1086                 c.Check(logs, Matches, logLineStart+expected)
1087         }
1088 }
1089
1090 func (s *TestSuite) TestCommitNodeInfoBeforeStart(c *C) {
1091         var collection_create, container_update arvadosclient.Dict
1092         s.fullRunHelper(c, `{
1093                 "command": ["true"],
1094                 "container_image": "`+arvadostest.DockerImage112PDH+`",
1095                 "cwd": ".",
1096                 "environment": {},
1097                 "mounts": {"/tmp": {"kind": "tmp"} },
1098                 "output_path": "/tmp",
1099                 "priority": 1,
1100                 "runtime_constraints": {},
1101                 "state": "Locked",
1102                 "uuid": "zzzzz-dz642-202301121543210"
1103         }`, nil, func() int {
1104                 collection_create = s.api.CalledWith("ensure_unique_name", true)
1105                 container_update = s.api.CalledWith("container.state", "Running")
1106                 return 0
1107         })
1108
1109         c.Assert(collection_create, NotNil)
1110         log_collection := collection_create["collection"].(arvadosclient.Dict)
1111         c.Check(log_collection["name"], Equals, "logs for zzzzz-dz642-202301121543210")
1112         manifest_text := log_collection["manifest_text"].(string)
1113         // We check that the file size is at least two digits as an easy way to
1114         // check the file isn't empty.
1115         c.Check(manifest_text, Matches, `\. .+ \d+:\d{2,}:node-info\.txt( .+)?\n`)
1116         c.Check(manifest_text, Matches, `\. .+ \d+:\d{2,}:node\.json( .+)?\n`)
1117
1118         c.Assert(container_update, NotNil)
1119         // As of Arvados 2.5.0, the container update must specify its log in PDH
1120         // format for the API server to propagate it to container requests, which
1121         // is what we care about for this test.
1122         expect_pdh := fmt.Sprintf("%x+%d", md5.Sum([]byte(manifest_text)), len(manifest_text))
1123         c.Check(container_update["container"].(arvadosclient.Dict)["log"], Equals, expect_pdh)
1124 }
1125
1126 func (s *TestSuite) TestContainerRecordLog(c *C) {
1127         s.fullRunHelper(c, `{
1128                 "command": ["sleep", "1"],
1129                 "container_image": "`+arvadostest.DockerImage112PDH+`",
1130                 "cwd": ".",
1131                 "environment": {},
1132                 "mounts": {"/tmp": {"kind": "tmp"} },
1133                 "output_path": "/tmp",
1134                 "priority": 1,
1135                 "runtime_constraints": {},
1136                 "state": "Locked"
1137         }`, nil,
1138                 func() int {
1139                         time.Sleep(time.Second)
1140                         return 0
1141                 })
1142
1143         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
1144         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
1145
1146         c.Check(logFileContent(c, s.runner, "container.json"), Matches, `(?ms).*container_image.*`)
1147 }
1148
1149 func (s *TestSuite) TestFullRunStderr(c *C) {
1150         s.fullRunHelper(c, `{
1151     "command": ["/bin/sh", "-c", "echo hello ; echo world 1>&2 ; exit 1"],
1152     "container_image": "`+arvadostest.DockerImage112PDH+`",
1153     "cwd": ".",
1154     "environment": {},
1155     "mounts": {"/tmp": {"kind": "tmp"} },
1156     "output_path": "/tmp",
1157     "priority": 1,
1158     "runtime_constraints": {},
1159     "state": "Locked"
1160 }`, nil, func() int {
1161                 fmt.Fprintln(s.executor.created.Stdout, "hello")
1162                 fmt.Fprintln(s.executor.created.Stderr, "world")
1163                 return 1
1164         })
1165
1166         final := s.api.CalledWith("container.state", "Complete")
1167         c.Assert(final, NotNil)
1168         c.Check(final["container"].(arvadosclient.Dict)["exit_code"], Equals, 1)
1169         c.Check(final["container"].(arvadosclient.Dict)["log"], NotNil)
1170
1171         c.Check(logFileContent(c, s.runner, "stdout.txt"), Matches, ".*hello\n")
1172         c.Check(logFileContent(c, s.runner, "stderr.txt"), Matches, ".*world\n")
1173 }
1174
1175 func (s *TestSuite) TestFullRunDefaultCwd(c *C) {
1176         s.fullRunHelper(c, `{
1177     "command": ["pwd"],
1178     "container_image": "`+arvadostest.DockerImage112PDH+`",
1179     "cwd": ".",
1180     "environment": {},
1181     "mounts": {"/tmp": {"kind": "tmp"} },
1182     "output_path": "/tmp",
1183     "priority": 1,
1184     "runtime_constraints": {},
1185     "state": "Locked"
1186 }`, nil, func() int {
1187                 fmt.Fprintf(s.executor.created.Stdout, "workdir=%q", s.executor.created.WorkingDir)
1188                 return 0
1189         })
1190
1191         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
1192         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
1193         c.Check(logFileContent(c, s.runner, "stdout.txt"), Matches, `.*workdir=""`)
1194 }
1195
1196 func (s *TestSuite) TestFullRunSetCwd(c *C) {
1197         s.fullRunHelper(c, `{
1198     "command": ["pwd"],
1199     "container_image": "`+arvadostest.DockerImage112PDH+`",
1200     "cwd": "/bin",
1201     "environment": {},
1202     "mounts": {"/tmp": {"kind": "tmp"} },
1203     "output_path": "/tmp",
1204     "priority": 1,
1205     "runtime_constraints": {},
1206     "state": "Locked"
1207 }`, nil, func() int {
1208                 fmt.Fprintln(s.executor.created.Stdout, s.executor.created.WorkingDir)
1209                 return 0
1210         })
1211
1212         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
1213         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
1214         c.Check(logFileContent(c, s.runner, "stdout.txt"), Matches, ".*/bin\n")
1215 }
1216
1217 func (s *TestSuite) TestFullRunSetOutputStorageClasses(c *C) {
1218         s.fullRunHelper(c, `{
1219     "command": ["pwd"],
1220     "container_image": "`+arvadostest.DockerImage112PDH+`",
1221     "cwd": "/bin",
1222     "environment": {},
1223     "mounts": {"/tmp": {"kind": "tmp"} },
1224     "output_path": "/tmp",
1225     "priority": 1,
1226     "runtime_constraints": {},
1227     "state": "Locked",
1228     "output_storage_classes": ["foo", "bar"]
1229 }`, nil, func() int {
1230                 fmt.Fprintln(s.executor.created.Stdout, s.executor.created.WorkingDir)
1231                 return 0
1232         })
1233
1234         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
1235         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
1236         c.Check(logFileContent(c, s.runner, "stdout.txt"), Matches, ".*/bin\n")
1237         c.Check(s.testDispatcherKeepClient.StorageClasses, DeepEquals, []string{"foo", "bar"})
1238         c.Check(s.testContainerKeepClient.StorageClasses, DeepEquals, []string{"foo", "bar"})
1239 }
1240
1241 func (s *TestSuite) TestEnableCUDADeviceCount(c *C) {
1242         s.fullRunHelper(c, `{
1243     "command": ["pwd"],
1244     "container_image": "`+arvadostest.DockerImage112PDH+`",
1245     "cwd": "/bin",
1246     "environment": {},
1247     "mounts": {"/tmp": {"kind": "tmp"} },
1248     "output_path": "/tmp",
1249     "priority": 1,
1250     "runtime_constraints": {"cuda": {"device_count": 2}},
1251     "state": "Locked",
1252     "output_storage_classes": ["foo", "bar"]
1253 }`, nil, func() int {
1254                 fmt.Fprintln(s.executor.created.Stdout, "ok")
1255                 return 0
1256         })
1257         c.Check(s.executor.created.CUDADeviceCount, Equals, 2)
1258 }
1259
1260 func (s *TestSuite) TestEnableCUDAHardwareCapability(c *C) {
1261         s.fullRunHelper(c, `{
1262     "command": ["pwd"],
1263     "container_image": "`+arvadostest.DockerImage112PDH+`",
1264     "cwd": "/bin",
1265     "environment": {},
1266     "mounts": {"/tmp": {"kind": "tmp"} },
1267     "output_path": "/tmp",
1268     "priority": 1,
1269     "runtime_constraints": {"cuda": {"hardware_capability": "foo"}},
1270     "state": "Locked",
1271     "output_storage_classes": ["foo", "bar"]
1272 }`, nil, func() int {
1273                 fmt.Fprintln(s.executor.created.Stdout, "ok")
1274                 return 0
1275         })
1276         c.Check(s.executor.created.CUDADeviceCount, Equals, 0)
1277 }
1278
1279 func (s *TestSuite) TestStopOnSignal(c *C) {
1280         s.executor.runFunc = func() int {
1281                 s.executor.created.Stdout.Write([]byte("foo\n"))
1282                 s.runner.SigChan <- syscall.SIGINT
1283                 time.Sleep(10 * time.Second)
1284                 return 0
1285         }
1286         s.testStopContainer(c)
1287 }
1288
1289 func (s *TestSuite) TestStopOnArvMountDeath(c *C) {
1290         s.executor.runFunc = func() int {
1291                 s.executor.created.Stdout.Write([]byte("foo\n"))
1292                 s.runner.ArvMountExit <- nil
1293                 close(s.runner.ArvMountExit)
1294                 time.Sleep(10 * time.Second)
1295                 return 0
1296         }
1297         s.runner.ArvMountExit = make(chan error)
1298         s.testStopContainer(c)
1299 }
1300
1301 func (s *TestSuite) testStopContainer(c *C) {
1302         record := `{
1303     "command": ["/bin/sh", "-c", "echo foo && sleep 30 && echo bar"],
1304     "container_image": "` + arvadostest.DockerImage112PDH + `",
1305     "cwd": ".",
1306     "environment": {},
1307     "mounts": {"/tmp": {"kind": "tmp"} },
1308     "output_path": "/tmp",
1309     "priority": 1,
1310     "runtime_constraints": {},
1311     "state": "Locked"
1312 }`
1313
1314         err := json.Unmarshal([]byte(record), &s.api.Container)
1315         c.Assert(err, IsNil)
1316
1317         s.runner.RunArvMount = func([]string, string) (*exec.Cmd, error) { return nil, nil }
1318         s.runner.MkArvClient = func(token string) (IArvadosClient, IKeepClient, *arvados.Client, error) {
1319                 return &ArvTestClient{}, &KeepTestClient{}, nil, nil
1320         }
1321
1322         done := make(chan error)
1323         go func() {
1324                 done <- s.runner.Run()
1325         }()
1326         select {
1327         case <-time.After(20 * time.Second):
1328                 pprof.Lookup("goroutine").WriteTo(os.Stderr, 1)
1329                 c.Fatal("timed out")
1330         case err = <-done:
1331                 c.Check(err, IsNil)
1332         }
1333         dumpAllLogFiles(c, s.runner)
1334
1335         c.Check(s.api.CalledWith("container.log", nil), NotNil)
1336         c.Check(s.api.CalledWith("container.state", "Cancelled"), NotNil)
1337         c.Check(logFileContent(c, s.runner, "stdout.txt"), Matches, "(?ms).*foo\n$")
1338 }
1339
1340 func (s *TestSuite) TestFullRunSetEnv(c *C) {
1341         s.fullRunHelper(c, `{
1342     "command": ["/bin/sh", "-c", "echo $FROBIZ"],
1343     "container_image": "`+arvadostest.DockerImage112PDH+`",
1344     "cwd": "/bin",
1345     "environment": {"FROBIZ": "bilbo"},
1346     "mounts": {"/tmp": {"kind": "tmp"} },
1347     "output_path": "/tmp",
1348     "priority": 1,
1349     "runtime_constraints": {},
1350     "state": "Locked"
1351 }`, nil, func() int {
1352                 fmt.Fprintf(s.executor.created.Stdout, "%v", s.executor.created.Env)
1353                 return 0
1354         })
1355
1356         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
1357         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
1358         c.Check(logFileContent(c, s.runner, "stdout.txt"), Matches, `.*map\[FROBIZ:bilbo\]`)
1359 }
1360
1361 type ArvMountCmdLine struct {
1362         Cmd   []string
1363         token string
1364 }
1365
1366 func (am *ArvMountCmdLine) ArvMountTest(c []string, token string) (*exec.Cmd, error) {
1367         am.Cmd = c
1368         am.token = token
1369         return nil, nil
1370 }
1371
1372 func stubCert(c *C, temp string) string {
1373         path := temp + "/ca-certificates.crt"
1374         err := os.WriteFile(path, []byte{}, 0666)
1375         c.Assert(err, IsNil)
1376         os.Setenv("SSL_CERT_FILE", path)
1377         return path
1378 }
1379
1380 func (s *TestSuite) TestSetupMounts(c *C) {
1381         cr := s.runner
1382         am := &ArvMountCmdLine{}
1383         cr.RunArvMount = am.ArvMountTest
1384         cr.containerClient, _ = apiStub()
1385         cr.ContainerArvClient = &ArvTestClient{}
1386         cr.ContainerKeepClient = &KeepTestClient{}
1387         cr.Container.OutputStorageClasses = []string{"default"}
1388
1389         realTemp := c.MkDir()
1390         certTemp := c.MkDir()
1391         stubCertPath := stubCert(c, certTemp)
1392         cr.parentTemp = realTemp
1393
1394         i := 0
1395         cr.MkTempDir = func(_ string, prefix string) (string, error) {
1396                 i++
1397                 d := fmt.Sprintf("%s/%s%d", realTemp, prefix, i)
1398                 err := os.Mkdir(d, os.ModePerm)
1399                 if err != nil && strings.Contains(err.Error(), ": file exists") {
1400                         // Test case must have pre-populated the tempdir
1401                         err = nil
1402                 }
1403                 return d, err
1404         }
1405
1406         checkEmpty := func() {
1407                 // Should be deleted.
1408                 _, err := os.Stat(realTemp)
1409                 c.Assert(os.IsNotExist(err), Equals, true)
1410
1411                 // Now recreate it for the next test.
1412                 c.Assert(os.Mkdir(realTemp, 0777), IsNil)
1413         }
1414
1415         {
1416                 i = 0
1417                 cr.ArvMountPoint = ""
1418                 cr.Container.Mounts = make(map[string]arvados.Mount)
1419                 cr.Container.Mounts["/tmp"] = arvados.Mount{Kind: "tmp"}
1420                 cr.Container.OutputPath = "/tmp"
1421                 cr.statInterval = 5 * time.Second
1422                 bindmounts, err := cr.SetupMounts()
1423                 c.Check(err, IsNil)
1424                 c.Check(am.Cmd, DeepEquals, []string{"arv-mount", "--foreground",
1425                         "--read-write", "--storage-classes", "default", "--crunchstat-interval=5",
1426                         "--mount-by-pdh", "by_id", "--disable-event-listening", "--mount-by-id", "by_uuid", realTemp + "/keep1"})
1427                 c.Check(bindmounts, DeepEquals, map[string]bindmount{"/tmp": {realTemp + "/tmp2", false}})
1428                 os.RemoveAll(cr.ArvMountPoint)
1429                 cr.CleanupDirs()
1430                 checkEmpty()
1431         }
1432
1433         {
1434                 i = 0
1435                 cr.ArvMountPoint = ""
1436                 cr.Container.Mounts = make(map[string]arvados.Mount)
1437                 cr.Container.Mounts["/out"] = arvados.Mount{Kind: "tmp"}
1438                 cr.Container.Mounts["/tmp"] = arvados.Mount{Kind: "tmp"}
1439                 cr.Container.OutputPath = "/out"
1440                 cr.Container.OutputStorageClasses = []string{"foo", "bar"}
1441
1442                 bindmounts, err := cr.SetupMounts()
1443                 c.Check(err, IsNil)
1444                 c.Check(am.Cmd, DeepEquals, []string{"arv-mount", "--foreground",
1445                         "--read-write", "--storage-classes", "foo,bar", "--crunchstat-interval=5",
1446                         "--mount-by-pdh", "by_id", "--disable-event-listening", "--mount-by-id", "by_uuid", realTemp + "/keep1"})
1447                 c.Check(bindmounts, DeepEquals, map[string]bindmount{"/out": {realTemp + "/tmp2", false}, "/tmp": {realTemp + "/tmp3", false}})
1448                 os.RemoveAll(cr.ArvMountPoint)
1449                 cr.CleanupDirs()
1450                 checkEmpty()
1451         }
1452
1453         {
1454                 i = 0
1455                 cr.ArvMountPoint = ""
1456                 cr.Container.Mounts = make(map[string]arvados.Mount)
1457                 cr.Container.Mounts["/tmp"] = arvados.Mount{Kind: "tmp"}
1458                 cr.Container.OutputPath = "/tmp"
1459                 cr.Container.RuntimeConstraints.API = true
1460                 cr.Container.OutputStorageClasses = []string{"default"}
1461
1462                 bindmounts, err := cr.SetupMounts()
1463                 c.Check(err, IsNil)
1464                 c.Check(am.Cmd, DeepEquals, []string{"arv-mount", "--foreground",
1465                         "--read-write", "--storage-classes", "default", "--crunchstat-interval=5",
1466                         "--mount-by-pdh", "by_id", "--disable-event-listening", "--mount-by-id", "by_uuid", realTemp + "/keep1"})
1467                 c.Check(bindmounts, DeepEquals, map[string]bindmount{"/tmp": {realTemp + "/tmp2", false}, "/etc/arvados/ca-certificates.crt": {stubCertPath, true}})
1468                 os.RemoveAll(cr.ArvMountPoint)
1469                 cr.CleanupDirs()
1470                 checkEmpty()
1471
1472                 cr.Container.RuntimeConstraints.API = false
1473         }
1474
1475         {
1476                 i = 0
1477                 cr.ArvMountPoint = ""
1478                 cr.Container.Mounts = map[string]arvados.Mount{
1479                         "/keeptmp": {Kind: "collection", Writable: true},
1480                 }
1481                 cr.Container.OutputPath = "/keeptmp"
1482
1483                 os.MkdirAll(realTemp+"/keep1/tmp0", os.ModePerm)
1484
1485                 bindmounts, err := cr.SetupMounts()
1486                 c.Check(err, IsNil)
1487                 c.Check(am.Cmd, DeepEquals, []string{"arv-mount", "--foreground",
1488                         "--read-write", "--storage-classes", "default", "--crunchstat-interval=5",
1489                         "--mount-tmp", "tmp0", "--mount-by-pdh", "by_id", "--disable-event-listening", "--mount-by-id", "by_uuid", realTemp + "/keep1"})
1490                 c.Check(bindmounts, DeepEquals, map[string]bindmount{"/keeptmp": {realTemp + "/keep1/tmp0", false}})
1491                 os.RemoveAll(cr.ArvMountPoint)
1492                 cr.CleanupDirs()
1493                 checkEmpty()
1494         }
1495
1496         {
1497                 i = 0
1498                 cr.ArvMountPoint = ""
1499                 cr.Container.Mounts = map[string]arvados.Mount{
1500                         "/keepinp": {Kind: "collection", PortableDataHash: "59389a8f9ee9d399be35462a0f92541c+53"},
1501                         "/keepout": {Kind: "collection", Writable: true},
1502                 }
1503                 cr.Container.OutputPath = "/keepout"
1504
1505                 os.MkdirAll(realTemp+"/keep1/by_id/59389a8f9ee9d399be35462a0f92541c+53", os.ModePerm)
1506                 os.MkdirAll(realTemp+"/keep1/tmp0", os.ModePerm)
1507
1508                 bindmounts, err := cr.SetupMounts()
1509                 c.Check(err, IsNil)
1510                 c.Check(am.Cmd, DeepEquals, []string{"arv-mount", "--foreground",
1511                         "--read-write", "--storage-classes", "default", "--crunchstat-interval=5",
1512                         "--mount-tmp", "tmp0", "--mount-by-pdh", "by_id", "--disable-event-listening", "--mount-by-id", "by_uuid", realTemp + "/keep1"})
1513                 c.Check(bindmounts, DeepEquals, map[string]bindmount{
1514                         "/keepinp": {realTemp + "/keep1/by_id/59389a8f9ee9d399be35462a0f92541c+53", true},
1515                         "/keepout": {realTemp + "/keep1/tmp0", false},
1516                 })
1517                 os.RemoveAll(cr.ArvMountPoint)
1518                 cr.CleanupDirs()
1519                 checkEmpty()
1520         }
1521
1522         {
1523                 i = 0
1524                 cr.ArvMountPoint = ""
1525                 cr.Container.RuntimeConstraints.KeepCacheRAM = 512
1526                 cr.Container.Mounts = map[string]arvados.Mount{
1527                         "/keepinp": {Kind: "collection", PortableDataHash: "59389a8f9ee9d399be35462a0f92541c+53"},
1528                         "/keepout": {Kind: "collection", Writable: true},
1529                 }
1530                 cr.Container.OutputPath = "/keepout"
1531
1532                 os.MkdirAll(realTemp+"/keep1/by_id/59389a8f9ee9d399be35462a0f92541c+53", os.ModePerm)
1533                 os.MkdirAll(realTemp+"/keep1/tmp0", os.ModePerm)
1534
1535                 bindmounts, err := cr.SetupMounts()
1536                 c.Check(err, IsNil)
1537                 c.Check(am.Cmd, DeepEquals, []string{"arv-mount", "--foreground",
1538                         "--read-write", "--storage-classes", "default", "--crunchstat-interval=5", "--ram-cache",
1539                         "--file-cache", "512", "--mount-tmp", "tmp0", "--mount-by-pdh", "by_id", "--disable-event-listening", "--mount-by-id", "by_uuid", realTemp + "/keep1"})
1540                 c.Check(bindmounts, DeepEquals, map[string]bindmount{
1541                         "/keepinp": {realTemp + "/keep1/by_id/59389a8f9ee9d399be35462a0f92541c+53", true},
1542                         "/keepout": {realTemp + "/keep1/tmp0", false},
1543                 })
1544                 os.RemoveAll(cr.ArvMountPoint)
1545                 cr.CleanupDirs()
1546                 checkEmpty()
1547         }
1548
1549         for _, test := range []struct {
1550                 in  interface{}
1551                 out string
1552         }{
1553                 {in: "foo", out: `"foo"`},
1554                 {in: nil, out: `null`},
1555                 {in: map[string]int64{"foo": 123456789123456789}, out: `{"foo":123456789123456789}`},
1556         } {
1557                 i = 0
1558                 cr.ArvMountPoint = ""
1559                 cr.Container.Mounts = map[string]arvados.Mount{
1560                         "/mnt/test.json": {Kind: "json", Content: test.in},
1561                 }
1562                 bindmounts, err := cr.SetupMounts()
1563                 c.Check(err, IsNil)
1564                 c.Check(bindmounts, DeepEquals, map[string]bindmount{
1565                         "/mnt/test.json": {realTemp + "/json2/mountdata.json", true},
1566                 })
1567                 content, err := ioutil.ReadFile(realTemp + "/json2/mountdata.json")
1568                 c.Check(err, IsNil)
1569                 c.Check(content, DeepEquals, []byte(test.out))
1570                 os.RemoveAll(cr.ArvMountPoint)
1571                 cr.CleanupDirs()
1572                 checkEmpty()
1573         }
1574
1575         for _, test := range []struct {
1576                 in  interface{}
1577                 out string
1578         }{
1579                 {in: "foo", out: `foo`},
1580                 {in: nil, out: "error"},
1581                 {in: map[string]int64{"foo": 123456789123456789}, out: "error"},
1582         } {
1583                 i = 0
1584                 cr.ArvMountPoint = ""
1585                 cr.Container.Mounts = map[string]arvados.Mount{
1586                         "/mnt/test.txt": {Kind: "text", Content: test.in},
1587                 }
1588                 bindmounts, err := cr.SetupMounts()
1589                 if test.out == "error" {
1590                         c.Check(err.Error(), Equals, "content for mount \"/mnt/test.txt\" must be a string")
1591                 } else {
1592                         c.Check(err, IsNil)
1593                         c.Check(bindmounts, DeepEquals, map[string]bindmount{
1594                                 "/mnt/test.txt": {realTemp + "/text2/mountdata.text", true},
1595                         })
1596                         content, err := ioutil.ReadFile(realTemp + "/text2/mountdata.text")
1597                         c.Check(err, IsNil)
1598                         c.Check(content, DeepEquals, []byte(test.out))
1599                 }
1600                 os.RemoveAll(cr.ArvMountPoint)
1601                 cr.CleanupDirs()
1602                 checkEmpty()
1603         }
1604
1605         // Read-only mount points are allowed underneath output_dir mount point
1606         {
1607                 i = 0
1608                 cr.ArvMountPoint = ""
1609                 cr.Container.Mounts = make(map[string]arvados.Mount)
1610                 cr.Container.Mounts = map[string]arvados.Mount{
1611                         "/tmp":     {Kind: "tmp"},
1612                         "/tmp/foo": {Kind: "collection"},
1613                 }
1614                 cr.Container.OutputPath = "/tmp"
1615
1616                 os.MkdirAll(realTemp+"/keep1/tmp0", os.ModePerm)
1617
1618                 bindmounts, err := cr.SetupMounts()
1619                 c.Check(err, IsNil)
1620                 c.Check(am.Cmd, DeepEquals, []string{"arv-mount", "--foreground",
1621                         "--read-write", "--storage-classes", "default", "--crunchstat-interval=5", "--ram-cache",
1622                         "--file-cache", "512", "--mount-tmp", "tmp0", "--mount-by-pdh", "by_id", "--disable-event-listening", "--mount-by-id", "by_uuid", realTemp + "/keep1"})
1623                 c.Check(bindmounts, DeepEquals, map[string]bindmount{
1624                         "/tmp":     {realTemp + "/tmp2", false},
1625                         "/tmp/foo": {realTemp + "/keep1/tmp0", true},
1626                 })
1627                 os.RemoveAll(cr.ArvMountPoint)
1628                 cr.CleanupDirs()
1629                 checkEmpty()
1630         }
1631
1632         // Writable mount points copied to output_dir mount point
1633         {
1634                 i = 0
1635                 cr.ArvMountPoint = ""
1636                 cr.Container.Mounts = make(map[string]arvados.Mount)
1637                 cr.Container.Mounts = map[string]arvados.Mount{
1638                         "/tmp": {Kind: "tmp"},
1639                         "/tmp/foo": {Kind: "collection",
1640                                 PortableDataHash: "59389a8f9ee9d399be35462a0f92541c+53",
1641                                 Writable:         true},
1642                         "/tmp/bar": {Kind: "collection",
1643                                 PortableDataHash: "59389a8f9ee9d399be35462a0f92541d+53",
1644                                 Path:             "baz",
1645                                 Writable:         true},
1646                 }
1647                 cr.Container.OutputPath = "/tmp"
1648
1649                 os.MkdirAll(realTemp+"/keep1/by_id/59389a8f9ee9d399be35462a0f92541c+53", os.ModePerm)
1650                 os.MkdirAll(realTemp+"/keep1/by_id/59389a8f9ee9d399be35462a0f92541d+53/baz", os.ModePerm)
1651
1652                 rf, _ := os.Create(realTemp + "/keep1/by_id/59389a8f9ee9d399be35462a0f92541d+53/baz/quux")
1653                 rf.Write([]byte("bar"))
1654                 rf.Close()
1655
1656                 _, err := cr.SetupMounts()
1657                 c.Check(err, IsNil)
1658                 _, err = os.Stat(cr.HostOutputDir + "/foo")
1659                 c.Check(err, IsNil)
1660                 _, err = os.Stat(cr.HostOutputDir + "/bar/quux")
1661                 c.Check(err, IsNil)
1662                 os.RemoveAll(cr.ArvMountPoint)
1663                 cr.CleanupDirs()
1664                 checkEmpty()
1665         }
1666
1667         // Only mount points of kind 'collection' are allowed underneath output_dir mount point
1668         {
1669                 i = 0
1670                 cr.ArvMountPoint = ""
1671                 cr.Container.Mounts = make(map[string]arvados.Mount)
1672                 cr.Container.Mounts = map[string]arvados.Mount{
1673                         "/tmp":     {Kind: "tmp"},
1674                         "/tmp/foo": {Kind: "tmp"},
1675                 }
1676                 cr.Container.OutputPath = "/tmp"
1677
1678                 _, err := cr.SetupMounts()
1679                 c.Check(err, NotNil)
1680                 c.Check(err, ErrorMatches, `only mount points of kind 'collection', 'text' or 'json' are supported underneath the output_path.*`)
1681                 os.RemoveAll(cr.ArvMountPoint)
1682                 cr.CleanupDirs()
1683                 checkEmpty()
1684         }
1685
1686         // Only mount point of kind 'collection' is allowed for stdin
1687         {
1688                 i = 0
1689                 cr.ArvMountPoint = ""
1690                 cr.Container.Mounts = make(map[string]arvados.Mount)
1691                 cr.Container.Mounts = map[string]arvados.Mount{
1692                         "stdin": {Kind: "tmp"},
1693                 }
1694
1695                 _, err := cr.SetupMounts()
1696                 c.Check(err, NotNil)
1697                 c.Check(err, ErrorMatches, `unsupported mount kind 'tmp' for stdin.*`)
1698                 os.RemoveAll(cr.ArvMountPoint)
1699                 cr.CleanupDirs()
1700                 checkEmpty()
1701         }
1702 }
1703
1704 func (s *TestSuite) TestStdout(c *C) {
1705         helperRecord := `{
1706                 "command": ["/bin/sh", "-c", "echo $FROBIZ"],
1707                 "container_image": "` + arvadostest.DockerImage112PDH + `",
1708                 "cwd": "/bin",
1709                 "environment": {"FROBIZ": "bilbo"},
1710                 "mounts": {"/tmp": {"kind": "tmp"}, "stdout": {"kind": "file", "path": "/tmp/a/b/c.out"} },
1711                 "output_path": "/tmp",
1712                 "priority": 1,
1713                 "runtime_constraints": {},
1714                 "state": "Locked"
1715         }`
1716
1717         s.fullRunHelper(c, helperRecord, nil, func() int {
1718                 fmt.Fprintln(s.executor.created.Stdout, s.executor.created.Env["FROBIZ"])
1719                 return 0
1720         })
1721
1722         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
1723         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
1724         c.Check(s.runner.ContainerArvClient.(*ArvTestClient).CalledWith("collection.manifest_text", "./a/b 307372fa8fd5c146b22ae7a45b49bc31+6 0:6:c.out\n"), NotNil)
1725 }
1726
1727 // Used by the TestStdoutWithWrongPath*()
1728 func (s *TestSuite) stdoutErrorRunHelper(c *C, record string, fn func() int) (*ArvTestClient, *ContainerRunner, error) {
1729         err := json.Unmarshal([]byte(record), &s.api.Container)
1730         c.Assert(err, IsNil)
1731         s.executor.runFunc = fn
1732         s.runner.RunArvMount = (&ArvMountCmdLine{}).ArvMountTest
1733         s.runner.MkArvClient = func(token string) (IArvadosClient, IKeepClient, *arvados.Client, error) {
1734                 return s.api, &KeepTestClient{}, nil, nil
1735         }
1736         return s.api, s.runner, s.runner.Run()
1737 }
1738
1739 func (s *TestSuite) TestStdoutWithWrongPath(c *C) {
1740         _, _, err := s.stdoutErrorRunHelper(c, `{
1741     "mounts": {"/tmp": {"kind": "tmp"}, "stdout": {"kind": "file", "path":"/tmpa.out"} },
1742     "output_path": "/tmp",
1743     "state": "Locked"
1744 }`, func() int { return 0 })
1745         c.Check(err, ErrorMatches, ".*Stdout path does not start with OutputPath.*")
1746 }
1747
1748 func (s *TestSuite) TestStdoutWithWrongKindTmp(c *C) {
1749         _, _, err := s.stdoutErrorRunHelper(c, `{
1750     "mounts": {"/tmp": {"kind": "tmp"}, "stdout": {"kind": "tmp", "path":"/tmp/a.out"} },
1751     "output_path": "/tmp",
1752     "state": "Locked"
1753 }`, func() int { return 0 })
1754         c.Check(err, ErrorMatches, ".*unsupported mount kind 'tmp' for stdout.*")
1755 }
1756
1757 func (s *TestSuite) TestStdoutWithWrongKindCollection(c *C) {
1758         _, _, err := s.stdoutErrorRunHelper(c, `{
1759     "mounts": {"/tmp": {"kind": "tmp"}, "stdout": {"kind": "collection", "path":"/tmp/a.out"} },
1760     "output_path": "/tmp",
1761     "state": "Locked"
1762 }`, func() int { return 0 })
1763         c.Check(err, ErrorMatches, ".*unsupported mount kind 'collection' for stdout.*")
1764 }
1765
1766 func (s *TestSuite) TestFullRunWithAPI(c *C) {
1767         s.fullRunHelper(c, `{
1768     "command": ["/bin/sh", "-c", "true $ARVADOS_API_HOST"],
1769     "container_image": "`+arvadostest.DockerImage112PDH+`",
1770     "cwd": "/bin",
1771     "environment": {},
1772     "mounts": {"/tmp": {"kind": "tmp"} },
1773     "output_path": "/tmp",
1774     "priority": 1,
1775     "runtime_constraints": {"API": true},
1776     "state": "Locked"
1777 }`, nil, func() int {
1778                 c.Check(s.executor.created.Env["ARVADOS_API_HOST"], Equals, os.Getenv("ARVADOS_API_HOST"))
1779                 return 3
1780         })
1781         c.Check(s.api.CalledWith("container.exit_code", 3), NotNil)
1782         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
1783         c.Check(logFileContent(c, s.runner, "crunch-run.txt"), Matches, `(?ms).*status code 3\n.*`)
1784 }
1785
1786 func (s *TestSuite) TestFullRunSetOutput(c *C) {
1787         defer os.Setenv("ARVADOS_API_HOST", os.Getenv("ARVADOS_API_HOST"))
1788         os.Setenv("ARVADOS_API_HOST", "test.arvados.org")
1789         s.fullRunHelper(c, `{
1790     "command": ["/bin/sh", "-c", "echo $ARVADOS_API_HOST"],
1791     "container_image": "`+arvadostest.DockerImage112PDH+`",
1792     "cwd": "/bin",
1793     "environment": {},
1794     "mounts": {"/tmp": {"kind": "tmp"} },
1795     "output_path": "/tmp",
1796     "priority": 1,
1797     "runtime_constraints": {"API": true},
1798     "state": "Locked"
1799 }`, nil, func() int {
1800                 s.api.Container.Output = arvadostest.DockerImage112PDH
1801                 return 0
1802         })
1803
1804         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
1805         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
1806         c.Check(s.api.CalledWith("container.output", arvadostest.DockerImage112PDH), NotNil)
1807 }
1808
1809 func (s *TestSuite) TestArvMountRuntimeStatusWarning(c *C) {
1810         s.runner.RunArvMount = func([]string, string) (*exec.Cmd, error) {
1811                 os.Mkdir(s.runner.ArvMountPoint+"/by_id", 0666)
1812                 ioutil.WriteFile(s.runner.ArvMountPoint+"/by_id/README", nil, 0666)
1813                 return s.runner.ArvMountCmd([]string{"bash", "-c", "echo >&2 Test: Keep write error: I am a teapot; sleep 3"}, "")
1814         }
1815         s.executor.runFunc = func() int {
1816                 time.Sleep(time.Second)
1817                 return 137
1818         }
1819         record := `{
1820     "command": ["sleep", "1"],
1821     "container_image": "` + arvadostest.DockerImage112PDH + `",
1822     "cwd": "/bin",
1823     "environment": {},
1824     "mounts": {"/tmp": {"kind": "tmp"} },
1825     "output_path": "/tmp",
1826     "priority": 1,
1827     "runtime_constraints": {"API": true},
1828     "state": "Locked"
1829 }`
1830         err := json.Unmarshal([]byte(record), &s.api.Container)
1831         c.Assert(err, IsNil)
1832         err = s.runner.Run()
1833         c.Assert(err, IsNil)
1834         c.Check(s.api.CalledWith("container.exit_code", 137), NotNil)
1835         c.Check(s.api.CalledWith("container.runtime_status.warning", "arv-mount: Keep write error"), NotNil)
1836         c.Check(s.api.CalledWith("container.runtime_status.warningDetail", "Test: Keep write error: I am a teapot"), NotNil)
1837         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
1838         c.Check(logFileContent(c, s.runner, "crunch-run.txt"), Matches, `(?ms).*Container exited with status code 137 \(signal 9, SIGKILL\).*`)
1839 }
1840
1841 func (s *TestSuite) TestStdoutWithExcludeFromOutputMountPointUnderOutputDir(c *C) {
1842         helperRecord := `{
1843                 "command": ["/bin/sh", "-c", "echo $FROBIZ"],
1844                 "container_image": "` + arvadostest.DockerImage112PDH + `",
1845                 "cwd": "/bin",
1846                 "environment": {"FROBIZ": "bilbo"},
1847                 "mounts": {
1848         "/tmp": {"kind": "tmp"},
1849         "/tmp/foo": {"kind": "collection",
1850                      "portable_data_hash": "a3e8f74c6f101eae01fa08bfb4e49b3a+54",
1851                      "exclude_from_output": true
1852         },
1853         "stdout": {"kind": "file", "path": "/tmp/a/b/c.out"}
1854     },
1855                 "output_path": "/tmp",
1856                 "priority": 1,
1857                 "runtime_constraints": {},
1858                 "state": "Locked"
1859         }`
1860
1861         extraMounts := []string{"a3e8f74c6f101eae01fa08bfb4e49b3a+54"}
1862
1863         s.fullRunHelper(c, helperRecord, extraMounts, func() int {
1864                 fmt.Fprintln(s.executor.created.Stdout, s.executor.created.Env["FROBIZ"])
1865                 return 0
1866         })
1867
1868         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
1869         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
1870         c.Check(s.runner.ContainerArvClient.(*ArvTestClient).CalledWith("collection.manifest_text", "./a/b 307372fa8fd5c146b22ae7a45b49bc31+6 0:6:c.out\n"), NotNil)
1871 }
1872
1873 func (s *TestSuite) TestStdoutWithMultipleMountPointsUnderOutputDir(c *C) {
1874         helperRecord := `{
1875                 "command": ["/bin/sh", "-c", "echo $FROBIZ"],
1876                 "container_image": "` + arvadostest.DockerImage112PDH + `",
1877                 "cwd": "/bin",
1878                 "environment": {"FROBIZ": "bilbo"},
1879                 "mounts": {
1880         "/tmp": {"kind": "tmp"},
1881         "/tmp/foo/bar": {"kind": "collection", "portable_data_hash": "a0def87f80dd594d4675809e83bd4f15+367", "path":"/file2_in_main.txt"},
1882         "/tmp/foo/sub1": {"kind": "collection", "portable_data_hash": "a0def87f80dd594d4675809e83bd4f15+367", "path":"/subdir1"},
1883         "/tmp/foo/sub1file2": {"kind": "collection", "portable_data_hash": "a0def87f80dd594d4675809e83bd4f15+367", "path":"/subdir1/file2_in_subdir1.txt"},
1884         "/tmp/foo/baz/sub2file2": {"kind": "collection", "portable_data_hash": "a0def87f80dd594d4675809e83bd4f15+367", "path":"/subdir1/subdir2/file2_in_subdir2.txt"},
1885         "stdout": {"kind": "file", "path": "/tmp/a/b/c.out"}
1886     },
1887                 "output_path": "/tmp",
1888                 "priority": 1,
1889                 "runtime_constraints": {},
1890                 "state": "Locked",
1891                 "uuid": "zzzzz-dz642-202301130848001"
1892         }`
1893
1894         extraMounts := []string{
1895                 "a0def87f80dd594d4675809e83bd4f15+367/file2_in_main.txt",
1896                 "a0def87f80dd594d4675809e83bd4f15+367/subdir1/file2_in_subdir1.txt",
1897                 "a0def87f80dd594d4675809e83bd4f15+367/subdir1/subdir2/file2_in_subdir2.txt",
1898         }
1899
1900         api, _, realtemp := s.fullRunHelper(c, helperRecord, extraMounts, func() int {
1901                 fmt.Fprintln(s.executor.created.Stdout, s.executor.created.Env["FROBIZ"])
1902                 return 0
1903         })
1904
1905         c.Check(s.executor.created.BindMounts, DeepEquals, map[string]bindmount{
1906                 "/tmp":                   {realtemp + "/tmp1", false},
1907                 "/tmp/foo/bar":           {s.keepmount + "/by_id/a0def87f80dd594d4675809e83bd4f15+367/file2_in_main.txt", true},
1908                 "/tmp/foo/baz/sub2file2": {s.keepmount + "/by_id/a0def87f80dd594d4675809e83bd4f15+367/subdir1/subdir2/file2_in_subdir2.txt", true},
1909                 "/tmp/foo/sub1":          {s.keepmount + "/by_id/a0def87f80dd594d4675809e83bd4f15+367/subdir1", true},
1910                 "/tmp/foo/sub1file2":     {s.keepmount + "/by_id/a0def87f80dd594d4675809e83bd4f15+367/subdir1/file2_in_subdir1.txt", true},
1911         })
1912
1913         c.Check(api.CalledWith("container.exit_code", 0), NotNil)
1914         c.Check(api.CalledWith("container.state", "Complete"), NotNil)
1915         output_count := uint(0)
1916         for _, v := range s.runner.ContainerArvClient.(*ArvTestClient).Content {
1917                 if v["collection"] == nil {
1918                         continue
1919                 }
1920                 collection := v["collection"].(arvadosclient.Dict)
1921                 if collection["name"].(string) != "output for zzzzz-dz642-202301130848001" {
1922                         continue
1923                 }
1924                 c.Check(v["ensure_unique_name"], Equals, true)
1925                 c.Check(collection["manifest_text"].(string), Equals, `./a/b 307372fa8fd5c146b22ae7a45b49bc31+6 0:6:c.out
1926 ./foo 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396c73c0abcdefgh11234567890@569fa8c3 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396cabcdefghij6419876543234@569fa8c4 9:18:bar 36:18:sub1file2
1927 ./foo/baz 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396c73c0bcdefghijk544332211@569fa8c5 9:18:sub2file2
1928 ./foo/sub1 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396cabcdefghij6419876543234@569fa8c4 0:9:file1_in_subdir1.txt 9:18:file2_in_subdir1.txt
1929 ./foo/sub1/subdir2 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396c73c0bcdefghijk544332211@569fa8c5 0:9:file1_in_subdir2.txt 9:18:file2_in_subdir2.txt
1930 `)
1931                 output_count++
1932         }
1933         c.Check(output_count, Not(Equals), uint(0))
1934 }
1935
1936 func (s *TestSuite) TestStdoutWithMountPointsUnderOutputDirDenormalizedManifest(c *C) {
1937         helperRecord := `{
1938                 "command": ["/bin/sh", "-c", "echo $FROBIZ"],
1939                 "container_image": "` + arvadostest.DockerImage112PDH + `",
1940                 "cwd": "/bin",
1941                 "environment": {"FROBIZ": "bilbo"},
1942                 "mounts": {
1943         "/tmp": {"kind": "tmp"},
1944         "/tmp/foo/bar": {"kind": "collection", "portable_data_hash": "b0def87f80dd594d4675809e83bd4f15+367", "path": "/subdir1/file2_in_subdir1.txt"},
1945         "stdout": {"kind": "file", "path": "/tmp/a/b/c.out"}
1946     },
1947                 "output_path": "/tmp",
1948                 "priority": 1,
1949                 "runtime_constraints": {},
1950                 "state": "Locked",
1951                 "uuid": "zzzzz-dz642-202301130848002"
1952         }`
1953
1954         extraMounts := []string{
1955                 "b0def87f80dd594d4675809e83bd4f15+367/subdir1/file2_in_subdir1.txt",
1956         }
1957
1958         s.fullRunHelper(c, helperRecord, extraMounts, func() int {
1959                 fmt.Fprintln(s.executor.created.Stdout, s.executor.created.Env["FROBIZ"])
1960                 return 0
1961         })
1962
1963         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
1964         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
1965         output_count := uint(0)
1966         for _, v := range s.runner.ContainerArvClient.(*ArvTestClient).Content {
1967                 if v["collection"] == nil {
1968                         continue
1969                 }
1970                 collection := v["collection"].(arvadosclient.Dict)
1971                 if collection["name"].(string) != "output for zzzzz-dz642-202301130848002" {
1972                         continue
1973                 }
1974                 c.Check(collection["manifest_text"].(string), Equals, `./a/b 307372fa8fd5c146b22ae7a45b49bc31+6 0:6:c.out
1975 ./foo 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396c73c0abcdefgh11234567890@569fa8c3 10:17:bar
1976 `)
1977                 output_count++
1978         }
1979         c.Check(output_count, Not(Equals), uint(0))
1980 }
1981
1982 func (s *TestSuite) TestOutputError(c *C) {
1983         helperRecord := `{
1984                 "command": ["/bin/sh", "-c", "echo $FROBIZ"],
1985                 "container_image": "` + arvadostest.DockerImage112PDH + `",
1986                 "cwd": "/bin",
1987                 "environment": {"FROBIZ": "bilbo"},
1988                 "mounts": {
1989                         "/tmp": {"kind": "tmp"}
1990                 },
1991                 "output_path": "/tmp",
1992                 "priority": 1,
1993                 "runtime_constraints": {},
1994                 "state": "Locked"
1995         }`
1996         s.fullRunHelper(c, helperRecord, nil, func() int {
1997                 os.Symlink("/etc/hosts", s.runner.HostOutputDir+"/baz")
1998                 return 0
1999         })
2000
2001         c.Check(s.api.CalledWith("container.state", "Cancelled"), NotNil)
2002 }
2003
2004 func (s *TestSuite) TestStdinCollectionMountPoint(c *C) {
2005         helperRecord := `{
2006                 "command": ["/bin/sh", "-c", "echo $FROBIZ"],
2007                 "container_image": "` + arvadostest.DockerImage112PDH + `",
2008                 "cwd": "/bin",
2009                 "environment": {"FROBIZ": "bilbo"},
2010                 "mounts": {
2011         "/tmp": {"kind": "tmp"},
2012         "stdin": {"kind": "collection", "portable_data_hash": "b0def87f80dd594d4675809e83bd4f15+367", "path": "/file1_in_main.txt"},
2013         "stdout": {"kind": "file", "path": "/tmp/a/b/c.out"}
2014     },
2015                 "output_path": "/tmp",
2016                 "priority": 1,
2017                 "runtime_constraints": {},
2018                 "state": "Locked"
2019         }`
2020
2021         extraMounts := []string{
2022                 "b0def87f80dd594d4675809e83bd4f15+367/file1_in_main.txt",
2023         }
2024
2025         api, _, _ := s.fullRunHelper(c, helperRecord, extraMounts, func() int {
2026                 fmt.Fprintln(s.executor.created.Stdout, s.executor.created.Env["FROBIZ"])
2027                 return 0
2028         })
2029
2030         c.Check(api.CalledWith("container.exit_code", 0), NotNil)
2031         c.Check(api.CalledWith("container.state", "Complete"), NotNil)
2032         for _, v := range api.Content {
2033                 if v["collection"] != nil {
2034                         collection := v["collection"].(arvadosclient.Dict)
2035                         if strings.Index(collection["name"].(string), "output") == 0 {
2036                                 manifest := collection["manifest_text"].(string)
2037                                 c.Check(manifest, Equals, `./a/b 307372fa8fd5c146b22ae7a45b49bc31+6 0:6:c.out
2038 `)
2039                         }
2040                 }
2041         }
2042 }
2043
2044 func (s *TestSuite) TestStdinJsonMountPoint(c *C) {
2045         helperRecord := `{
2046                 "command": ["/bin/sh", "-c", "echo $FROBIZ"],
2047                 "container_image": "` + arvadostest.DockerImage112PDH + `",
2048                 "cwd": "/bin",
2049                 "environment": {"FROBIZ": "bilbo"},
2050                 "mounts": {
2051         "/tmp": {"kind": "tmp"},
2052         "stdin": {"kind": "json", "content": "foo"},
2053         "stdout": {"kind": "file", "path": "/tmp/a/b/c.out"}
2054     },
2055                 "output_path": "/tmp",
2056                 "priority": 1,
2057                 "runtime_constraints": {},
2058                 "state": "Locked"
2059         }`
2060
2061         api, _, _ := s.fullRunHelper(c, helperRecord, nil, func() int {
2062                 fmt.Fprintln(s.executor.created.Stdout, s.executor.created.Env["FROBIZ"])
2063                 return 0
2064         })
2065
2066         c.Check(api.CalledWith("container.exit_code", 0), NotNil)
2067         c.Check(api.CalledWith("container.state", "Complete"), NotNil)
2068         for _, v := range api.Content {
2069                 if v["collection"] != nil {
2070                         collection := v["collection"].(arvadosclient.Dict)
2071                         if strings.Index(collection["name"].(string), "output") == 0 {
2072                                 manifest := collection["manifest_text"].(string)
2073                                 c.Check(manifest, Equals, `./a/b 307372fa8fd5c146b22ae7a45b49bc31+6 0:6:c.out
2074 `)
2075                         }
2076                 }
2077         }
2078 }
2079
2080 func (s *TestSuite) TestStderrMount(c *C) {
2081         api, cr, _ := s.fullRunHelper(c, `{
2082     "command": ["/bin/sh", "-c", "echo hello;exit 1"],
2083     "container_image": "`+arvadostest.DockerImage112PDH+`",
2084     "cwd": ".",
2085     "environment": {},
2086     "mounts": {"/tmp": {"kind": "tmp"},
2087                "stdout": {"kind": "file", "path": "/tmp/a/out.txt"},
2088                "stderr": {"kind": "file", "path": "/tmp/b/err.txt"}},
2089     "output_path": "/tmp",
2090     "priority": 1,
2091     "runtime_constraints": {},
2092     "state": "Locked"
2093 }`, nil, func() int {
2094                 fmt.Fprintln(s.executor.created.Stdout, "hello")
2095                 fmt.Fprintln(s.executor.created.Stderr, "oops")
2096                 return 1
2097         })
2098
2099         final := api.CalledWith("container.state", "Complete")
2100         c.Assert(final, NotNil)
2101         c.Check(final["container"].(arvadosclient.Dict)["exit_code"], Equals, 1)
2102         c.Check(final["container"].(arvadosclient.Dict)["log"], NotNil)
2103
2104         c.Check(cr.ContainerArvClient.(*ArvTestClient).CalledWith("collection.manifest_text", "./a b1946ac92492d2347c6235b4d2611184+6 0:6:out.txt\n./b 38af5c54926b620264ab1501150cf189+5 0:5:err.txt\n"), NotNil)
2105 }
2106
2107 func (s *TestSuite) TestNumberRoundTrip(c *C) {
2108         s.api.callraw = true
2109         err := s.runner.fetchContainerRecord()
2110         c.Assert(err, IsNil)
2111         jsondata, err := json.Marshal(s.runner.Container.Mounts["/json"].Content)
2112         c.Logf("%#v", s.runner.Container)
2113         c.Check(err, IsNil)
2114         c.Check(string(jsondata), Equals, `{"number":123456789123456789}`)
2115 }
2116
2117 func (s *TestSuite) TestFullBrokenDocker(c *C) {
2118         nextState := ""
2119         for _, setup := range []func(){
2120                 func() {
2121                         c.Log("// waitErr = ocl runtime error")
2122                         s.executor.waitErr = errors.New(`Error response from daemon: oci runtime error: container_linux.go:247: starting container process caused "process_linux.go:359: container init caused \"rootfs_linux.go:54: mounting \\\"/tmp/keep453790790/by_id/99999999999999999999999999999999+99999/myGenome\\\" to rootfs \\\"/tmp/docker/overlay2/9999999999999999999999999999999999999999999999999999999999999999/merged\\\" at \\\"/tmp/docker/overlay2/9999999999999999999999999999999999999999999999999999999999999999/merged/keep/99999999999999999999999999999999+99999/myGenome\\\" caused \\\"no such file or directory\\\"\""`)
2123                         nextState = "Cancelled"
2124                 },
2125                 func() {
2126                         c.Log("// loadErr = cannot connect")
2127                         s.executor.loadErr = errors.New("Cannot connect to the Docker daemon at unix:///var/run/docker.sock. Is the docker daemon running?")
2128                         s.runner.brokenNodeHook = c.MkDir() + "/broken-node-hook"
2129                         err := ioutil.WriteFile(s.runner.brokenNodeHook, []byte("#!/bin/sh\nexec echo killme\n"), 0700)
2130                         c.Assert(err, IsNil)
2131                         nextState = "Queued"
2132                 },
2133         } {
2134                 s.SetUpTest(c)
2135                 setup()
2136                 s.fullRunHelper(c, `{
2137     "command": ["echo", "hello world"],
2138     "container_image": "`+arvadostest.DockerImage112PDH+`",
2139     "cwd": ".",
2140     "environment": {},
2141     "mounts": {"/tmp": {"kind": "tmp"} },
2142     "output_path": "/tmp",
2143     "priority": 1,
2144     "runtime_constraints": {},
2145     "state": "Locked"
2146 }`, nil, func() int { return 0 })
2147                 c.Check(s.api.CalledWith("container.state", nextState), NotNil)
2148                 logs := logFileContent(c, s.runner, "crunch-run.txt")
2149                 c.Check(logs, Matches, "(?ms).*unable to run containers.*")
2150                 if s.runner.brokenNodeHook != "" {
2151                         c.Check(logs, Matches, "(?ms).*Running broken node hook.*")
2152                         c.Check(logs, Matches, "(?ms).*killme.*")
2153                         c.Check(logs, Not(Matches), "(?ms).*Writing /var/lock/crunch-run-broken to mark node as broken.*")
2154                 } else {
2155                         c.Check(logs, Matches, "(?ms).*Writing /var/lock/crunch-run-broken to mark node as broken.*")
2156                 }
2157         }
2158 }
2159
2160 func (s *TestSuite) TestBadCommand(c *C) {
2161         for _, startError := range []string{
2162                 `panic: standard_init_linux.go:175: exec user process caused "no such file or directory"`,
2163                 `Error response from daemon: Cannot start container 41f26cbc43bcc1280f4323efb1830a394ba8660c9d1c2b564ba42bf7f7694845: [8] System error: no such file or directory`,
2164                 `Error response from daemon: Cannot start container 58099cd76c834f3dc2a4fb76c8028f049ae6d4fdf0ec373e1f2cfea030670c2d: [8] System error: exec: "foobar": executable file not found in $PATH`,
2165         } {
2166                 s.SetUpTest(c)
2167                 s.executor.startErr = errors.New(startError)
2168                 s.fullRunHelper(c, `{
2169     "command": ["echo", "hello world"],
2170     "container_image": "`+arvadostest.DockerImage112PDH+`",
2171     "cwd": ".",
2172     "environment": {},
2173     "mounts": {"/tmp": {"kind": "tmp"} },
2174     "output_path": "/tmp",
2175     "priority": 1,
2176     "runtime_constraints": {},
2177     "state": "Locked"
2178 }`, nil, func() int { return 0 })
2179                 c.Check(s.api.CalledWith("container.state", "Cancelled"), NotNil)
2180                 c.Check(logFileContent(c, s.runner, "crunch-run.txt"), Matches, "(?ms).*Possible causes:.*is missing.*")
2181         }
2182 }
2183
2184 func (s *TestSuite) TestSecretTextMountPoint(c *C) {
2185         helperRecord := `{
2186                 "command": ["true"],
2187                 "container_image": "` + arvadostest.DockerImage112PDH + `",
2188                 "cwd": "/bin",
2189                 "mounts": {
2190                     "/tmp": {"kind": "tmp"},
2191                     "/tmp/secret.conf": {"kind": "text", "content": "mypassword"}
2192                 },
2193                 "secret_mounts": {
2194                 },
2195                 "output_path": "/tmp",
2196                 "priority": 1,
2197                 "runtime_constraints": {},
2198                 "state": "Locked"
2199         }`
2200
2201         s.fullRunHelper(c, helperRecord, nil, func() int {
2202                 content, err := ioutil.ReadFile(s.runner.HostOutputDir + "/secret.conf")
2203                 c.Check(err, IsNil)
2204                 c.Check(string(content), Equals, "mypassword")
2205                 return 0
2206         })
2207
2208         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
2209         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
2210         c.Check(s.runner.ContainerArvClient.(*ArvTestClient).CalledWith("collection.manifest_text", ". 34819d7beeabb9260a5c854bc85b3e44+10 0:10:secret.conf\n"), NotNil)
2211         c.Check(s.runner.ContainerArvClient.(*ArvTestClient).CalledWith("collection.manifest_text", ""), IsNil)
2212
2213         // under secret mounts, not captured in output
2214         helperRecord = `{
2215                 "command": ["true"],
2216                 "container_image": "` + arvadostest.DockerImage112PDH + `",
2217                 "cwd": "/bin",
2218                 "mounts": {
2219                     "/tmp": {"kind": "tmp"}
2220                 },
2221                 "secret_mounts": {
2222                     "/tmp/secret.conf": {"kind": "text", "content": "mypassword"}
2223                 },
2224                 "output_path": "/tmp",
2225                 "priority": 1,
2226                 "runtime_constraints": {},
2227                 "state": "Locked"
2228         }`
2229
2230         s.SetUpTest(c)
2231         s.fullRunHelper(c, helperRecord, nil, func() int {
2232                 content, err := ioutil.ReadFile(s.runner.HostOutputDir + "/secret.conf")
2233                 c.Check(err, IsNil)
2234                 c.Check(string(content), Equals, "mypassword")
2235                 return 0
2236         })
2237
2238         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
2239         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
2240         c.Check(s.runner.ContainerArvClient.(*ArvTestClient).CalledWith("collection.manifest_text", ". 34819d7beeabb9260a5c854bc85b3e44+10 0:10:secret.conf\n"), IsNil)
2241         c.Check(s.runner.ContainerArvClient.(*ArvTestClient).CalledWith("collection.manifest_text", ""), NotNil)
2242
2243         // under secret mounts, output dir is a collection, not captured in output
2244         helperRecord = `{
2245                 "command": ["true"],
2246                 "container_image": "` + arvadostest.DockerImage112PDH + `",
2247                 "cwd": "/bin",
2248                 "mounts": {
2249                     "/tmp": {"kind": "collection", "writable": true}
2250                 },
2251                 "secret_mounts": {
2252                     "/tmp/secret.conf": {"kind": "text", "content": "mypassword"}
2253                 },
2254                 "output_path": "/tmp",
2255                 "priority": 1,
2256                 "runtime_constraints": {},
2257                 "state": "Locked"
2258         }`
2259
2260         s.SetUpTest(c)
2261         _, _, realtemp := s.fullRunHelper(c, helperRecord, nil, func() int {
2262                 // secret.conf should be provisioned as a separate
2263                 // bind mount, i.e., it should not appear in the
2264                 // (fake) fuse filesystem as viewed from the host.
2265                 content, err := ioutil.ReadFile(s.runner.HostOutputDir + "/secret.conf")
2266                 if !c.Check(errors.Is(err, os.ErrNotExist), Equals, true) {
2267                         c.Logf("secret.conf: content %q, err %#v", content, err)
2268                 }
2269                 err = ioutil.WriteFile(s.runner.HostOutputDir+"/.arvados#collection", []byte(`{"manifest_text":". acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:foo.txt\n"}`), 0700)
2270                 c.Check(err, IsNil)
2271                 return 0
2272         })
2273
2274         content, err := ioutil.ReadFile(realtemp + "/text1/mountdata.text")
2275         c.Check(err, IsNil)
2276         c.Check(string(content), Equals, "mypassword")
2277         c.Check(s.executor.created.BindMounts["/tmp/secret.conf"], DeepEquals, bindmount{realtemp + "/text1/mountdata.text", true})
2278         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
2279         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
2280         c.Check(s.runner.ContainerArvClient.(*ArvTestClient).CalledWith("collection.manifest_text", ". acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:foo.txt\n"), NotNil)
2281 }
2282
2283 func (s *TestSuite) TestCalculateCost(c *C) {
2284         defer func(s string) { lockdir = s }(lockdir)
2285         lockdir = c.MkDir()
2286         now := time.Now()
2287         cr := s.runner
2288         cr.costStartTime = now.Add(-time.Hour)
2289         var logbuf bytes.Buffer
2290         cr.CrunchLog = newLogWriter(&logbuf)
2291
2292         // if there's no InstanceType env var, cost is calculated as 0
2293         os.Unsetenv("InstanceType")
2294         cost := cr.calculateCost(now)
2295         c.Check(cost, Equals, 0.0)
2296
2297         // with InstanceType env var and loadPrices() hasn't run (or
2298         // hasn't found any data), cost is calculated based on
2299         // InstanceType env var
2300         os.Setenv("InstanceType", `{"Price":1.2}`)
2301         cost = cr.calculateCost(now)
2302         c.Check(cost, Equals, 1.2)
2303
2304         // first update tells us the spot price was $1/h until 30
2305         // minutes ago when it increased to $2/h
2306         j, err := json.Marshal([]cloud.InstancePrice{
2307                 {StartTime: now.Add(-4 * time.Hour), Price: 1.0},
2308                 {StartTime: now.Add(-time.Hour / 2), Price: 2.0},
2309         })
2310         c.Assert(err, IsNil)
2311         os.WriteFile(lockdir+"/"+pricesfile, j, 0777)
2312         cr.loadPrices()
2313         cost = cr.calculateCost(now)
2314         c.Check(cost, Equals, 1.5)
2315
2316         // next update (via --list + SIGUSR2) tells us the spot price
2317         // increased to $3/h 15 minutes ago
2318         j, err = json.Marshal([]cloud.InstancePrice{
2319                 {StartTime: now.Add(-time.Hour / 3), Price: 2.0}, // dup of -time.Hour/2 price
2320                 {StartTime: now.Add(-time.Hour / 4), Price: 3.0},
2321         })
2322         c.Assert(err, IsNil)
2323         os.WriteFile(lockdir+"/"+pricesfile, j, 0777)
2324         cr.loadPrices()
2325         cost = cr.calculateCost(now)
2326         c.Check(cost, Equals, 1.0/2+2.0/4+3.0/4)
2327
2328         cost = cr.calculateCost(now.Add(-time.Hour / 2))
2329         c.Check(cost, Equals, 0.5)
2330
2331         c.Logf("%s", logbuf.String())
2332         c.Check(logbuf.String(), Matches, `(?ms).*Instance price changed to 1\.00 at 20.* changed to 2\.00 .* changed to 3\.00 .*`)
2333         c.Check(logbuf.String(), Not(Matches), `(?ms).*changed to 2\.00 .* changed to 2\.00 .*`)
2334 }
2335
2336 func (s *TestSuite) TestSIGUSR2CostUpdate(c *C) {
2337         pid := os.Getpid()
2338         now := time.Now()
2339         pricesJSON, err := json.Marshal([]cloud.InstancePrice{
2340                 {StartTime: now.Add(-4 * time.Hour), Price: 2.4},
2341                 {StartTime: now.Add(-2 * time.Hour), Price: 2.6},
2342         })
2343         c.Assert(err, IsNil)
2344
2345         os.Setenv("InstanceType", `{"Price":2.2}`)
2346         defer func(s string) { lockdir = s }(lockdir)
2347         lockdir = c.MkDir()
2348
2349         // We can't use s.api.CalledWith because timing differences will yield
2350         // different cost values across runs. getCostUpdate iterates over API
2351         // calls until it finds one that sets the cost, then writes that value
2352         // to the next index of costUpdates.
2353         deadline := now.Add(time.Second)
2354         costUpdates := make([]float64, 2)
2355         costIndex := 0
2356         apiIndex := 0
2357         getCostUpdate := func() {
2358                 for ; time.Now().Before(deadline); time.Sleep(time.Second / 10) {
2359                         for apiIndex < len(s.api.Content) {
2360                                 update := s.api.Content[apiIndex]
2361                                 apiIndex++
2362                                 var ok bool
2363                                 var cost float64
2364                                 if update, ok = update["container"].(arvadosclient.Dict); !ok {
2365                                         continue
2366                                 }
2367                                 if cost, ok = update["cost"].(float64); !ok {
2368                                         continue
2369                                 }
2370                                 c.Logf("API call #%d updates cost to %v", apiIndex-1, cost)
2371                                 costUpdates[costIndex] = cost
2372                                 costIndex++
2373                                 return
2374                         }
2375                 }
2376         }
2377
2378         s.fullRunHelper(c, `{
2379                 "command": ["true"],
2380                 "container_image": "`+arvadostest.DockerImage112PDH+`",
2381                 "cwd": ".",
2382                 "environment": {},
2383                 "mounts": {"/tmp": {"kind": "tmp"} },
2384                 "output_path": "/tmp",
2385                 "priority": 1,
2386                 "runtime_constraints": {},
2387                 "state": "Locked",
2388                 "uuid": "zzzzz-dz642-20230320101530a"
2389         }`, nil, func() int {
2390                 s.runner.costStartTime = now.Add(-3 * time.Hour)
2391                 err := syscall.Kill(pid, syscall.SIGUSR2)
2392                 c.Check(err, IsNil, Commentf("error sending first SIGUSR2 to runner"))
2393                 getCostUpdate()
2394
2395                 err = os.WriteFile(path.Join(lockdir, pricesfile), pricesJSON, 0o700)
2396                 c.Check(err, IsNil, Commentf("error writing JSON prices file"))
2397                 err = syscall.Kill(pid, syscall.SIGUSR2)
2398                 c.Check(err, IsNil, Commentf("error sending second SIGUSR2 to runner"))
2399                 getCostUpdate()
2400
2401                 return 0
2402         })
2403         // Comparing with format strings makes it easy to ignore minor variations
2404         // in cost across runs while keeping diagnostics pretty.
2405         c.Check(fmt.Sprintf("%.3f", costUpdates[0]), Equals, "6.600")
2406         c.Check(fmt.Sprintf("%.3f", costUpdates[1]), Equals, "7.600")
2407 }
2408
2409 type FakeProcess struct {
2410         cmdLine []string
2411 }
2412
2413 func (fp FakeProcess) CmdlineSlice() ([]string, error) {
2414         return fp.cmdLine, nil
2415 }
2416
2417 func logFileContent(c *C, cr *ContainerRunner, fnm string) string {
2418         buf, err := fs.ReadFile(arvados.FS(cr.LogCollection), fnm)
2419         c.Assert(err, IsNil)
2420         return string(buf)
2421 }
2422
2423 func dumpAllLogFiles(c *C, cr *ContainerRunner) {
2424         d, err := cr.LogCollection.OpenFile("/", os.O_RDONLY, 0)
2425         c.Assert(err, IsNil)
2426         fis, err := d.Readdir(-1)
2427         c.Assert(err, IsNil)
2428         for _, fi := range fis {
2429                 c.Logf("=== %s", fi.Name())
2430                 c.Log(logFileContent(c, cr, fi.Name()))
2431         }
2432 }