21204: Merge branch '21204-stable-log-sort' from arvados-workbench2.git
[arvados.git] / lib / crunchrun / crunchrun_test.go
1 // Copyright (C) The Arvados Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4
5 package crunchrun
6
7 import (
8         "bytes"
9         "context"
10         "crypto/md5"
11         "encoding/json"
12         "errors"
13         "fmt"
14         "io"
15         "io/ioutil"
16         "log"
17         "math/rand"
18         "net/http"
19         "net/http/httptest"
20         "net/http/httputil"
21         "net/url"
22         "os"
23         "os/exec"
24         "path"
25         "regexp"
26         "runtime/pprof"
27         "strings"
28         "sync"
29         "sync/atomic"
30         "syscall"
31         "testing"
32         "time"
33
34         "git.arvados.org/arvados.git/lib/cloud"
35         "git.arvados.org/arvados.git/lib/cmd"
36         "git.arvados.org/arvados.git/sdk/go/arvados"
37         "git.arvados.org/arvados.git/sdk/go/arvadosclient"
38         "git.arvados.org/arvados.git/sdk/go/arvadostest"
39         "git.arvados.org/arvados.git/sdk/go/manifest"
40
41         . "gopkg.in/check.v1"
42         git_client "gopkg.in/src-d/go-git.v4/plumbing/transport/client"
43         git_http "gopkg.in/src-d/go-git.v4/plumbing/transport/http"
44 )
45
46 // Gocheck boilerplate
47 func TestCrunchExec(t *testing.T) {
48         TestingT(t)
49 }
50
51 const logLineStart = `(?m)(.*\n)*\d{4}-\d\d-\d\dT\d\d:\d\d:\d\d\.\d+Z `
52
53 var _ = Suite(&TestSuite{})
54
55 type TestSuite struct {
56         client                   *arvados.Client
57         api                      *ArvTestClient
58         runner                   *ContainerRunner
59         executor                 *stubExecutor
60         keepmount                string
61         keepmountTmp             []string
62         testDispatcherKeepClient KeepTestClient
63         testContainerKeepClient  KeepTestClient
64         debian12MemoryCurrent    int64
65         debian12SwapCurrent      int64
66 }
67
68 func (s *TestSuite) SetUpSuite(c *C) {
69         buf, err := os.ReadFile("../crunchstat/testdata/debian12/sys/fs/cgroup/user.slice/user-1000.slice/session-4.scope/memory.current")
70         c.Assert(err, IsNil)
71         _, err = fmt.Sscanf(string(buf), "%d", &s.debian12MemoryCurrent)
72         c.Assert(err, IsNil)
73
74         buf, err = os.ReadFile("../crunchstat/testdata/debian12/sys/fs/cgroup/user.slice/user-1000.slice/session-4.scope/memory.swap.current")
75         c.Assert(err, IsNil)
76         _, err = fmt.Sscanf(string(buf), "%d", &s.debian12SwapCurrent)
77         c.Assert(err, IsNil)
78 }
79
80 func (s *TestSuite) SetUpTest(c *C) {
81         s.client = arvados.NewClientFromEnv()
82         s.executor = &stubExecutor{}
83         var err error
84         s.api = &ArvTestClient{}
85         s.runner, err = NewContainerRunner(s.client, s.api, &s.testDispatcherKeepClient, "zzzzz-zzzzz-zzzzzzzzzzzzzzz")
86         c.Assert(err, IsNil)
87         s.runner.executor = s.executor
88         s.runner.MkArvClient = func(token string) (IArvadosClient, IKeepClient, *arvados.Client, error) {
89                 return s.api, &s.testContainerKeepClient, s.client, nil
90         }
91         s.runner.RunArvMount = func(cmd []string, tok string) (*exec.Cmd, error) {
92                 s.runner.ArvMountPoint = s.keepmount
93                 for i, opt := range cmd {
94                         if opt == "--mount-tmp" {
95                                 err := os.Mkdir(s.keepmount+"/"+cmd[i+1], 0700)
96                                 if err != nil {
97                                         return nil, err
98                                 }
99                                 s.keepmountTmp = append(s.keepmountTmp, cmd[i+1])
100                         }
101                 }
102                 return nil, nil
103         }
104         s.keepmount = c.MkDir()
105         err = os.Mkdir(s.keepmount+"/by_id", 0755)
106         s.keepmountTmp = nil
107         c.Assert(err, IsNil)
108         err = os.Mkdir(s.keepmount+"/by_id/"+arvadostest.DockerImage112PDH, 0755)
109         c.Assert(err, IsNil)
110         err = ioutil.WriteFile(s.keepmount+"/by_id/"+arvadostest.DockerImage112PDH+"/"+arvadostest.DockerImage112Filename, []byte("#notarealtarball"), 0644)
111         err = os.Mkdir(s.keepmount+"/by_id/"+fakeInputCollectionPDH, 0755)
112         c.Assert(err, IsNil)
113         err = ioutil.WriteFile(s.keepmount+"/by_id/"+fakeInputCollectionPDH+"/input.json", []byte(`{"input":true}`), 0644)
114         c.Assert(err, IsNil)
115         s.runner.ArvMountPoint = s.keepmount
116 }
117
118 type ArvTestClient struct {
119         Total   int64
120         Calls   int
121         Content []arvadosclient.Dict
122         arvados.Container
123         secretMounts []byte
124         Logs         map[string]*bytes.Buffer
125         sync.Mutex
126         WasSetRunning bool
127         callraw       bool
128 }
129
130 type KeepTestClient struct {
131         Called         bool
132         Content        []byte
133         StorageClasses []string
134 }
135
136 type stubExecutor struct {
137         imageLoaded bool
138         loaded      string
139         loadErr     error
140         exitCode    int
141         createErr   error
142         created     containerSpec
143         startErr    error
144         waitSleep   time.Duration
145         waitErr     error
146         stopErr     error
147         stopped     bool
148         closed      bool
149         runFunc     func() int
150         exit        chan int
151 }
152
153 func (e *stubExecutor) LoadImage(imageId string, tarball string, container arvados.Container, keepMount string,
154         containerClient *arvados.Client) error {
155         e.loaded = tarball
156         return e.loadErr
157 }
158 func (e *stubExecutor) Runtime() string                 { return "stub" }
159 func (e *stubExecutor) Version() string                 { return "stub " + cmd.Version.String() }
160 func (e *stubExecutor) Create(spec containerSpec) error { e.created = spec; return e.createErr }
161 func (e *stubExecutor) Start() error {
162         e.exit = make(chan int, 1)
163         go func() { e.exit <- e.runFunc() }()
164         return e.startErr
165 }
166 func (e *stubExecutor) Pid() int    { return 1115883 } // matches pid in ../crunchstat/testdata/debian12/proc/
167 func (e *stubExecutor) Stop() error { e.stopped = true; go func() { e.exit <- -1 }(); return e.stopErr }
168 func (e *stubExecutor) Close()      { e.closed = true }
169 func (e *stubExecutor) Wait(context.Context) (int, error) {
170         return <-e.exit, e.waitErr
171 }
172 func (e *stubExecutor) InjectCommand(ctx context.Context, _, _ string, _ bool, _ []string) (*exec.Cmd, error) {
173         return nil, errors.New("unimplemented")
174 }
175 func (e *stubExecutor) IPAddress() (string, error) { return "", errors.New("unimplemented") }
176
177 const fakeInputCollectionPDH = "ffffffffaaaaaaaa88888888eeeeeeee+1234"
178
179 var hwManifest = ". 82ab40c24fc8df01798e57ba66795bb1+841216+Aa124ac75e5168396c73c0a18eda641a4f41791c0@569fa8c3 0:841216:9c31ee32b3d15268a0754e8edc74d4f815ee014b693bc5109058e431dd5caea7.tar\n"
180 var hwPDH = "a45557269dcb65a6b78f9ac061c0850b+120"
181 var hwImageID = "9c31ee32b3d15268a0754e8edc74d4f815ee014b693bc5109058e431dd5caea7"
182
183 var otherManifest = ". 68a84f561b1d1708c6baff5e019a9ab3+46+Ae5d0af96944a3690becb1decdf60cc1c937f556d@5693216f 0:46:md5sum.txt\n"
184 var otherPDH = "a3e8f74c6f101eae01fa08bfb4e49b3a+54"
185
186 var normalizedManifestWithSubdirs = `. 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396c73c0abcdefgh11234567890@569fa8c3 0:9:file1_in_main.txt 9:18:file2_in_main.txt 0:27:zzzzz-8i9sb-bcdefghijkdhvnk.log.txt
187 ./subdir1 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396cabcdefghij6419876543234@569fa8c4 0:9:file1_in_subdir1.txt 9:18:file2_in_subdir1.txt
188 ./subdir1/subdir2 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396c73c0bcdefghijk544332211@569fa8c5 0:9:file1_in_subdir2.txt 9:18:file2_in_subdir2.txt
189 `
190
191 var normalizedWithSubdirsPDH = "a0def87f80dd594d4675809e83bd4f15+367"
192
193 var denormalizedManifestWithSubdirs = ". 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396c73c0abcdefgh11234567890@569fa8c3 0:9:file1_in_main.txt 9:18:file2_in_main.txt 0:27:zzzzz-8i9sb-bcdefghijkdhvnk.log.txt 0:10:subdir1/file1_in_subdir1.txt 10:17:subdir1/file2_in_subdir1.txt\n"
194 var denormalizedWithSubdirsPDH = "b0def87f80dd594d4675809e83bd4f15+367"
195
196 var fakeAuthUUID = "zzzzz-gj3su-55pqoyepgi2glem"
197 var fakeAuthToken = "a3ltuwzqcu2u4sc0q7yhpc2w7s00fdcqecg5d6e0u3pfohmbjt"
198
199 func (client *ArvTestClient) Create(resourceType string,
200         parameters arvadosclient.Dict,
201         output interface{}) error {
202
203         client.Mutex.Lock()
204         defer client.Mutex.Unlock()
205
206         client.Calls++
207         client.Content = append(client.Content, parameters)
208
209         if resourceType == "logs" {
210                 et := parameters["log"].(arvadosclient.Dict)["event_type"].(string)
211                 if client.Logs == nil {
212                         client.Logs = make(map[string]*bytes.Buffer)
213                 }
214                 if client.Logs[et] == nil {
215                         client.Logs[et] = &bytes.Buffer{}
216                 }
217                 client.Logs[et].Write([]byte(parameters["log"].(arvadosclient.Dict)["properties"].(map[string]string)["text"]))
218         }
219
220         if resourceType == "collections" && output != nil {
221                 mt := parameters["collection"].(arvadosclient.Dict)["manifest_text"].(string)
222                 md5sum := md5.Sum([]byte(mt))
223                 outmap := output.(*arvados.Collection)
224                 outmap.PortableDataHash = fmt.Sprintf("%x+%d", md5sum, len(mt))
225                 outmap.UUID = fmt.Sprintf("zzzzz-4zz18-%015x", md5sum[:7])
226         }
227
228         return nil
229 }
230
231 func (client *ArvTestClient) Call(method, resourceType, uuid, action string, parameters arvadosclient.Dict, output interface{}) error {
232         switch {
233         case method == "GET" && resourceType == "containers" && action == "auth":
234                 return json.Unmarshal([]byte(`{
235                         "kind": "arvados#api_client_authorization",
236                         "uuid": "`+fakeAuthUUID+`",
237                         "api_token": "`+fakeAuthToken+`"
238                         }`), output)
239         case method == "GET" && resourceType == "containers" && action == "secret_mounts":
240                 if client.secretMounts != nil {
241                         return json.Unmarshal(client.secretMounts, output)
242                 }
243                 return json.Unmarshal([]byte(`{"secret_mounts":{}}`), output)
244         default:
245                 return fmt.Errorf("Not found")
246         }
247 }
248
249 func (client *ArvTestClient) CallRaw(method, resourceType, uuid, action string,
250         parameters arvadosclient.Dict) (reader io.ReadCloser, err error) {
251         var j []byte
252         if method == "GET" && resourceType == "nodes" && uuid == "" && action == "" {
253                 j = []byte(`{
254                         "kind": "arvados#nodeList",
255                         "items": [{
256                                 "uuid": "zzzzz-7ekkf-2z3mc76g2q73aio",
257                                 "hostname": "compute2",
258                                 "properties": {"total_cpu_cores": 16}
259                         }]}`)
260         } else if method == "GET" && resourceType == "containers" && action == "" && !client.callraw {
261                 if uuid == "" {
262                         j, err = json.Marshal(map[string]interface{}{
263                                 "items": []interface{}{client.Container},
264                                 "kind":  "arvados#nodeList",
265                         })
266                 } else {
267                         j, err = json.Marshal(client.Container)
268                 }
269         } else {
270                 j = []byte(`{
271                         "command": ["sleep", "1"],
272                         "container_image": "` + arvadostest.DockerImage112PDH + `",
273                         "cwd": ".",
274                         "environment": {},
275                         "mounts": {"/tmp": {"kind": "tmp"}, "/json": {"kind": "json", "content": {"number": 123456789123456789}}},
276                         "output_path": "/tmp",
277                         "priority": 1,
278                         "runtime_constraints": {}
279                 }`)
280         }
281         return ioutil.NopCloser(bytes.NewReader(j)), err
282 }
283
284 func (client *ArvTestClient) Get(resourceType string, uuid string, parameters arvadosclient.Dict, output interface{}) error {
285         if resourceType == "collections" {
286                 if uuid == hwPDH {
287                         output.(*arvados.Collection).ManifestText = hwManifest
288                 } else if uuid == otherPDH {
289                         output.(*arvados.Collection).ManifestText = otherManifest
290                 } else if uuid == normalizedWithSubdirsPDH {
291                         output.(*arvados.Collection).ManifestText = normalizedManifestWithSubdirs
292                 } else if uuid == denormalizedWithSubdirsPDH {
293                         output.(*arvados.Collection).ManifestText = denormalizedManifestWithSubdirs
294                 }
295         }
296         if resourceType == "containers" {
297                 (*output.(*arvados.Container)) = client.Container
298         }
299         return nil
300 }
301
302 func (client *ArvTestClient) Update(resourceType string, uuid string, parameters arvadosclient.Dict, output interface{}) (err error) {
303         client.Mutex.Lock()
304         defer client.Mutex.Unlock()
305         client.Calls++
306         client.Content = append(client.Content, parameters)
307         if resourceType == "containers" {
308                 if parameters["container"].(arvadosclient.Dict)["state"] == "Running" {
309                         client.WasSetRunning = true
310                 }
311         } else if resourceType == "collections" && output != nil {
312                 mt := parameters["collection"].(arvadosclient.Dict)["manifest_text"].(string)
313                 output.(*arvados.Collection).UUID = uuid
314                 output.(*arvados.Collection).PortableDataHash = fmt.Sprintf("%x", md5.Sum([]byte(mt)))
315         }
316         return nil
317 }
318
319 var discoveryMap = map[string]interface{}{
320         "defaultTrashLifetime":               float64(1209600),
321         "crunchLimitLogBytesPerJob":          float64(67108864),
322         "crunchLogThrottleBytes":             float64(65536),
323         "crunchLogThrottlePeriod":            float64(60),
324         "crunchLogThrottleLines":             float64(1024),
325         "crunchLogPartialLineThrottlePeriod": float64(5),
326         "crunchLogBytesPerEvent":             float64(4096),
327         "crunchLogSecondsBetweenEvents":      float64(1),
328 }
329
330 func (client *ArvTestClient) Discovery(key string) (interface{}, error) {
331         return discoveryMap[key], nil
332 }
333
334 // CalledWith returns the parameters from the first API call whose
335 // parameters match jpath/string. E.g., CalledWith(c, "foo.bar",
336 // "baz") returns parameters with parameters["foo"]["bar"]=="baz". If
337 // no call matches, it returns nil.
338 func (client *ArvTestClient) CalledWith(jpath string, expect interface{}) arvadosclient.Dict {
339 call:
340         for _, content := range client.Content {
341                 var v interface{} = content
342                 for _, k := range strings.Split(jpath, ".") {
343                         if dict, ok := v.(arvadosclient.Dict); !ok {
344                                 continue call
345                         } else {
346                                 v = dict[k]
347                         }
348                 }
349                 if v == expect {
350                         return content
351                 }
352         }
353         return nil
354 }
355
356 func (client *KeepTestClient) LocalLocator(locator string) (string, error) {
357         return locator, nil
358 }
359
360 func (client *KeepTestClient) BlockWrite(_ context.Context, opts arvados.BlockWriteOptions) (arvados.BlockWriteResponse, error) {
361         client.Content = opts.Data
362         return arvados.BlockWriteResponse{
363                 Locator: fmt.Sprintf("%x+%d", md5.Sum(opts.Data), len(opts.Data)),
364         }, nil
365 }
366
367 func (client *KeepTestClient) ReadAt(string, []byte, int) (int, error) {
368         return 0, errors.New("not implemented")
369 }
370
371 func (client *KeepTestClient) ClearBlockCache() {
372 }
373
374 func (client *KeepTestClient) Close() {
375         client.Content = nil
376 }
377
378 func (client *KeepTestClient) SetStorageClasses(sc []string) {
379         client.StorageClasses = sc
380 }
381
382 type FileWrapper struct {
383         io.ReadCloser
384         len int64
385 }
386
387 func (fw FileWrapper) Readdir(n int) ([]os.FileInfo, error) {
388         return nil, errors.New("not implemented")
389 }
390
391 func (fw FileWrapper) Seek(int64, int) (int64, error) {
392         return 0, errors.New("not implemented")
393 }
394
395 func (fw FileWrapper) Size() int64 {
396         return fw.len
397 }
398
399 func (fw FileWrapper) Stat() (os.FileInfo, error) {
400         return nil, errors.New("not implemented")
401 }
402
403 func (fw FileWrapper) Truncate(int64) error {
404         return errors.New("not implemented")
405 }
406
407 func (fw FileWrapper) Write([]byte) (int, error) {
408         return 0, errors.New("not implemented")
409 }
410
411 func (fw FileWrapper) Sync() error {
412         return errors.New("not implemented")
413 }
414
415 func (fw FileWrapper) Snapshot() (*arvados.Subtree, error) {
416         return nil, errors.New("not implemented")
417 }
418
419 func (fw FileWrapper) Splice(*arvados.Subtree) error {
420         return errors.New("not implemented")
421 }
422
423 func (client *KeepTestClient) ManifestFileReader(m manifest.Manifest, filename string) (arvados.File, error) {
424         if filename == hwImageID+".tar" {
425                 rdr := ioutil.NopCloser(&bytes.Buffer{})
426                 client.Called = true
427                 return FileWrapper{rdr, 1321984}, nil
428         } else if filename == "/file1_in_main.txt" {
429                 rdr := ioutil.NopCloser(strings.NewReader("foo"))
430                 client.Called = true
431                 return FileWrapper{rdr, 3}, nil
432         }
433         return nil, nil
434 }
435
436 type apiStubServer struct {
437         server    *httptest.Server
438         proxy     *httputil.ReverseProxy
439         intercept func(http.ResponseWriter, *http.Request) bool
440
441         container arvados.Container
442         logs      map[string]string
443 }
444
445 func apiStub() (*arvados.Client, *apiStubServer) {
446         client := arvados.NewClientFromEnv()
447         apistub := &apiStubServer{}
448         apistub.server = httptest.NewTLSServer(apistub)
449         apistub.proxy = httputil.NewSingleHostReverseProxy(&url.URL{Scheme: "https", Host: client.APIHost})
450         if client.Insecure {
451                 apistub.proxy.Transport = arvados.InsecureHTTPClient.Transport
452         }
453         client.APIHost = apistub.server.Listener.Addr().String()
454         return client, apistub
455 }
456
457 func (apistub *apiStubServer) ServeHTTP(w http.ResponseWriter, r *http.Request) {
458         if apistub.intercept != nil && apistub.intercept(w, r) {
459                 return
460         }
461         if r.Method == "POST" && r.URL.Path == "/arvados/v1/logs" {
462                 var body struct {
463                         Log struct {
464                                 EventType  string `json:"event_type"`
465                                 Properties struct {
466                                         Text string
467                                 }
468                         }
469                 }
470                 json.NewDecoder(r.Body).Decode(&body)
471                 apistub.logs[body.Log.EventType] += body.Log.Properties.Text
472                 return
473         }
474         if r.Method == "GET" && r.URL.Path == "/arvados/v1/collections/"+hwPDH {
475                 json.NewEncoder(w).Encode(arvados.Collection{ManifestText: hwManifest})
476                 return
477         }
478         if r.Method == "GET" && r.URL.Path == "/arvados/v1/collections/"+otherPDH {
479                 json.NewEncoder(w).Encode(arvados.Collection{ManifestText: otherManifest})
480                 return
481         }
482         if r.Method == "GET" && r.URL.Path == "/arvados/v1/collections/"+normalizedWithSubdirsPDH {
483                 json.NewEncoder(w).Encode(arvados.Collection{ManifestText: normalizedManifestWithSubdirs})
484                 return
485         }
486         if r.Method == "GET" && r.URL.Path == "/arvados/v1/collections/"+denormalizedWithSubdirsPDH {
487                 json.NewEncoder(w).Encode(arvados.Collection{ManifestText: denormalizedManifestWithSubdirs})
488                 return
489         }
490         if r.Method == "GET" && r.URL.Path == "/arvados/v1/containers/"+apistub.container.UUID {
491                 json.NewEncoder(w).Encode(apistub.container)
492                 return
493         }
494         apistub.proxy.ServeHTTP(w, r)
495 }
496
497 func (s *TestSuite) TestLoadImage(c *C) {
498         s.runner.Container.ContainerImage = arvadostest.DockerImage112PDH
499         s.runner.Container.Mounts = map[string]arvados.Mount{
500                 "/out": {Kind: "tmp", Writable: true},
501         }
502         s.runner.Container.OutputPath = "/out"
503
504         _, err := s.runner.SetupMounts()
505         c.Assert(err, IsNil)
506
507         imageID, err := s.runner.LoadImage()
508         c.Check(err, IsNil)
509         c.Check(s.executor.loaded, Matches, ".*"+regexp.QuoteMeta(arvadostest.DockerImage112Filename))
510         c.Check(imageID, Equals, strings.TrimSuffix(arvadostest.DockerImage112Filename, ".tar"))
511
512         s.runner.Container.ContainerImage = arvadostest.DockerImage112PDH
513         s.executor.imageLoaded = false
514         s.executor.loaded = ""
515         s.executor.loadErr = errors.New("bork")
516         imageID, err = s.runner.LoadImage()
517         c.Check(err, ErrorMatches, ".*bork")
518         c.Check(s.executor.loaded, Matches, ".*"+regexp.QuoteMeta(arvadostest.DockerImage112Filename))
519
520         s.runner.Container.ContainerImage = fakeInputCollectionPDH
521         s.executor.imageLoaded = false
522         s.executor.loaded = ""
523         s.executor.loadErr = nil
524         imageID, err = s.runner.LoadImage()
525         c.Check(err, ErrorMatches, "image collection does not include a \\.tar image file")
526         c.Check(s.executor.loaded, Equals, "")
527 }
528
529 type ArvErrorTestClient struct{}
530
531 func (ArvErrorTestClient) Create(resourceType string,
532         parameters arvadosclient.Dict,
533         output interface{}) error {
534         return nil
535 }
536
537 func (ArvErrorTestClient) Call(method, resourceType, uuid, action string, parameters arvadosclient.Dict, output interface{}) error {
538         if method == "GET" && resourceType == "containers" && action == "auth" {
539                 return nil
540         }
541         return errors.New("ArvError")
542 }
543
544 func (ArvErrorTestClient) CallRaw(method, resourceType, uuid, action string,
545         parameters arvadosclient.Dict) (reader io.ReadCloser, err error) {
546         return nil, errors.New("ArvError")
547 }
548
549 func (ArvErrorTestClient) Get(resourceType string, uuid string, parameters arvadosclient.Dict, output interface{}) error {
550         return errors.New("ArvError")
551 }
552
553 func (ArvErrorTestClient) Update(resourceType string, uuid string, parameters arvadosclient.Dict, output interface{}) (err error) {
554         return nil
555 }
556
557 func (ArvErrorTestClient) Discovery(key string) (interface{}, error) {
558         return discoveryMap[key], nil
559 }
560
561 type KeepErrorTestClient struct {
562         KeepTestClient
563 }
564
565 func (*KeepErrorTestClient) ManifestFileReader(manifest.Manifest, string) (arvados.File, error) {
566         return nil, errors.New("KeepError")
567 }
568
569 func (*KeepErrorTestClient) BlockWrite(context.Context, arvados.BlockWriteOptions) (arvados.BlockWriteResponse, error) {
570         return arvados.BlockWriteResponse{}, errors.New("KeepError")
571 }
572
573 func (*KeepErrorTestClient) LocalLocator(string) (string, error) {
574         return "", errors.New("KeepError")
575 }
576
577 type KeepReadErrorTestClient struct {
578         KeepTestClient
579 }
580
581 func (*KeepReadErrorTestClient) ReadAt(string, []byte, int) (int, error) {
582         return 0, errors.New("KeepError")
583 }
584
585 type ErrorReader struct {
586         FileWrapper
587 }
588
589 func (ErrorReader) Read(p []byte) (n int, err error) {
590         return 0, errors.New("ErrorReader")
591 }
592
593 func (ErrorReader) Seek(int64, int) (int64, error) {
594         return 0, errors.New("ErrorReader")
595 }
596
597 func (KeepReadErrorTestClient) ManifestFileReader(m manifest.Manifest, filename string) (arvados.File, error) {
598         return ErrorReader{}, nil
599 }
600
601 type ClosableBuffer struct {
602         bytes.Buffer
603 }
604
605 func (*ClosableBuffer) Close() error {
606         return nil
607 }
608
609 type TestLogs struct {
610         Stdout ClosableBuffer
611         Stderr ClosableBuffer
612 }
613
614 func (tl *TestLogs) NewTestLoggingWriter(logstr string) (io.WriteCloser, error) {
615         if logstr == "stdout" {
616                 return &tl.Stdout, nil
617         }
618         if logstr == "stderr" {
619                 return &tl.Stderr, nil
620         }
621         return nil, errors.New("???")
622 }
623
624 func dockerLog(fd byte, msg string) []byte {
625         by := []byte(msg)
626         header := make([]byte, 8+len(by))
627         header[0] = fd
628         header[7] = byte(len(by))
629         copy(header[8:], by)
630         return header
631 }
632
633 func (s *TestSuite) TestRunContainer(c *C) {
634         s.executor.runFunc = func() int {
635                 fmt.Fprintf(s.executor.created.Stdout, "Hello world\n")
636                 return 0
637         }
638
639         var logs TestLogs
640         s.runner.NewLogWriter = logs.NewTestLoggingWriter
641         s.runner.Container.ContainerImage = arvadostest.DockerImage112PDH
642         s.runner.Container.Command = []string{"./hw"}
643         s.runner.Container.OutputStorageClasses = []string{"default"}
644
645         imageID, err := s.runner.LoadImage()
646         c.Assert(err, IsNil)
647
648         err = s.runner.CreateContainer(imageID, nil)
649         c.Assert(err, IsNil)
650
651         err = s.runner.StartContainer()
652         c.Assert(err, IsNil)
653
654         err = s.runner.WaitFinish()
655         c.Assert(err, IsNil)
656
657         c.Check(logs.Stdout.String(), Matches, ".*Hello world\n")
658         c.Check(logs.Stderr.String(), Equals, "")
659 }
660
661 func (s *TestSuite) TestCommitLogs(c *C) {
662         api := &ArvTestClient{}
663         kc := &KeepTestClient{}
664         defer kc.Close()
665         cr, err := NewContainerRunner(s.client, api, kc, "zzzzz-zzzzz-zzzzzzzzzzzzzzz")
666         c.Assert(err, IsNil)
667         cr.CrunchLog.Timestamper = (&TestTimestamper{}).Timestamp
668
669         cr.CrunchLog.Print("Hello world!")
670         cr.CrunchLog.Print("Goodbye")
671         cr.finalState = "Complete"
672
673         err = cr.CommitLogs()
674         c.Check(err, IsNil)
675
676         c.Check(api.Calls, Equals, 2)
677         c.Check(api.Content[1]["ensure_unique_name"], Equals, true)
678         c.Check(api.Content[1]["collection"].(arvadosclient.Dict)["name"], Equals, "logs for zzzzz-zzzzz-zzzzzzzzzzzzzzz")
679         c.Check(api.Content[1]["collection"].(arvadosclient.Dict)["manifest_text"], Equals, ". 744b2e4553123b02fa7b452ec5c18993+123 0:123:crunch-run.txt\n")
680         c.Check(*cr.LogsPDH, Equals, "63da7bdacf08c40f604daad80c261e9a+60")
681 }
682
683 func (s *TestSuite) TestUpdateContainerRunning(c *C) {
684         api := &ArvTestClient{}
685         kc := &KeepTestClient{}
686         defer kc.Close()
687         cr, err := NewContainerRunner(s.client, api, kc, "zzzzz-zzzzz-zzzzzzzzzzzzzzz")
688         c.Assert(err, IsNil)
689
690         err = cr.UpdateContainerRunning("")
691         c.Check(err, IsNil)
692
693         c.Check(api.Content[0]["container"].(arvadosclient.Dict)["state"], Equals, "Running")
694 }
695
696 func (s *TestSuite) TestUpdateContainerComplete(c *C) {
697         api := &ArvTestClient{}
698         kc := &KeepTestClient{}
699         defer kc.Close()
700         cr, err := NewContainerRunner(s.client, api, kc, "zzzzz-zzzzz-zzzzzzzzzzzzzzz")
701         c.Assert(err, IsNil)
702
703         cr.LogsPDH = new(string)
704         *cr.LogsPDH = "d3a229d2fe3690c2c3e75a71a153c6a3+60"
705
706         cr.ExitCode = new(int)
707         *cr.ExitCode = 42
708         cr.finalState = "Complete"
709
710         err = cr.UpdateContainerFinal()
711         c.Check(err, IsNil)
712
713         c.Check(api.Content[0]["container"].(arvadosclient.Dict)["log"], Equals, *cr.LogsPDH)
714         c.Check(api.Content[0]["container"].(arvadosclient.Dict)["exit_code"], Equals, *cr.ExitCode)
715         c.Check(api.Content[0]["container"].(arvadosclient.Dict)["state"], Equals, "Complete")
716 }
717
718 func (s *TestSuite) TestUpdateContainerCancelled(c *C) {
719         api := &ArvTestClient{}
720         kc := &KeepTestClient{}
721         defer kc.Close()
722         cr, err := NewContainerRunner(s.client, api, kc, "zzzzz-zzzzz-zzzzzzzzzzzzzzz")
723         c.Assert(err, IsNil)
724         cr.cCancelled = true
725         cr.finalState = "Cancelled"
726
727         err = cr.UpdateContainerFinal()
728         c.Check(err, IsNil)
729
730         c.Check(api.Content[0]["container"].(arvadosclient.Dict)["log"], IsNil)
731         c.Check(api.Content[0]["container"].(arvadosclient.Dict)["exit_code"], IsNil)
732         c.Check(api.Content[0]["container"].(arvadosclient.Dict)["state"], Equals, "Cancelled")
733 }
734
735 // Used by the TestFullRun*() test below to DRY up boilerplate setup to do full
736 // dress rehearsal of the Run() function, starting from a JSON container record.
737 func (s *TestSuite) fullRunHelper(c *C, record string, extraMounts []string, fn func() int) (*ArvTestClient, *ContainerRunner, string) {
738         err := json.Unmarshal([]byte(record), &s.api.Container)
739         c.Assert(err, IsNil)
740         initialState := s.api.Container.State
741
742         var sm struct {
743                 SecretMounts map[string]arvados.Mount `json:"secret_mounts"`
744         }
745         err = json.Unmarshal([]byte(record), &sm)
746         c.Check(err, IsNil)
747         secretMounts, err := json.Marshal(sm)
748         c.Assert(err, IsNil)
749         c.Logf("SecretMounts decoded %v json %q", sm, secretMounts)
750
751         s.executor.runFunc = fn
752
753         s.runner.statInterval = 100 * time.Millisecond
754         s.runner.containerWatchdogInterval = time.Second
755
756         realTemp := c.MkDir()
757         tempcount := 0
758         s.runner.MkTempDir = func(_, prefix string) (string, error) {
759                 tempcount++
760                 d := fmt.Sprintf("%s/%s%d", realTemp, prefix, tempcount)
761                 err := os.Mkdir(d, os.ModePerm)
762                 if err != nil && strings.Contains(err.Error(), ": file exists") {
763                         // Test case must have pre-populated the tempdir
764                         err = nil
765                 }
766                 return d, err
767         }
768         client, _ := apiStub()
769         s.runner.MkArvClient = func(token string) (IArvadosClient, IKeepClient, *arvados.Client, error) {
770                 return &ArvTestClient{secretMounts: secretMounts}, &s.testContainerKeepClient, client, nil
771         }
772
773         if extraMounts != nil && len(extraMounts) > 0 {
774                 err := s.runner.SetupArvMountPoint("keep")
775                 c.Check(err, IsNil)
776
777                 for _, m := range extraMounts {
778                         os.MkdirAll(s.runner.ArvMountPoint+"/by_id/"+m, os.ModePerm)
779                 }
780         }
781
782         err = s.runner.Run()
783         if s.api.CalledWith("container.state", "Complete") != nil {
784                 c.Check(err, IsNil)
785         }
786         if s.executor.loadErr == nil && s.executor.createErr == nil && initialState != "Running" {
787                 c.Check(s.api.WasSetRunning, Equals, true)
788                 var lastupdate arvadosclient.Dict
789                 for _, content := range s.api.Content {
790                         if content["container"] != nil {
791                                 lastupdate = content["container"].(arvadosclient.Dict)
792                         }
793                 }
794                 if lastupdate["log"] == nil {
795                         c.Errorf("no container update with non-nil log -- updates were: %v", s.api.Content)
796                 }
797         }
798
799         if err != nil {
800                 for k, v := range s.api.Logs {
801                         c.Log(k)
802                         c.Log(v.String())
803                 }
804         }
805
806         return s.api, s.runner, realTemp
807 }
808
809 func (s *TestSuite) TestFullRunHello(c *C) {
810         s.runner.enableMemoryLimit = true
811         s.runner.networkMode = "default"
812         s.fullRunHelper(c, `{
813     "command": ["echo", "hello world"],
814     "container_image": "`+arvadostest.DockerImage112PDH+`",
815     "cwd": ".",
816     "environment": {"foo":"bar","baz":"waz"},
817     "mounts": {"/tmp": {"kind": "tmp"} },
818     "output_path": "/tmp",
819     "priority": 1,
820     "runtime_constraints": {"vcpus":1,"ram":1000000},
821     "state": "Locked",
822     "output_storage_classes": ["default"]
823 }`, nil, func() int {
824                 c.Check(s.executor.created.Command, DeepEquals, []string{"echo", "hello world"})
825                 c.Check(s.executor.created.Image, Equals, "sha256:d8309758b8fe2c81034ffc8a10c36460b77db7bc5e7b448c4e5b684f9d95a678")
826                 c.Check(s.executor.created.Env, DeepEquals, map[string]string{"foo": "bar", "baz": "waz"})
827                 c.Check(s.executor.created.VCPUs, Equals, 1)
828                 c.Check(s.executor.created.RAM, Equals, int64(1000000))
829                 c.Check(s.executor.created.NetworkMode, Equals, "default")
830                 c.Check(s.executor.created.EnableNetwork, Equals, false)
831                 c.Check(s.executor.created.CUDADeviceCount, Equals, 0)
832                 fmt.Fprintln(s.executor.created.Stdout, "hello world")
833                 return 0
834         })
835
836         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
837         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
838         c.Check(s.api.Logs["stdout"].String(), Matches, ".*hello world\n")
839         c.Check(s.testDispatcherKeepClient.StorageClasses, DeepEquals, []string{"default"})
840         c.Check(s.testContainerKeepClient.StorageClasses, DeepEquals, []string{"default"})
841 }
842
843 func (s *TestSuite) TestRunAlreadyRunning(c *C) {
844         var ran bool
845         s.fullRunHelper(c, `{
846     "command": ["sleep", "3"],
847     "container_image": "`+arvadostest.DockerImage112PDH+`",
848     "cwd": ".",
849     "environment": {},
850     "mounts": {"/tmp": {"kind": "tmp"} },
851     "output_path": "/tmp",
852     "priority": 1,
853     "runtime_constraints": {},
854     "scheduling_parameters":{"max_run_time": 1},
855     "state": "Running"
856 }`, nil, func() int {
857                 ran = true
858                 return 2
859         })
860         c.Check(s.api.CalledWith("container.state", "Cancelled"), IsNil)
861         c.Check(s.api.CalledWith("container.state", "Complete"), IsNil)
862         c.Check(ran, Equals, false)
863 }
864
865 func ec2MetadataServerStub(c *C, token *string, failureRate float64, stoptime *atomic.Value) *httptest.Server {
866         failedOnce := false
867         return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
868                 if !failedOnce || rand.Float64() < failureRate {
869                         w.WriteHeader(http.StatusServiceUnavailable)
870                         failedOnce = true
871                         return
872                 }
873                 switch r.URL.Path {
874                 case "/latest/api/token":
875                         fmt.Fprintln(w, *token)
876                 case "/latest/meta-data/spot/instance-action":
877                         if r.Header.Get("X-aws-ec2-metadata-token") != *token {
878                                 w.WriteHeader(http.StatusUnauthorized)
879                         } else if t, _ := stoptime.Load().(time.Time); t.IsZero() {
880                                 w.WriteHeader(http.StatusNotFound)
881                         } else {
882                                 fmt.Fprintf(w, `{"action":"stop","time":"%s"}`, t.Format(time.RFC3339))
883                         }
884                 default:
885                         w.WriteHeader(http.StatusNotFound)
886                 }
887         }))
888 }
889
890 func (s *TestSuite) TestSpotInterruptionNotice(c *C) {
891         s.testSpotInterruptionNotice(c, 0.1)
892 }
893
894 func (s *TestSuite) TestSpotInterruptionNoticeNotAvailable(c *C) {
895         s.testSpotInterruptionNotice(c, 1)
896 }
897
898 func (s *TestSuite) testSpotInterruptionNotice(c *C, failureRate float64) {
899         var stoptime atomic.Value
900         token := "fake-ec2-metadata-token"
901         stub := ec2MetadataServerStub(c, &token, failureRate, &stoptime)
902         defer stub.Close()
903
904         defer func(i time.Duration, u string) {
905                 spotInterruptionCheckInterval = i
906                 ec2MetadataBaseURL = u
907         }(spotInterruptionCheckInterval, ec2MetadataBaseURL)
908         spotInterruptionCheckInterval = time.Second / 8
909         ec2MetadataBaseURL = stub.URL
910
911         go s.runner.checkSpotInterruptionNotices()
912         s.fullRunHelper(c, `{
913     "command": ["sleep", "3"],
914     "container_image": "`+arvadostest.DockerImage112PDH+`",
915     "cwd": ".",
916     "environment": {},
917     "mounts": {"/tmp": {"kind": "tmp"} },
918     "output_path": "/tmp",
919     "priority": 1,
920     "runtime_constraints": {},
921     "state": "Locked"
922 }`, nil, func() int {
923                 time.Sleep(time.Second)
924                 stoptime.Store(time.Now().Add(time.Minute).UTC())
925                 token = "different-fake-ec2-metadata-token"
926                 time.Sleep(time.Second)
927                 return 0
928         })
929         c.Check(s.api.Logs["crunch-run"].String(), Matches, `(?ms).*Checking for spot interruptions every 125ms using instance metadata at http://.*`)
930         c.Check(s.api.Logs["crunch-run"].String(), Matches, `(?ms).*Error checking spot interruptions: 503 Service Unavailable.*`)
931         if failureRate == 1 {
932                 c.Check(s.api.Logs["crunch-run"].String(), Matches, `(?ms).*Giving up on checking spot interruptions after too many consecutive failures.*`)
933         } else {
934                 text := `Cloud provider scheduled instance stop at ` + stoptime.Load().(time.Time).Format(time.RFC3339)
935                 c.Check(s.api.Logs["crunch-run"].String(), Matches, `(?ms).*`+text+`.*`)
936                 c.Check(s.api.CalledWith("container.runtime_status.warning", "preemption notice"), NotNil)
937                 c.Check(s.api.CalledWith("container.runtime_status.warningDetail", text), NotNil)
938                 c.Check(s.api.CalledWith("container.runtime_status.preemptionNotice", text), NotNil)
939         }
940 }
941
942 func (s *TestSuite) TestRunTimeExceeded(c *C) {
943         s.fullRunHelper(c, `{
944     "command": ["sleep", "3"],
945     "container_image": "`+arvadostest.DockerImage112PDH+`",
946     "cwd": ".",
947     "environment": {},
948     "mounts": {"/tmp": {"kind": "tmp"} },
949     "output_path": "/tmp",
950     "priority": 1,
951     "runtime_constraints": {},
952     "scheduling_parameters":{"max_run_time": 1},
953     "state": "Locked"
954 }`, nil, func() int {
955                 time.Sleep(3 * time.Second)
956                 return 0
957         })
958
959         c.Check(s.api.CalledWith("container.state", "Cancelled"), NotNil)
960         c.Check(s.api.Logs["crunch-run"].String(), Matches, "(?ms).*maximum run time exceeded.*")
961 }
962
963 func (s *TestSuite) TestContainerWaitFails(c *C) {
964         s.fullRunHelper(c, `{
965     "command": ["sleep", "3"],
966     "container_image": "`+arvadostest.DockerImage112PDH+`",
967     "cwd": ".",
968     "mounts": {"/tmp": {"kind": "tmp"} },
969     "output_path": "/tmp",
970     "priority": 1,
971     "state": "Locked"
972 }`, nil, func() int {
973                 s.executor.waitErr = errors.New("Container is not running")
974                 return 0
975         })
976
977         c.Check(s.api.CalledWith("container.state", "Cancelled"), NotNil)
978         c.Check(s.api.Logs["crunch-run"].String(), Matches, "(?ms).*Container is not running.*")
979 }
980
981 func (s *TestSuite) TestCrunchstat(c *C) {
982         s.runner.crunchstatFakeFS = os.DirFS("../crunchstat/testdata/debian12")
983         s.fullRunHelper(c, `{
984                 "command": ["sleep", "1"],
985                 "container_image": "`+arvadostest.DockerImage112PDH+`",
986                 "cwd": ".",
987                 "environment": {},
988                 "mounts": {"/tmp": {"kind": "tmp"} },
989                 "output_path": "/tmp",
990                 "priority": 1,
991                 "runtime_constraints": {},
992                 "state": "Locked"
993         }`, nil, func() int {
994                 time.Sleep(time.Second)
995                 return 0
996         })
997
998         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
999         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
1000
1001         c.Assert(s.api.Logs["crunchstat"], NotNil)
1002         c.Check(s.api.Logs["crunchstat"].String(), Matches, `(?ms).*mem \d+ swap \d+ pgmajfault \d+ rss.*`)
1003
1004         // Check that we called (*crunchstat.Reporter)Stop().
1005         c.Check(s.api.Logs["crunch-run"].String(), Matches, `(?ms).*Maximum crunch-run memory rss usage was \d+ bytes\n.*`)
1006 }
1007
1008 func (s *TestSuite) TestNodeInfoLog(c *C) {
1009         os.Setenv("SLURMD_NODENAME", "compute2")
1010         s.fullRunHelper(c, `{
1011                 "command": ["sleep", "1"],
1012                 "container_image": "`+arvadostest.DockerImage112PDH+`",
1013                 "cwd": ".",
1014                 "environment": {},
1015                 "mounts": {"/tmp": {"kind": "tmp"} },
1016                 "output_path": "/tmp",
1017                 "priority": 1,
1018                 "runtime_constraints": {},
1019                 "state": "Locked"
1020         }`, nil, func() int {
1021                 time.Sleep(time.Second)
1022                 return 0
1023         })
1024
1025         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
1026         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
1027
1028         c.Assert(s.api.Logs["node"], NotNil)
1029         json := s.api.Logs["node"].String()
1030         c.Check(json, Matches, `(?ms).*"uuid": *"zzzzz-7ekkf-2z3mc76g2q73aio".*`)
1031         c.Check(json, Matches, `(?ms).*"total_cpu_cores": *16.*`)
1032         c.Check(json, Not(Matches), `(?ms).*"info":.*`)
1033
1034         c.Assert(s.api.Logs["node-info"], NotNil)
1035         json = s.api.Logs["node-info"].String()
1036         c.Check(json, Matches, `(?ms).*Host Information.*`)
1037         c.Check(json, Matches, `(?ms).*CPU Information.*`)
1038         c.Check(json, Matches, `(?ms).*Memory Information.*`)
1039         c.Check(json, Matches, `(?ms).*Disk Space.*`)
1040         c.Check(json, Matches, `(?ms).*Disk INodes.*`)
1041 }
1042
1043 func (s *TestSuite) TestLogVersionAndRuntime(c *C) {
1044         s.fullRunHelper(c, `{
1045                 "command": ["sleep", "1"],
1046                 "container_image": "`+arvadostest.DockerImage112PDH+`",
1047                 "cwd": ".",
1048                 "environment": {},
1049                 "mounts": {"/tmp": {"kind": "tmp"} },
1050                 "output_path": "/tmp",
1051                 "priority": 1,
1052                 "runtime_constraints": {},
1053                 "state": "Locked"
1054         }`, nil, func() int {
1055                 return 0
1056         })
1057
1058         c.Assert(s.api.Logs["crunch-run"], NotNil)
1059         c.Check(s.api.Logs["crunch-run"].String(), Matches, `(?ms).*crunch-run \S+ \(go\S+\) start.*`)
1060         c.Check(s.api.Logs["crunch-run"].String(), Matches, `(?ms).*crunch-run process has uid=\d+\(.+\) gid=\d+\(.+\) groups=\d+\(.+\)(,\d+\(.+\))*\n.*`)
1061         c.Check(s.api.Logs["crunch-run"].String(), Matches, `(?ms).*Executing container: zzzzz-zzzzz-zzzzzzzzzzzzzzz.*`)
1062         c.Check(s.api.Logs["crunch-run"].String(), Matches, `(?ms).*Using container runtime: stub.*`)
1063 }
1064
1065 func (s *TestSuite) testLogRSSThresholds(c *C, ram int64, expected []int, notExpected int) {
1066         s.runner.crunchstatFakeFS = os.DirFS("../crunchstat/testdata/debian12")
1067         s.fullRunHelper(c, `{
1068                 "command": ["true"],
1069                 "container_image": "`+arvadostest.DockerImage112PDH+`",
1070                 "cwd": ".",
1071                 "environment": {},
1072                 "mounts": {"/tmp": {"kind": "tmp"} },
1073                 "output_path": "/tmp",
1074                 "priority": 1,
1075                 "runtime_constraints": {"ram": `+fmt.Sprintf("%d", ram)+`},
1076                 "state": "Locked"
1077         }`, nil, func() int { return 0 })
1078         c.Logf("=== crunchstat logs\n%s\n", s.api.Logs["crunchstat"].String())
1079         logs := s.api.Logs["crunch-run"].String()
1080         pattern := logLineStart + `Container using over %d%% of memory \(rss %d/%d bytes\)`
1081         var threshold int
1082         for _, threshold = range expected {
1083                 c.Check(logs, Matches, fmt.Sprintf(pattern, threshold, s.debian12MemoryCurrent, ram))
1084         }
1085         if notExpected > threshold {
1086                 c.Check(logs, Not(Matches), fmt.Sprintf(pattern, notExpected, s.debian12MemoryCurrent, ram))
1087         }
1088 }
1089
1090 func (s *TestSuite) TestLogNoRSSThresholds(c *C) {
1091         s.testLogRSSThresholds(c, s.debian12MemoryCurrent*10, []int{}, 90)
1092 }
1093
1094 func (s *TestSuite) TestLogSomeRSSThresholds(c *C) {
1095         onePercentRSS := s.debian12MemoryCurrent / 100
1096         s.testLogRSSThresholds(c, 102*onePercentRSS, []int{90, 95}, 99)
1097 }
1098
1099 func (s *TestSuite) TestLogAllRSSThresholds(c *C) {
1100         s.testLogRSSThresholds(c, s.debian12MemoryCurrent, []int{90, 95, 99}, 0)
1101 }
1102
1103 func (s *TestSuite) TestLogMaximaAfterRun(c *C) {
1104         s.runner.crunchstatFakeFS = os.DirFS("../crunchstat/testdata/debian12")
1105         s.runner.parentTemp = c.MkDir()
1106         s.fullRunHelper(c, `{
1107         "command": ["true"],
1108         "container_image": "`+arvadostest.DockerImage112PDH+`",
1109         "cwd": ".",
1110         "environment": {},
1111         "mounts": {"/tmp": {"kind": "tmp"} },
1112         "output_path": "/tmp",
1113         "priority": 1,
1114         "runtime_constraints": {"ram": `+fmt.Sprintf("%d", s.debian12MemoryCurrent*10)+`},
1115         "state": "Locked"
1116     }`, nil, func() int { return 0 })
1117         logs := s.api.Logs["crunch-run"].String()
1118         for _, expected := range []string{
1119                 `Maximum disk usage was \d+%, \d+/\d+ bytes`,
1120                 fmt.Sprintf(`Maximum container memory swap usage was %d bytes`, s.debian12SwapCurrent),
1121                 `Maximum container memory pgmajfault usage was \d+ faults`,
1122                 fmt.Sprintf(`Maximum container memory rss usage was 10%%, %d/%d bytes`, s.debian12MemoryCurrent, s.debian12MemoryCurrent*10),
1123                 `Maximum crunch-run memory rss usage was \d+ bytes`,
1124         } {
1125                 c.Check(logs, Matches, logLineStart+expected)
1126         }
1127 }
1128
1129 func (s *TestSuite) TestCommitNodeInfoBeforeStart(c *C) {
1130         var collection_create, container_update arvadosclient.Dict
1131         s.fullRunHelper(c, `{
1132                 "command": ["true"],
1133                 "container_image": "`+arvadostest.DockerImage112PDH+`",
1134                 "cwd": ".",
1135                 "environment": {},
1136                 "mounts": {"/tmp": {"kind": "tmp"} },
1137                 "output_path": "/tmp",
1138                 "priority": 1,
1139                 "runtime_constraints": {},
1140                 "state": "Locked",
1141                 "uuid": "zzzzz-dz642-202301121543210"
1142         }`, nil, func() int {
1143                 collection_create = s.api.CalledWith("ensure_unique_name", true)
1144                 container_update = s.api.CalledWith("container.state", "Running")
1145                 return 0
1146         })
1147
1148         c.Assert(collection_create, NotNil)
1149         log_collection := collection_create["collection"].(arvadosclient.Dict)
1150         c.Check(log_collection["name"], Equals, "logs for zzzzz-dz642-202301121543210")
1151         manifest_text := log_collection["manifest_text"].(string)
1152         // We check that the file size is at least two digits as an easy way to
1153         // check the file isn't empty.
1154         c.Check(manifest_text, Matches, `\. .+ \d+:\d{2,}:node-info\.txt( .+)?\n`)
1155         c.Check(manifest_text, Matches, `\. .+ \d+:\d{2,}:node\.json( .+)?\n`)
1156
1157         c.Assert(container_update, NotNil)
1158         // As of Arvados 2.5.0, the container update must specify its log in PDH
1159         // format for the API server to propagate it to container requests, which
1160         // is what we care about for this test.
1161         expect_pdh := fmt.Sprintf("%x+%d", md5.Sum([]byte(manifest_text)), len(manifest_text))
1162         c.Check(container_update["container"].(arvadosclient.Dict)["log"], Equals, expect_pdh)
1163 }
1164
1165 func (s *TestSuite) TestContainerRecordLog(c *C) {
1166         s.fullRunHelper(c, `{
1167                 "command": ["sleep", "1"],
1168                 "container_image": "`+arvadostest.DockerImage112PDH+`",
1169                 "cwd": ".",
1170                 "environment": {},
1171                 "mounts": {"/tmp": {"kind": "tmp"} },
1172                 "output_path": "/tmp",
1173                 "priority": 1,
1174                 "runtime_constraints": {},
1175                 "state": "Locked"
1176         }`, nil,
1177                 func() int {
1178                         time.Sleep(time.Second)
1179                         return 0
1180                 })
1181
1182         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
1183         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
1184
1185         c.Assert(s.api.Logs["container"], NotNil)
1186         c.Check(s.api.Logs["container"].String(), Matches, `(?ms).*container_image.*`)
1187 }
1188
1189 func (s *TestSuite) TestFullRunStderr(c *C) {
1190         s.fullRunHelper(c, `{
1191     "command": ["/bin/sh", "-c", "echo hello ; echo world 1>&2 ; exit 1"],
1192     "container_image": "`+arvadostest.DockerImage112PDH+`",
1193     "cwd": ".",
1194     "environment": {},
1195     "mounts": {"/tmp": {"kind": "tmp"} },
1196     "output_path": "/tmp",
1197     "priority": 1,
1198     "runtime_constraints": {},
1199     "state": "Locked"
1200 }`, nil, func() int {
1201                 fmt.Fprintln(s.executor.created.Stdout, "hello")
1202                 fmt.Fprintln(s.executor.created.Stderr, "world")
1203                 return 1
1204         })
1205
1206         final := s.api.CalledWith("container.state", "Complete")
1207         c.Assert(final, NotNil)
1208         c.Check(final["container"].(arvadosclient.Dict)["exit_code"], Equals, 1)
1209         c.Check(final["container"].(arvadosclient.Dict)["log"], NotNil)
1210
1211         c.Check(s.api.Logs["stdout"].String(), Matches, ".*hello\n")
1212         c.Check(s.api.Logs["stderr"].String(), Matches, ".*world\n")
1213 }
1214
1215 func (s *TestSuite) TestFullRunDefaultCwd(c *C) {
1216         s.fullRunHelper(c, `{
1217     "command": ["pwd"],
1218     "container_image": "`+arvadostest.DockerImage112PDH+`",
1219     "cwd": ".",
1220     "environment": {},
1221     "mounts": {"/tmp": {"kind": "tmp"} },
1222     "output_path": "/tmp",
1223     "priority": 1,
1224     "runtime_constraints": {},
1225     "state": "Locked"
1226 }`, nil, func() int {
1227                 fmt.Fprintf(s.executor.created.Stdout, "workdir=%q", s.executor.created.WorkingDir)
1228                 return 0
1229         })
1230
1231         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
1232         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
1233         c.Log(s.api.Logs["stdout"])
1234         c.Check(s.api.Logs["stdout"].String(), Matches, `.*workdir=""\n`)
1235 }
1236
1237 func (s *TestSuite) TestFullRunSetCwd(c *C) {
1238         s.fullRunHelper(c, `{
1239     "command": ["pwd"],
1240     "container_image": "`+arvadostest.DockerImage112PDH+`",
1241     "cwd": "/bin",
1242     "environment": {},
1243     "mounts": {"/tmp": {"kind": "tmp"} },
1244     "output_path": "/tmp",
1245     "priority": 1,
1246     "runtime_constraints": {},
1247     "state": "Locked"
1248 }`, nil, func() int {
1249                 fmt.Fprintln(s.executor.created.Stdout, s.executor.created.WorkingDir)
1250                 return 0
1251         })
1252
1253         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
1254         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
1255         c.Check(s.api.Logs["stdout"].String(), Matches, ".*/bin\n")
1256 }
1257
1258 func (s *TestSuite) TestFullRunSetOutputStorageClasses(c *C) {
1259         s.fullRunHelper(c, `{
1260     "command": ["pwd"],
1261     "container_image": "`+arvadostest.DockerImage112PDH+`",
1262     "cwd": "/bin",
1263     "environment": {},
1264     "mounts": {"/tmp": {"kind": "tmp"} },
1265     "output_path": "/tmp",
1266     "priority": 1,
1267     "runtime_constraints": {},
1268     "state": "Locked",
1269     "output_storage_classes": ["foo", "bar"]
1270 }`, nil, func() int {
1271                 fmt.Fprintln(s.executor.created.Stdout, s.executor.created.WorkingDir)
1272                 return 0
1273         })
1274
1275         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
1276         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
1277         c.Check(s.api.Logs["stdout"].String(), Matches, ".*/bin\n")
1278         c.Check(s.testDispatcherKeepClient.StorageClasses, DeepEquals, []string{"foo", "bar"})
1279         c.Check(s.testContainerKeepClient.StorageClasses, DeepEquals, []string{"foo", "bar"})
1280 }
1281
1282 func (s *TestSuite) TestEnableCUDADeviceCount(c *C) {
1283         s.fullRunHelper(c, `{
1284     "command": ["pwd"],
1285     "container_image": "`+arvadostest.DockerImage112PDH+`",
1286     "cwd": "/bin",
1287     "environment": {},
1288     "mounts": {"/tmp": {"kind": "tmp"} },
1289     "output_path": "/tmp",
1290     "priority": 1,
1291     "runtime_constraints": {"cuda": {"device_count": 2}},
1292     "state": "Locked",
1293     "output_storage_classes": ["foo", "bar"]
1294 }`, nil, func() int {
1295                 fmt.Fprintln(s.executor.created.Stdout, "ok")
1296                 return 0
1297         })
1298         c.Check(s.executor.created.CUDADeviceCount, Equals, 2)
1299 }
1300
1301 func (s *TestSuite) TestEnableCUDAHardwareCapability(c *C) {
1302         s.fullRunHelper(c, `{
1303     "command": ["pwd"],
1304     "container_image": "`+arvadostest.DockerImage112PDH+`",
1305     "cwd": "/bin",
1306     "environment": {},
1307     "mounts": {"/tmp": {"kind": "tmp"} },
1308     "output_path": "/tmp",
1309     "priority": 1,
1310     "runtime_constraints": {"cuda": {"hardware_capability": "foo"}},
1311     "state": "Locked",
1312     "output_storage_classes": ["foo", "bar"]
1313 }`, nil, func() int {
1314                 fmt.Fprintln(s.executor.created.Stdout, "ok")
1315                 return 0
1316         })
1317         c.Check(s.executor.created.CUDADeviceCount, Equals, 0)
1318 }
1319
1320 func (s *TestSuite) TestStopOnSignal(c *C) {
1321         s.executor.runFunc = func() int {
1322                 s.executor.created.Stdout.Write([]byte("foo\n"))
1323                 s.runner.SigChan <- syscall.SIGINT
1324                 time.Sleep(10 * time.Second)
1325                 return 0
1326         }
1327         s.testStopContainer(c)
1328 }
1329
1330 func (s *TestSuite) TestStopOnArvMountDeath(c *C) {
1331         s.executor.runFunc = func() int {
1332                 s.executor.created.Stdout.Write([]byte("foo\n"))
1333                 s.runner.ArvMountExit <- nil
1334                 close(s.runner.ArvMountExit)
1335                 time.Sleep(10 * time.Second)
1336                 return 0
1337         }
1338         s.runner.ArvMountExit = make(chan error)
1339         s.testStopContainer(c)
1340 }
1341
1342 func (s *TestSuite) testStopContainer(c *C) {
1343         record := `{
1344     "command": ["/bin/sh", "-c", "echo foo && sleep 30 && echo bar"],
1345     "container_image": "` + arvadostest.DockerImage112PDH + `",
1346     "cwd": ".",
1347     "environment": {},
1348     "mounts": {"/tmp": {"kind": "tmp"} },
1349     "output_path": "/tmp",
1350     "priority": 1,
1351     "runtime_constraints": {},
1352     "state": "Locked"
1353 }`
1354
1355         err := json.Unmarshal([]byte(record), &s.api.Container)
1356         c.Assert(err, IsNil)
1357
1358         s.runner.RunArvMount = func([]string, string) (*exec.Cmd, error) { return nil, nil }
1359         s.runner.MkArvClient = func(token string) (IArvadosClient, IKeepClient, *arvados.Client, error) {
1360                 return &ArvTestClient{}, &KeepTestClient{}, nil, nil
1361         }
1362
1363         done := make(chan error)
1364         go func() {
1365                 done <- s.runner.Run()
1366         }()
1367         select {
1368         case <-time.After(20 * time.Second):
1369                 pprof.Lookup("goroutine").WriteTo(os.Stderr, 1)
1370                 c.Fatal("timed out")
1371         case err = <-done:
1372                 c.Check(err, IsNil)
1373         }
1374         for k, v := range s.api.Logs {
1375                 c.Log(k)
1376                 c.Log(v.String(), "\n")
1377         }
1378
1379         c.Check(s.api.CalledWith("container.log", nil), NotNil)
1380         c.Check(s.api.CalledWith("container.state", "Cancelled"), NotNil)
1381         c.Check(s.api.Logs["stdout"].String(), Matches, "(?ms).*foo\n$")
1382 }
1383
1384 func (s *TestSuite) TestFullRunSetEnv(c *C) {
1385         s.fullRunHelper(c, `{
1386     "command": ["/bin/sh", "-c", "echo $FROBIZ"],
1387     "container_image": "`+arvadostest.DockerImage112PDH+`",
1388     "cwd": "/bin",
1389     "environment": {"FROBIZ": "bilbo"},
1390     "mounts": {"/tmp": {"kind": "tmp"} },
1391     "output_path": "/tmp",
1392     "priority": 1,
1393     "runtime_constraints": {},
1394     "state": "Locked"
1395 }`, nil, func() int {
1396                 fmt.Fprintf(s.executor.created.Stdout, "%v", s.executor.created.Env)
1397                 return 0
1398         })
1399
1400         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
1401         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
1402         c.Check(s.api.Logs["stdout"].String(), Matches, `.*map\[FROBIZ:bilbo\]\n`)
1403 }
1404
1405 type ArvMountCmdLine struct {
1406         Cmd   []string
1407         token string
1408 }
1409
1410 func (am *ArvMountCmdLine) ArvMountTest(c []string, token string) (*exec.Cmd, error) {
1411         am.Cmd = c
1412         am.token = token
1413         return nil, nil
1414 }
1415
1416 func stubCert(c *C, temp string) string {
1417         path := temp + "/ca-certificates.crt"
1418         err := os.WriteFile(path, []byte{}, 0666)
1419         c.Assert(err, IsNil)
1420         os.Setenv("SSL_CERT_FILE", path)
1421         return path
1422 }
1423
1424 func (s *TestSuite) TestSetupMounts(c *C) {
1425         cr := s.runner
1426         am := &ArvMountCmdLine{}
1427         cr.RunArvMount = am.ArvMountTest
1428         cr.containerClient, _ = apiStub()
1429         cr.ContainerArvClient = &ArvTestClient{}
1430         cr.ContainerKeepClient = &KeepTestClient{}
1431         cr.Container.OutputStorageClasses = []string{"default"}
1432
1433         realTemp := c.MkDir()
1434         certTemp := c.MkDir()
1435         stubCertPath := stubCert(c, certTemp)
1436         cr.parentTemp = realTemp
1437
1438         i := 0
1439         cr.MkTempDir = func(_ string, prefix string) (string, error) {
1440                 i++
1441                 d := fmt.Sprintf("%s/%s%d", realTemp, prefix, i)
1442                 err := os.Mkdir(d, os.ModePerm)
1443                 if err != nil && strings.Contains(err.Error(), ": file exists") {
1444                         // Test case must have pre-populated the tempdir
1445                         err = nil
1446                 }
1447                 return d, err
1448         }
1449
1450         checkEmpty := func() {
1451                 // Should be deleted.
1452                 _, err := os.Stat(realTemp)
1453                 c.Assert(os.IsNotExist(err), Equals, true)
1454
1455                 // Now recreate it for the next test.
1456                 c.Assert(os.Mkdir(realTemp, 0777), IsNil)
1457         }
1458
1459         {
1460                 i = 0
1461                 cr.ArvMountPoint = ""
1462                 cr.Container.Mounts = make(map[string]arvados.Mount)
1463                 cr.Container.Mounts["/tmp"] = arvados.Mount{Kind: "tmp"}
1464                 cr.Container.OutputPath = "/tmp"
1465                 cr.statInterval = 5 * time.Second
1466                 bindmounts, err := cr.SetupMounts()
1467                 c.Check(err, IsNil)
1468                 c.Check(am.Cmd, DeepEquals, []string{"arv-mount", "--foreground",
1469                         "--read-write", "--storage-classes", "default", "--crunchstat-interval=5",
1470                         "--mount-by-pdh", "by_id", "--disable-event-listening", "--mount-by-id", "by_uuid", realTemp + "/keep1"})
1471                 c.Check(bindmounts, DeepEquals, map[string]bindmount{"/tmp": {realTemp + "/tmp2", false}})
1472                 os.RemoveAll(cr.ArvMountPoint)
1473                 cr.CleanupDirs()
1474                 checkEmpty()
1475         }
1476
1477         {
1478                 i = 0
1479                 cr.ArvMountPoint = ""
1480                 cr.Container.Mounts = make(map[string]arvados.Mount)
1481                 cr.Container.Mounts["/out"] = arvados.Mount{Kind: "tmp"}
1482                 cr.Container.Mounts["/tmp"] = arvados.Mount{Kind: "tmp"}
1483                 cr.Container.OutputPath = "/out"
1484                 cr.Container.OutputStorageClasses = []string{"foo", "bar"}
1485
1486                 bindmounts, err := cr.SetupMounts()
1487                 c.Check(err, IsNil)
1488                 c.Check(am.Cmd, DeepEquals, []string{"arv-mount", "--foreground",
1489                         "--read-write", "--storage-classes", "foo,bar", "--crunchstat-interval=5",
1490                         "--mount-by-pdh", "by_id", "--disable-event-listening", "--mount-by-id", "by_uuid", realTemp + "/keep1"})
1491                 c.Check(bindmounts, DeepEquals, map[string]bindmount{"/out": {realTemp + "/tmp2", false}, "/tmp": {realTemp + "/tmp3", false}})
1492                 os.RemoveAll(cr.ArvMountPoint)
1493                 cr.CleanupDirs()
1494                 checkEmpty()
1495         }
1496
1497         {
1498                 i = 0
1499                 cr.ArvMountPoint = ""
1500                 cr.Container.Mounts = make(map[string]arvados.Mount)
1501                 cr.Container.Mounts["/tmp"] = arvados.Mount{Kind: "tmp"}
1502                 cr.Container.OutputPath = "/tmp"
1503                 cr.Container.RuntimeConstraints.API = true
1504                 cr.Container.OutputStorageClasses = []string{"default"}
1505
1506                 bindmounts, err := cr.SetupMounts()
1507                 c.Check(err, IsNil)
1508                 c.Check(am.Cmd, DeepEquals, []string{"arv-mount", "--foreground",
1509                         "--read-write", "--storage-classes", "default", "--crunchstat-interval=5",
1510                         "--mount-by-pdh", "by_id", "--disable-event-listening", "--mount-by-id", "by_uuid", realTemp + "/keep1"})
1511                 c.Check(bindmounts, DeepEquals, map[string]bindmount{"/tmp": {realTemp + "/tmp2", false}, "/etc/arvados/ca-certificates.crt": {stubCertPath, true}})
1512                 os.RemoveAll(cr.ArvMountPoint)
1513                 cr.CleanupDirs()
1514                 checkEmpty()
1515
1516                 cr.Container.RuntimeConstraints.API = false
1517         }
1518
1519         {
1520                 i = 0
1521                 cr.ArvMountPoint = ""
1522                 cr.Container.Mounts = map[string]arvados.Mount{
1523                         "/keeptmp": {Kind: "collection", Writable: true},
1524                 }
1525                 cr.Container.OutputPath = "/keeptmp"
1526
1527                 os.MkdirAll(realTemp+"/keep1/tmp0", os.ModePerm)
1528
1529                 bindmounts, err := cr.SetupMounts()
1530                 c.Check(err, IsNil)
1531                 c.Check(am.Cmd, DeepEquals, []string{"arv-mount", "--foreground",
1532                         "--read-write", "--storage-classes", "default", "--crunchstat-interval=5",
1533                         "--mount-tmp", "tmp0", "--mount-by-pdh", "by_id", "--disable-event-listening", "--mount-by-id", "by_uuid", realTemp + "/keep1"})
1534                 c.Check(bindmounts, DeepEquals, map[string]bindmount{"/keeptmp": {realTemp + "/keep1/tmp0", false}})
1535                 os.RemoveAll(cr.ArvMountPoint)
1536                 cr.CleanupDirs()
1537                 checkEmpty()
1538         }
1539
1540         {
1541                 i = 0
1542                 cr.ArvMountPoint = ""
1543                 cr.Container.Mounts = map[string]arvados.Mount{
1544                         "/keepinp": {Kind: "collection", PortableDataHash: "59389a8f9ee9d399be35462a0f92541c+53"},
1545                         "/keepout": {Kind: "collection", Writable: true},
1546                 }
1547                 cr.Container.OutputPath = "/keepout"
1548
1549                 os.MkdirAll(realTemp+"/keep1/by_id/59389a8f9ee9d399be35462a0f92541c+53", os.ModePerm)
1550                 os.MkdirAll(realTemp+"/keep1/tmp0", os.ModePerm)
1551
1552                 bindmounts, err := cr.SetupMounts()
1553                 c.Check(err, IsNil)
1554                 c.Check(am.Cmd, DeepEquals, []string{"arv-mount", "--foreground",
1555                         "--read-write", "--storage-classes", "default", "--crunchstat-interval=5",
1556                         "--mount-tmp", "tmp0", "--mount-by-pdh", "by_id", "--disable-event-listening", "--mount-by-id", "by_uuid", realTemp + "/keep1"})
1557                 c.Check(bindmounts, DeepEquals, map[string]bindmount{
1558                         "/keepinp": {realTemp + "/keep1/by_id/59389a8f9ee9d399be35462a0f92541c+53", true},
1559                         "/keepout": {realTemp + "/keep1/tmp0", false},
1560                 })
1561                 os.RemoveAll(cr.ArvMountPoint)
1562                 cr.CleanupDirs()
1563                 checkEmpty()
1564         }
1565
1566         {
1567                 i = 0
1568                 cr.ArvMountPoint = ""
1569                 cr.Container.RuntimeConstraints.KeepCacheRAM = 512
1570                 cr.Container.Mounts = map[string]arvados.Mount{
1571                         "/keepinp": {Kind: "collection", PortableDataHash: "59389a8f9ee9d399be35462a0f92541c+53"},
1572                         "/keepout": {Kind: "collection", Writable: true},
1573                 }
1574                 cr.Container.OutputPath = "/keepout"
1575
1576                 os.MkdirAll(realTemp+"/keep1/by_id/59389a8f9ee9d399be35462a0f92541c+53", os.ModePerm)
1577                 os.MkdirAll(realTemp+"/keep1/tmp0", os.ModePerm)
1578
1579                 bindmounts, err := cr.SetupMounts()
1580                 c.Check(err, IsNil)
1581                 c.Check(am.Cmd, DeepEquals, []string{"arv-mount", "--foreground",
1582                         "--read-write", "--storage-classes", "default", "--crunchstat-interval=5", "--ram-cache",
1583                         "--file-cache", "512", "--mount-tmp", "tmp0", "--mount-by-pdh", "by_id", "--disable-event-listening", "--mount-by-id", "by_uuid", realTemp + "/keep1"})
1584                 c.Check(bindmounts, DeepEquals, map[string]bindmount{
1585                         "/keepinp": {realTemp + "/keep1/by_id/59389a8f9ee9d399be35462a0f92541c+53", true},
1586                         "/keepout": {realTemp + "/keep1/tmp0", false},
1587                 })
1588                 os.RemoveAll(cr.ArvMountPoint)
1589                 cr.CleanupDirs()
1590                 checkEmpty()
1591         }
1592
1593         for _, test := range []struct {
1594                 in  interface{}
1595                 out string
1596         }{
1597                 {in: "foo", out: `"foo"`},
1598                 {in: nil, out: `null`},
1599                 {in: map[string]int64{"foo": 123456789123456789}, out: `{"foo":123456789123456789}`},
1600         } {
1601                 i = 0
1602                 cr.ArvMountPoint = ""
1603                 cr.Container.Mounts = map[string]arvados.Mount{
1604                         "/mnt/test.json": {Kind: "json", Content: test.in},
1605                 }
1606                 bindmounts, err := cr.SetupMounts()
1607                 c.Check(err, IsNil)
1608                 c.Check(bindmounts, DeepEquals, map[string]bindmount{
1609                         "/mnt/test.json": {realTemp + "/json2/mountdata.json", true},
1610                 })
1611                 content, err := ioutil.ReadFile(realTemp + "/json2/mountdata.json")
1612                 c.Check(err, IsNil)
1613                 c.Check(content, DeepEquals, []byte(test.out))
1614                 os.RemoveAll(cr.ArvMountPoint)
1615                 cr.CleanupDirs()
1616                 checkEmpty()
1617         }
1618
1619         for _, test := range []struct {
1620                 in  interface{}
1621                 out string
1622         }{
1623                 {in: "foo", out: `foo`},
1624                 {in: nil, out: "error"},
1625                 {in: map[string]int64{"foo": 123456789123456789}, out: "error"},
1626         } {
1627                 i = 0
1628                 cr.ArvMountPoint = ""
1629                 cr.Container.Mounts = map[string]arvados.Mount{
1630                         "/mnt/test.txt": {Kind: "text", Content: test.in},
1631                 }
1632                 bindmounts, err := cr.SetupMounts()
1633                 if test.out == "error" {
1634                         c.Check(err.Error(), Equals, "content for mount \"/mnt/test.txt\" must be a string")
1635                 } else {
1636                         c.Check(err, IsNil)
1637                         c.Check(bindmounts, DeepEquals, map[string]bindmount{
1638                                 "/mnt/test.txt": {realTemp + "/text2/mountdata.text", true},
1639                         })
1640                         content, err := ioutil.ReadFile(realTemp + "/text2/mountdata.text")
1641                         c.Check(err, IsNil)
1642                         c.Check(content, DeepEquals, []byte(test.out))
1643                 }
1644                 os.RemoveAll(cr.ArvMountPoint)
1645                 cr.CleanupDirs()
1646                 checkEmpty()
1647         }
1648
1649         // Read-only mount points are allowed underneath output_dir mount point
1650         {
1651                 i = 0
1652                 cr.ArvMountPoint = ""
1653                 cr.Container.Mounts = make(map[string]arvados.Mount)
1654                 cr.Container.Mounts = map[string]arvados.Mount{
1655                         "/tmp":     {Kind: "tmp"},
1656                         "/tmp/foo": {Kind: "collection"},
1657                 }
1658                 cr.Container.OutputPath = "/tmp"
1659
1660                 os.MkdirAll(realTemp+"/keep1/tmp0", os.ModePerm)
1661
1662                 bindmounts, err := cr.SetupMounts()
1663                 c.Check(err, IsNil)
1664                 c.Check(am.Cmd, DeepEquals, []string{"arv-mount", "--foreground",
1665                         "--read-write", "--storage-classes", "default", "--crunchstat-interval=5", "--ram-cache",
1666                         "--file-cache", "512", "--mount-tmp", "tmp0", "--mount-by-pdh", "by_id", "--disable-event-listening", "--mount-by-id", "by_uuid", realTemp + "/keep1"})
1667                 c.Check(bindmounts, DeepEquals, map[string]bindmount{
1668                         "/tmp":     {realTemp + "/tmp2", false},
1669                         "/tmp/foo": {realTemp + "/keep1/tmp0", true},
1670                 })
1671                 os.RemoveAll(cr.ArvMountPoint)
1672                 cr.CleanupDirs()
1673                 checkEmpty()
1674         }
1675
1676         // Writable mount points copied to output_dir mount point
1677         {
1678                 i = 0
1679                 cr.ArvMountPoint = ""
1680                 cr.Container.Mounts = make(map[string]arvados.Mount)
1681                 cr.Container.Mounts = map[string]arvados.Mount{
1682                         "/tmp": {Kind: "tmp"},
1683                         "/tmp/foo": {Kind: "collection",
1684                                 PortableDataHash: "59389a8f9ee9d399be35462a0f92541c+53",
1685                                 Writable:         true},
1686                         "/tmp/bar": {Kind: "collection",
1687                                 PortableDataHash: "59389a8f9ee9d399be35462a0f92541d+53",
1688                                 Path:             "baz",
1689                                 Writable:         true},
1690                 }
1691                 cr.Container.OutputPath = "/tmp"
1692
1693                 os.MkdirAll(realTemp+"/keep1/by_id/59389a8f9ee9d399be35462a0f92541c+53", os.ModePerm)
1694                 os.MkdirAll(realTemp+"/keep1/by_id/59389a8f9ee9d399be35462a0f92541d+53/baz", os.ModePerm)
1695
1696                 rf, _ := os.Create(realTemp + "/keep1/by_id/59389a8f9ee9d399be35462a0f92541d+53/baz/quux")
1697                 rf.Write([]byte("bar"))
1698                 rf.Close()
1699
1700                 _, err := cr.SetupMounts()
1701                 c.Check(err, IsNil)
1702                 _, err = os.Stat(cr.HostOutputDir + "/foo")
1703                 c.Check(err, IsNil)
1704                 _, err = os.Stat(cr.HostOutputDir + "/bar/quux")
1705                 c.Check(err, IsNil)
1706                 os.RemoveAll(cr.ArvMountPoint)
1707                 cr.CleanupDirs()
1708                 checkEmpty()
1709         }
1710
1711         // Only mount points of kind 'collection' are allowed underneath output_dir mount point
1712         {
1713                 i = 0
1714                 cr.ArvMountPoint = ""
1715                 cr.Container.Mounts = make(map[string]arvados.Mount)
1716                 cr.Container.Mounts = map[string]arvados.Mount{
1717                         "/tmp":     {Kind: "tmp"},
1718                         "/tmp/foo": {Kind: "tmp"},
1719                 }
1720                 cr.Container.OutputPath = "/tmp"
1721
1722                 _, err := cr.SetupMounts()
1723                 c.Check(err, NotNil)
1724                 c.Check(err, ErrorMatches, `only mount points of kind 'collection', 'text' or 'json' are supported underneath the output_path.*`)
1725                 os.RemoveAll(cr.ArvMountPoint)
1726                 cr.CleanupDirs()
1727                 checkEmpty()
1728         }
1729
1730         // Only mount point of kind 'collection' is allowed for stdin
1731         {
1732                 i = 0
1733                 cr.ArvMountPoint = ""
1734                 cr.Container.Mounts = make(map[string]arvados.Mount)
1735                 cr.Container.Mounts = map[string]arvados.Mount{
1736                         "stdin": {Kind: "tmp"},
1737                 }
1738
1739                 _, err := cr.SetupMounts()
1740                 c.Check(err, NotNil)
1741                 c.Check(err, ErrorMatches, `unsupported mount kind 'tmp' for stdin.*`)
1742                 os.RemoveAll(cr.ArvMountPoint)
1743                 cr.CleanupDirs()
1744                 checkEmpty()
1745         }
1746
1747         // git_tree mounts
1748         {
1749                 i = 0
1750                 cr.ArvMountPoint = ""
1751                 git_client.InstallProtocol("https", git_http.NewClient(arvados.InsecureHTTPClient))
1752                 cr.token = arvadostest.ActiveToken
1753                 cr.Container.Mounts = make(map[string]arvados.Mount)
1754                 cr.Container.Mounts = map[string]arvados.Mount{
1755                         "/tip": {
1756                                 Kind:   "git_tree",
1757                                 UUID:   arvadostest.Repository2UUID,
1758                                 Commit: "fd3531f42995344f36c30b79f55f27b502f3d344",
1759                                 Path:   "/",
1760                         },
1761                         "/non-tip": {
1762                                 Kind:   "git_tree",
1763                                 UUID:   arvadostest.Repository2UUID,
1764                                 Commit: "5ebfab0522851df01fec11ec55a6d0f4877b542e",
1765                                 Path:   "/",
1766                         },
1767                 }
1768                 cr.Container.OutputPath = "/tmp"
1769
1770                 bindmounts, err := cr.SetupMounts()
1771                 c.Check(err, IsNil)
1772
1773                 for path, mount := range bindmounts {
1774                         c.Check(mount.ReadOnly, Equals, !cr.Container.Mounts[path].Writable, Commentf("%s %#v", path, mount))
1775                 }
1776
1777                 data, err := ioutil.ReadFile(bindmounts["/tip"].HostPath + "/dir1/dir2/file with mode 0644")
1778                 c.Check(err, IsNil)
1779                 c.Check(string(data), Equals, "\000\001\002\003")
1780                 _, err = ioutil.ReadFile(bindmounts["/tip"].HostPath + "/file only on testbranch")
1781                 c.Check(err, FitsTypeOf, &os.PathError{})
1782                 c.Check(os.IsNotExist(err), Equals, true)
1783
1784                 data, err = ioutil.ReadFile(bindmounts["/non-tip"].HostPath + "/dir1/dir2/file with mode 0644")
1785                 c.Check(err, IsNil)
1786                 c.Check(string(data), Equals, "\000\001\002\003")
1787                 data, err = ioutil.ReadFile(bindmounts["/non-tip"].HostPath + "/file only on testbranch")
1788                 c.Check(err, IsNil)
1789                 c.Check(string(data), Equals, "testfile\n")
1790
1791                 cr.CleanupDirs()
1792                 checkEmpty()
1793         }
1794 }
1795
1796 func (s *TestSuite) TestStdout(c *C) {
1797         helperRecord := `{
1798                 "command": ["/bin/sh", "-c", "echo $FROBIZ"],
1799                 "container_image": "` + arvadostest.DockerImage112PDH + `",
1800                 "cwd": "/bin",
1801                 "environment": {"FROBIZ": "bilbo"},
1802                 "mounts": {"/tmp": {"kind": "tmp"}, "stdout": {"kind": "file", "path": "/tmp/a/b/c.out"} },
1803                 "output_path": "/tmp",
1804                 "priority": 1,
1805                 "runtime_constraints": {},
1806                 "state": "Locked"
1807         }`
1808
1809         s.fullRunHelper(c, helperRecord, nil, func() int {
1810                 fmt.Fprintln(s.executor.created.Stdout, s.executor.created.Env["FROBIZ"])
1811                 return 0
1812         })
1813
1814         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
1815         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
1816         c.Check(s.runner.ContainerArvClient.(*ArvTestClient).CalledWith("collection.manifest_text", "./a/b 307372fa8fd5c146b22ae7a45b49bc31+6 0:6:c.out\n"), NotNil)
1817 }
1818
1819 // Used by the TestStdoutWithWrongPath*()
1820 func (s *TestSuite) stdoutErrorRunHelper(c *C, record string, fn func() int) (*ArvTestClient, *ContainerRunner, error) {
1821         err := json.Unmarshal([]byte(record), &s.api.Container)
1822         c.Assert(err, IsNil)
1823         s.executor.runFunc = fn
1824         s.runner.RunArvMount = (&ArvMountCmdLine{}).ArvMountTest
1825         s.runner.MkArvClient = func(token string) (IArvadosClient, IKeepClient, *arvados.Client, error) {
1826                 return s.api, &KeepTestClient{}, nil, nil
1827         }
1828         return s.api, s.runner, s.runner.Run()
1829 }
1830
1831 func (s *TestSuite) TestStdoutWithWrongPath(c *C) {
1832         _, _, err := s.stdoutErrorRunHelper(c, `{
1833     "mounts": {"/tmp": {"kind": "tmp"}, "stdout": {"kind": "file", "path":"/tmpa.out"} },
1834     "output_path": "/tmp",
1835     "state": "Locked"
1836 }`, func() int { return 0 })
1837         c.Check(err, ErrorMatches, ".*Stdout path does not start with OutputPath.*")
1838 }
1839
1840 func (s *TestSuite) TestStdoutWithWrongKindTmp(c *C) {
1841         _, _, err := s.stdoutErrorRunHelper(c, `{
1842     "mounts": {"/tmp": {"kind": "tmp"}, "stdout": {"kind": "tmp", "path":"/tmp/a.out"} },
1843     "output_path": "/tmp",
1844     "state": "Locked"
1845 }`, func() int { return 0 })
1846         c.Check(err, ErrorMatches, ".*unsupported mount kind 'tmp' for stdout.*")
1847 }
1848
1849 func (s *TestSuite) TestStdoutWithWrongKindCollection(c *C) {
1850         _, _, err := s.stdoutErrorRunHelper(c, `{
1851     "mounts": {"/tmp": {"kind": "tmp"}, "stdout": {"kind": "collection", "path":"/tmp/a.out"} },
1852     "output_path": "/tmp",
1853     "state": "Locked"
1854 }`, func() int { return 0 })
1855         c.Check(err, ErrorMatches, ".*unsupported mount kind 'collection' for stdout.*")
1856 }
1857
1858 func (s *TestSuite) TestFullRunWithAPI(c *C) {
1859         s.fullRunHelper(c, `{
1860     "command": ["/bin/sh", "-c", "true $ARVADOS_API_HOST"],
1861     "container_image": "`+arvadostest.DockerImage112PDH+`",
1862     "cwd": "/bin",
1863     "environment": {},
1864     "mounts": {"/tmp": {"kind": "tmp"} },
1865     "output_path": "/tmp",
1866     "priority": 1,
1867     "runtime_constraints": {"API": true},
1868     "state": "Locked"
1869 }`, nil, func() int {
1870                 c.Check(s.executor.created.Env["ARVADOS_API_HOST"], Equals, os.Getenv("ARVADOS_API_HOST"))
1871                 return 3
1872         })
1873         c.Check(s.api.CalledWith("container.exit_code", 3), NotNil)
1874         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
1875         c.Check(s.api.Logs["crunch-run"].String(), Matches, `(?ms).*status code 3\n.*`)
1876 }
1877
1878 func (s *TestSuite) TestFullRunSetOutput(c *C) {
1879         defer os.Setenv("ARVADOS_API_HOST", os.Getenv("ARVADOS_API_HOST"))
1880         os.Setenv("ARVADOS_API_HOST", "test.arvados.org")
1881         s.fullRunHelper(c, `{
1882     "command": ["/bin/sh", "-c", "echo $ARVADOS_API_HOST"],
1883     "container_image": "`+arvadostest.DockerImage112PDH+`",
1884     "cwd": "/bin",
1885     "environment": {},
1886     "mounts": {"/tmp": {"kind": "tmp"} },
1887     "output_path": "/tmp",
1888     "priority": 1,
1889     "runtime_constraints": {"API": true},
1890     "state": "Locked"
1891 }`, nil, func() int {
1892                 s.api.Container.Output = arvadostest.DockerImage112PDH
1893                 return 0
1894         })
1895
1896         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
1897         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
1898         c.Check(s.api.CalledWith("container.output", arvadostest.DockerImage112PDH), NotNil)
1899 }
1900
1901 func (s *TestSuite) TestArvMountRuntimeStatusWarning(c *C) {
1902         s.runner.RunArvMount = func([]string, string) (*exec.Cmd, error) {
1903                 os.Mkdir(s.runner.ArvMountPoint+"/by_id", 0666)
1904                 ioutil.WriteFile(s.runner.ArvMountPoint+"/by_id/README", nil, 0666)
1905                 return s.runner.ArvMountCmd([]string{"bash", "-c", "echo >&2 Test: Keep write error: I am a teapot; sleep 3"}, "")
1906         }
1907         s.executor.runFunc = func() int {
1908                 time.Sleep(time.Second)
1909                 return 137
1910         }
1911         record := `{
1912     "command": ["sleep", "1"],
1913     "container_image": "` + arvadostest.DockerImage112PDH + `",
1914     "cwd": "/bin",
1915     "environment": {},
1916     "mounts": {"/tmp": {"kind": "tmp"} },
1917     "output_path": "/tmp",
1918     "priority": 1,
1919     "runtime_constraints": {"API": true},
1920     "state": "Locked"
1921 }`
1922         err := json.Unmarshal([]byte(record), &s.api.Container)
1923         c.Assert(err, IsNil)
1924         err = s.runner.Run()
1925         c.Assert(err, IsNil)
1926         c.Check(s.api.CalledWith("container.exit_code", 137), NotNil)
1927         c.Check(s.api.CalledWith("container.runtime_status.warning", "arv-mount: Keep write error"), NotNil)
1928         c.Check(s.api.CalledWith("container.runtime_status.warningDetail", "Test: Keep write error: I am a teapot"), NotNil)
1929         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
1930         c.Check(s.api.Logs["crunch-run"].String(), Matches, `(?ms).*Container exited with status code 137 \(signal 9, SIGKILL\).*`)
1931 }
1932
1933 func (s *TestSuite) TestStdoutWithExcludeFromOutputMountPointUnderOutputDir(c *C) {
1934         helperRecord := `{
1935                 "command": ["/bin/sh", "-c", "echo $FROBIZ"],
1936                 "container_image": "` + arvadostest.DockerImage112PDH + `",
1937                 "cwd": "/bin",
1938                 "environment": {"FROBIZ": "bilbo"},
1939                 "mounts": {
1940         "/tmp": {"kind": "tmp"},
1941         "/tmp/foo": {"kind": "collection",
1942                      "portable_data_hash": "a3e8f74c6f101eae01fa08bfb4e49b3a+54",
1943                      "exclude_from_output": true
1944         },
1945         "stdout": {"kind": "file", "path": "/tmp/a/b/c.out"}
1946     },
1947                 "output_path": "/tmp",
1948                 "priority": 1,
1949                 "runtime_constraints": {},
1950                 "state": "Locked"
1951         }`
1952
1953         extraMounts := []string{"a3e8f74c6f101eae01fa08bfb4e49b3a+54"}
1954
1955         s.fullRunHelper(c, helperRecord, extraMounts, func() int {
1956                 fmt.Fprintln(s.executor.created.Stdout, s.executor.created.Env["FROBIZ"])
1957                 return 0
1958         })
1959
1960         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
1961         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
1962         c.Check(s.runner.ContainerArvClient.(*ArvTestClient).CalledWith("collection.manifest_text", "./a/b 307372fa8fd5c146b22ae7a45b49bc31+6 0:6:c.out\n"), NotNil)
1963 }
1964
1965 func (s *TestSuite) TestStdoutWithMultipleMountPointsUnderOutputDir(c *C) {
1966         helperRecord := `{
1967                 "command": ["/bin/sh", "-c", "echo $FROBIZ"],
1968                 "container_image": "` + arvadostest.DockerImage112PDH + `",
1969                 "cwd": "/bin",
1970                 "environment": {"FROBIZ": "bilbo"},
1971                 "mounts": {
1972         "/tmp": {"kind": "tmp"},
1973         "/tmp/foo/bar": {"kind": "collection", "portable_data_hash": "a0def87f80dd594d4675809e83bd4f15+367", "path":"/file2_in_main.txt"},
1974         "/tmp/foo/sub1": {"kind": "collection", "portable_data_hash": "a0def87f80dd594d4675809e83bd4f15+367", "path":"/subdir1"},
1975         "/tmp/foo/sub1file2": {"kind": "collection", "portable_data_hash": "a0def87f80dd594d4675809e83bd4f15+367", "path":"/subdir1/file2_in_subdir1.txt"},
1976         "/tmp/foo/baz/sub2file2": {"kind": "collection", "portable_data_hash": "a0def87f80dd594d4675809e83bd4f15+367", "path":"/subdir1/subdir2/file2_in_subdir2.txt"},
1977         "stdout": {"kind": "file", "path": "/tmp/a/b/c.out"}
1978     },
1979                 "output_path": "/tmp",
1980                 "priority": 1,
1981                 "runtime_constraints": {},
1982                 "state": "Locked",
1983                 "uuid": "zzzzz-dz642-202301130848001"
1984         }`
1985
1986         extraMounts := []string{
1987                 "a0def87f80dd594d4675809e83bd4f15+367/file2_in_main.txt",
1988                 "a0def87f80dd594d4675809e83bd4f15+367/subdir1/file2_in_subdir1.txt",
1989                 "a0def87f80dd594d4675809e83bd4f15+367/subdir1/subdir2/file2_in_subdir2.txt",
1990         }
1991
1992         api, _, realtemp := s.fullRunHelper(c, helperRecord, extraMounts, func() int {
1993                 fmt.Fprintln(s.executor.created.Stdout, s.executor.created.Env["FROBIZ"])
1994                 return 0
1995         })
1996
1997         c.Check(s.executor.created.BindMounts, DeepEquals, map[string]bindmount{
1998                 "/tmp":                   {realtemp + "/tmp1", false},
1999                 "/tmp/foo/bar":           {s.keepmount + "/by_id/a0def87f80dd594d4675809e83bd4f15+367/file2_in_main.txt", true},
2000                 "/tmp/foo/baz/sub2file2": {s.keepmount + "/by_id/a0def87f80dd594d4675809e83bd4f15+367/subdir1/subdir2/file2_in_subdir2.txt", true},
2001                 "/tmp/foo/sub1":          {s.keepmount + "/by_id/a0def87f80dd594d4675809e83bd4f15+367/subdir1", true},
2002                 "/tmp/foo/sub1file2":     {s.keepmount + "/by_id/a0def87f80dd594d4675809e83bd4f15+367/subdir1/file2_in_subdir1.txt", true},
2003         })
2004
2005         c.Check(api.CalledWith("container.exit_code", 0), NotNil)
2006         c.Check(api.CalledWith("container.state", "Complete"), NotNil)
2007         output_count := uint(0)
2008         for _, v := range s.runner.ContainerArvClient.(*ArvTestClient).Content {
2009                 if v["collection"] == nil {
2010                         continue
2011                 }
2012                 collection := v["collection"].(arvadosclient.Dict)
2013                 if collection["name"].(string) != "output for zzzzz-dz642-202301130848001" {
2014                         continue
2015                 }
2016                 c.Check(v["ensure_unique_name"], Equals, true)
2017                 c.Check(collection["manifest_text"].(string), Equals, `./a/b 307372fa8fd5c146b22ae7a45b49bc31+6 0:6:c.out
2018 ./foo 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396c73c0abcdefgh11234567890@569fa8c3 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396cabcdefghij6419876543234@569fa8c4 9:18:bar 36:18:sub1file2
2019 ./foo/baz 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396c73c0bcdefghijk544332211@569fa8c5 9:18:sub2file2
2020 ./foo/sub1 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396cabcdefghij6419876543234@569fa8c4 0:9:file1_in_subdir1.txt 9:18:file2_in_subdir1.txt
2021 ./foo/sub1/subdir2 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396c73c0bcdefghijk544332211@569fa8c5 0:9:file1_in_subdir2.txt 9:18:file2_in_subdir2.txt
2022 `)
2023                 output_count++
2024         }
2025         c.Check(output_count, Not(Equals), uint(0))
2026 }
2027
2028 func (s *TestSuite) TestStdoutWithMountPointsUnderOutputDirDenormalizedManifest(c *C) {
2029         helperRecord := `{
2030                 "command": ["/bin/sh", "-c", "echo $FROBIZ"],
2031                 "container_image": "` + arvadostest.DockerImage112PDH + `",
2032                 "cwd": "/bin",
2033                 "environment": {"FROBIZ": "bilbo"},
2034                 "mounts": {
2035         "/tmp": {"kind": "tmp"},
2036         "/tmp/foo/bar": {"kind": "collection", "portable_data_hash": "b0def87f80dd594d4675809e83bd4f15+367", "path": "/subdir1/file2_in_subdir1.txt"},
2037         "stdout": {"kind": "file", "path": "/tmp/a/b/c.out"}
2038     },
2039                 "output_path": "/tmp",
2040                 "priority": 1,
2041                 "runtime_constraints": {},
2042                 "state": "Locked",
2043                 "uuid": "zzzzz-dz642-202301130848002"
2044         }`
2045
2046         extraMounts := []string{
2047                 "b0def87f80dd594d4675809e83bd4f15+367/subdir1/file2_in_subdir1.txt",
2048         }
2049
2050         s.fullRunHelper(c, helperRecord, extraMounts, func() int {
2051                 fmt.Fprintln(s.executor.created.Stdout, s.executor.created.Env["FROBIZ"])
2052                 return 0
2053         })
2054
2055         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
2056         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
2057         output_count := uint(0)
2058         for _, v := range s.runner.ContainerArvClient.(*ArvTestClient).Content {
2059                 if v["collection"] == nil {
2060                         continue
2061                 }
2062                 collection := v["collection"].(arvadosclient.Dict)
2063                 if collection["name"].(string) != "output for zzzzz-dz642-202301130848002" {
2064                         continue
2065                 }
2066                 c.Check(collection["manifest_text"].(string), Equals, `./a/b 307372fa8fd5c146b22ae7a45b49bc31+6 0:6:c.out
2067 ./foo 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396c73c0abcdefgh11234567890@569fa8c3 10:17:bar
2068 `)
2069                 output_count++
2070         }
2071         c.Check(output_count, Not(Equals), uint(0))
2072 }
2073
2074 func (s *TestSuite) TestOutputError(c *C) {
2075         helperRecord := `{
2076                 "command": ["/bin/sh", "-c", "echo $FROBIZ"],
2077                 "container_image": "` + arvadostest.DockerImage112PDH + `",
2078                 "cwd": "/bin",
2079                 "environment": {"FROBIZ": "bilbo"},
2080                 "mounts": {
2081                         "/tmp": {"kind": "tmp"}
2082                 },
2083                 "output_path": "/tmp",
2084                 "priority": 1,
2085                 "runtime_constraints": {},
2086                 "state": "Locked"
2087         }`
2088         s.fullRunHelper(c, helperRecord, nil, func() int {
2089                 os.Symlink("/etc/hosts", s.runner.HostOutputDir+"/baz")
2090                 return 0
2091         })
2092
2093         c.Check(s.api.CalledWith("container.state", "Cancelled"), NotNil)
2094 }
2095
2096 func (s *TestSuite) TestStdinCollectionMountPoint(c *C) {
2097         helperRecord := `{
2098                 "command": ["/bin/sh", "-c", "echo $FROBIZ"],
2099                 "container_image": "` + arvadostest.DockerImage112PDH + `",
2100                 "cwd": "/bin",
2101                 "environment": {"FROBIZ": "bilbo"},
2102                 "mounts": {
2103         "/tmp": {"kind": "tmp"},
2104         "stdin": {"kind": "collection", "portable_data_hash": "b0def87f80dd594d4675809e83bd4f15+367", "path": "/file1_in_main.txt"},
2105         "stdout": {"kind": "file", "path": "/tmp/a/b/c.out"}
2106     },
2107                 "output_path": "/tmp",
2108                 "priority": 1,
2109                 "runtime_constraints": {},
2110                 "state": "Locked"
2111         }`
2112
2113         extraMounts := []string{
2114                 "b0def87f80dd594d4675809e83bd4f15+367/file1_in_main.txt",
2115         }
2116
2117         api, _, _ := s.fullRunHelper(c, helperRecord, extraMounts, func() int {
2118                 fmt.Fprintln(s.executor.created.Stdout, s.executor.created.Env["FROBIZ"])
2119                 return 0
2120         })
2121
2122         c.Check(api.CalledWith("container.exit_code", 0), NotNil)
2123         c.Check(api.CalledWith("container.state", "Complete"), NotNil)
2124         for _, v := range api.Content {
2125                 if v["collection"] != nil {
2126                         collection := v["collection"].(arvadosclient.Dict)
2127                         if strings.Index(collection["name"].(string), "output") == 0 {
2128                                 manifest := collection["manifest_text"].(string)
2129                                 c.Check(manifest, Equals, `./a/b 307372fa8fd5c146b22ae7a45b49bc31+6 0:6:c.out
2130 `)
2131                         }
2132                 }
2133         }
2134 }
2135
2136 func (s *TestSuite) TestStdinJsonMountPoint(c *C) {
2137         helperRecord := `{
2138                 "command": ["/bin/sh", "-c", "echo $FROBIZ"],
2139                 "container_image": "` + arvadostest.DockerImage112PDH + `",
2140                 "cwd": "/bin",
2141                 "environment": {"FROBIZ": "bilbo"},
2142                 "mounts": {
2143         "/tmp": {"kind": "tmp"},
2144         "stdin": {"kind": "json", "content": "foo"},
2145         "stdout": {"kind": "file", "path": "/tmp/a/b/c.out"}
2146     },
2147                 "output_path": "/tmp",
2148                 "priority": 1,
2149                 "runtime_constraints": {},
2150                 "state": "Locked"
2151         }`
2152
2153         api, _, _ := s.fullRunHelper(c, helperRecord, nil, func() int {
2154                 fmt.Fprintln(s.executor.created.Stdout, s.executor.created.Env["FROBIZ"])
2155                 return 0
2156         })
2157
2158         c.Check(api.CalledWith("container.exit_code", 0), NotNil)
2159         c.Check(api.CalledWith("container.state", "Complete"), NotNil)
2160         for _, v := range api.Content {
2161                 if v["collection"] != nil {
2162                         collection := v["collection"].(arvadosclient.Dict)
2163                         if strings.Index(collection["name"].(string), "output") == 0 {
2164                                 manifest := collection["manifest_text"].(string)
2165                                 c.Check(manifest, Equals, `./a/b 307372fa8fd5c146b22ae7a45b49bc31+6 0:6:c.out
2166 `)
2167                         }
2168                 }
2169         }
2170 }
2171
2172 func (s *TestSuite) TestStderrMount(c *C) {
2173         api, cr, _ := s.fullRunHelper(c, `{
2174     "command": ["/bin/sh", "-c", "echo hello;exit 1"],
2175     "container_image": "`+arvadostest.DockerImage112PDH+`",
2176     "cwd": ".",
2177     "environment": {},
2178     "mounts": {"/tmp": {"kind": "tmp"},
2179                "stdout": {"kind": "file", "path": "/tmp/a/out.txt"},
2180                "stderr": {"kind": "file", "path": "/tmp/b/err.txt"}},
2181     "output_path": "/tmp",
2182     "priority": 1,
2183     "runtime_constraints": {},
2184     "state": "Locked"
2185 }`, nil, func() int {
2186                 fmt.Fprintln(s.executor.created.Stdout, "hello")
2187                 fmt.Fprintln(s.executor.created.Stderr, "oops")
2188                 return 1
2189         })
2190
2191         final := api.CalledWith("container.state", "Complete")
2192         c.Assert(final, NotNil)
2193         c.Check(final["container"].(arvadosclient.Dict)["exit_code"], Equals, 1)
2194         c.Check(final["container"].(arvadosclient.Dict)["log"], NotNil)
2195
2196         c.Check(cr.ContainerArvClient.(*ArvTestClient).CalledWith("collection.manifest_text", "./a b1946ac92492d2347c6235b4d2611184+6 0:6:out.txt\n./b 38af5c54926b620264ab1501150cf189+5 0:5:err.txt\n"), NotNil)
2197 }
2198
2199 func (s *TestSuite) TestNumberRoundTrip(c *C) {
2200         s.api.callraw = true
2201         err := s.runner.fetchContainerRecord()
2202         c.Assert(err, IsNil)
2203         jsondata, err := json.Marshal(s.runner.Container.Mounts["/json"].Content)
2204         c.Logf("%#v", s.runner.Container)
2205         c.Check(err, IsNil)
2206         c.Check(string(jsondata), Equals, `{"number":123456789123456789}`)
2207 }
2208
2209 func (s *TestSuite) TestFullBrokenDocker(c *C) {
2210         nextState := ""
2211         for _, setup := range []func(){
2212                 func() {
2213                         c.Log("// waitErr = ocl runtime error")
2214                         s.executor.waitErr = errors.New(`Error response from daemon: oci runtime error: container_linux.go:247: starting container process caused "process_linux.go:359: container init caused \"rootfs_linux.go:54: mounting \\\"/tmp/keep453790790/by_id/99999999999999999999999999999999+99999/myGenome\\\" to rootfs \\\"/tmp/docker/overlay2/9999999999999999999999999999999999999999999999999999999999999999/merged\\\" at \\\"/tmp/docker/overlay2/9999999999999999999999999999999999999999999999999999999999999999/merged/keep/99999999999999999999999999999999+99999/myGenome\\\" caused \\\"no such file or directory\\\"\""`)
2215                         nextState = "Cancelled"
2216                 },
2217                 func() {
2218                         c.Log("// loadErr = cannot connect")
2219                         s.executor.loadErr = errors.New("Cannot connect to the Docker daemon at unix:///var/run/docker.sock. Is the docker daemon running?")
2220                         s.runner.brokenNodeHook = c.MkDir() + "/broken-node-hook"
2221                         err := ioutil.WriteFile(s.runner.brokenNodeHook, []byte("#!/bin/sh\nexec echo killme\n"), 0700)
2222                         c.Assert(err, IsNil)
2223                         nextState = "Queued"
2224                 },
2225         } {
2226                 s.SetUpTest(c)
2227                 setup()
2228                 s.fullRunHelper(c, `{
2229     "command": ["echo", "hello world"],
2230     "container_image": "`+arvadostest.DockerImage112PDH+`",
2231     "cwd": ".",
2232     "environment": {},
2233     "mounts": {"/tmp": {"kind": "tmp"} },
2234     "output_path": "/tmp",
2235     "priority": 1,
2236     "runtime_constraints": {},
2237     "state": "Locked"
2238 }`, nil, func() int { return 0 })
2239                 c.Check(s.api.CalledWith("container.state", nextState), NotNil)
2240                 c.Check(s.api.Logs["crunch-run"].String(), Matches, "(?ms).*unable to run containers.*")
2241                 if s.runner.brokenNodeHook != "" {
2242                         c.Check(s.api.Logs["crunch-run"].String(), Matches, "(?ms).*Running broken node hook.*")
2243                         c.Check(s.api.Logs["crunch-run"].String(), Matches, "(?ms).*killme.*")
2244                         c.Check(s.api.Logs["crunch-run"].String(), Not(Matches), "(?ms).*Writing /var/lock/crunch-run-broken to mark node as broken.*")
2245                 } else {
2246                         c.Check(s.api.Logs["crunch-run"].String(), Matches, "(?ms).*Writing /var/lock/crunch-run-broken to mark node as broken.*")
2247                 }
2248         }
2249 }
2250
2251 func (s *TestSuite) TestBadCommand(c *C) {
2252         for _, startError := range []string{
2253                 `panic: standard_init_linux.go:175: exec user process caused "no such file or directory"`,
2254                 `Error response from daemon: Cannot start container 41f26cbc43bcc1280f4323efb1830a394ba8660c9d1c2b564ba42bf7f7694845: [8] System error: no such file or directory`,
2255                 `Error response from daemon: Cannot start container 58099cd76c834f3dc2a4fb76c8028f049ae6d4fdf0ec373e1f2cfea030670c2d: [8] System error: exec: "foobar": executable file not found in $PATH`,
2256         } {
2257                 s.SetUpTest(c)
2258                 s.executor.startErr = errors.New(startError)
2259                 s.fullRunHelper(c, `{
2260     "command": ["echo", "hello world"],
2261     "container_image": "`+arvadostest.DockerImage112PDH+`",
2262     "cwd": ".",
2263     "environment": {},
2264     "mounts": {"/tmp": {"kind": "tmp"} },
2265     "output_path": "/tmp",
2266     "priority": 1,
2267     "runtime_constraints": {},
2268     "state": "Locked"
2269 }`, nil, func() int { return 0 })
2270                 c.Check(s.api.CalledWith("container.state", "Cancelled"), NotNil)
2271                 c.Check(s.api.Logs["crunch-run"].String(), Matches, "(?ms).*Possible causes:.*is missing.*")
2272         }
2273 }
2274
2275 func (s *TestSuite) TestSecretTextMountPoint(c *C) {
2276         helperRecord := `{
2277                 "command": ["true"],
2278                 "container_image": "` + arvadostest.DockerImage112PDH + `",
2279                 "cwd": "/bin",
2280                 "mounts": {
2281                     "/tmp": {"kind": "tmp"},
2282                     "/tmp/secret.conf": {"kind": "text", "content": "mypassword"}
2283                 },
2284                 "secret_mounts": {
2285                 },
2286                 "output_path": "/tmp",
2287                 "priority": 1,
2288                 "runtime_constraints": {},
2289                 "state": "Locked"
2290         }`
2291
2292         s.fullRunHelper(c, helperRecord, nil, func() int {
2293                 content, err := ioutil.ReadFile(s.runner.HostOutputDir + "/secret.conf")
2294                 c.Check(err, IsNil)
2295                 c.Check(string(content), Equals, "mypassword")
2296                 return 0
2297         })
2298
2299         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
2300         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
2301         c.Check(s.runner.ContainerArvClient.(*ArvTestClient).CalledWith("collection.manifest_text", ". 34819d7beeabb9260a5c854bc85b3e44+10 0:10:secret.conf\n"), NotNil)
2302         c.Check(s.runner.ContainerArvClient.(*ArvTestClient).CalledWith("collection.manifest_text", ""), IsNil)
2303
2304         // under secret mounts, not captured in output
2305         helperRecord = `{
2306                 "command": ["true"],
2307                 "container_image": "` + arvadostest.DockerImage112PDH + `",
2308                 "cwd": "/bin",
2309                 "mounts": {
2310                     "/tmp": {"kind": "tmp"}
2311                 },
2312                 "secret_mounts": {
2313                     "/tmp/secret.conf": {"kind": "text", "content": "mypassword"}
2314                 },
2315                 "output_path": "/tmp",
2316                 "priority": 1,
2317                 "runtime_constraints": {},
2318                 "state": "Locked"
2319         }`
2320
2321         s.SetUpTest(c)
2322         s.fullRunHelper(c, helperRecord, nil, func() int {
2323                 content, err := ioutil.ReadFile(s.runner.HostOutputDir + "/secret.conf")
2324                 c.Check(err, IsNil)
2325                 c.Check(string(content), Equals, "mypassword")
2326                 return 0
2327         })
2328
2329         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
2330         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
2331         c.Check(s.runner.ContainerArvClient.(*ArvTestClient).CalledWith("collection.manifest_text", ". 34819d7beeabb9260a5c854bc85b3e44+10 0:10:secret.conf\n"), IsNil)
2332         c.Check(s.runner.ContainerArvClient.(*ArvTestClient).CalledWith("collection.manifest_text", ""), NotNil)
2333
2334         // under secret mounts, output dir is a collection, not captured in output
2335         helperRecord = `{
2336                 "command": ["true"],
2337                 "container_image": "` + arvadostest.DockerImage112PDH + `",
2338                 "cwd": "/bin",
2339                 "mounts": {
2340                     "/tmp": {"kind": "collection", "writable": true}
2341                 },
2342                 "secret_mounts": {
2343                     "/tmp/secret.conf": {"kind": "text", "content": "mypassword"}
2344                 },
2345                 "output_path": "/tmp",
2346                 "priority": 1,
2347                 "runtime_constraints": {},
2348                 "state": "Locked"
2349         }`
2350
2351         s.SetUpTest(c)
2352         _, _, realtemp := s.fullRunHelper(c, helperRecord, nil, func() int {
2353                 // secret.conf should be provisioned as a separate
2354                 // bind mount, i.e., it should not appear in the
2355                 // (fake) fuse filesystem as viewed from the host.
2356                 content, err := ioutil.ReadFile(s.runner.HostOutputDir + "/secret.conf")
2357                 if !c.Check(errors.Is(err, os.ErrNotExist), Equals, true) {
2358                         c.Logf("secret.conf: content %q, err %#v", content, err)
2359                 }
2360                 err = ioutil.WriteFile(s.runner.HostOutputDir+"/.arvados#collection", []byte(`{"manifest_text":". acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:foo.txt\n"}`), 0700)
2361                 c.Check(err, IsNil)
2362                 return 0
2363         })
2364
2365         content, err := ioutil.ReadFile(realtemp + "/text1/mountdata.text")
2366         c.Check(err, IsNil)
2367         c.Check(string(content), Equals, "mypassword")
2368         c.Check(s.executor.created.BindMounts["/tmp/secret.conf"], DeepEquals, bindmount{realtemp + "/text1/mountdata.text", true})
2369         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
2370         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
2371         c.Check(s.runner.ContainerArvClient.(*ArvTestClient).CalledWith("collection.manifest_text", ". acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:foo.txt\n"), NotNil)
2372 }
2373
2374 func (s *TestSuite) TestCalculateCost(c *C) {
2375         defer func(s string) { lockdir = s }(lockdir)
2376         lockdir = c.MkDir()
2377         now := time.Now()
2378         cr := s.runner
2379         cr.costStartTime = now.Add(-time.Hour)
2380         var logbuf bytes.Buffer
2381         cr.CrunchLog.Immediate = log.New(&logbuf, "", 0)
2382
2383         // if there's no InstanceType env var, cost is calculated as 0
2384         os.Unsetenv("InstanceType")
2385         cost := cr.calculateCost(now)
2386         c.Check(cost, Equals, 0.0)
2387
2388         // with InstanceType env var and loadPrices() hasn't run (or
2389         // hasn't found any data), cost is calculated based on
2390         // InstanceType env var
2391         os.Setenv("InstanceType", `{"Price":1.2}`)
2392         defer os.Unsetenv("InstanceType")
2393         cost = cr.calculateCost(now)
2394         c.Check(cost, Equals, 1.2)
2395
2396         // first update tells us the spot price was $1/h until 30
2397         // minutes ago when it increased to $2/h
2398         j, err := json.Marshal([]cloud.InstancePrice{
2399                 {StartTime: now.Add(-4 * time.Hour), Price: 1.0},
2400                 {StartTime: now.Add(-time.Hour / 2), Price: 2.0},
2401         })
2402         c.Assert(err, IsNil)
2403         os.WriteFile(lockdir+"/"+pricesfile, j, 0777)
2404         cr.loadPrices()
2405         cost = cr.calculateCost(now)
2406         c.Check(cost, Equals, 1.5)
2407
2408         // next update (via --list + SIGUSR2) tells us the spot price
2409         // increased to $3/h 15 minutes ago
2410         j, err = json.Marshal([]cloud.InstancePrice{
2411                 {StartTime: now.Add(-time.Hour / 3), Price: 2.0}, // dup of -time.Hour/2 price
2412                 {StartTime: now.Add(-time.Hour / 4), Price: 3.0},
2413         })
2414         c.Assert(err, IsNil)
2415         os.WriteFile(lockdir+"/"+pricesfile, j, 0777)
2416         cr.loadPrices()
2417         cost = cr.calculateCost(now)
2418         c.Check(cost, Equals, 1.0/2+2.0/4+3.0/4)
2419
2420         cost = cr.calculateCost(now.Add(-time.Hour / 2))
2421         c.Check(cost, Equals, 0.5)
2422
2423         c.Logf("%s", logbuf.String())
2424         c.Check(logbuf.String(), Matches, `(?ms).*Instance price changed to 1\.00 at 20.* changed to 2\.00 .* changed to 3\.00 .*`)
2425         c.Check(logbuf.String(), Not(Matches), `(?ms).*changed to 2\.00 .* changed to 2\.00 .*`)
2426 }
2427
2428 func (s *TestSuite) TestSIGUSR2CostUpdate(c *C) {
2429         pid := os.Getpid()
2430         now := time.Now()
2431         pricesJSON, err := json.Marshal([]cloud.InstancePrice{
2432                 {StartTime: now.Add(-4 * time.Hour), Price: 2.4},
2433                 {StartTime: now.Add(-2 * time.Hour), Price: 2.6},
2434         })
2435         c.Assert(err, IsNil)
2436
2437         os.Setenv("InstanceType", `{"Price":2.2}`)
2438         defer os.Unsetenv("InstanceType")
2439         defer func(s string) { lockdir = s }(lockdir)
2440         lockdir = c.MkDir()
2441
2442         // We can't use s.api.CalledWith because timing differences will yield
2443         // different cost values across runs. getCostUpdate iterates over API
2444         // calls until it finds one that sets the cost, then writes that value
2445         // to the next index of costUpdates.
2446         deadline := now.Add(time.Second)
2447         costUpdates := make([]float64, 2)
2448         costIndex := 0
2449         apiIndex := 0
2450         getCostUpdate := func() {
2451                 for ; time.Now().Before(deadline); time.Sleep(time.Second / 10) {
2452                         for apiIndex < len(s.api.Content) {
2453                                 update := s.api.Content[apiIndex]
2454                                 apiIndex++
2455                                 var ok bool
2456                                 var cost float64
2457                                 if update, ok = update["container"].(arvadosclient.Dict); !ok {
2458                                         continue
2459                                 }
2460                                 if cost, ok = update["cost"].(float64); !ok {
2461                                         continue
2462                                 }
2463                                 c.Logf("API call #%d updates cost to %v", apiIndex-1, cost)
2464                                 costUpdates[costIndex] = cost
2465                                 costIndex++
2466                                 return
2467                         }
2468                 }
2469         }
2470
2471         s.fullRunHelper(c, `{
2472                 "command": ["true"],
2473                 "container_image": "`+arvadostest.DockerImage112PDH+`",
2474                 "cwd": ".",
2475                 "environment": {},
2476                 "mounts": {"/tmp": {"kind": "tmp"} },
2477                 "output_path": "/tmp",
2478                 "priority": 1,
2479                 "runtime_constraints": {},
2480                 "state": "Locked",
2481                 "uuid": "zzzzz-dz642-20230320101530a"
2482         }`, nil, func() int {
2483                 s.runner.costStartTime = now.Add(-3 * time.Hour)
2484                 err := syscall.Kill(pid, syscall.SIGUSR2)
2485                 c.Check(err, IsNil, Commentf("error sending first SIGUSR2 to runner"))
2486                 getCostUpdate()
2487
2488                 err = os.WriteFile(path.Join(lockdir, pricesfile), pricesJSON, 0o700)
2489                 c.Check(err, IsNil, Commentf("error writing JSON prices file"))
2490                 err = syscall.Kill(pid, syscall.SIGUSR2)
2491                 c.Check(err, IsNil, Commentf("error sending second SIGUSR2 to runner"))
2492                 getCostUpdate()
2493
2494                 return 0
2495         })
2496         // Comparing with format strings makes it easy to ignore minor variations
2497         // in cost across runs while keeping diagnostics pretty.
2498         c.Check(fmt.Sprintf("%.3f", costUpdates[0]), Equals, "6.600")
2499         c.Check(fmt.Sprintf("%.3f", costUpdates[1]), Equals, "7.600")
2500 }
2501
2502 type FakeProcess struct {
2503         cmdLine []string
2504 }
2505
2506 func (fp FakeProcess) CmdlineSlice() ([]string, error) {
2507         return fp.cmdLine, nil
2508 }