Merge branch '21541-arv-mount-keyerror-rebase' refs #21541
[arvados.git] / lib / crunchrun / crunchrun_test.go
1 // Copyright (C) The Arvados Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4
5 package crunchrun
6
7 import (
8         "bytes"
9         "context"
10         "crypto/md5"
11         "encoding/json"
12         "errors"
13         "fmt"
14         "io"
15         "io/ioutil"
16         "log"
17         "math/rand"
18         "net/http"
19         "net/http/httptest"
20         "net/http/httputil"
21         "net/url"
22         "os"
23         "os/exec"
24         "path"
25         "regexp"
26         "runtime/pprof"
27         "strings"
28         "sync"
29         "sync/atomic"
30         "syscall"
31         "testing"
32         "time"
33
34         "git.arvados.org/arvados.git/lib/cloud"
35         "git.arvados.org/arvados.git/lib/cmd"
36         "git.arvados.org/arvados.git/sdk/go/arvados"
37         "git.arvados.org/arvados.git/sdk/go/arvadosclient"
38         "git.arvados.org/arvados.git/sdk/go/arvadostest"
39         "git.arvados.org/arvados.git/sdk/go/manifest"
40
41         . "gopkg.in/check.v1"
42         git_client "gopkg.in/src-d/go-git.v4/plumbing/transport/client"
43         git_http "gopkg.in/src-d/go-git.v4/plumbing/transport/http"
44 )
45
46 // Gocheck boilerplate
47 func TestCrunchExec(t *testing.T) {
48         TestingT(t)
49 }
50
51 const logLineStart = `(?m)(.*\n)*\d{4}-\d\d-\d\dT\d\d:\d\d:\d\d\.\d+Z `
52
53 var _ = Suite(&TestSuite{})
54
55 type TestSuite struct {
56         client                   *arvados.Client
57         api                      *ArvTestClient
58         runner                   *ContainerRunner
59         executor                 *stubExecutor
60         keepmount                string
61         keepmountTmp             []string
62         testDispatcherKeepClient KeepTestClient
63         testContainerKeepClient  KeepTestClient
64         debian12MemoryCurrent    int64
65         debian12SwapCurrent      int64
66 }
67
68 func (s *TestSuite) SetUpSuite(c *C) {
69         buf, err := os.ReadFile("../crunchstat/testdata/debian12/sys/fs/cgroup/user.slice/user-1000.slice/session-4.scope/memory.current")
70         c.Assert(err, IsNil)
71         _, err = fmt.Sscanf(string(buf), "%d", &s.debian12MemoryCurrent)
72         c.Assert(err, IsNil)
73
74         buf, err = os.ReadFile("../crunchstat/testdata/debian12/sys/fs/cgroup/user.slice/user-1000.slice/session-4.scope/memory.swap.current")
75         c.Assert(err, IsNil)
76         _, err = fmt.Sscanf(string(buf), "%d", &s.debian12SwapCurrent)
77         c.Assert(err, IsNil)
78 }
79
80 func (s *TestSuite) SetUpTest(c *C) {
81         s.client = arvados.NewClientFromEnv()
82         s.executor = &stubExecutor{}
83         var err error
84         s.api = &ArvTestClient{}
85         s.runner, err = NewContainerRunner(s.client, s.api, &s.testDispatcherKeepClient, "zzzzz-zzzzz-zzzzzzzzzzzzzzz")
86         c.Assert(err, IsNil)
87         s.runner.executor = s.executor
88         s.runner.MkArvClient = func(token string) (IArvadosClient, IKeepClient, *arvados.Client, error) {
89                 return s.api, &s.testContainerKeepClient, s.client, nil
90         }
91         s.runner.RunArvMount = func(cmd []string, tok string) (*exec.Cmd, error) {
92                 s.runner.ArvMountPoint = s.keepmount
93                 for i, opt := range cmd {
94                         if opt == "--mount-tmp" {
95                                 err := os.Mkdir(s.keepmount+"/"+cmd[i+1], 0700)
96                                 if err != nil {
97                                         return nil, err
98                                 }
99                                 s.keepmountTmp = append(s.keepmountTmp, cmd[i+1])
100                         }
101                 }
102                 return nil, nil
103         }
104         s.keepmount = c.MkDir()
105         err = os.Mkdir(s.keepmount+"/by_id", 0755)
106         s.keepmountTmp = nil
107         c.Assert(err, IsNil)
108         err = os.Mkdir(s.keepmount+"/by_id/"+arvadostest.DockerImage112PDH, 0755)
109         c.Assert(err, IsNil)
110         err = ioutil.WriteFile(s.keepmount+"/by_id/"+arvadostest.DockerImage112PDH+"/"+arvadostest.DockerImage112Filename, []byte("#notarealtarball"), 0644)
111         err = os.Mkdir(s.keepmount+"/by_id/"+fakeInputCollectionPDH, 0755)
112         c.Assert(err, IsNil)
113         err = ioutil.WriteFile(s.keepmount+"/by_id/"+fakeInputCollectionPDH+"/input.json", []byte(`{"input":true}`), 0644)
114         c.Assert(err, IsNil)
115         s.runner.ArvMountPoint = s.keepmount
116 }
117
118 type ArvTestClient struct {
119         Total   int64
120         Calls   int
121         Content []arvadosclient.Dict
122         arvados.Container
123         secretMounts []byte
124         Logs         map[string]*bytes.Buffer
125         sync.Mutex
126         WasSetRunning bool
127         callraw       bool
128 }
129
130 type KeepTestClient struct {
131         Called         bool
132         Content        []byte
133         StorageClasses []string
134 }
135
136 type stubExecutor struct {
137         imageLoaded bool
138         loaded      string
139         loadErr     error
140         exitCode    int
141         createErr   error
142         created     containerSpec
143         startErr    error
144         waitSleep   time.Duration
145         waitErr     error
146         stopErr     error
147         stopped     bool
148         closed      bool
149         runFunc     func() int
150         exit        chan int
151 }
152
153 func (e *stubExecutor) LoadImage(imageId string, tarball string, container arvados.Container, keepMount string,
154         containerClient *arvados.Client) error {
155         e.loaded = tarball
156         return e.loadErr
157 }
158 func (e *stubExecutor) Runtime() string                 { return "stub" }
159 func (e *stubExecutor) Version() string                 { return "stub " + cmd.Version.String() }
160 func (e *stubExecutor) Create(spec containerSpec) error { e.created = spec; return e.createErr }
161 func (e *stubExecutor) Start() error {
162         e.exit = make(chan int, 1)
163         go func() { e.exit <- e.runFunc() }()
164         return e.startErr
165 }
166 func (e *stubExecutor) Pid() int    { return 1115883 } // matches pid in ../crunchstat/testdata/debian12/proc/
167 func (e *stubExecutor) Stop() error { e.stopped = true; go func() { e.exit <- -1 }(); return e.stopErr }
168 func (e *stubExecutor) Close()      { e.closed = true }
169 func (e *stubExecutor) Wait(context.Context) (int, error) {
170         return <-e.exit, e.waitErr
171 }
172 func (e *stubExecutor) InjectCommand(ctx context.Context, _, _ string, _ bool, _ []string) (*exec.Cmd, error) {
173         return nil, errors.New("unimplemented")
174 }
175 func (e *stubExecutor) IPAddress() (string, error) { return "", errors.New("unimplemented") }
176
177 const fakeInputCollectionPDH = "ffffffffaaaaaaaa88888888eeeeeeee+1234"
178
179 var hwManifest = ". 82ab40c24fc8df01798e57ba66795bb1+841216+Aa124ac75e5168396c73c0a18eda641a4f41791c0@569fa8c3 0:841216:9c31ee32b3d15268a0754e8edc74d4f815ee014b693bc5109058e431dd5caea7.tar\n"
180 var hwPDH = "a45557269dcb65a6b78f9ac061c0850b+120"
181 var hwImageID = "9c31ee32b3d15268a0754e8edc74d4f815ee014b693bc5109058e431dd5caea7"
182
183 var otherManifest = ". 68a84f561b1d1708c6baff5e019a9ab3+46+Ae5d0af96944a3690becb1decdf60cc1c937f556d@5693216f 0:46:md5sum.txt\n"
184 var otherPDH = "a3e8f74c6f101eae01fa08bfb4e49b3a+54"
185
186 var normalizedManifestWithSubdirs = `. 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396c73c0abcdefgh11234567890@569fa8c3 0:9:file1_in_main.txt 9:18:file2_in_main.txt 0:27:zzzzz-8i9sb-bcdefghijkdhvnk.log.txt
187 ./subdir1 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396cabcdefghij6419876543234@569fa8c4 0:9:file1_in_subdir1.txt 9:18:file2_in_subdir1.txt
188 ./subdir1/subdir2 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396c73c0bcdefghijk544332211@569fa8c5 0:9:file1_in_subdir2.txt 9:18:file2_in_subdir2.txt
189 `
190
191 var normalizedWithSubdirsPDH = "a0def87f80dd594d4675809e83bd4f15+367"
192
193 var denormalizedManifestWithSubdirs = ". 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396c73c0abcdefgh11234567890@569fa8c3 0:9:file1_in_main.txt 9:18:file2_in_main.txt 0:27:zzzzz-8i9sb-bcdefghijkdhvnk.log.txt 0:10:subdir1/file1_in_subdir1.txt 10:17:subdir1/file2_in_subdir1.txt\n"
194 var denormalizedWithSubdirsPDH = "b0def87f80dd594d4675809e83bd4f15+367"
195
196 var fakeAuthUUID = "zzzzz-gj3su-55pqoyepgi2glem"
197 var fakeAuthToken = "a3ltuwzqcu2u4sc0q7yhpc2w7s00fdcqecg5d6e0u3pfohmbjt"
198
199 func (client *ArvTestClient) Create(resourceType string,
200         parameters arvadosclient.Dict,
201         output interface{}) error {
202
203         client.Mutex.Lock()
204         defer client.Mutex.Unlock()
205
206         client.Calls++
207         client.Content = append(client.Content, parameters)
208
209         if resourceType == "logs" {
210                 et := parameters["log"].(arvadosclient.Dict)["event_type"].(string)
211                 if client.Logs == nil {
212                         client.Logs = make(map[string]*bytes.Buffer)
213                 }
214                 if client.Logs[et] == nil {
215                         client.Logs[et] = &bytes.Buffer{}
216                 }
217                 client.Logs[et].Write([]byte(parameters["log"].(arvadosclient.Dict)["properties"].(map[string]string)["text"]))
218         }
219
220         if resourceType == "collections" && output != nil {
221                 mt := parameters["collection"].(arvadosclient.Dict)["manifest_text"].(string)
222                 md5sum := md5.Sum([]byte(mt))
223                 outmap := output.(*arvados.Collection)
224                 outmap.PortableDataHash = fmt.Sprintf("%x+%d", md5sum, len(mt))
225                 outmap.UUID = fmt.Sprintf("zzzzz-4zz18-%015x", md5sum[:7])
226         }
227
228         return nil
229 }
230
231 func (client *ArvTestClient) Call(method, resourceType, uuid, action string, parameters arvadosclient.Dict, output interface{}) error {
232         switch {
233         case method == "GET" && resourceType == "containers" && action == "auth":
234                 return json.Unmarshal([]byte(`{
235                         "kind": "arvados#api_client_authorization",
236                         "uuid": "`+fakeAuthUUID+`",
237                         "api_token": "`+fakeAuthToken+`"
238                         }`), output)
239         case method == "GET" && resourceType == "containers" && action == "secret_mounts":
240                 if client.secretMounts != nil {
241                         return json.Unmarshal(client.secretMounts, output)
242                 }
243                 return json.Unmarshal([]byte(`{"secret_mounts":{}}`), output)
244         default:
245                 return fmt.Errorf("Not found")
246         }
247 }
248
249 func (client *ArvTestClient) CallRaw(method, resourceType, uuid, action string,
250         parameters arvadosclient.Dict) (reader io.ReadCloser, err error) {
251         var j []byte
252         if method == "GET" && resourceType == "nodes" && uuid == "" && action == "" {
253                 j = []byte(`{
254                         "kind": "arvados#nodeList",
255                         "items": [{
256                                 "uuid": "zzzzz-7ekkf-2z3mc76g2q73aio",
257                                 "hostname": "compute2",
258                                 "properties": {"total_cpu_cores": 16}
259                         }]}`)
260         } else if method == "GET" && resourceType == "containers" && action == "" && !client.callraw {
261                 if uuid == "" {
262                         j, err = json.Marshal(map[string]interface{}{
263                                 "items": []interface{}{client.Container},
264                                 "kind":  "arvados#nodeList",
265                         })
266                 } else {
267                         j, err = json.Marshal(client.Container)
268                 }
269         } else {
270                 j = []byte(`{
271                         "command": ["sleep", "1"],
272                         "container_image": "` + arvadostest.DockerImage112PDH + `",
273                         "cwd": ".",
274                         "environment": {},
275                         "mounts": {"/tmp": {"kind": "tmp"}, "/json": {"kind": "json", "content": {"number": 123456789123456789}}},
276                         "output_path": "/tmp",
277                         "priority": 1,
278                         "runtime_constraints": {}
279                 }`)
280         }
281         return ioutil.NopCloser(bytes.NewReader(j)), err
282 }
283
284 func (client *ArvTestClient) Get(resourceType string, uuid string, parameters arvadosclient.Dict, output interface{}) error {
285         if resourceType == "collections" {
286                 if uuid == hwPDH {
287                         output.(*arvados.Collection).ManifestText = hwManifest
288                 } else if uuid == otherPDH {
289                         output.(*arvados.Collection).ManifestText = otherManifest
290                 } else if uuid == normalizedWithSubdirsPDH {
291                         output.(*arvados.Collection).ManifestText = normalizedManifestWithSubdirs
292                 } else if uuid == denormalizedWithSubdirsPDH {
293                         output.(*arvados.Collection).ManifestText = denormalizedManifestWithSubdirs
294                 }
295         }
296         if resourceType == "containers" {
297                 (*output.(*arvados.Container)) = client.Container
298         }
299         return nil
300 }
301
302 func (client *ArvTestClient) Update(resourceType string, uuid string, parameters arvadosclient.Dict, output interface{}) (err error) {
303         client.Mutex.Lock()
304         defer client.Mutex.Unlock()
305         client.Calls++
306         client.Content = append(client.Content, parameters)
307         if resourceType == "containers" {
308                 if parameters["container"].(arvadosclient.Dict)["state"] == "Running" {
309                         client.WasSetRunning = true
310                 }
311         } else if resourceType == "collections" && output != nil {
312                 mt := parameters["collection"].(arvadosclient.Dict)["manifest_text"].(string)
313                 output.(*arvados.Collection).UUID = uuid
314                 output.(*arvados.Collection).PortableDataHash = fmt.Sprintf("%x", md5.Sum([]byte(mt)))
315         }
316         return nil
317 }
318
319 var discoveryMap = map[string]interface{}{
320         "defaultTrashLifetime":               float64(1209600),
321         "crunchLimitLogBytesPerJob":          float64(67108864),
322         "crunchLogThrottleBytes":             float64(65536),
323         "crunchLogThrottlePeriod":            float64(60),
324         "crunchLogThrottleLines":             float64(1024),
325         "crunchLogPartialLineThrottlePeriod": float64(5),
326         "crunchLogBytesPerEvent":             float64(4096),
327         "crunchLogSecondsBetweenEvents":      float64(1),
328 }
329
330 func (client *ArvTestClient) Discovery(key string) (interface{}, error) {
331         return discoveryMap[key], nil
332 }
333
334 // CalledWith returns the parameters from the first API call whose
335 // parameters match jpath/string. E.g., CalledWith(c, "foo.bar",
336 // "baz") returns parameters with parameters["foo"]["bar"]=="baz". If
337 // no call matches, it returns nil.
338 func (client *ArvTestClient) CalledWith(jpath string, expect interface{}) arvadosclient.Dict {
339 call:
340         for _, content := range client.Content {
341                 var v interface{} = content
342                 for _, k := range strings.Split(jpath, ".") {
343                         if dict, ok := v.(arvadosclient.Dict); !ok {
344                                 continue call
345                         } else {
346                                 v = dict[k]
347                         }
348                 }
349                 if v == expect {
350                         return content
351                 }
352         }
353         return nil
354 }
355
356 func (client *KeepTestClient) LocalLocator(locator string) (string, error) {
357         return locator, nil
358 }
359
360 func (client *KeepTestClient) BlockWrite(_ context.Context, opts arvados.BlockWriteOptions) (arvados.BlockWriteResponse, error) {
361         client.Content = opts.Data
362         return arvados.BlockWriteResponse{
363                 Locator: fmt.Sprintf("%x+%d", md5.Sum(opts.Data), len(opts.Data)),
364         }, nil
365 }
366
367 func (client *KeepTestClient) ReadAt(string, []byte, int) (int, error) {
368         return 0, errors.New("not implemented")
369 }
370
371 func (client *KeepTestClient) Close() {
372         client.Content = nil
373 }
374
375 func (client *KeepTestClient) SetStorageClasses(sc []string) {
376         client.StorageClasses = sc
377 }
378
379 type FileWrapper struct {
380         io.ReadCloser
381         len int64
382 }
383
384 func (fw FileWrapper) Readdir(n int) ([]os.FileInfo, error) {
385         return nil, errors.New("not implemented")
386 }
387
388 func (fw FileWrapper) Seek(int64, int) (int64, error) {
389         return 0, errors.New("not implemented")
390 }
391
392 func (fw FileWrapper) Size() int64 {
393         return fw.len
394 }
395
396 func (fw FileWrapper) Stat() (os.FileInfo, error) {
397         return nil, errors.New("not implemented")
398 }
399
400 func (fw FileWrapper) Truncate(int64) error {
401         return errors.New("not implemented")
402 }
403
404 func (fw FileWrapper) Write([]byte) (int, error) {
405         return 0, errors.New("not implemented")
406 }
407
408 func (fw FileWrapper) Sync() error {
409         return errors.New("not implemented")
410 }
411
412 func (fw FileWrapper) Snapshot() (*arvados.Subtree, error) {
413         return nil, errors.New("not implemented")
414 }
415
416 func (fw FileWrapper) Splice(*arvados.Subtree) error {
417         return errors.New("not implemented")
418 }
419
420 func (client *KeepTestClient) ManifestFileReader(m manifest.Manifest, filename string) (arvados.File, error) {
421         if filename == hwImageID+".tar" {
422                 rdr := ioutil.NopCloser(&bytes.Buffer{})
423                 client.Called = true
424                 return FileWrapper{rdr, 1321984}, nil
425         } else if filename == "/file1_in_main.txt" {
426                 rdr := ioutil.NopCloser(strings.NewReader("foo"))
427                 client.Called = true
428                 return FileWrapper{rdr, 3}, nil
429         }
430         return nil, nil
431 }
432
433 type apiStubServer struct {
434         server    *httptest.Server
435         proxy     *httputil.ReverseProxy
436         intercept func(http.ResponseWriter, *http.Request) bool
437
438         container arvados.Container
439         logs      map[string]string
440 }
441
442 func apiStub() (*arvados.Client, *apiStubServer) {
443         client := arvados.NewClientFromEnv()
444         apistub := &apiStubServer{}
445         apistub.server = httptest.NewTLSServer(apistub)
446         apistub.proxy = httputil.NewSingleHostReverseProxy(&url.URL{Scheme: "https", Host: client.APIHost})
447         if client.Insecure {
448                 apistub.proxy.Transport = arvados.InsecureHTTPClient.Transport
449         }
450         client.APIHost = apistub.server.Listener.Addr().String()
451         return client, apistub
452 }
453
454 func (apistub *apiStubServer) ServeHTTP(w http.ResponseWriter, r *http.Request) {
455         if apistub.intercept != nil && apistub.intercept(w, r) {
456                 return
457         }
458         if r.Method == "POST" && r.URL.Path == "/arvados/v1/logs" {
459                 var body struct {
460                         Log struct {
461                                 EventType  string `json:"event_type"`
462                                 Properties struct {
463                                         Text string
464                                 }
465                         }
466                 }
467                 json.NewDecoder(r.Body).Decode(&body)
468                 apistub.logs[body.Log.EventType] += body.Log.Properties.Text
469                 return
470         }
471         if r.Method == "GET" && r.URL.Path == "/arvados/v1/collections/"+hwPDH {
472                 json.NewEncoder(w).Encode(arvados.Collection{ManifestText: hwManifest})
473                 return
474         }
475         if r.Method == "GET" && r.URL.Path == "/arvados/v1/collections/"+otherPDH {
476                 json.NewEncoder(w).Encode(arvados.Collection{ManifestText: otherManifest})
477                 return
478         }
479         if r.Method == "GET" && r.URL.Path == "/arvados/v1/collections/"+normalizedWithSubdirsPDH {
480                 json.NewEncoder(w).Encode(arvados.Collection{ManifestText: normalizedManifestWithSubdirs})
481                 return
482         }
483         if r.Method == "GET" && r.URL.Path == "/arvados/v1/collections/"+denormalizedWithSubdirsPDH {
484                 json.NewEncoder(w).Encode(arvados.Collection{ManifestText: denormalizedManifestWithSubdirs})
485                 return
486         }
487         if r.Method == "GET" && r.URL.Path == "/arvados/v1/containers/"+apistub.container.UUID {
488                 json.NewEncoder(w).Encode(apistub.container)
489                 return
490         }
491         apistub.proxy.ServeHTTP(w, r)
492 }
493
494 func (s *TestSuite) TestLoadImage(c *C) {
495         s.runner.Container.ContainerImage = arvadostest.DockerImage112PDH
496         s.runner.Container.Mounts = map[string]arvados.Mount{
497                 "/out": {Kind: "tmp", Writable: true},
498         }
499         s.runner.Container.OutputPath = "/out"
500
501         _, err := s.runner.SetupMounts()
502         c.Assert(err, IsNil)
503
504         imageID, err := s.runner.LoadImage()
505         c.Check(err, IsNil)
506         c.Check(s.executor.loaded, Matches, ".*"+regexp.QuoteMeta(arvadostest.DockerImage112Filename))
507         c.Check(imageID, Equals, strings.TrimSuffix(arvadostest.DockerImage112Filename, ".tar"))
508
509         s.runner.Container.ContainerImage = arvadostest.DockerImage112PDH
510         s.executor.imageLoaded = false
511         s.executor.loaded = ""
512         s.executor.loadErr = errors.New("bork")
513         imageID, err = s.runner.LoadImage()
514         c.Check(err, ErrorMatches, ".*bork")
515         c.Check(s.executor.loaded, Matches, ".*"+regexp.QuoteMeta(arvadostest.DockerImage112Filename))
516
517         s.runner.Container.ContainerImage = fakeInputCollectionPDH
518         s.executor.imageLoaded = false
519         s.executor.loaded = ""
520         s.executor.loadErr = nil
521         imageID, err = s.runner.LoadImage()
522         c.Check(err, ErrorMatches, "image collection does not include a \\.tar image file")
523         c.Check(s.executor.loaded, Equals, "")
524 }
525
526 type ArvErrorTestClient struct{}
527
528 func (ArvErrorTestClient) Create(resourceType string,
529         parameters arvadosclient.Dict,
530         output interface{}) error {
531         return nil
532 }
533
534 func (ArvErrorTestClient) Call(method, resourceType, uuid, action string, parameters arvadosclient.Dict, output interface{}) error {
535         if method == "GET" && resourceType == "containers" && action == "auth" {
536                 return nil
537         }
538         return errors.New("ArvError")
539 }
540
541 func (ArvErrorTestClient) CallRaw(method, resourceType, uuid, action string,
542         parameters arvadosclient.Dict) (reader io.ReadCloser, err error) {
543         return nil, errors.New("ArvError")
544 }
545
546 func (ArvErrorTestClient) Get(resourceType string, uuid string, parameters arvadosclient.Dict, output interface{}) error {
547         return errors.New("ArvError")
548 }
549
550 func (ArvErrorTestClient) Update(resourceType string, uuid string, parameters arvadosclient.Dict, output interface{}) (err error) {
551         return nil
552 }
553
554 func (ArvErrorTestClient) Discovery(key string) (interface{}, error) {
555         return discoveryMap[key], nil
556 }
557
558 type KeepErrorTestClient struct {
559         KeepTestClient
560 }
561
562 func (*KeepErrorTestClient) ManifestFileReader(manifest.Manifest, string) (arvados.File, error) {
563         return nil, errors.New("KeepError")
564 }
565
566 func (*KeepErrorTestClient) BlockWrite(context.Context, arvados.BlockWriteOptions) (arvados.BlockWriteResponse, error) {
567         return arvados.BlockWriteResponse{}, errors.New("KeepError")
568 }
569
570 func (*KeepErrorTestClient) LocalLocator(string) (string, error) {
571         return "", errors.New("KeepError")
572 }
573
574 type KeepReadErrorTestClient struct {
575         KeepTestClient
576 }
577
578 func (*KeepReadErrorTestClient) ReadAt(string, []byte, int) (int, error) {
579         return 0, errors.New("KeepError")
580 }
581
582 type ErrorReader struct {
583         FileWrapper
584 }
585
586 func (ErrorReader) Read(p []byte) (n int, err error) {
587         return 0, errors.New("ErrorReader")
588 }
589
590 func (ErrorReader) Seek(int64, int) (int64, error) {
591         return 0, errors.New("ErrorReader")
592 }
593
594 func (KeepReadErrorTestClient) ManifestFileReader(m manifest.Manifest, filename string) (arvados.File, error) {
595         return ErrorReader{}, nil
596 }
597
598 type ClosableBuffer struct {
599         bytes.Buffer
600 }
601
602 func (*ClosableBuffer) Close() error {
603         return nil
604 }
605
606 type TestLogs struct {
607         Stdout ClosableBuffer
608         Stderr ClosableBuffer
609 }
610
611 func (tl *TestLogs) NewTestLoggingWriter(logstr string) (io.WriteCloser, error) {
612         if logstr == "stdout" {
613                 return &tl.Stdout, nil
614         }
615         if logstr == "stderr" {
616                 return &tl.Stderr, nil
617         }
618         return nil, errors.New("???")
619 }
620
621 func dockerLog(fd byte, msg string) []byte {
622         by := []byte(msg)
623         header := make([]byte, 8+len(by))
624         header[0] = fd
625         header[7] = byte(len(by))
626         copy(header[8:], by)
627         return header
628 }
629
630 func (s *TestSuite) TestRunContainer(c *C) {
631         s.executor.runFunc = func() int {
632                 fmt.Fprintf(s.executor.created.Stdout, "Hello world\n")
633                 return 0
634         }
635
636         var logs TestLogs
637         s.runner.NewLogWriter = logs.NewTestLoggingWriter
638         s.runner.Container.ContainerImage = arvadostest.DockerImage112PDH
639         s.runner.Container.Command = []string{"./hw"}
640         s.runner.Container.OutputStorageClasses = []string{"default"}
641
642         imageID, err := s.runner.LoadImage()
643         c.Assert(err, IsNil)
644
645         err = s.runner.CreateContainer(imageID, nil)
646         c.Assert(err, IsNil)
647
648         err = s.runner.StartContainer()
649         c.Assert(err, IsNil)
650
651         err = s.runner.WaitFinish()
652         c.Assert(err, IsNil)
653
654         c.Check(logs.Stdout.String(), Matches, ".*Hello world\n")
655         c.Check(logs.Stderr.String(), Equals, "")
656 }
657
658 func (s *TestSuite) TestCommitLogs(c *C) {
659         api := &ArvTestClient{}
660         kc := &KeepTestClient{}
661         defer kc.Close()
662         cr, err := NewContainerRunner(s.client, api, kc, "zzzzz-zzzzz-zzzzzzzzzzzzzzz")
663         c.Assert(err, IsNil)
664         cr.CrunchLog.Timestamper = (&TestTimestamper{}).Timestamp
665
666         cr.CrunchLog.Print("Hello world!")
667         cr.CrunchLog.Print("Goodbye")
668         cr.finalState = "Complete"
669
670         err = cr.CommitLogs()
671         c.Check(err, IsNil)
672
673         c.Check(api.Calls, Equals, 2)
674         c.Check(api.Content[1]["ensure_unique_name"], Equals, true)
675         c.Check(api.Content[1]["collection"].(arvadosclient.Dict)["name"], Equals, "logs for zzzzz-zzzzz-zzzzzzzzzzzzzzz")
676         c.Check(api.Content[1]["collection"].(arvadosclient.Dict)["manifest_text"], Equals, ". 744b2e4553123b02fa7b452ec5c18993+123 0:123:crunch-run.txt\n")
677         c.Check(*cr.LogsPDH, Equals, "63da7bdacf08c40f604daad80c261e9a+60")
678 }
679
680 func (s *TestSuite) TestUpdateContainerRunning(c *C) {
681         api := &ArvTestClient{}
682         kc := &KeepTestClient{}
683         defer kc.Close()
684         cr, err := NewContainerRunner(s.client, api, kc, "zzzzz-zzzzz-zzzzzzzzzzzzzzz")
685         c.Assert(err, IsNil)
686
687         err = cr.UpdateContainerRunning("")
688         c.Check(err, IsNil)
689
690         c.Check(api.Content[0]["container"].(arvadosclient.Dict)["state"], Equals, "Running")
691 }
692
693 func (s *TestSuite) TestUpdateContainerComplete(c *C) {
694         api := &ArvTestClient{}
695         kc := &KeepTestClient{}
696         defer kc.Close()
697         cr, err := NewContainerRunner(s.client, api, kc, "zzzzz-zzzzz-zzzzzzzzzzzzzzz")
698         c.Assert(err, IsNil)
699
700         cr.LogsPDH = new(string)
701         *cr.LogsPDH = "d3a229d2fe3690c2c3e75a71a153c6a3+60"
702
703         cr.ExitCode = new(int)
704         *cr.ExitCode = 42
705         cr.finalState = "Complete"
706
707         err = cr.UpdateContainerFinal()
708         c.Check(err, IsNil)
709
710         c.Check(api.Content[0]["container"].(arvadosclient.Dict)["log"], Equals, *cr.LogsPDH)
711         c.Check(api.Content[0]["container"].(arvadosclient.Dict)["exit_code"], Equals, *cr.ExitCode)
712         c.Check(api.Content[0]["container"].(arvadosclient.Dict)["state"], Equals, "Complete")
713 }
714
715 func (s *TestSuite) TestUpdateContainerCancelled(c *C) {
716         api := &ArvTestClient{}
717         kc := &KeepTestClient{}
718         defer kc.Close()
719         cr, err := NewContainerRunner(s.client, api, kc, "zzzzz-zzzzz-zzzzzzzzzzzzzzz")
720         c.Assert(err, IsNil)
721         cr.cCancelled = true
722         cr.finalState = "Cancelled"
723
724         err = cr.UpdateContainerFinal()
725         c.Check(err, IsNil)
726
727         c.Check(api.Content[0]["container"].(arvadosclient.Dict)["log"], IsNil)
728         c.Check(api.Content[0]["container"].(arvadosclient.Dict)["exit_code"], IsNil)
729         c.Check(api.Content[0]["container"].(arvadosclient.Dict)["state"], Equals, "Cancelled")
730 }
731
732 // Used by the TestFullRun*() test below to DRY up boilerplate setup to do full
733 // dress rehearsal of the Run() function, starting from a JSON container record.
734 func (s *TestSuite) fullRunHelper(c *C, record string, extraMounts []string, fn func() int) (*ArvTestClient, *ContainerRunner, string) {
735         err := json.Unmarshal([]byte(record), &s.api.Container)
736         c.Assert(err, IsNil)
737         initialState := s.api.Container.State
738
739         var sm struct {
740                 SecretMounts map[string]arvados.Mount `json:"secret_mounts"`
741         }
742         err = json.Unmarshal([]byte(record), &sm)
743         c.Check(err, IsNil)
744         secretMounts, err := json.Marshal(sm)
745         c.Assert(err, IsNil)
746         c.Logf("SecretMounts decoded %v json %q", sm, secretMounts)
747
748         s.executor.runFunc = fn
749
750         s.runner.statInterval = 100 * time.Millisecond
751         s.runner.containerWatchdogInterval = time.Second
752
753         realTemp := c.MkDir()
754         tempcount := 0
755         s.runner.MkTempDir = func(_, prefix string) (string, error) {
756                 tempcount++
757                 d := fmt.Sprintf("%s/%s%d", realTemp, prefix, tempcount)
758                 err := os.Mkdir(d, os.ModePerm)
759                 if err != nil && strings.Contains(err.Error(), ": file exists") {
760                         // Test case must have pre-populated the tempdir
761                         err = nil
762                 }
763                 return d, err
764         }
765         client, _ := apiStub()
766         s.runner.MkArvClient = func(token string) (IArvadosClient, IKeepClient, *arvados.Client, error) {
767                 return &ArvTestClient{secretMounts: secretMounts}, &s.testContainerKeepClient, client, nil
768         }
769
770         if extraMounts != nil && len(extraMounts) > 0 {
771                 err := s.runner.SetupArvMountPoint("keep")
772                 c.Check(err, IsNil)
773
774                 for _, m := range extraMounts {
775                         os.MkdirAll(s.runner.ArvMountPoint+"/by_id/"+m, os.ModePerm)
776                 }
777         }
778
779         err = s.runner.Run()
780         if s.api.CalledWith("container.state", "Complete") != nil {
781                 c.Check(err, IsNil)
782         }
783         if s.executor.loadErr == nil && s.executor.createErr == nil && initialState != "Running" {
784                 c.Check(s.api.WasSetRunning, Equals, true)
785                 var lastupdate arvadosclient.Dict
786                 for _, content := range s.api.Content {
787                         if content["container"] != nil {
788                                 lastupdate = content["container"].(arvadosclient.Dict)
789                         }
790                 }
791                 if lastupdate["log"] == nil {
792                         c.Errorf("no container update with non-nil log -- updates were: %v", s.api.Content)
793                 }
794         }
795
796         if err != nil {
797                 for k, v := range s.api.Logs {
798                         c.Log(k)
799                         c.Log(v.String())
800                 }
801         }
802
803         return s.api, s.runner, realTemp
804 }
805
806 func (s *TestSuite) TestFullRunHello(c *C) {
807         s.runner.enableMemoryLimit = true
808         s.runner.networkMode = "default"
809         s.fullRunHelper(c, `{
810     "command": ["echo", "hello world"],
811     "container_image": "`+arvadostest.DockerImage112PDH+`",
812     "cwd": ".",
813     "environment": {"foo":"bar","baz":"waz"},
814     "mounts": {"/tmp": {"kind": "tmp"} },
815     "output_path": "/tmp",
816     "priority": 1,
817     "runtime_constraints": {"vcpus":1,"ram":1000000},
818     "state": "Locked",
819     "output_storage_classes": ["default"]
820 }`, nil, func() int {
821                 c.Check(s.executor.created.Command, DeepEquals, []string{"echo", "hello world"})
822                 c.Check(s.executor.created.Image, Equals, "sha256:d8309758b8fe2c81034ffc8a10c36460b77db7bc5e7b448c4e5b684f9d95a678")
823                 c.Check(s.executor.created.Env, DeepEquals, map[string]string{"foo": "bar", "baz": "waz"})
824                 c.Check(s.executor.created.VCPUs, Equals, 1)
825                 c.Check(s.executor.created.RAM, Equals, int64(1000000))
826                 c.Check(s.executor.created.NetworkMode, Equals, "default")
827                 c.Check(s.executor.created.EnableNetwork, Equals, false)
828                 c.Check(s.executor.created.CUDADeviceCount, Equals, 0)
829                 fmt.Fprintln(s.executor.created.Stdout, "hello world")
830                 return 0
831         })
832
833         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
834         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
835         c.Check(s.api.Logs["stdout"].String(), Matches, ".*hello world\n")
836         c.Check(s.testDispatcherKeepClient.StorageClasses, DeepEquals, []string{"default"})
837         c.Check(s.testContainerKeepClient.StorageClasses, DeepEquals, []string{"default"})
838 }
839
840 func (s *TestSuite) TestRunAlreadyRunning(c *C) {
841         var ran bool
842         s.fullRunHelper(c, `{
843     "command": ["sleep", "3"],
844     "container_image": "`+arvadostest.DockerImage112PDH+`",
845     "cwd": ".",
846     "environment": {},
847     "mounts": {"/tmp": {"kind": "tmp"} },
848     "output_path": "/tmp",
849     "priority": 1,
850     "runtime_constraints": {},
851     "scheduling_parameters":{"max_run_time": 1},
852     "state": "Running"
853 }`, nil, func() int {
854                 ran = true
855                 return 2
856         })
857         c.Check(s.api.CalledWith("container.state", "Cancelled"), IsNil)
858         c.Check(s.api.CalledWith("container.state", "Complete"), IsNil)
859         c.Check(ran, Equals, false)
860 }
861
862 func ec2MetadataServerStub(c *C, token *string, failureRate float64, stoptime *atomic.Value) *httptest.Server {
863         failedOnce := false
864         return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
865                 if !failedOnce || rand.Float64() < failureRate {
866                         w.WriteHeader(http.StatusServiceUnavailable)
867                         failedOnce = true
868                         return
869                 }
870                 switch r.URL.Path {
871                 case "/latest/api/token":
872                         fmt.Fprintln(w, *token)
873                 case "/latest/meta-data/spot/instance-action":
874                         if r.Header.Get("X-aws-ec2-metadata-token") != *token {
875                                 w.WriteHeader(http.StatusUnauthorized)
876                         } else if t, _ := stoptime.Load().(time.Time); t.IsZero() {
877                                 w.WriteHeader(http.StatusNotFound)
878                         } else {
879                                 fmt.Fprintf(w, `{"action":"stop","time":"%s"}`, t.Format(time.RFC3339))
880                         }
881                 default:
882                         w.WriteHeader(http.StatusNotFound)
883                 }
884         }))
885 }
886
887 func (s *TestSuite) TestSpotInterruptionNotice(c *C) {
888         s.testSpotInterruptionNotice(c, 0.1)
889 }
890
891 func (s *TestSuite) TestSpotInterruptionNoticeNotAvailable(c *C) {
892         s.testSpotInterruptionNotice(c, 1)
893 }
894
895 func (s *TestSuite) testSpotInterruptionNotice(c *C, failureRate float64) {
896         var stoptime atomic.Value
897         token := "fake-ec2-metadata-token"
898         stub := ec2MetadataServerStub(c, &token, failureRate, &stoptime)
899         defer stub.Close()
900
901         defer func(i time.Duration, u string) {
902                 spotInterruptionCheckInterval = i
903                 ec2MetadataBaseURL = u
904         }(spotInterruptionCheckInterval, ec2MetadataBaseURL)
905         spotInterruptionCheckInterval = time.Second / 8
906         ec2MetadataBaseURL = stub.URL
907
908         go s.runner.checkSpotInterruptionNotices()
909         s.fullRunHelper(c, `{
910     "command": ["sleep", "3"],
911     "container_image": "`+arvadostest.DockerImage112PDH+`",
912     "cwd": ".",
913     "environment": {},
914     "mounts": {"/tmp": {"kind": "tmp"} },
915     "output_path": "/tmp",
916     "priority": 1,
917     "runtime_constraints": {},
918     "state": "Locked"
919 }`, nil, func() int {
920                 time.Sleep(time.Second)
921                 stoptime.Store(time.Now().Add(time.Minute).UTC())
922                 token = "different-fake-ec2-metadata-token"
923                 time.Sleep(time.Second)
924                 return 0
925         })
926         c.Check(s.api.Logs["crunch-run"].String(), Matches, `(?ms).*Checking for spot interruptions every 125ms using instance metadata at http://.*`)
927         c.Check(s.api.Logs["crunch-run"].String(), Matches, `(?ms).*Error checking spot interruptions: 503 Service Unavailable.*`)
928         if failureRate == 1 {
929                 c.Check(s.api.Logs["crunch-run"].String(), Matches, `(?ms).*Giving up on checking spot interruptions after too many consecutive failures.*`)
930         } else {
931                 text := `Cloud provider scheduled instance stop at ` + stoptime.Load().(time.Time).Format(time.RFC3339)
932                 c.Check(s.api.Logs["crunch-run"].String(), Matches, `(?ms).*`+text+`.*`)
933                 c.Check(s.api.CalledWith("container.runtime_status.warning", "preemption notice"), NotNil)
934                 c.Check(s.api.CalledWith("container.runtime_status.warningDetail", text), NotNil)
935                 c.Check(s.api.CalledWith("container.runtime_status.preemptionNotice", text), NotNil)
936         }
937 }
938
939 func (s *TestSuite) TestRunTimeExceeded(c *C) {
940         s.fullRunHelper(c, `{
941     "command": ["sleep", "3"],
942     "container_image": "`+arvadostest.DockerImage112PDH+`",
943     "cwd": ".",
944     "environment": {},
945     "mounts": {"/tmp": {"kind": "tmp"} },
946     "output_path": "/tmp",
947     "priority": 1,
948     "runtime_constraints": {},
949     "scheduling_parameters":{"max_run_time": 1},
950     "state": "Locked"
951 }`, nil, func() int {
952                 time.Sleep(3 * time.Second)
953                 return 0
954         })
955
956         c.Check(s.api.CalledWith("container.state", "Cancelled"), NotNil)
957         c.Check(s.api.Logs["crunch-run"].String(), Matches, "(?ms).*maximum run time exceeded.*")
958 }
959
960 func (s *TestSuite) TestContainerWaitFails(c *C) {
961         s.fullRunHelper(c, `{
962     "command": ["sleep", "3"],
963     "container_image": "`+arvadostest.DockerImage112PDH+`",
964     "cwd": ".",
965     "mounts": {"/tmp": {"kind": "tmp"} },
966     "output_path": "/tmp",
967     "priority": 1,
968     "state": "Locked"
969 }`, nil, func() int {
970                 s.executor.waitErr = errors.New("Container is not running")
971                 return 0
972         })
973
974         c.Check(s.api.CalledWith("container.state", "Cancelled"), NotNil)
975         c.Check(s.api.Logs["crunch-run"].String(), Matches, "(?ms).*Container is not running.*")
976 }
977
978 func (s *TestSuite) TestCrunchstat(c *C) {
979         s.runner.crunchstatFakeFS = os.DirFS("../crunchstat/testdata/debian12")
980         s.fullRunHelper(c, `{
981                 "command": ["sleep", "1"],
982                 "container_image": "`+arvadostest.DockerImage112PDH+`",
983                 "cwd": ".",
984                 "environment": {},
985                 "mounts": {"/tmp": {"kind": "tmp"} },
986                 "output_path": "/tmp",
987                 "priority": 1,
988                 "runtime_constraints": {},
989                 "state": "Locked"
990         }`, nil, func() int {
991                 time.Sleep(time.Second)
992                 return 0
993         })
994
995         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
996         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
997
998         c.Assert(s.api.Logs["crunchstat"], NotNil)
999         c.Check(s.api.Logs["crunchstat"].String(), Matches, `(?ms).*mem \d+ swap \d+ pgmajfault \d+ rss.*`)
1000
1001         // Check that we called (*crunchstat.Reporter)Stop().
1002         c.Check(s.api.Logs["crunch-run"].String(), Matches, `(?ms).*Maximum crunch-run memory rss usage was \d+ bytes\n.*`)
1003 }
1004
1005 func (s *TestSuite) TestNodeInfoLog(c *C) {
1006         os.Setenv("SLURMD_NODENAME", "compute2")
1007         s.fullRunHelper(c, `{
1008                 "command": ["sleep", "1"],
1009                 "container_image": "`+arvadostest.DockerImage112PDH+`",
1010                 "cwd": ".",
1011                 "environment": {},
1012                 "mounts": {"/tmp": {"kind": "tmp"} },
1013                 "output_path": "/tmp",
1014                 "priority": 1,
1015                 "runtime_constraints": {},
1016                 "state": "Locked"
1017         }`, nil, func() int {
1018                 time.Sleep(time.Second)
1019                 return 0
1020         })
1021
1022         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
1023         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
1024
1025         c.Assert(s.api.Logs["node"], NotNil)
1026         json := s.api.Logs["node"].String()
1027         c.Check(json, Matches, `(?ms).*"uuid": *"zzzzz-7ekkf-2z3mc76g2q73aio".*`)
1028         c.Check(json, Matches, `(?ms).*"total_cpu_cores": *16.*`)
1029         c.Check(json, Not(Matches), `(?ms).*"info":.*`)
1030
1031         c.Assert(s.api.Logs["node-info"], NotNil)
1032         json = s.api.Logs["node-info"].String()
1033         c.Check(json, Matches, `(?ms).*Host Information.*`)
1034         c.Check(json, Matches, `(?ms).*CPU Information.*`)
1035         c.Check(json, Matches, `(?ms).*Memory Information.*`)
1036         c.Check(json, Matches, `(?ms).*Disk Space.*`)
1037         c.Check(json, Matches, `(?ms).*Disk INodes.*`)
1038 }
1039
1040 func (s *TestSuite) TestLogVersionAndRuntime(c *C) {
1041         s.fullRunHelper(c, `{
1042                 "command": ["sleep", "1"],
1043                 "container_image": "`+arvadostest.DockerImage112PDH+`",
1044                 "cwd": ".",
1045                 "environment": {},
1046                 "mounts": {"/tmp": {"kind": "tmp"} },
1047                 "output_path": "/tmp",
1048                 "priority": 1,
1049                 "runtime_constraints": {},
1050                 "state": "Locked"
1051         }`, nil, func() int {
1052                 return 0
1053         })
1054
1055         c.Assert(s.api.Logs["crunch-run"], NotNil)
1056         c.Check(s.api.Logs["crunch-run"].String(), Matches, `(?ms).*crunch-run \S+ \(go\S+\) start.*`)
1057         c.Check(s.api.Logs["crunch-run"].String(), Matches, `(?ms).*crunch-run process has uid=\d+\(.+\) gid=\d+\(.+\) groups=\d+\(.+\)(,\d+\(.+\))*\n.*`)
1058         c.Check(s.api.Logs["crunch-run"].String(), Matches, `(?ms).*Executing container: zzzzz-zzzzz-zzzzzzzzzzzzzzz.*`)
1059         c.Check(s.api.Logs["crunch-run"].String(), Matches, `(?ms).*Using container runtime: stub.*`)
1060 }
1061
1062 func (s *TestSuite) testLogRSSThresholds(c *C, ram int64, expected []int, notExpected int) {
1063         s.runner.crunchstatFakeFS = os.DirFS("../crunchstat/testdata/debian12")
1064         s.fullRunHelper(c, `{
1065                 "command": ["true"],
1066                 "container_image": "`+arvadostest.DockerImage112PDH+`",
1067                 "cwd": ".",
1068                 "environment": {},
1069                 "mounts": {"/tmp": {"kind": "tmp"} },
1070                 "output_path": "/tmp",
1071                 "priority": 1,
1072                 "runtime_constraints": {"ram": `+fmt.Sprintf("%d", ram)+`},
1073                 "state": "Locked"
1074         }`, nil, func() int { return 0 })
1075         c.Logf("=== crunchstat logs\n%s\n", s.api.Logs["crunchstat"].String())
1076         logs := s.api.Logs["crunch-run"].String()
1077         pattern := logLineStart + `Container using over %d%% of memory \(rss %d/%d bytes\)`
1078         var threshold int
1079         for _, threshold = range expected {
1080                 c.Check(logs, Matches, fmt.Sprintf(pattern, threshold, s.debian12MemoryCurrent, ram))
1081         }
1082         if notExpected > threshold {
1083                 c.Check(logs, Not(Matches), fmt.Sprintf(pattern, notExpected, s.debian12MemoryCurrent, ram))
1084         }
1085 }
1086
1087 func (s *TestSuite) TestLogNoRSSThresholds(c *C) {
1088         s.testLogRSSThresholds(c, s.debian12MemoryCurrent*10, []int{}, 90)
1089 }
1090
1091 func (s *TestSuite) TestLogSomeRSSThresholds(c *C) {
1092         onePercentRSS := s.debian12MemoryCurrent / 100
1093         s.testLogRSSThresholds(c, 102*onePercentRSS, []int{90, 95}, 99)
1094 }
1095
1096 func (s *TestSuite) TestLogAllRSSThresholds(c *C) {
1097         s.testLogRSSThresholds(c, s.debian12MemoryCurrent, []int{90, 95, 99}, 0)
1098 }
1099
1100 func (s *TestSuite) TestLogMaximaAfterRun(c *C) {
1101         s.runner.crunchstatFakeFS = os.DirFS("../crunchstat/testdata/debian12")
1102         s.runner.parentTemp = c.MkDir()
1103         s.fullRunHelper(c, `{
1104         "command": ["true"],
1105         "container_image": "`+arvadostest.DockerImage112PDH+`",
1106         "cwd": ".",
1107         "environment": {},
1108         "mounts": {"/tmp": {"kind": "tmp"} },
1109         "output_path": "/tmp",
1110         "priority": 1,
1111         "runtime_constraints": {"ram": `+fmt.Sprintf("%d", s.debian12MemoryCurrent*10)+`},
1112         "state": "Locked"
1113     }`, nil, func() int { return 0 })
1114         logs := s.api.Logs["crunch-run"].String()
1115         for _, expected := range []string{
1116                 `Maximum disk usage was \d+%, \d+/\d+ bytes`,
1117                 fmt.Sprintf(`Maximum container memory swap usage was %d bytes`, s.debian12SwapCurrent),
1118                 `Maximum container memory pgmajfault usage was \d+ faults`,
1119                 fmt.Sprintf(`Maximum container memory rss usage was 10%%, %d/%d bytes`, s.debian12MemoryCurrent, s.debian12MemoryCurrent*10),
1120                 `Maximum crunch-run memory rss usage was \d+ bytes`,
1121         } {
1122                 c.Check(logs, Matches, logLineStart+expected)
1123         }
1124 }
1125
1126 func (s *TestSuite) TestCommitNodeInfoBeforeStart(c *C) {
1127         var collection_create, container_update arvadosclient.Dict
1128         s.fullRunHelper(c, `{
1129                 "command": ["true"],
1130                 "container_image": "`+arvadostest.DockerImage112PDH+`",
1131                 "cwd": ".",
1132                 "environment": {},
1133                 "mounts": {"/tmp": {"kind": "tmp"} },
1134                 "output_path": "/tmp",
1135                 "priority": 1,
1136                 "runtime_constraints": {},
1137                 "state": "Locked",
1138                 "uuid": "zzzzz-dz642-202301121543210"
1139         }`, nil, func() int {
1140                 collection_create = s.api.CalledWith("ensure_unique_name", true)
1141                 container_update = s.api.CalledWith("container.state", "Running")
1142                 return 0
1143         })
1144
1145         c.Assert(collection_create, NotNil)
1146         log_collection := collection_create["collection"].(arvadosclient.Dict)
1147         c.Check(log_collection["name"], Equals, "logs for zzzzz-dz642-202301121543210")
1148         manifest_text := log_collection["manifest_text"].(string)
1149         // We check that the file size is at least two digits as an easy way to
1150         // check the file isn't empty.
1151         c.Check(manifest_text, Matches, `\. .+ \d+:\d{2,}:node-info\.txt( .+)?\n`)
1152         c.Check(manifest_text, Matches, `\. .+ \d+:\d{2,}:node\.json( .+)?\n`)
1153
1154         c.Assert(container_update, NotNil)
1155         // As of Arvados 2.5.0, the container update must specify its log in PDH
1156         // format for the API server to propagate it to container requests, which
1157         // is what we care about for this test.
1158         expect_pdh := fmt.Sprintf("%x+%d", md5.Sum([]byte(manifest_text)), len(manifest_text))
1159         c.Check(container_update["container"].(arvadosclient.Dict)["log"], Equals, expect_pdh)
1160 }
1161
1162 func (s *TestSuite) TestContainerRecordLog(c *C) {
1163         s.fullRunHelper(c, `{
1164                 "command": ["sleep", "1"],
1165                 "container_image": "`+arvadostest.DockerImage112PDH+`",
1166                 "cwd": ".",
1167                 "environment": {},
1168                 "mounts": {"/tmp": {"kind": "tmp"} },
1169                 "output_path": "/tmp",
1170                 "priority": 1,
1171                 "runtime_constraints": {},
1172                 "state": "Locked"
1173         }`, nil,
1174                 func() int {
1175                         time.Sleep(time.Second)
1176                         return 0
1177                 })
1178
1179         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
1180         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
1181
1182         c.Assert(s.api.Logs["container"], NotNil)
1183         c.Check(s.api.Logs["container"].String(), Matches, `(?ms).*container_image.*`)
1184 }
1185
1186 func (s *TestSuite) TestFullRunStderr(c *C) {
1187         s.fullRunHelper(c, `{
1188     "command": ["/bin/sh", "-c", "echo hello ; echo world 1>&2 ; exit 1"],
1189     "container_image": "`+arvadostest.DockerImage112PDH+`",
1190     "cwd": ".",
1191     "environment": {},
1192     "mounts": {"/tmp": {"kind": "tmp"} },
1193     "output_path": "/tmp",
1194     "priority": 1,
1195     "runtime_constraints": {},
1196     "state": "Locked"
1197 }`, nil, func() int {
1198                 fmt.Fprintln(s.executor.created.Stdout, "hello")
1199                 fmt.Fprintln(s.executor.created.Stderr, "world")
1200                 return 1
1201         })
1202
1203         final := s.api.CalledWith("container.state", "Complete")
1204         c.Assert(final, NotNil)
1205         c.Check(final["container"].(arvadosclient.Dict)["exit_code"], Equals, 1)
1206         c.Check(final["container"].(arvadosclient.Dict)["log"], NotNil)
1207
1208         c.Check(s.api.Logs["stdout"].String(), Matches, ".*hello\n")
1209         c.Check(s.api.Logs["stderr"].String(), Matches, ".*world\n")
1210 }
1211
1212 func (s *TestSuite) TestFullRunDefaultCwd(c *C) {
1213         s.fullRunHelper(c, `{
1214     "command": ["pwd"],
1215     "container_image": "`+arvadostest.DockerImage112PDH+`",
1216     "cwd": ".",
1217     "environment": {},
1218     "mounts": {"/tmp": {"kind": "tmp"} },
1219     "output_path": "/tmp",
1220     "priority": 1,
1221     "runtime_constraints": {},
1222     "state": "Locked"
1223 }`, nil, func() int {
1224                 fmt.Fprintf(s.executor.created.Stdout, "workdir=%q", s.executor.created.WorkingDir)
1225                 return 0
1226         })
1227
1228         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
1229         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
1230         c.Log(s.api.Logs["stdout"])
1231         c.Check(s.api.Logs["stdout"].String(), Matches, `.*workdir=""\n`)
1232 }
1233
1234 func (s *TestSuite) TestFullRunSetCwd(c *C) {
1235         s.fullRunHelper(c, `{
1236     "command": ["pwd"],
1237     "container_image": "`+arvadostest.DockerImage112PDH+`",
1238     "cwd": "/bin",
1239     "environment": {},
1240     "mounts": {"/tmp": {"kind": "tmp"} },
1241     "output_path": "/tmp",
1242     "priority": 1,
1243     "runtime_constraints": {},
1244     "state": "Locked"
1245 }`, nil, func() int {
1246                 fmt.Fprintln(s.executor.created.Stdout, s.executor.created.WorkingDir)
1247                 return 0
1248         })
1249
1250         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
1251         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
1252         c.Check(s.api.Logs["stdout"].String(), Matches, ".*/bin\n")
1253 }
1254
1255 func (s *TestSuite) TestFullRunSetOutputStorageClasses(c *C) {
1256         s.fullRunHelper(c, `{
1257     "command": ["pwd"],
1258     "container_image": "`+arvadostest.DockerImage112PDH+`",
1259     "cwd": "/bin",
1260     "environment": {},
1261     "mounts": {"/tmp": {"kind": "tmp"} },
1262     "output_path": "/tmp",
1263     "priority": 1,
1264     "runtime_constraints": {},
1265     "state": "Locked",
1266     "output_storage_classes": ["foo", "bar"]
1267 }`, nil, func() int {
1268                 fmt.Fprintln(s.executor.created.Stdout, s.executor.created.WorkingDir)
1269                 return 0
1270         })
1271
1272         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
1273         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
1274         c.Check(s.api.Logs["stdout"].String(), Matches, ".*/bin\n")
1275         c.Check(s.testDispatcherKeepClient.StorageClasses, DeepEquals, []string{"foo", "bar"})
1276         c.Check(s.testContainerKeepClient.StorageClasses, DeepEquals, []string{"foo", "bar"})
1277 }
1278
1279 func (s *TestSuite) TestEnableCUDADeviceCount(c *C) {
1280         s.fullRunHelper(c, `{
1281     "command": ["pwd"],
1282     "container_image": "`+arvadostest.DockerImage112PDH+`",
1283     "cwd": "/bin",
1284     "environment": {},
1285     "mounts": {"/tmp": {"kind": "tmp"} },
1286     "output_path": "/tmp",
1287     "priority": 1,
1288     "runtime_constraints": {"cuda": {"device_count": 2}},
1289     "state": "Locked",
1290     "output_storage_classes": ["foo", "bar"]
1291 }`, nil, func() int {
1292                 fmt.Fprintln(s.executor.created.Stdout, "ok")
1293                 return 0
1294         })
1295         c.Check(s.executor.created.CUDADeviceCount, Equals, 2)
1296 }
1297
1298 func (s *TestSuite) TestEnableCUDAHardwareCapability(c *C) {
1299         s.fullRunHelper(c, `{
1300     "command": ["pwd"],
1301     "container_image": "`+arvadostest.DockerImage112PDH+`",
1302     "cwd": "/bin",
1303     "environment": {},
1304     "mounts": {"/tmp": {"kind": "tmp"} },
1305     "output_path": "/tmp",
1306     "priority": 1,
1307     "runtime_constraints": {"cuda": {"hardware_capability": "foo"}},
1308     "state": "Locked",
1309     "output_storage_classes": ["foo", "bar"]
1310 }`, nil, func() int {
1311                 fmt.Fprintln(s.executor.created.Stdout, "ok")
1312                 return 0
1313         })
1314         c.Check(s.executor.created.CUDADeviceCount, Equals, 0)
1315 }
1316
1317 func (s *TestSuite) TestStopOnSignal(c *C) {
1318         s.executor.runFunc = func() int {
1319                 s.executor.created.Stdout.Write([]byte("foo\n"))
1320                 s.runner.SigChan <- syscall.SIGINT
1321                 time.Sleep(10 * time.Second)
1322                 return 0
1323         }
1324         s.testStopContainer(c)
1325 }
1326
1327 func (s *TestSuite) TestStopOnArvMountDeath(c *C) {
1328         s.executor.runFunc = func() int {
1329                 s.executor.created.Stdout.Write([]byte("foo\n"))
1330                 s.runner.ArvMountExit <- nil
1331                 close(s.runner.ArvMountExit)
1332                 time.Sleep(10 * time.Second)
1333                 return 0
1334         }
1335         s.runner.ArvMountExit = make(chan error)
1336         s.testStopContainer(c)
1337 }
1338
1339 func (s *TestSuite) testStopContainer(c *C) {
1340         record := `{
1341     "command": ["/bin/sh", "-c", "echo foo && sleep 30 && echo bar"],
1342     "container_image": "` + arvadostest.DockerImage112PDH + `",
1343     "cwd": ".",
1344     "environment": {},
1345     "mounts": {"/tmp": {"kind": "tmp"} },
1346     "output_path": "/tmp",
1347     "priority": 1,
1348     "runtime_constraints": {},
1349     "state": "Locked"
1350 }`
1351
1352         err := json.Unmarshal([]byte(record), &s.api.Container)
1353         c.Assert(err, IsNil)
1354
1355         s.runner.RunArvMount = func([]string, string) (*exec.Cmd, error) { return nil, nil }
1356         s.runner.MkArvClient = func(token string) (IArvadosClient, IKeepClient, *arvados.Client, error) {
1357                 return &ArvTestClient{}, &KeepTestClient{}, nil, nil
1358         }
1359
1360         done := make(chan error)
1361         go func() {
1362                 done <- s.runner.Run()
1363         }()
1364         select {
1365         case <-time.After(20 * time.Second):
1366                 pprof.Lookup("goroutine").WriteTo(os.Stderr, 1)
1367                 c.Fatal("timed out")
1368         case err = <-done:
1369                 c.Check(err, IsNil)
1370         }
1371         for k, v := range s.api.Logs {
1372                 c.Log(k)
1373                 c.Log(v.String(), "\n")
1374         }
1375
1376         c.Check(s.api.CalledWith("container.log", nil), NotNil)
1377         c.Check(s.api.CalledWith("container.state", "Cancelled"), NotNil)
1378         c.Check(s.api.Logs["stdout"].String(), Matches, "(?ms).*foo\n$")
1379 }
1380
1381 func (s *TestSuite) TestFullRunSetEnv(c *C) {
1382         s.fullRunHelper(c, `{
1383     "command": ["/bin/sh", "-c", "echo $FROBIZ"],
1384     "container_image": "`+arvadostest.DockerImage112PDH+`",
1385     "cwd": "/bin",
1386     "environment": {"FROBIZ": "bilbo"},
1387     "mounts": {"/tmp": {"kind": "tmp"} },
1388     "output_path": "/tmp",
1389     "priority": 1,
1390     "runtime_constraints": {},
1391     "state": "Locked"
1392 }`, nil, func() int {
1393                 fmt.Fprintf(s.executor.created.Stdout, "%v", s.executor.created.Env)
1394                 return 0
1395         })
1396
1397         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
1398         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
1399         c.Check(s.api.Logs["stdout"].String(), Matches, `.*map\[FROBIZ:bilbo\]\n`)
1400 }
1401
1402 type ArvMountCmdLine struct {
1403         Cmd   []string
1404         token string
1405 }
1406
1407 func (am *ArvMountCmdLine) ArvMountTest(c []string, token string) (*exec.Cmd, error) {
1408         am.Cmd = c
1409         am.token = token
1410         return nil, nil
1411 }
1412
1413 func stubCert(c *C, temp string) string {
1414         path := temp + "/ca-certificates.crt"
1415         err := os.WriteFile(path, []byte{}, 0666)
1416         c.Assert(err, IsNil)
1417         os.Setenv("SSL_CERT_FILE", path)
1418         return path
1419 }
1420
1421 func (s *TestSuite) TestSetupMounts(c *C) {
1422         cr := s.runner
1423         am := &ArvMountCmdLine{}
1424         cr.RunArvMount = am.ArvMountTest
1425         cr.containerClient, _ = apiStub()
1426         cr.ContainerArvClient = &ArvTestClient{}
1427         cr.ContainerKeepClient = &KeepTestClient{}
1428         cr.Container.OutputStorageClasses = []string{"default"}
1429
1430         realTemp := c.MkDir()
1431         certTemp := c.MkDir()
1432         stubCertPath := stubCert(c, certTemp)
1433         cr.parentTemp = realTemp
1434
1435         i := 0
1436         cr.MkTempDir = func(_ string, prefix string) (string, error) {
1437                 i++
1438                 d := fmt.Sprintf("%s/%s%d", realTemp, prefix, i)
1439                 err := os.Mkdir(d, os.ModePerm)
1440                 if err != nil && strings.Contains(err.Error(), ": file exists") {
1441                         // Test case must have pre-populated the tempdir
1442                         err = nil
1443                 }
1444                 return d, err
1445         }
1446
1447         checkEmpty := func() {
1448                 // Should be deleted.
1449                 _, err := os.Stat(realTemp)
1450                 c.Assert(os.IsNotExist(err), Equals, true)
1451
1452                 // Now recreate it for the next test.
1453                 c.Assert(os.Mkdir(realTemp, 0777), IsNil)
1454         }
1455
1456         {
1457                 i = 0
1458                 cr.ArvMountPoint = ""
1459                 cr.Container.Mounts = make(map[string]arvados.Mount)
1460                 cr.Container.Mounts["/tmp"] = arvados.Mount{Kind: "tmp"}
1461                 cr.Container.OutputPath = "/tmp"
1462                 cr.statInterval = 5 * time.Second
1463                 bindmounts, err := cr.SetupMounts()
1464                 c.Check(err, IsNil)
1465                 c.Check(am.Cmd, DeepEquals, []string{"arv-mount", "--foreground",
1466                         "--read-write", "--storage-classes", "default", "--crunchstat-interval=5",
1467                         "--mount-by-pdh", "by_id", "--disable-event-listening", "--mount-by-id", "by_uuid", realTemp + "/keep1"})
1468                 c.Check(bindmounts, DeepEquals, map[string]bindmount{"/tmp": {realTemp + "/tmp2", false}})
1469                 os.RemoveAll(cr.ArvMountPoint)
1470                 cr.CleanupDirs()
1471                 checkEmpty()
1472         }
1473
1474         {
1475                 i = 0
1476                 cr.ArvMountPoint = ""
1477                 cr.Container.Mounts = make(map[string]arvados.Mount)
1478                 cr.Container.Mounts["/out"] = arvados.Mount{Kind: "tmp"}
1479                 cr.Container.Mounts["/tmp"] = arvados.Mount{Kind: "tmp"}
1480                 cr.Container.OutputPath = "/out"
1481                 cr.Container.OutputStorageClasses = []string{"foo", "bar"}
1482
1483                 bindmounts, err := cr.SetupMounts()
1484                 c.Check(err, IsNil)
1485                 c.Check(am.Cmd, DeepEquals, []string{"arv-mount", "--foreground",
1486                         "--read-write", "--storage-classes", "foo,bar", "--crunchstat-interval=5",
1487                         "--mount-by-pdh", "by_id", "--disable-event-listening", "--mount-by-id", "by_uuid", realTemp + "/keep1"})
1488                 c.Check(bindmounts, DeepEquals, map[string]bindmount{"/out": {realTemp + "/tmp2", false}, "/tmp": {realTemp + "/tmp3", false}})
1489                 os.RemoveAll(cr.ArvMountPoint)
1490                 cr.CleanupDirs()
1491                 checkEmpty()
1492         }
1493
1494         {
1495                 i = 0
1496                 cr.ArvMountPoint = ""
1497                 cr.Container.Mounts = make(map[string]arvados.Mount)
1498                 cr.Container.Mounts["/tmp"] = arvados.Mount{Kind: "tmp"}
1499                 cr.Container.OutputPath = "/tmp"
1500                 cr.Container.RuntimeConstraints.API = true
1501                 cr.Container.OutputStorageClasses = []string{"default"}
1502
1503                 bindmounts, err := cr.SetupMounts()
1504                 c.Check(err, IsNil)
1505                 c.Check(am.Cmd, DeepEquals, []string{"arv-mount", "--foreground",
1506                         "--read-write", "--storage-classes", "default", "--crunchstat-interval=5",
1507                         "--mount-by-pdh", "by_id", "--disable-event-listening", "--mount-by-id", "by_uuid", realTemp + "/keep1"})
1508                 c.Check(bindmounts, DeepEquals, map[string]bindmount{"/tmp": {realTemp + "/tmp2", false}, "/etc/arvados/ca-certificates.crt": {stubCertPath, true}})
1509                 os.RemoveAll(cr.ArvMountPoint)
1510                 cr.CleanupDirs()
1511                 checkEmpty()
1512
1513                 cr.Container.RuntimeConstraints.API = false
1514         }
1515
1516         {
1517                 i = 0
1518                 cr.ArvMountPoint = ""
1519                 cr.Container.Mounts = map[string]arvados.Mount{
1520                         "/keeptmp": {Kind: "collection", Writable: true},
1521                 }
1522                 cr.Container.OutputPath = "/keeptmp"
1523
1524                 os.MkdirAll(realTemp+"/keep1/tmp0", os.ModePerm)
1525
1526                 bindmounts, err := cr.SetupMounts()
1527                 c.Check(err, IsNil)
1528                 c.Check(am.Cmd, DeepEquals, []string{"arv-mount", "--foreground",
1529                         "--read-write", "--storage-classes", "default", "--crunchstat-interval=5",
1530                         "--mount-tmp", "tmp0", "--mount-by-pdh", "by_id", "--disable-event-listening", "--mount-by-id", "by_uuid", realTemp + "/keep1"})
1531                 c.Check(bindmounts, DeepEquals, map[string]bindmount{"/keeptmp": {realTemp + "/keep1/tmp0", false}})
1532                 os.RemoveAll(cr.ArvMountPoint)
1533                 cr.CleanupDirs()
1534                 checkEmpty()
1535         }
1536
1537         {
1538                 i = 0
1539                 cr.ArvMountPoint = ""
1540                 cr.Container.Mounts = map[string]arvados.Mount{
1541                         "/keepinp": {Kind: "collection", PortableDataHash: "59389a8f9ee9d399be35462a0f92541c+53"},
1542                         "/keepout": {Kind: "collection", Writable: true},
1543                 }
1544                 cr.Container.OutputPath = "/keepout"
1545
1546                 os.MkdirAll(realTemp+"/keep1/by_id/59389a8f9ee9d399be35462a0f92541c+53", os.ModePerm)
1547                 os.MkdirAll(realTemp+"/keep1/tmp0", os.ModePerm)
1548
1549                 bindmounts, err := cr.SetupMounts()
1550                 c.Check(err, IsNil)
1551                 c.Check(am.Cmd, DeepEquals, []string{"arv-mount", "--foreground",
1552                         "--read-write", "--storage-classes", "default", "--crunchstat-interval=5",
1553                         "--mount-tmp", "tmp0", "--mount-by-pdh", "by_id", "--disable-event-listening", "--mount-by-id", "by_uuid", realTemp + "/keep1"})
1554                 c.Check(bindmounts, DeepEquals, map[string]bindmount{
1555                         "/keepinp": {realTemp + "/keep1/by_id/59389a8f9ee9d399be35462a0f92541c+53", true},
1556                         "/keepout": {realTemp + "/keep1/tmp0", false},
1557                 })
1558                 os.RemoveAll(cr.ArvMountPoint)
1559                 cr.CleanupDirs()
1560                 checkEmpty()
1561         }
1562
1563         {
1564                 i = 0
1565                 cr.ArvMountPoint = ""
1566                 cr.Container.RuntimeConstraints.KeepCacheRAM = 512
1567                 cr.Container.Mounts = map[string]arvados.Mount{
1568                         "/keepinp": {Kind: "collection", PortableDataHash: "59389a8f9ee9d399be35462a0f92541c+53"},
1569                         "/keepout": {Kind: "collection", Writable: true},
1570                 }
1571                 cr.Container.OutputPath = "/keepout"
1572
1573                 os.MkdirAll(realTemp+"/keep1/by_id/59389a8f9ee9d399be35462a0f92541c+53", os.ModePerm)
1574                 os.MkdirAll(realTemp+"/keep1/tmp0", os.ModePerm)
1575
1576                 bindmounts, err := cr.SetupMounts()
1577                 c.Check(err, IsNil)
1578                 c.Check(am.Cmd, DeepEquals, []string{"arv-mount", "--foreground",
1579                         "--read-write", "--storage-classes", "default", "--crunchstat-interval=5", "--ram-cache",
1580                         "--file-cache", "512", "--mount-tmp", "tmp0", "--mount-by-pdh", "by_id", "--disable-event-listening", "--mount-by-id", "by_uuid", realTemp + "/keep1"})
1581                 c.Check(bindmounts, DeepEquals, map[string]bindmount{
1582                         "/keepinp": {realTemp + "/keep1/by_id/59389a8f9ee9d399be35462a0f92541c+53", true},
1583                         "/keepout": {realTemp + "/keep1/tmp0", false},
1584                 })
1585                 os.RemoveAll(cr.ArvMountPoint)
1586                 cr.CleanupDirs()
1587                 checkEmpty()
1588         }
1589
1590         for _, test := range []struct {
1591                 in  interface{}
1592                 out string
1593         }{
1594                 {in: "foo", out: `"foo"`},
1595                 {in: nil, out: `null`},
1596                 {in: map[string]int64{"foo": 123456789123456789}, out: `{"foo":123456789123456789}`},
1597         } {
1598                 i = 0
1599                 cr.ArvMountPoint = ""
1600                 cr.Container.Mounts = map[string]arvados.Mount{
1601                         "/mnt/test.json": {Kind: "json", Content: test.in},
1602                 }
1603                 bindmounts, err := cr.SetupMounts()
1604                 c.Check(err, IsNil)
1605                 c.Check(bindmounts, DeepEquals, map[string]bindmount{
1606                         "/mnt/test.json": {realTemp + "/json2/mountdata.json", true},
1607                 })
1608                 content, err := ioutil.ReadFile(realTemp + "/json2/mountdata.json")
1609                 c.Check(err, IsNil)
1610                 c.Check(content, DeepEquals, []byte(test.out))
1611                 os.RemoveAll(cr.ArvMountPoint)
1612                 cr.CleanupDirs()
1613                 checkEmpty()
1614         }
1615
1616         for _, test := range []struct {
1617                 in  interface{}
1618                 out string
1619         }{
1620                 {in: "foo", out: `foo`},
1621                 {in: nil, out: "error"},
1622                 {in: map[string]int64{"foo": 123456789123456789}, out: "error"},
1623         } {
1624                 i = 0
1625                 cr.ArvMountPoint = ""
1626                 cr.Container.Mounts = map[string]arvados.Mount{
1627                         "/mnt/test.txt": {Kind: "text", Content: test.in},
1628                 }
1629                 bindmounts, err := cr.SetupMounts()
1630                 if test.out == "error" {
1631                         c.Check(err.Error(), Equals, "content for mount \"/mnt/test.txt\" must be a string")
1632                 } else {
1633                         c.Check(err, IsNil)
1634                         c.Check(bindmounts, DeepEquals, map[string]bindmount{
1635                                 "/mnt/test.txt": {realTemp + "/text2/mountdata.text", true},
1636                         })
1637                         content, err := ioutil.ReadFile(realTemp + "/text2/mountdata.text")
1638                         c.Check(err, IsNil)
1639                         c.Check(content, DeepEquals, []byte(test.out))
1640                 }
1641                 os.RemoveAll(cr.ArvMountPoint)
1642                 cr.CleanupDirs()
1643                 checkEmpty()
1644         }
1645
1646         // Read-only mount points are allowed underneath output_dir mount point
1647         {
1648                 i = 0
1649                 cr.ArvMountPoint = ""
1650                 cr.Container.Mounts = make(map[string]arvados.Mount)
1651                 cr.Container.Mounts = map[string]arvados.Mount{
1652                         "/tmp":     {Kind: "tmp"},
1653                         "/tmp/foo": {Kind: "collection"},
1654                 }
1655                 cr.Container.OutputPath = "/tmp"
1656
1657                 os.MkdirAll(realTemp+"/keep1/tmp0", os.ModePerm)
1658
1659                 bindmounts, err := cr.SetupMounts()
1660                 c.Check(err, IsNil)
1661                 c.Check(am.Cmd, DeepEquals, []string{"arv-mount", "--foreground",
1662                         "--read-write", "--storage-classes", "default", "--crunchstat-interval=5", "--ram-cache",
1663                         "--file-cache", "512", "--mount-tmp", "tmp0", "--mount-by-pdh", "by_id", "--disable-event-listening", "--mount-by-id", "by_uuid", realTemp + "/keep1"})
1664                 c.Check(bindmounts, DeepEquals, map[string]bindmount{
1665                         "/tmp":     {realTemp + "/tmp2", false},
1666                         "/tmp/foo": {realTemp + "/keep1/tmp0", true},
1667                 })
1668                 os.RemoveAll(cr.ArvMountPoint)
1669                 cr.CleanupDirs()
1670                 checkEmpty()
1671         }
1672
1673         // Writable mount points copied to output_dir mount point
1674         {
1675                 i = 0
1676                 cr.ArvMountPoint = ""
1677                 cr.Container.Mounts = make(map[string]arvados.Mount)
1678                 cr.Container.Mounts = map[string]arvados.Mount{
1679                         "/tmp": {Kind: "tmp"},
1680                         "/tmp/foo": {Kind: "collection",
1681                                 PortableDataHash: "59389a8f9ee9d399be35462a0f92541c+53",
1682                                 Writable:         true},
1683                         "/tmp/bar": {Kind: "collection",
1684                                 PortableDataHash: "59389a8f9ee9d399be35462a0f92541d+53",
1685                                 Path:             "baz",
1686                                 Writable:         true},
1687                 }
1688                 cr.Container.OutputPath = "/tmp"
1689
1690                 os.MkdirAll(realTemp+"/keep1/by_id/59389a8f9ee9d399be35462a0f92541c+53", os.ModePerm)
1691                 os.MkdirAll(realTemp+"/keep1/by_id/59389a8f9ee9d399be35462a0f92541d+53/baz", os.ModePerm)
1692
1693                 rf, _ := os.Create(realTemp + "/keep1/by_id/59389a8f9ee9d399be35462a0f92541d+53/baz/quux")
1694                 rf.Write([]byte("bar"))
1695                 rf.Close()
1696
1697                 _, err := cr.SetupMounts()
1698                 c.Check(err, IsNil)
1699                 _, err = os.Stat(cr.HostOutputDir + "/foo")
1700                 c.Check(err, IsNil)
1701                 _, err = os.Stat(cr.HostOutputDir + "/bar/quux")
1702                 c.Check(err, IsNil)
1703                 os.RemoveAll(cr.ArvMountPoint)
1704                 cr.CleanupDirs()
1705                 checkEmpty()
1706         }
1707
1708         // Only mount points of kind 'collection' are allowed underneath output_dir mount point
1709         {
1710                 i = 0
1711                 cr.ArvMountPoint = ""
1712                 cr.Container.Mounts = make(map[string]arvados.Mount)
1713                 cr.Container.Mounts = map[string]arvados.Mount{
1714                         "/tmp":     {Kind: "tmp"},
1715                         "/tmp/foo": {Kind: "tmp"},
1716                 }
1717                 cr.Container.OutputPath = "/tmp"
1718
1719                 _, err := cr.SetupMounts()
1720                 c.Check(err, NotNil)
1721                 c.Check(err, ErrorMatches, `only mount points of kind 'collection', 'text' or 'json' are supported underneath the output_path.*`)
1722                 os.RemoveAll(cr.ArvMountPoint)
1723                 cr.CleanupDirs()
1724                 checkEmpty()
1725         }
1726
1727         // Only mount point of kind 'collection' is allowed for stdin
1728         {
1729                 i = 0
1730                 cr.ArvMountPoint = ""
1731                 cr.Container.Mounts = make(map[string]arvados.Mount)
1732                 cr.Container.Mounts = map[string]arvados.Mount{
1733                         "stdin": {Kind: "tmp"},
1734                 }
1735
1736                 _, err := cr.SetupMounts()
1737                 c.Check(err, NotNil)
1738                 c.Check(err, ErrorMatches, `unsupported mount kind 'tmp' for stdin.*`)
1739                 os.RemoveAll(cr.ArvMountPoint)
1740                 cr.CleanupDirs()
1741                 checkEmpty()
1742         }
1743
1744         // git_tree mounts
1745         {
1746                 i = 0
1747                 cr.ArvMountPoint = ""
1748                 git_client.InstallProtocol("https", git_http.NewClient(arvados.InsecureHTTPClient))
1749                 cr.token = arvadostest.ActiveToken
1750                 cr.Container.Mounts = make(map[string]arvados.Mount)
1751                 cr.Container.Mounts = map[string]arvados.Mount{
1752                         "/tip": {
1753                                 Kind:   "git_tree",
1754                                 UUID:   arvadostest.Repository2UUID,
1755                                 Commit: "fd3531f42995344f36c30b79f55f27b502f3d344",
1756                                 Path:   "/",
1757                         },
1758                         "/non-tip": {
1759                                 Kind:   "git_tree",
1760                                 UUID:   arvadostest.Repository2UUID,
1761                                 Commit: "5ebfab0522851df01fec11ec55a6d0f4877b542e",
1762                                 Path:   "/",
1763                         },
1764                 }
1765                 cr.Container.OutputPath = "/tmp"
1766
1767                 bindmounts, err := cr.SetupMounts()
1768                 c.Check(err, IsNil)
1769
1770                 for path, mount := range bindmounts {
1771                         c.Check(mount.ReadOnly, Equals, !cr.Container.Mounts[path].Writable, Commentf("%s %#v", path, mount))
1772                 }
1773
1774                 data, err := ioutil.ReadFile(bindmounts["/tip"].HostPath + "/dir1/dir2/file with mode 0644")
1775                 c.Check(err, IsNil)
1776                 c.Check(string(data), Equals, "\000\001\002\003")
1777                 _, err = ioutil.ReadFile(bindmounts["/tip"].HostPath + "/file only on testbranch")
1778                 c.Check(err, FitsTypeOf, &os.PathError{})
1779                 c.Check(os.IsNotExist(err), Equals, true)
1780
1781                 data, err = ioutil.ReadFile(bindmounts["/non-tip"].HostPath + "/dir1/dir2/file with mode 0644")
1782                 c.Check(err, IsNil)
1783                 c.Check(string(data), Equals, "\000\001\002\003")
1784                 data, err = ioutil.ReadFile(bindmounts["/non-tip"].HostPath + "/file only on testbranch")
1785                 c.Check(err, IsNil)
1786                 c.Check(string(data), Equals, "testfile\n")
1787
1788                 cr.CleanupDirs()
1789                 checkEmpty()
1790         }
1791 }
1792
1793 func (s *TestSuite) TestStdout(c *C) {
1794         helperRecord := `{
1795                 "command": ["/bin/sh", "-c", "echo $FROBIZ"],
1796                 "container_image": "` + arvadostest.DockerImage112PDH + `",
1797                 "cwd": "/bin",
1798                 "environment": {"FROBIZ": "bilbo"},
1799                 "mounts": {"/tmp": {"kind": "tmp"}, "stdout": {"kind": "file", "path": "/tmp/a/b/c.out"} },
1800                 "output_path": "/tmp",
1801                 "priority": 1,
1802                 "runtime_constraints": {},
1803                 "state": "Locked"
1804         }`
1805
1806         s.fullRunHelper(c, helperRecord, nil, func() int {
1807                 fmt.Fprintln(s.executor.created.Stdout, s.executor.created.Env["FROBIZ"])
1808                 return 0
1809         })
1810
1811         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
1812         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
1813         c.Check(s.runner.ContainerArvClient.(*ArvTestClient).CalledWith("collection.manifest_text", "./a/b 307372fa8fd5c146b22ae7a45b49bc31+6 0:6:c.out\n"), NotNil)
1814 }
1815
1816 // Used by the TestStdoutWithWrongPath*()
1817 func (s *TestSuite) stdoutErrorRunHelper(c *C, record string, fn func() int) (*ArvTestClient, *ContainerRunner, error) {
1818         err := json.Unmarshal([]byte(record), &s.api.Container)
1819         c.Assert(err, IsNil)
1820         s.executor.runFunc = fn
1821         s.runner.RunArvMount = (&ArvMountCmdLine{}).ArvMountTest
1822         s.runner.MkArvClient = func(token string) (IArvadosClient, IKeepClient, *arvados.Client, error) {
1823                 return s.api, &KeepTestClient{}, nil, nil
1824         }
1825         return s.api, s.runner, s.runner.Run()
1826 }
1827
1828 func (s *TestSuite) TestStdoutWithWrongPath(c *C) {
1829         _, _, err := s.stdoutErrorRunHelper(c, `{
1830     "mounts": {"/tmp": {"kind": "tmp"}, "stdout": {"kind": "file", "path":"/tmpa.out"} },
1831     "output_path": "/tmp",
1832     "state": "Locked"
1833 }`, func() int { return 0 })
1834         c.Check(err, ErrorMatches, ".*Stdout path does not start with OutputPath.*")
1835 }
1836
1837 func (s *TestSuite) TestStdoutWithWrongKindTmp(c *C) {
1838         _, _, err := s.stdoutErrorRunHelper(c, `{
1839     "mounts": {"/tmp": {"kind": "tmp"}, "stdout": {"kind": "tmp", "path":"/tmp/a.out"} },
1840     "output_path": "/tmp",
1841     "state": "Locked"
1842 }`, func() int { return 0 })
1843         c.Check(err, ErrorMatches, ".*unsupported mount kind 'tmp' for stdout.*")
1844 }
1845
1846 func (s *TestSuite) TestStdoutWithWrongKindCollection(c *C) {
1847         _, _, err := s.stdoutErrorRunHelper(c, `{
1848     "mounts": {"/tmp": {"kind": "tmp"}, "stdout": {"kind": "collection", "path":"/tmp/a.out"} },
1849     "output_path": "/tmp",
1850     "state": "Locked"
1851 }`, func() int { return 0 })
1852         c.Check(err, ErrorMatches, ".*unsupported mount kind 'collection' for stdout.*")
1853 }
1854
1855 func (s *TestSuite) TestFullRunWithAPI(c *C) {
1856         s.fullRunHelper(c, `{
1857     "command": ["/bin/sh", "-c", "true $ARVADOS_API_HOST"],
1858     "container_image": "`+arvadostest.DockerImage112PDH+`",
1859     "cwd": "/bin",
1860     "environment": {},
1861     "mounts": {"/tmp": {"kind": "tmp"} },
1862     "output_path": "/tmp",
1863     "priority": 1,
1864     "runtime_constraints": {"API": true},
1865     "state": "Locked"
1866 }`, nil, func() int {
1867                 c.Check(s.executor.created.Env["ARVADOS_API_HOST"], Equals, os.Getenv("ARVADOS_API_HOST"))
1868                 return 3
1869         })
1870         c.Check(s.api.CalledWith("container.exit_code", 3), NotNil)
1871         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
1872         c.Check(s.api.Logs["crunch-run"].String(), Matches, `(?ms).*status code 3\n.*`)
1873 }
1874
1875 func (s *TestSuite) TestFullRunSetOutput(c *C) {
1876         defer os.Setenv("ARVADOS_API_HOST", os.Getenv("ARVADOS_API_HOST"))
1877         os.Setenv("ARVADOS_API_HOST", "test.arvados.org")
1878         s.fullRunHelper(c, `{
1879     "command": ["/bin/sh", "-c", "echo $ARVADOS_API_HOST"],
1880     "container_image": "`+arvadostest.DockerImage112PDH+`",
1881     "cwd": "/bin",
1882     "environment": {},
1883     "mounts": {"/tmp": {"kind": "tmp"} },
1884     "output_path": "/tmp",
1885     "priority": 1,
1886     "runtime_constraints": {"API": true},
1887     "state": "Locked"
1888 }`, nil, func() int {
1889                 s.api.Container.Output = arvadostest.DockerImage112PDH
1890                 return 0
1891         })
1892
1893         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
1894         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
1895         c.Check(s.api.CalledWith("container.output", arvadostest.DockerImage112PDH), NotNil)
1896 }
1897
1898 func (s *TestSuite) TestArvMountRuntimeStatusWarning(c *C) {
1899         s.runner.RunArvMount = func([]string, string) (*exec.Cmd, error) {
1900                 os.Mkdir(s.runner.ArvMountPoint+"/by_id", 0666)
1901                 ioutil.WriteFile(s.runner.ArvMountPoint+"/by_id/README", nil, 0666)
1902                 return s.runner.ArvMountCmd([]string{"bash", "-c", "echo >&2 Test: Keep write error: I am a teapot; sleep 3"}, "")
1903         }
1904         s.executor.runFunc = func() int {
1905                 time.Sleep(time.Second)
1906                 return 137
1907         }
1908         record := `{
1909     "command": ["sleep", "1"],
1910     "container_image": "` + arvadostest.DockerImage112PDH + `",
1911     "cwd": "/bin",
1912     "environment": {},
1913     "mounts": {"/tmp": {"kind": "tmp"} },
1914     "output_path": "/tmp",
1915     "priority": 1,
1916     "runtime_constraints": {"API": true},
1917     "state": "Locked"
1918 }`
1919         err := json.Unmarshal([]byte(record), &s.api.Container)
1920         c.Assert(err, IsNil)
1921         err = s.runner.Run()
1922         c.Assert(err, IsNil)
1923         c.Check(s.api.CalledWith("container.exit_code", 137), NotNil)
1924         c.Check(s.api.CalledWith("container.runtime_status.warning", "arv-mount: Keep write error"), NotNil)
1925         c.Check(s.api.CalledWith("container.runtime_status.warningDetail", "Test: Keep write error: I am a teapot"), NotNil)
1926         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
1927         c.Check(s.api.Logs["crunch-run"].String(), Matches, `(?ms).*Container exited with status code 137 \(signal 9, SIGKILL\).*`)
1928 }
1929
1930 func (s *TestSuite) TestStdoutWithExcludeFromOutputMountPointUnderOutputDir(c *C) {
1931         helperRecord := `{
1932                 "command": ["/bin/sh", "-c", "echo $FROBIZ"],
1933                 "container_image": "` + arvadostest.DockerImage112PDH + `",
1934                 "cwd": "/bin",
1935                 "environment": {"FROBIZ": "bilbo"},
1936                 "mounts": {
1937         "/tmp": {"kind": "tmp"},
1938         "/tmp/foo": {"kind": "collection",
1939                      "portable_data_hash": "a3e8f74c6f101eae01fa08bfb4e49b3a+54",
1940                      "exclude_from_output": true
1941         },
1942         "stdout": {"kind": "file", "path": "/tmp/a/b/c.out"}
1943     },
1944                 "output_path": "/tmp",
1945                 "priority": 1,
1946                 "runtime_constraints": {},
1947                 "state": "Locked"
1948         }`
1949
1950         extraMounts := []string{"a3e8f74c6f101eae01fa08bfb4e49b3a+54"}
1951
1952         s.fullRunHelper(c, helperRecord, extraMounts, func() int {
1953                 fmt.Fprintln(s.executor.created.Stdout, s.executor.created.Env["FROBIZ"])
1954                 return 0
1955         })
1956
1957         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
1958         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
1959         c.Check(s.runner.ContainerArvClient.(*ArvTestClient).CalledWith("collection.manifest_text", "./a/b 307372fa8fd5c146b22ae7a45b49bc31+6 0:6:c.out\n"), NotNil)
1960 }
1961
1962 func (s *TestSuite) TestStdoutWithMultipleMountPointsUnderOutputDir(c *C) {
1963         helperRecord := `{
1964                 "command": ["/bin/sh", "-c", "echo $FROBIZ"],
1965                 "container_image": "` + arvadostest.DockerImage112PDH + `",
1966                 "cwd": "/bin",
1967                 "environment": {"FROBIZ": "bilbo"},
1968                 "mounts": {
1969         "/tmp": {"kind": "tmp"},
1970         "/tmp/foo/bar": {"kind": "collection", "portable_data_hash": "a0def87f80dd594d4675809e83bd4f15+367", "path":"/file2_in_main.txt"},
1971         "/tmp/foo/sub1": {"kind": "collection", "portable_data_hash": "a0def87f80dd594d4675809e83bd4f15+367", "path":"/subdir1"},
1972         "/tmp/foo/sub1file2": {"kind": "collection", "portable_data_hash": "a0def87f80dd594d4675809e83bd4f15+367", "path":"/subdir1/file2_in_subdir1.txt"},
1973         "/tmp/foo/baz/sub2file2": {"kind": "collection", "portable_data_hash": "a0def87f80dd594d4675809e83bd4f15+367", "path":"/subdir1/subdir2/file2_in_subdir2.txt"},
1974         "stdout": {"kind": "file", "path": "/tmp/a/b/c.out"}
1975     },
1976                 "output_path": "/tmp",
1977                 "priority": 1,
1978                 "runtime_constraints": {},
1979                 "state": "Locked",
1980                 "uuid": "zzzzz-dz642-202301130848001"
1981         }`
1982
1983         extraMounts := []string{
1984                 "a0def87f80dd594d4675809e83bd4f15+367/file2_in_main.txt",
1985                 "a0def87f80dd594d4675809e83bd4f15+367/subdir1/file2_in_subdir1.txt",
1986                 "a0def87f80dd594d4675809e83bd4f15+367/subdir1/subdir2/file2_in_subdir2.txt",
1987         }
1988
1989         api, _, realtemp := s.fullRunHelper(c, helperRecord, extraMounts, func() int {
1990                 fmt.Fprintln(s.executor.created.Stdout, s.executor.created.Env["FROBIZ"])
1991                 return 0
1992         })
1993
1994         c.Check(s.executor.created.BindMounts, DeepEquals, map[string]bindmount{
1995                 "/tmp":                   {realtemp + "/tmp1", false},
1996                 "/tmp/foo/bar":           {s.keepmount + "/by_id/a0def87f80dd594d4675809e83bd4f15+367/file2_in_main.txt", true},
1997                 "/tmp/foo/baz/sub2file2": {s.keepmount + "/by_id/a0def87f80dd594d4675809e83bd4f15+367/subdir1/subdir2/file2_in_subdir2.txt", true},
1998                 "/tmp/foo/sub1":          {s.keepmount + "/by_id/a0def87f80dd594d4675809e83bd4f15+367/subdir1", true},
1999                 "/tmp/foo/sub1file2":     {s.keepmount + "/by_id/a0def87f80dd594d4675809e83bd4f15+367/subdir1/file2_in_subdir1.txt", true},
2000         })
2001
2002         c.Check(api.CalledWith("container.exit_code", 0), NotNil)
2003         c.Check(api.CalledWith("container.state", "Complete"), NotNil)
2004         output_count := uint(0)
2005         for _, v := range s.runner.ContainerArvClient.(*ArvTestClient).Content {
2006                 if v["collection"] == nil {
2007                         continue
2008                 }
2009                 collection := v["collection"].(arvadosclient.Dict)
2010                 if collection["name"].(string) != "output for zzzzz-dz642-202301130848001" {
2011                         continue
2012                 }
2013                 c.Check(v["ensure_unique_name"], Equals, true)
2014                 c.Check(collection["manifest_text"].(string), Equals, `./a/b 307372fa8fd5c146b22ae7a45b49bc31+6 0:6:c.out
2015 ./foo 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396c73c0abcdefgh11234567890@569fa8c3 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396cabcdefghij6419876543234@569fa8c4 9:18:bar 36:18:sub1file2
2016 ./foo/baz 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396c73c0bcdefghijk544332211@569fa8c5 9:18:sub2file2
2017 ./foo/sub1 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396cabcdefghij6419876543234@569fa8c4 0:9:file1_in_subdir1.txt 9:18:file2_in_subdir1.txt
2018 ./foo/sub1/subdir2 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396c73c0bcdefghijk544332211@569fa8c5 0:9:file1_in_subdir2.txt 9:18:file2_in_subdir2.txt
2019 `)
2020                 output_count++
2021         }
2022         c.Check(output_count, Not(Equals), uint(0))
2023 }
2024
2025 func (s *TestSuite) TestStdoutWithMountPointsUnderOutputDirDenormalizedManifest(c *C) {
2026         helperRecord := `{
2027                 "command": ["/bin/sh", "-c", "echo $FROBIZ"],
2028                 "container_image": "` + arvadostest.DockerImage112PDH + `",
2029                 "cwd": "/bin",
2030                 "environment": {"FROBIZ": "bilbo"},
2031                 "mounts": {
2032         "/tmp": {"kind": "tmp"},
2033         "/tmp/foo/bar": {"kind": "collection", "portable_data_hash": "b0def87f80dd594d4675809e83bd4f15+367", "path": "/subdir1/file2_in_subdir1.txt"},
2034         "stdout": {"kind": "file", "path": "/tmp/a/b/c.out"}
2035     },
2036                 "output_path": "/tmp",
2037                 "priority": 1,
2038                 "runtime_constraints": {},
2039                 "state": "Locked",
2040                 "uuid": "zzzzz-dz642-202301130848002"
2041         }`
2042
2043         extraMounts := []string{
2044                 "b0def87f80dd594d4675809e83bd4f15+367/subdir1/file2_in_subdir1.txt",
2045         }
2046
2047         s.fullRunHelper(c, helperRecord, extraMounts, func() int {
2048                 fmt.Fprintln(s.executor.created.Stdout, s.executor.created.Env["FROBIZ"])
2049                 return 0
2050         })
2051
2052         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
2053         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
2054         output_count := uint(0)
2055         for _, v := range s.runner.ContainerArvClient.(*ArvTestClient).Content {
2056                 if v["collection"] == nil {
2057                         continue
2058                 }
2059                 collection := v["collection"].(arvadosclient.Dict)
2060                 if collection["name"].(string) != "output for zzzzz-dz642-202301130848002" {
2061                         continue
2062                 }
2063                 c.Check(collection["manifest_text"].(string), Equals, `./a/b 307372fa8fd5c146b22ae7a45b49bc31+6 0:6:c.out
2064 ./foo 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396c73c0abcdefgh11234567890@569fa8c3 10:17:bar
2065 `)
2066                 output_count++
2067         }
2068         c.Check(output_count, Not(Equals), uint(0))
2069 }
2070
2071 func (s *TestSuite) TestOutputError(c *C) {
2072         helperRecord := `{
2073                 "command": ["/bin/sh", "-c", "echo $FROBIZ"],
2074                 "container_image": "` + arvadostest.DockerImage112PDH + `",
2075                 "cwd": "/bin",
2076                 "environment": {"FROBIZ": "bilbo"},
2077                 "mounts": {
2078                         "/tmp": {"kind": "tmp"}
2079                 },
2080                 "output_path": "/tmp",
2081                 "priority": 1,
2082                 "runtime_constraints": {},
2083                 "state": "Locked"
2084         }`
2085         s.fullRunHelper(c, helperRecord, nil, func() int {
2086                 os.Symlink("/etc/hosts", s.runner.HostOutputDir+"/baz")
2087                 return 0
2088         })
2089
2090         c.Check(s.api.CalledWith("container.state", "Cancelled"), NotNil)
2091 }
2092
2093 func (s *TestSuite) TestStdinCollectionMountPoint(c *C) {
2094         helperRecord := `{
2095                 "command": ["/bin/sh", "-c", "echo $FROBIZ"],
2096                 "container_image": "` + arvadostest.DockerImage112PDH + `",
2097                 "cwd": "/bin",
2098                 "environment": {"FROBIZ": "bilbo"},
2099                 "mounts": {
2100         "/tmp": {"kind": "tmp"},
2101         "stdin": {"kind": "collection", "portable_data_hash": "b0def87f80dd594d4675809e83bd4f15+367", "path": "/file1_in_main.txt"},
2102         "stdout": {"kind": "file", "path": "/tmp/a/b/c.out"}
2103     },
2104                 "output_path": "/tmp",
2105                 "priority": 1,
2106                 "runtime_constraints": {},
2107                 "state": "Locked"
2108         }`
2109
2110         extraMounts := []string{
2111                 "b0def87f80dd594d4675809e83bd4f15+367/file1_in_main.txt",
2112         }
2113
2114         api, _, _ := s.fullRunHelper(c, helperRecord, extraMounts, func() int {
2115                 fmt.Fprintln(s.executor.created.Stdout, s.executor.created.Env["FROBIZ"])
2116                 return 0
2117         })
2118
2119         c.Check(api.CalledWith("container.exit_code", 0), NotNil)
2120         c.Check(api.CalledWith("container.state", "Complete"), NotNil)
2121         for _, v := range api.Content {
2122                 if v["collection"] != nil {
2123                         collection := v["collection"].(arvadosclient.Dict)
2124                         if strings.Index(collection["name"].(string), "output") == 0 {
2125                                 manifest := collection["manifest_text"].(string)
2126                                 c.Check(manifest, Equals, `./a/b 307372fa8fd5c146b22ae7a45b49bc31+6 0:6:c.out
2127 `)
2128                         }
2129                 }
2130         }
2131 }
2132
2133 func (s *TestSuite) TestStdinJsonMountPoint(c *C) {
2134         helperRecord := `{
2135                 "command": ["/bin/sh", "-c", "echo $FROBIZ"],
2136                 "container_image": "` + arvadostest.DockerImage112PDH + `",
2137                 "cwd": "/bin",
2138                 "environment": {"FROBIZ": "bilbo"},
2139                 "mounts": {
2140         "/tmp": {"kind": "tmp"},
2141         "stdin": {"kind": "json", "content": "foo"},
2142         "stdout": {"kind": "file", "path": "/tmp/a/b/c.out"}
2143     },
2144                 "output_path": "/tmp",
2145                 "priority": 1,
2146                 "runtime_constraints": {},
2147                 "state": "Locked"
2148         }`
2149
2150         api, _, _ := s.fullRunHelper(c, helperRecord, nil, func() int {
2151                 fmt.Fprintln(s.executor.created.Stdout, s.executor.created.Env["FROBIZ"])
2152                 return 0
2153         })
2154
2155         c.Check(api.CalledWith("container.exit_code", 0), NotNil)
2156         c.Check(api.CalledWith("container.state", "Complete"), NotNil)
2157         for _, v := range api.Content {
2158                 if v["collection"] != nil {
2159                         collection := v["collection"].(arvadosclient.Dict)
2160                         if strings.Index(collection["name"].(string), "output") == 0 {
2161                                 manifest := collection["manifest_text"].(string)
2162                                 c.Check(manifest, Equals, `./a/b 307372fa8fd5c146b22ae7a45b49bc31+6 0:6:c.out
2163 `)
2164                         }
2165                 }
2166         }
2167 }
2168
2169 func (s *TestSuite) TestStderrMount(c *C) {
2170         api, cr, _ := s.fullRunHelper(c, `{
2171     "command": ["/bin/sh", "-c", "echo hello;exit 1"],
2172     "container_image": "`+arvadostest.DockerImage112PDH+`",
2173     "cwd": ".",
2174     "environment": {},
2175     "mounts": {"/tmp": {"kind": "tmp"},
2176                "stdout": {"kind": "file", "path": "/tmp/a/out.txt"},
2177                "stderr": {"kind": "file", "path": "/tmp/b/err.txt"}},
2178     "output_path": "/tmp",
2179     "priority": 1,
2180     "runtime_constraints": {},
2181     "state": "Locked"
2182 }`, nil, func() int {
2183                 fmt.Fprintln(s.executor.created.Stdout, "hello")
2184                 fmt.Fprintln(s.executor.created.Stderr, "oops")
2185                 return 1
2186         })
2187
2188         final := api.CalledWith("container.state", "Complete")
2189         c.Assert(final, NotNil)
2190         c.Check(final["container"].(arvadosclient.Dict)["exit_code"], Equals, 1)
2191         c.Check(final["container"].(arvadosclient.Dict)["log"], NotNil)
2192
2193         c.Check(cr.ContainerArvClient.(*ArvTestClient).CalledWith("collection.manifest_text", "./a b1946ac92492d2347c6235b4d2611184+6 0:6:out.txt\n./b 38af5c54926b620264ab1501150cf189+5 0:5:err.txt\n"), NotNil)
2194 }
2195
2196 func (s *TestSuite) TestNumberRoundTrip(c *C) {
2197         s.api.callraw = true
2198         err := s.runner.fetchContainerRecord()
2199         c.Assert(err, IsNil)
2200         jsondata, err := json.Marshal(s.runner.Container.Mounts["/json"].Content)
2201         c.Logf("%#v", s.runner.Container)
2202         c.Check(err, IsNil)
2203         c.Check(string(jsondata), Equals, `{"number":123456789123456789}`)
2204 }
2205
2206 func (s *TestSuite) TestFullBrokenDocker(c *C) {
2207         nextState := ""
2208         for _, setup := range []func(){
2209                 func() {
2210                         c.Log("// waitErr = ocl runtime error")
2211                         s.executor.waitErr = errors.New(`Error response from daemon: oci runtime error: container_linux.go:247: starting container process caused "process_linux.go:359: container init caused \"rootfs_linux.go:54: mounting \\\"/tmp/keep453790790/by_id/99999999999999999999999999999999+99999/myGenome\\\" to rootfs \\\"/tmp/docker/overlay2/9999999999999999999999999999999999999999999999999999999999999999/merged\\\" at \\\"/tmp/docker/overlay2/9999999999999999999999999999999999999999999999999999999999999999/merged/keep/99999999999999999999999999999999+99999/myGenome\\\" caused \\\"no such file or directory\\\"\""`)
2212                         nextState = "Cancelled"
2213                 },
2214                 func() {
2215                         c.Log("// loadErr = cannot connect")
2216                         s.executor.loadErr = errors.New("Cannot connect to the Docker daemon at unix:///var/run/docker.sock. Is the docker daemon running?")
2217                         s.runner.brokenNodeHook = c.MkDir() + "/broken-node-hook"
2218                         err := ioutil.WriteFile(s.runner.brokenNodeHook, []byte("#!/bin/sh\nexec echo killme\n"), 0700)
2219                         c.Assert(err, IsNil)
2220                         nextState = "Queued"
2221                 },
2222         } {
2223                 s.SetUpTest(c)
2224                 setup()
2225                 s.fullRunHelper(c, `{
2226     "command": ["echo", "hello world"],
2227     "container_image": "`+arvadostest.DockerImage112PDH+`",
2228     "cwd": ".",
2229     "environment": {},
2230     "mounts": {"/tmp": {"kind": "tmp"} },
2231     "output_path": "/tmp",
2232     "priority": 1,
2233     "runtime_constraints": {},
2234     "state": "Locked"
2235 }`, nil, func() int { return 0 })
2236                 c.Check(s.api.CalledWith("container.state", nextState), NotNil)
2237                 c.Check(s.api.Logs["crunch-run"].String(), Matches, "(?ms).*unable to run containers.*")
2238                 if s.runner.brokenNodeHook != "" {
2239                         c.Check(s.api.Logs["crunch-run"].String(), Matches, "(?ms).*Running broken node hook.*")
2240                         c.Check(s.api.Logs["crunch-run"].String(), Matches, "(?ms).*killme.*")
2241                         c.Check(s.api.Logs["crunch-run"].String(), Not(Matches), "(?ms).*Writing /var/lock/crunch-run-broken to mark node as broken.*")
2242                 } else {
2243                         c.Check(s.api.Logs["crunch-run"].String(), Matches, "(?ms).*Writing /var/lock/crunch-run-broken to mark node as broken.*")
2244                 }
2245         }
2246 }
2247
2248 func (s *TestSuite) TestBadCommand(c *C) {
2249         for _, startError := range []string{
2250                 `panic: standard_init_linux.go:175: exec user process caused "no such file or directory"`,
2251                 `Error response from daemon: Cannot start container 41f26cbc43bcc1280f4323efb1830a394ba8660c9d1c2b564ba42bf7f7694845: [8] System error: no such file or directory`,
2252                 `Error response from daemon: Cannot start container 58099cd76c834f3dc2a4fb76c8028f049ae6d4fdf0ec373e1f2cfea030670c2d: [8] System error: exec: "foobar": executable file not found in $PATH`,
2253         } {
2254                 s.SetUpTest(c)
2255                 s.executor.startErr = errors.New(startError)
2256                 s.fullRunHelper(c, `{
2257     "command": ["echo", "hello world"],
2258     "container_image": "`+arvadostest.DockerImage112PDH+`",
2259     "cwd": ".",
2260     "environment": {},
2261     "mounts": {"/tmp": {"kind": "tmp"} },
2262     "output_path": "/tmp",
2263     "priority": 1,
2264     "runtime_constraints": {},
2265     "state": "Locked"
2266 }`, nil, func() int { return 0 })
2267                 c.Check(s.api.CalledWith("container.state", "Cancelled"), NotNil)
2268                 c.Check(s.api.Logs["crunch-run"].String(), Matches, "(?ms).*Possible causes:.*is missing.*")
2269         }
2270 }
2271
2272 func (s *TestSuite) TestSecretTextMountPoint(c *C) {
2273         helperRecord := `{
2274                 "command": ["true"],
2275                 "container_image": "` + arvadostest.DockerImage112PDH + `",
2276                 "cwd": "/bin",
2277                 "mounts": {
2278                     "/tmp": {"kind": "tmp"},
2279                     "/tmp/secret.conf": {"kind": "text", "content": "mypassword"}
2280                 },
2281                 "secret_mounts": {
2282                 },
2283                 "output_path": "/tmp",
2284                 "priority": 1,
2285                 "runtime_constraints": {},
2286                 "state": "Locked"
2287         }`
2288
2289         s.fullRunHelper(c, helperRecord, nil, func() int {
2290                 content, err := ioutil.ReadFile(s.runner.HostOutputDir + "/secret.conf")
2291                 c.Check(err, IsNil)
2292                 c.Check(string(content), Equals, "mypassword")
2293                 return 0
2294         })
2295
2296         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
2297         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
2298         c.Check(s.runner.ContainerArvClient.(*ArvTestClient).CalledWith("collection.manifest_text", ". 34819d7beeabb9260a5c854bc85b3e44+10 0:10:secret.conf\n"), NotNil)
2299         c.Check(s.runner.ContainerArvClient.(*ArvTestClient).CalledWith("collection.manifest_text", ""), IsNil)
2300
2301         // under secret mounts, not captured in output
2302         helperRecord = `{
2303                 "command": ["true"],
2304                 "container_image": "` + arvadostest.DockerImage112PDH + `",
2305                 "cwd": "/bin",
2306                 "mounts": {
2307                     "/tmp": {"kind": "tmp"}
2308                 },
2309                 "secret_mounts": {
2310                     "/tmp/secret.conf": {"kind": "text", "content": "mypassword"}
2311                 },
2312                 "output_path": "/tmp",
2313                 "priority": 1,
2314                 "runtime_constraints": {},
2315                 "state": "Locked"
2316         }`
2317
2318         s.SetUpTest(c)
2319         s.fullRunHelper(c, helperRecord, nil, func() int {
2320                 content, err := ioutil.ReadFile(s.runner.HostOutputDir + "/secret.conf")
2321                 c.Check(err, IsNil)
2322                 c.Check(string(content), Equals, "mypassword")
2323                 return 0
2324         })
2325
2326         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
2327         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
2328         c.Check(s.runner.ContainerArvClient.(*ArvTestClient).CalledWith("collection.manifest_text", ". 34819d7beeabb9260a5c854bc85b3e44+10 0:10:secret.conf\n"), IsNil)
2329         c.Check(s.runner.ContainerArvClient.(*ArvTestClient).CalledWith("collection.manifest_text", ""), NotNil)
2330
2331         // under secret mounts, output dir is a collection, not captured in output
2332         helperRecord = `{
2333                 "command": ["true"],
2334                 "container_image": "` + arvadostest.DockerImage112PDH + `",
2335                 "cwd": "/bin",
2336                 "mounts": {
2337                     "/tmp": {"kind": "collection", "writable": true}
2338                 },
2339                 "secret_mounts": {
2340                     "/tmp/secret.conf": {"kind": "text", "content": "mypassword"}
2341                 },
2342                 "output_path": "/tmp",
2343                 "priority": 1,
2344                 "runtime_constraints": {},
2345                 "state": "Locked"
2346         }`
2347
2348         s.SetUpTest(c)
2349         _, _, realtemp := s.fullRunHelper(c, helperRecord, nil, func() int {
2350                 // secret.conf should be provisioned as a separate
2351                 // bind mount, i.e., it should not appear in the
2352                 // (fake) fuse filesystem as viewed from the host.
2353                 content, err := ioutil.ReadFile(s.runner.HostOutputDir + "/secret.conf")
2354                 if !c.Check(errors.Is(err, os.ErrNotExist), Equals, true) {
2355                         c.Logf("secret.conf: content %q, err %#v", content, err)
2356                 }
2357                 err = ioutil.WriteFile(s.runner.HostOutputDir+"/.arvados#collection", []byte(`{"manifest_text":". acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:foo.txt\n"}`), 0700)
2358                 c.Check(err, IsNil)
2359                 return 0
2360         })
2361
2362         content, err := ioutil.ReadFile(realtemp + "/text1/mountdata.text")
2363         c.Check(err, IsNil)
2364         c.Check(string(content), Equals, "mypassword")
2365         c.Check(s.executor.created.BindMounts["/tmp/secret.conf"], DeepEquals, bindmount{realtemp + "/text1/mountdata.text", true})
2366         c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
2367         c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
2368         c.Check(s.runner.ContainerArvClient.(*ArvTestClient).CalledWith("collection.manifest_text", ". acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:foo.txt\n"), NotNil)
2369 }
2370
2371 func (s *TestSuite) TestCalculateCost(c *C) {
2372         defer func(s string) { lockdir = s }(lockdir)
2373         lockdir = c.MkDir()
2374         now := time.Now()
2375         cr := s.runner
2376         cr.costStartTime = now.Add(-time.Hour)
2377         var logbuf bytes.Buffer
2378         cr.CrunchLog.Immediate = log.New(&logbuf, "", 0)
2379
2380         // if there's no InstanceType env var, cost is calculated as 0
2381         os.Unsetenv("InstanceType")
2382         cost := cr.calculateCost(now)
2383         c.Check(cost, Equals, 0.0)
2384
2385         // with InstanceType env var and loadPrices() hasn't run (or
2386         // hasn't found any data), cost is calculated based on
2387         // InstanceType env var
2388         os.Setenv("InstanceType", `{"Price":1.2}`)
2389         defer os.Unsetenv("InstanceType")
2390         cost = cr.calculateCost(now)
2391         c.Check(cost, Equals, 1.2)
2392
2393         // first update tells us the spot price was $1/h until 30
2394         // minutes ago when it increased to $2/h
2395         j, err := json.Marshal([]cloud.InstancePrice{
2396                 {StartTime: now.Add(-4 * time.Hour), Price: 1.0},
2397                 {StartTime: now.Add(-time.Hour / 2), Price: 2.0},
2398         })
2399         c.Assert(err, IsNil)
2400         os.WriteFile(lockdir+"/"+pricesfile, j, 0777)
2401         cr.loadPrices()
2402         cost = cr.calculateCost(now)
2403         c.Check(cost, Equals, 1.5)
2404
2405         // next update (via --list + SIGUSR2) tells us the spot price
2406         // increased to $3/h 15 minutes ago
2407         j, err = json.Marshal([]cloud.InstancePrice{
2408                 {StartTime: now.Add(-time.Hour / 3), Price: 2.0}, // dup of -time.Hour/2 price
2409                 {StartTime: now.Add(-time.Hour / 4), Price: 3.0},
2410         })
2411         c.Assert(err, IsNil)
2412         os.WriteFile(lockdir+"/"+pricesfile, j, 0777)
2413         cr.loadPrices()
2414         cost = cr.calculateCost(now)
2415         c.Check(cost, Equals, 1.0/2+2.0/4+3.0/4)
2416
2417         cost = cr.calculateCost(now.Add(-time.Hour / 2))
2418         c.Check(cost, Equals, 0.5)
2419
2420         c.Logf("%s", logbuf.String())
2421         c.Check(logbuf.String(), Matches, `(?ms).*Instance price changed to 1\.00 at 20.* changed to 2\.00 .* changed to 3\.00 .*`)
2422         c.Check(logbuf.String(), Not(Matches), `(?ms).*changed to 2\.00 .* changed to 2\.00 .*`)
2423 }
2424
2425 func (s *TestSuite) TestSIGUSR2CostUpdate(c *C) {
2426         pid := os.Getpid()
2427         now := time.Now()
2428         pricesJSON, err := json.Marshal([]cloud.InstancePrice{
2429                 {StartTime: now.Add(-4 * time.Hour), Price: 2.4},
2430                 {StartTime: now.Add(-2 * time.Hour), Price: 2.6},
2431         })
2432         c.Assert(err, IsNil)
2433
2434         os.Setenv("InstanceType", `{"Price":2.2}`)
2435         defer os.Unsetenv("InstanceType")
2436         defer func(s string) { lockdir = s }(lockdir)
2437         lockdir = c.MkDir()
2438
2439         // We can't use s.api.CalledWith because timing differences will yield
2440         // different cost values across runs. getCostUpdate iterates over API
2441         // calls until it finds one that sets the cost, then writes that value
2442         // to the next index of costUpdates.
2443         deadline := now.Add(time.Second)
2444         costUpdates := make([]float64, 2)
2445         costIndex := 0
2446         apiIndex := 0
2447         getCostUpdate := func() {
2448                 for ; time.Now().Before(deadline); time.Sleep(time.Second / 10) {
2449                         for apiIndex < len(s.api.Content) {
2450                                 update := s.api.Content[apiIndex]
2451                                 apiIndex++
2452                                 var ok bool
2453                                 var cost float64
2454                                 if update, ok = update["container"].(arvadosclient.Dict); !ok {
2455                                         continue
2456                                 }
2457                                 if cost, ok = update["cost"].(float64); !ok {
2458                                         continue
2459                                 }
2460                                 c.Logf("API call #%d updates cost to %v", apiIndex-1, cost)
2461                                 costUpdates[costIndex] = cost
2462                                 costIndex++
2463                                 return
2464                         }
2465                 }
2466         }
2467
2468         s.fullRunHelper(c, `{
2469                 "command": ["true"],
2470                 "container_image": "`+arvadostest.DockerImage112PDH+`",
2471                 "cwd": ".",
2472                 "environment": {},
2473                 "mounts": {"/tmp": {"kind": "tmp"} },
2474                 "output_path": "/tmp",
2475                 "priority": 1,
2476                 "runtime_constraints": {},
2477                 "state": "Locked",
2478                 "uuid": "zzzzz-dz642-20230320101530a"
2479         }`, nil, func() int {
2480                 s.runner.costStartTime = now.Add(-3 * time.Hour)
2481                 err := syscall.Kill(pid, syscall.SIGUSR2)
2482                 c.Check(err, IsNil, Commentf("error sending first SIGUSR2 to runner"))
2483                 getCostUpdate()
2484
2485                 err = os.WriteFile(path.Join(lockdir, pricesfile), pricesJSON, 0o700)
2486                 c.Check(err, IsNil, Commentf("error writing JSON prices file"))
2487                 err = syscall.Kill(pid, syscall.SIGUSR2)
2488                 c.Check(err, IsNil, Commentf("error sending second SIGUSR2 to runner"))
2489                 getCostUpdate()
2490
2491                 return 0
2492         })
2493         // Comparing with format strings makes it easy to ignore minor variations
2494         // in cost across runs while keeping diagnostics pretty.
2495         c.Check(fmt.Sprintf("%.3f", costUpdates[0]), Equals, "6.600")
2496         c.Check(fmt.Sprintf("%.3f", costUpdates[1]), Equals, "7.600")
2497 }
2498
2499 type FakeProcess struct {
2500         cmdLine []string
2501 }
2502
2503 func (fp FakeProcess) CmdlineSlice() ([]string, error) {
2504         return fp.cmdLine, nil
2505 }