19094: Note docker/singularity/arv-mount versions in container log.
[arvados.git] / lib / crunchrun / crunchrun_test.go
index bb982cdee76c32cb9321ce88e8fa47fa0588f2f1..347703a95baeefd6a2023562aae3ebe19f6e986d 100644 (file)
@@ -22,6 +22,7 @@ import (
        "testing"
        "time"
 
+       "git.arvados.org/arvados.git/lib/cmd"
        "git.arvados.org/arvados.git/sdk/go/arvados"
        "git.arvados.org/arvados.git/sdk/go/arvadosclient"
        "git.arvados.org/arvados.git/sdk/go/arvadostest"
@@ -44,12 +45,12 @@ type TestSuite struct {
        runner                   *ContainerRunner
        executor                 *stubExecutor
        keepmount                string
+       keepmountTmp             []string
        testDispatcherKeepClient KeepTestClient
        testContainerKeepClient  KeepTestClient
 }
 
 func (s *TestSuite) SetUpTest(c *C) {
-       *brokenNodeHook = ""
        s.client = arvados.NewClientFromEnv()
        s.executor = &stubExecutor{}
        var err error
@@ -62,10 +63,20 @@ func (s *TestSuite) SetUpTest(c *C) {
        }
        s.runner.RunArvMount = func(cmd []string, tok string) (*exec.Cmd, error) {
                s.runner.ArvMountPoint = s.keepmount
+               for i, opt := range cmd {
+                       if opt == "--mount-tmp" {
+                               err := os.Mkdir(s.keepmount+"/"+cmd[i+1], 0700)
+                               if err != nil {
+                                       return nil, err
+                               }
+                               s.keepmountTmp = append(s.keepmountTmp, cmd[i+1])
+                       }
+               }
                return nil, nil
        }
        s.keepmount = c.MkDir()
        err = os.Mkdir(s.keepmount+"/by_id", 0755)
+       s.keepmountTmp = nil
        c.Assert(err, IsNil)
        err = os.Mkdir(s.keepmount+"/by_id/"+arvadostest.DockerImage112PDH, 0755)
        c.Assert(err, IsNil)
@@ -117,6 +128,8 @@ func (e *stubExecutor) LoadImage(imageId string, tarball string, container arvad
        e.loaded = tarball
        return e.loadErr
 }
+func (e *stubExecutor) Runtime() string                 { return "stub" }
+func (e *stubExecutor) Version() string                 { return "stub " + cmd.Version.String() }
 func (e *stubExecutor) Create(spec containerSpec) error { e.created = spec; return e.createErr }
 func (e *stubExecutor) Start() error                    { e.exit = make(chan int, 1); go e.runFunc(); return e.startErr }
 func (e *stubExecutor) CgroupID() string                { return "cgroupid" }
@@ -363,6 +376,14 @@ func (fw FileWrapper) Sync() error {
        return errors.New("not implemented")
 }
 
+func (fw FileWrapper) Snapshot() (*arvados.Subtree, error) {
+       return nil, errors.New("not implemented")
+}
+
+func (fw FileWrapper) Splice(*arvados.Subtree) error {
+       return errors.New("not implemented")
+}
+
 func (client *KeepTestClient) ManifestFileReader(m manifest.Manifest, filename string) (arvados.File, error) {
        if filename == hwImageID+".tar" {
                rdr := ioutil.NopCloser(&bytes.Buffer{})
@@ -637,8 +658,6 @@ func (s *TestSuite) fullRunHelper(c *C, record string, extraMounts []string, exi
 
        s.runner.statInterval = 100 * time.Millisecond
        s.runner.containerWatchdogInterval = time.Second
-       am := &ArvMountCmdLine{}
-       s.runner.RunArvMount = am.ArvMountTest
 
        realTemp := c.MkDir()
        tempcount := 0
@@ -714,6 +733,7 @@ func (s *TestSuite) TestFullRunHello(c *C) {
                c.Check(s.executor.created.RAM, Equals, int64(1000000))
                c.Check(s.executor.created.NetworkMode, Equals, "default")
                c.Check(s.executor.created.EnableNetwork, Equals, false)
+               c.Check(s.executor.created.CUDADeviceCount, Equals, 0)
                fmt.Fprintln(s.executor.created.Stdout, "hello world")
        })
 
@@ -849,6 +869,28 @@ func (s *TestSuite) TestNodeInfoLog(c *C) {
        c.Check(json, Matches, `(?ms).*Disk INodes.*`)
 }
 
+func (s *TestSuite) TestLogVersionAndRuntime(c *C) {
+       s.fullRunHelper(c, `{
+               "command": ["sleep", "1"],
+               "container_image": "`+arvadostest.DockerImage112PDH+`",
+               "cwd": ".",
+               "environment": {},
+               "mounts": {"/tmp": {"kind": "tmp"} },
+               "output_path": "/tmp",
+               "priority": 1,
+               "runtime_constraints": {},
+               "state": "Locked"
+       }`, nil, 0,
+               func() {
+               })
+
+       c.Assert(s.api.Logs["crunch-run"], NotNil)
+       c.Check(s.api.Logs["crunch-run"].String(), Matches, `(?ms).*crunch-run \S+ \(go\S+\) start.*`)
+       c.Check(s.api.Logs["crunch-run"].String(), Matches, `(?ms).*crunch-run process has uid=\d+\(.+\) gid=\d+\(.+\) groups=\d+\(.+\)(,\d+\(.+\))*\n.*`)
+       c.Check(s.api.Logs["crunch-run"].String(), Matches, `(?ms).*Executing container: zzzzz-zzzzz-zzzzzzzzzzzzzzz.*`)
+       c.Check(s.api.Logs["crunch-run"].String(), Matches, `(?ms).*Using container runtime: stub.*`)
+}
+
 func (s *TestSuite) TestContainerRecordLog(c *C) {
        s.fullRunHelper(c, `{
                "command": ["sleep", "1"],
@@ -961,6 +1003,42 @@ func (s *TestSuite) TestFullRunSetOutputStorageClasses(c *C) {
        c.Check(s.testContainerKeepClient.StorageClasses, DeepEquals, []string{"foo", "bar"})
 }
 
+func (s *TestSuite) TestEnableCUDADeviceCount(c *C) {
+       s.fullRunHelper(c, `{
+    "command": ["pwd"],
+    "container_image": "`+arvadostest.DockerImage112PDH+`",
+    "cwd": "/bin",
+    "environment": {},
+    "mounts": {"/tmp": {"kind": "tmp"} },
+    "output_path": "/tmp",
+    "priority": 1,
+    "runtime_constraints": {"cuda": {"device_count": 2}},
+    "state": "Locked",
+    "output_storage_classes": ["foo", "bar"]
+}`, nil, 0, func() {
+               fmt.Fprintln(s.executor.created.Stdout, "ok")
+       })
+       c.Check(s.executor.created.CUDADeviceCount, Equals, 2)
+}
+
+func (s *TestSuite) TestEnableCUDAHardwareCapability(c *C) {
+       s.fullRunHelper(c, `{
+    "command": ["pwd"],
+    "container_image": "`+arvadostest.DockerImage112PDH+`",
+    "cwd": "/bin",
+    "environment": {},
+    "mounts": {"/tmp": {"kind": "tmp"} },
+    "output_path": "/tmp",
+    "priority": 1,
+    "runtime_constraints": {"cuda": {"hardware_capability": "foo"}},
+    "state": "Locked",
+    "output_storage_classes": ["foo", "bar"]
+}`, nil, 0, func() {
+               fmt.Fprintln(s.executor.created.Stdout, "ok")
+       })
+       c.Check(s.executor.created.CUDADeviceCount, Equals, 0)
+}
+
 func (s *TestSuite) TestStopOnSignal(c *C) {
        s.executor.runFunc = func() {
                s.executor.created.Stdout.Write([]byte("foo\n"))
@@ -1103,9 +1181,9 @@ func (s *TestSuite) TestSetupMounts(c *C) {
                cr.statInterval = 5 * time.Second
                bindmounts, err := cr.SetupMounts()
                c.Check(err, IsNil)
-               c.Check(am.Cmd, DeepEquals, []string{"--foreground", "--allow-other",
+               c.Check(am.Cmd, DeepEquals, []string{"arv-mount", "--foreground",
                        "--read-write", "--storage-classes", "default", "--crunchstat-interval=5",
-                       "--mount-by-pdh", "by_id", "--mount-by-id", "by_uuid", realTemp + "/keep1"})
+                       "--mount-by-pdh", "by_id", "--disable-event-listening", "--mount-by-id", "by_uuid", realTemp + "/keep1"})
                c.Check(bindmounts, DeepEquals, map[string]bindmount{"/tmp": {realTemp + "/tmp2", false}})
                os.RemoveAll(cr.ArvMountPoint)
                cr.CleanupDirs()
@@ -1123,9 +1201,9 @@ func (s *TestSuite) TestSetupMounts(c *C) {
 
                bindmounts, err := cr.SetupMounts()
                c.Check(err, IsNil)
-               c.Check(am.Cmd, DeepEquals, []string{"--foreground", "--allow-other",
+               c.Check(am.Cmd, DeepEquals, []string{"arv-mount", "--foreground",
                        "--read-write", "--storage-classes", "foo,bar", "--crunchstat-interval=5",
-                       "--mount-by-pdh", "by_id", "--mount-by-id", "by_uuid", realTemp + "/keep1"})
+                       "--mount-by-pdh", "by_id", "--disable-event-listening", "--mount-by-id", "by_uuid", realTemp + "/keep1"})
                c.Check(bindmounts, DeepEquals, map[string]bindmount{"/out": {realTemp + "/tmp2", false}, "/tmp": {realTemp + "/tmp3", false}})
                os.RemoveAll(cr.ArvMountPoint)
                cr.CleanupDirs()
@@ -1143,9 +1221,9 @@ func (s *TestSuite) TestSetupMounts(c *C) {
 
                bindmounts, err := cr.SetupMounts()
                c.Check(err, IsNil)
-               c.Check(am.Cmd, DeepEquals, []string{"--foreground", "--allow-other",
+               c.Check(am.Cmd, DeepEquals, []string{"arv-mount", "--foreground",
                        "--read-write", "--storage-classes", "default", "--crunchstat-interval=5",
-                       "--mount-by-pdh", "by_id", "--mount-by-id", "by_uuid", realTemp + "/keep1"})
+                       "--mount-by-pdh", "by_id", "--disable-event-listening", "--mount-by-id", "by_uuid", realTemp + "/keep1"})
                c.Check(bindmounts, DeepEquals, map[string]bindmount{"/tmp": {realTemp + "/tmp2", false}, "/etc/arvados/ca-certificates.crt": {stubCertPath, true}})
                os.RemoveAll(cr.ArvMountPoint)
                cr.CleanupDirs()
@@ -1166,9 +1244,9 @@ func (s *TestSuite) TestSetupMounts(c *C) {
 
                bindmounts, err := cr.SetupMounts()
                c.Check(err, IsNil)
-               c.Check(am.Cmd, DeepEquals, []string{"--foreground", "--allow-other",
+               c.Check(am.Cmd, DeepEquals, []string{"arv-mount", "--foreground",
                        "--read-write", "--storage-classes", "default", "--crunchstat-interval=5",
-                       "--mount-tmp", "tmp0", "--mount-by-pdh", "by_id", "--mount-by-id", "by_uuid", realTemp + "/keep1"})
+                       "--mount-tmp", "tmp0", "--mount-by-pdh", "by_id", "--disable-event-listening", "--mount-by-id", "by_uuid", realTemp + "/keep1"})
                c.Check(bindmounts, DeepEquals, map[string]bindmount{"/keeptmp": {realTemp + "/keep1/tmp0", false}})
                os.RemoveAll(cr.ArvMountPoint)
                cr.CleanupDirs()
@@ -1189,9 +1267,9 @@ func (s *TestSuite) TestSetupMounts(c *C) {
 
                bindmounts, err := cr.SetupMounts()
                c.Check(err, IsNil)
-               c.Check(am.Cmd, DeepEquals, []string{"--foreground", "--allow-other",
+               c.Check(am.Cmd, DeepEquals, []string{"arv-mount", "--foreground",
                        "--read-write", "--storage-classes", "default", "--crunchstat-interval=5",
-                       "--mount-tmp", "tmp0", "--mount-by-pdh", "by_id", "--mount-by-id", "by_uuid", realTemp + "/keep1"})
+                       "--mount-tmp", "tmp0", "--mount-by-pdh", "by_id", "--disable-event-listening", "--mount-by-id", "by_uuid", realTemp + "/keep1"})
                c.Check(bindmounts, DeepEquals, map[string]bindmount{
                        "/keepinp": {realTemp + "/keep1/by_id/59389a8f9ee9d399be35462a0f92541c+53", true},
                        "/keepout": {realTemp + "/keep1/tmp0", false},
@@ -1216,9 +1294,9 @@ func (s *TestSuite) TestSetupMounts(c *C) {
 
                bindmounts, err := cr.SetupMounts()
                c.Check(err, IsNil)
-               c.Check(am.Cmd, DeepEquals, []string{"--foreground", "--allow-other",
+               c.Check(am.Cmd, DeepEquals, []string{"arv-mount", "--foreground",
                        "--read-write", "--storage-classes", "default", "--crunchstat-interval=5",
-                       "--file-cache", "512", "--mount-tmp", "tmp0", "--mount-by-pdh", "by_id", "--mount-by-id", "by_uuid", realTemp + "/keep1"})
+                       "--file-cache", "512", "--mount-tmp", "tmp0", "--mount-by-pdh", "by_id", "--disable-event-listening", "--mount-by-id", "by_uuid", realTemp + "/keep1"})
                c.Check(bindmounts, DeepEquals, map[string]bindmount{
                        "/keepinp": {realTemp + "/keep1/by_id/59389a8f9ee9d399be35462a0f92541c+53", true},
                        "/keepout": {realTemp + "/keep1/tmp0", false},
@@ -1299,9 +1377,9 @@ func (s *TestSuite) TestSetupMounts(c *C) {
 
                bindmounts, err := cr.SetupMounts()
                c.Check(err, IsNil)
-               c.Check(am.Cmd, DeepEquals, []string{"--foreground", "--allow-other",
+               c.Check(am.Cmd, DeepEquals, []string{"arv-mount", "--foreground",
                        "--read-write", "--storage-classes", "default", "--crunchstat-interval=5",
-                       "--file-cache", "512", "--mount-tmp", "tmp0", "--mount-by-pdh", "by_id", "--mount-by-id", "by_uuid", realTemp + "/keep1"})
+                       "--file-cache", "512", "--mount-tmp", "tmp0", "--mount-by-pdh", "by_id", "--disable-event-listening", "--mount-by-id", "by_uuid", realTemp + "/keep1"})
                c.Check(bindmounts, DeepEquals, map[string]bindmount{
                        "/tmp":     {realTemp + "/tmp2", false},
                        "/tmp/foo": {realTemp + "/keep1/tmp0", true},
@@ -1509,6 +1587,7 @@ func (s *TestSuite) TestFullRunWithAPI(c *C) {
        })
        c.Check(s.api.CalledWith("container.exit_code", 3), NotNil)
        c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
+       c.Check(s.api.Logs["crunch-run"].String(), Matches, `(?ms).*status code 3\n.*`)
 }
 
 func (s *TestSuite) TestFullRunSetOutput(c *C) {
@@ -1533,6 +1612,38 @@ func (s *TestSuite) TestFullRunSetOutput(c *C) {
        c.Check(s.api.CalledWith("container.output", arvadostest.DockerImage112PDH), NotNil)
 }
 
+func (s *TestSuite) TestArvMountRuntimeStatusWarning(c *C) {
+       s.runner.RunArvMount = func([]string, string) (*exec.Cmd, error) {
+               os.Mkdir(s.runner.ArvMountPoint+"/by_id", 0666)
+               ioutil.WriteFile(s.runner.ArvMountPoint+"/by_id/README", nil, 0666)
+               return s.runner.ArvMountCmd([]string{"bash", "-c", "echo >&2 Test: Keep write error: I am a teapot; sleep 3"}, "")
+       }
+       s.executor.runFunc = func() {
+               time.Sleep(time.Second)
+               s.executor.exit <- 137
+       }
+       record := `{
+    "command": ["sleep", "1"],
+    "container_image": "` + arvadostest.DockerImage112PDH + `",
+    "cwd": "/bin",
+    "environment": {},
+    "mounts": {"/tmp": {"kind": "tmp"} },
+    "output_path": "/tmp",
+    "priority": 1,
+    "runtime_constraints": {"API": true},
+    "state": "Locked"
+}`
+       err := json.Unmarshal([]byte(record), &s.api.Container)
+       c.Assert(err, IsNil)
+       err = s.runner.Run()
+       c.Assert(err, IsNil)
+       c.Check(s.api.CalledWith("container.exit_code", 137), NotNil)
+       c.Check(s.api.CalledWith("container.runtime_status.warning", "arv-mount: Keep write error"), NotNil)
+       c.Check(s.api.CalledWith("container.runtime_status.warningDetail", "Test: Keep write error: I am a teapot"), NotNil)
+       c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
+       c.Check(s.api.Logs["crunch-run"].String(), Matches, `(?ms).*Container exited with status code 137 \(signal 9, SIGKILL\).*`)
+}
+
 func (s *TestSuite) TestStdoutWithExcludeFromOutputMountPointUnderOutputDir(c *C) {
        helperRecord := `{
                "command": ["/bin/sh", "-c", "echo $FROBIZ"],
@@ -1805,8 +1916,8 @@ func (s *TestSuite) TestFullBrokenDocker(c *C) {
                func() {
                        c.Log("// loadErr = cannot connect")
                        s.executor.loadErr = errors.New("Cannot connect to the Docker daemon at unix:///var/run/docker.sock. Is the docker daemon running?")
-                       *brokenNodeHook = c.MkDir() + "/broken-node-hook"
-                       err := ioutil.WriteFile(*brokenNodeHook, []byte("#!/bin/sh\nexec echo killme\n"), 0700)
+                       s.runner.brokenNodeHook = c.MkDir() + "/broken-node-hook"
+                       err := ioutil.WriteFile(s.runner.brokenNodeHook, []byte("#!/bin/sh\nexec echo killme\n"), 0700)
                        c.Assert(err, IsNil)
                        nextState = "Queued"
                },
@@ -1826,7 +1937,7 @@ func (s *TestSuite) TestFullBrokenDocker(c *C) {
 }`, nil, 0, func() {})
                c.Check(s.api.CalledWith("container.state", nextState), NotNil)
                c.Check(s.api.Logs["crunch-run"].String(), Matches, "(?ms).*unable to run containers.*")
-               if *brokenNodeHook != "" {
+               if s.runner.brokenNodeHook != "" {
                        c.Check(s.api.Logs["crunch-run"].String(), Matches, "(?ms).*Running broken node hook.*")
                        c.Check(s.api.Logs["crunch-run"].String(), Matches, "(?ms).*killme.*")
                        c.Check(s.api.Logs["crunch-run"].String(), Not(Matches), "(?ms).*Writing /var/lock/crunch-run-broken to mark node as broken.*")
@@ -1916,6 +2027,44 @@ func (s *TestSuite) TestSecretTextMountPoint(c *C) {
        c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
        c.Check(s.runner.ContainerArvClient.(*ArvTestClient).CalledWith("collection.manifest_text", ". 34819d7beeabb9260a5c854bc85b3e44+10 0:10:secret.conf\n"), IsNil)
        c.Check(s.runner.ContainerArvClient.(*ArvTestClient).CalledWith("collection.manifest_text", ""), NotNil)
+
+       // under secret mounts, output dir is a collection, not captured in output
+       helperRecord = `{
+               "command": ["true"],
+               "container_image": "` + arvadostest.DockerImage112PDH + `",
+               "cwd": "/bin",
+               "mounts": {
+                    "/tmp": {"kind": "collection", "writable": true}
+                },
+                "secret_mounts": {
+                    "/tmp/secret.conf": {"kind": "text", "content": "mypassword"}
+                },
+               "output_path": "/tmp",
+               "priority": 1,
+               "runtime_constraints": {},
+               "state": "Locked"
+       }`
+
+       s.SetUpTest(c)
+       _, _, realtemp := s.fullRunHelper(c, helperRecord, nil, 0, func() {
+               // secret.conf should be provisioned as a separate
+               // bind mount, i.e., it should not appear in the
+               // (fake) fuse filesystem as viewed from the host.
+               content, err := ioutil.ReadFile(s.runner.HostOutputDir + "/secret.conf")
+               if !c.Check(errors.Is(err, os.ErrNotExist), Equals, true) {
+                       c.Logf("secret.conf: content %q, err %#v", content, err)
+               }
+               err = ioutil.WriteFile(s.runner.HostOutputDir+"/.arvados#collection", []byte(`{"manifest_text":". acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:foo.txt\n"}`), 0700)
+               c.Check(err, IsNil)
+       })
+
+       content, err := ioutil.ReadFile(realtemp + "/text1/mountdata.text")
+       c.Check(err, IsNil)
+       c.Check(string(content), Equals, "mypassword")
+       c.Check(s.executor.created.BindMounts["/tmp/secret.conf"], DeepEquals, bindmount{realtemp + "/text1/mountdata.text", true})
+       c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
+       c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
+       c.Check(s.runner.ContainerArvClient.(*ArvTestClient).CalledWith("collection.manifest_text", ". acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:foo.txt\n"), NotNil)
 }
 
 type FakeProcess struct {