From 3b9c4641a985a53347696b7a77bcde28a92d6e79 Mon Sep 17 00:00:00 2001 From: Tom Clegg Date: Tue, 18 Jan 2022 10:13:50 -0500 Subject: [PATCH] 18513: Decipher status code in crunch-run.txt log. Container exited with status code 137 (signal 9, SIGKILL) Previously, docker mode just said "exited with code: 137" and singularity mode didn't mention it at all. Arvados-DCO-1.1-Signed-off-by: Tom Clegg --- lib/crunchrun/crunchrun.go | 15 +++++++++++++++ lib/crunchrun/crunchrun_test.go | 6 ++++-- lib/crunchrun/docker.go | 1 - lib/crunchrun/executor_test.go | 16 ++++++++++++++++ 4 files changed, 35 insertions(+), 3 deletions(-) diff --git a/lib/crunchrun/crunchrun.go b/lib/crunchrun/crunchrun.go index fb2200a56b..af0d49c80e 100644 --- a/lib/crunchrun/crunchrun.go +++ b/lib/crunchrun/crunchrun.go @@ -36,6 +36,7 @@ import ( "git.arvados.org/arvados.git/sdk/go/arvadosclient" "git.arvados.org/arvados.git/sdk/go/keepclient" "git.arvados.org/arvados.git/sdk/go/manifest" + "golang.org/x/sys/unix" ) type command struct{} @@ -1056,6 +1057,20 @@ func (runner *ContainerRunner) WaitFinish() error { } runner.ExitCode = &exitcode + extra := "" + if exitcode&0x80 != 0 { + // Convert raw exit status (0x80 + signal number) to a + // string to log after the code, like " (signal 101)" + // or " (signal 9, killed)" + sig := syscall.WaitStatus(exitcode).Signal() + if name := unix.SignalName(sig); name != "" { + extra = fmt.Sprintf(" (signal %d, %s)", sig, name) + } else { + extra = fmt.Sprintf(" (signal %d)", sig) + } + } + runner.CrunchLog.Printf("Container exited with status code %d%s", exitcode, extra) + var returnErr error if err = runner.executorStdin.Close(); err != nil { err = fmt.Errorf("error closing container stdin: %s", err) diff --git a/lib/crunchrun/crunchrun_test.go b/lib/crunchrun/crunchrun_test.go index 97f91103a0..2ec35f0559 100644 --- a/lib/crunchrun/crunchrun_test.go +++ b/lib/crunchrun/crunchrun_test.go @@ -1567,6 +1567,7 @@ func (s *TestSuite) TestFullRunWithAPI(c *C) { }) c.Check(s.api.CalledWith("container.exit_code", 3), NotNil) c.Check(s.api.CalledWith("container.state", "Complete"), NotNil) + c.Check(s.api.Logs["crunch-run"].String(), Matches, `(?ms).*status code 3\n.*`) } func (s *TestSuite) TestFullRunSetOutput(c *C) { @@ -1599,7 +1600,7 @@ func (s *TestSuite) TestArvMountRuntimeStatusWarning(c *C) { } s.executor.runFunc = func() { time.Sleep(time.Second) - s.executor.exit <- 0 + s.executor.exit <- 137 } record := `{ "command": ["sleep", "1"], @@ -1616,10 +1617,11 @@ func (s *TestSuite) TestArvMountRuntimeStatusWarning(c *C) { c.Assert(err, IsNil) err = s.runner.Run() c.Assert(err, IsNil) - c.Check(s.api.CalledWith("container.exit_code", 0), NotNil) + c.Check(s.api.CalledWith("container.exit_code", 137), NotNil) c.Check(s.api.CalledWith("container.runtime_status.warning", "arv-mount: Keep write error"), NotNil) c.Check(s.api.CalledWith("container.runtime_status.warningDetail", "Test: Keep write error: I am a teapot"), NotNil) c.Check(s.api.CalledWith("container.state", "Complete"), NotNil) + c.Check(s.api.Logs["crunch-run"].String(), Matches, `(?ms).*Container exited with status code 137 \(signal 9, SIGKILL\).*`) } func (s *TestSuite) TestStdoutWithExcludeFromOutputMountPointUnderOutputDir(c *C) { diff --git a/lib/crunchrun/docker.go b/lib/crunchrun/docker.go index 06e5b5b1ec..e62f2a39ba 100644 --- a/lib/crunchrun/docker.go +++ b/lib/crunchrun/docker.go @@ -215,7 +215,6 @@ func (e *dockerExecutor) Wait(ctx context.Context) (int, error) { for { select { case waitBody := <-waitOk: - e.logf("Container exited with code: %v", waitBody.StatusCode) // wait for stdout/stderr to complete <-e.doneIO return int(waitBody.StatusCode), nil diff --git a/lib/crunchrun/executor_test.go b/lib/crunchrun/executor_test.go index 08c7140add..99af0530ff 100644 --- a/lib/crunchrun/executor_test.go +++ b/lib/crunchrun/executor_test.go @@ -86,6 +86,22 @@ func (s *executorSuite) TestExecTrivialContainer(c *C) { c.Check(s.stderr.String(), Equals, "") } +func (s *executorSuite) TestExitStatus(c *C) { + s.spec.Command = []string{"false"} + s.checkRun(c, 1) +} + +func (s *executorSuite) TestSignalExitStatus(c *C) { + if _, isdocker := s.executor.(*dockerExecutor); isdocker { + // It's not quite this easy to make busybox kill + // itself in docker where it's pid 1. + c.Skip("kill -9 $$ doesn't work on busybox with pid=1 in docker") + return + } + s.spec.Command = []string{"sh", "-c", "kill -9 $$"} + s.checkRun(c, 0x80+9) +} + func (s *executorSuite) TestExecStop(c *C) { s.spec.Command = []string{"sh", "-c", "sleep 10; echo ok"} err := s.executor.Create(s.spec) -- 2.30.2