X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/1071f4f96fcb2084424c4b29dd5915880c650254..6d03fdac5674dc88eff821f5e8ac70642f39a895:/lib/crunchrun/crunchrun_test.go diff --git a/lib/crunchrun/crunchrun_test.go b/lib/crunchrun/crunchrun_test.go index 4d127f19c2..9c4fe20bc1 100644 --- a/lib/crunchrun/crunchrun_test.go +++ b/lib/crunchrun/crunchrun_test.go @@ -6,6 +6,7 @@ package crunchrun import ( "bytes" + "context" "crypto/md5" "encoding/json" "errors" @@ -18,8 +19,10 @@ import ( "net/http/httptest" "os" "os/exec" + "path" "regexp" "runtime/pprof" + "strconv" "strings" "sync" "sync/atomic" @@ -33,7 +36,6 @@ import ( "git.arvados.org/arvados.git/sdk/go/arvadosclient" "git.arvados.org/arvados.git/sdk/go/arvadostest" "git.arvados.org/arvados.git/sdk/go/manifest" - "golang.org/x/net/context" . "gopkg.in/check.v1" ) @@ -43,6 +45,8 @@ func TestCrunchExec(t *testing.T) { TestingT(t) } +const logLineStart = `(?m)(.*\n)*\d{4}-\d\d-\d\dT\d\d:\d\d:\d\d\.\d+Z ` + var _ = Suite(&TestSuite{}) type TestSuite struct { @@ -985,6 +989,70 @@ func (s *TestSuite) TestLogVersionAndRuntime(c *C) { c.Check(s.api.Logs["crunch-run"].String(), Matches, `(?ms).*Using container runtime: stub.*`) } +func (s *TestSuite) testLogRSSThresholds(c *C, ram int, expected []int, notExpected int) { + s.runner.cgroupRoot = "testdata/fakestat" + s.fullRunHelper(c, `{ + "command": ["true"], + "container_image": "`+arvadostest.DockerImage112PDH+`", + "cwd": ".", + "environment": {}, + "mounts": {"/tmp": {"kind": "tmp"} }, + "output_path": "/tmp", + "priority": 1, + "runtime_constraints": {"ram": `+strconv.Itoa(ram)+`}, + "state": "Locked" + }`, nil, func() int { return 0 }) + logs := s.api.Logs["crunch-run"].String() + pattern := logLineStart + `Container using over %d%% of memory \(rss 734003200/%d bytes\)` + var threshold int + for _, threshold = range expected { + c.Check(logs, Matches, fmt.Sprintf(pattern, threshold, ram)) + } + if notExpected > threshold { + c.Check(logs, Not(Matches), fmt.Sprintf(pattern, notExpected, ram)) + } +} + +func (s *TestSuite) TestLogNoRSSThresholds(c *C) { + s.testLogRSSThresholds(c, 7340032000, []int{}, 90) +} + +func (s *TestSuite) TestLogSomeRSSThresholds(c *C) { + onePercentRSS := 7340032 + s.testLogRSSThresholds(c, 102*onePercentRSS, []int{90, 95}, 99) +} + +func (s *TestSuite) TestLogAllRSSThresholds(c *C) { + s.testLogRSSThresholds(c, 734003299, []int{90, 95, 99}, 0) +} + +func (s *TestSuite) TestLogMaximaAfterRun(c *C) { + s.runner.cgroupRoot = "testdata/fakestat" + s.runner.parentTemp = c.MkDir() + s.fullRunHelper(c, `{ + "command": ["true"], + "container_image": "`+arvadostest.DockerImage112PDH+`", + "cwd": ".", + "environment": {}, + "mounts": {"/tmp": {"kind": "tmp"} }, + "output_path": "/tmp", + "priority": 1, + "runtime_constraints": {"ram": 7340032000}, + "state": "Locked" + }`, nil, func() int { return 0 }) + logs := s.api.Logs["crunch-run"].String() + for _, expected := range []string{ + `Maximum disk usage was \d+%, \d+/\d+ bytes`, + `Maximum container memory cache usage was 73400320 bytes`, + `Maximum container memory swap usage was 320 bytes`, + `Maximum container memory pgmajfault usage was 20 faults`, + `Maximum container memory rss usage was 10%, 734003200/7340032000 bytes`, + `Maximum crunch-run memory rss usage was \d+ bytes`, + } { + c.Check(logs, Matches, logLineStart+expected) + } +} + func (s *TestSuite) TestCommitNodeInfoBeforeStart(c *C) { var collection_create, container_update arvadosclient.Dict s.fullRunHelper(c, `{ @@ -2283,6 +2351,80 @@ func (s *TestSuite) TestCalculateCost(c *C) { c.Check(logbuf.String(), Not(Matches), `(?ms).*changed to 2\.00 .* changed to 2\.00 .*`) } +func (s *TestSuite) TestSIGUSR2CostUpdate(c *C) { + pid := os.Getpid() + now := time.Now() + pricesJSON, err := json.Marshal([]cloud.InstancePrice{ + {StartTime: now.Add(-4 * time.Hour), Price: 2.4}, + {StartTime: now.Add(-2 * time.Hour), Price: 2.6}, + }) + c.Assert(err, IsNil) + + os.Setenv("InstanceType", `{"Price":2.2}`) + defer os.Unsetenv("InstanceType") + defer func(s string) { lockdir = s }(lockdir) + lockdir = c.MkDir() + + // We can't use s.api.CalledWith because timing differences will yield + // different cost values across runs. getCostUpdate iterates over API + // calls until it finds one that sets the cost, then writes that value + // to the next index of costUpdates. + deadline := now.Add(time.Second) + costUpdates := make([]float64, 2) + costIndex := 0 + apiIndex := 0 + getCostUpdate := func() { + for ; time.Now().Before(deadline); time.Sleep(time.Second / 10) { + for apiIndex < len(s.api.Content) { + update := s.api.Content[apiIndex] + apiIndex++ + var ok bool + var cost float64 + if update, ok = update["container"].(arvadosclient.Dict); !ok { + continue + } + if cost, ok = update["cost"].(float64); !ok { + continue + } + c.Logf("API call #%d updates cost to %v", apiIndex-1, cost) + costUpdates[costIndex] = cost + costIndex++ + return + } + } + } + + s.fullRunHelper(c, `{ + "command": ["true"], + "container_image": "`+arvadostest.DockerImage112PDH+`", + "cwd": ".", + "environment": {}, + "mounts": {"/tmp": {"kind": "tmp"} }, + "output_path": "/tmp", + "priority": 1, + "runtime_constraints": {}, + "state": "Locked", + "uuid": "zzzzz-dz642-20230320101530a" + }`, nil, func() int { + s.runner.costStartTime = now.Add(-3 * time.Hour) + err := syscall.Kill(pid, syscall.SIGUSR2) + c.Check(err, IsNil, Commentf("error sending first SIGUSR2 to runner")) + getCostUpdate() + + err = os.WriteFile(path.Join(lockdir, pricesfile), pricesJSON, 0o700) + c.Check(err, IsNil, Commentf("error writing JSON prices file")) + err = syscall.Kill(pid, syscall.SIGUSR2) + c.Check(err, IsNil, Commentf("error sending second SIGUSR2 to runner")) + getCostUpdate() + + return 0 + }) + // Comparing with format strings makes it easy to ignore minor variations + // in cost across runs while keeping diagnostics pretty. + c.Check(fmt.Sprintf("%.3f", costUpdates[0]), Equals, "6.600") + c.Check(fmt.Sprintf("%.3f", costUpdates[1]), Equals, "7.600") +} + type FakeProcess struct { cmdLine []string }