X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/b6da0bcc634615e8db9d4546a33b97bb69583afc..247fd765bc1fbf559572d6e5590893c9190f086a:/lib/crunchstat/crunchstat_test.go diff --git a/lib/crunchstat/crunchstat_test.go b/lib/crunchstat/crunchstat_test.go index 5e8e93de6c..88de12f076 100644 --- a/lib/crunchstat/crunchstat_test.go +++ b/lib/crunchstat/crunchstat_test.go @@ -6,8 +6,10 @@ package crunchstat import ( "bytes" - "log" + "errors" + "fmt" "os" + "path" "regexp" "strconv" "testing" @@ -17,17 +19,89 @@ import ( . "gopkg.in/check.v1" ) +const logMsgPrefix = `(?m)(.*\n)*.* msg="` +const GiB = int64(1024 * 1024 * 1024) + +type fakeStat struct { + cgroupRoot string + statName string + unit string + value int64 +} + +var fakeRSS = fakeStat{ + cgroupRoot: "testdata/fakestat", + statName: "mem rss", + unit: "bytes", + // Note this is the value of total_rss, not rss, because that's what should + // always be reported for thresholds and maxima. + value: 750 * 1024 * 1024, +} + func Test(t *testing.T) { TestingT(t) } -var _ = Suite(&suite{}) +var _ = Suite(&suite{ + logger: logrus.New(), +}) + +type suite struct { + cgroupRoot string + logbuf bytes.Buffer + logger *logrus.Logger +} + +func (s *suite) SetUpSuite(c *C) { + s.logger.Out = &s.logbuf +} + +func (s *suite) SetUpTest(c *C) { + s.cgroupRoot = "" + s.logbuf.Reset() +} + +func (s *suite) tempCgroup(c *C, sourceDir string) error { + tempDir := c.MkDir() + dirents, err := os.ReadDir(sourceDir) + if err != nil { + return err + } + for _, dirent := range dirents { + srcData, err := os.ReadFile(path.Join(sourceDir, dirent.Name())) + if err != nil { + return err + } + destPath := path.Join(tempDir, dirent.Name()) + err = os.WriteFile(destPath, srcData, 0o600) + if err != nil { + return err + } + } + s.cgroupRoot = tempDir + return nil +} -type suite struct{} +func (s *suite) addPidToCgroup(pid int) error { + if s.cgroupRoot == "" { + return errors.New("cgroup has not been set up for this test") + } + procsPath := path.Join(s.cgroupRoot, "cgroup.procs") + procsFile, err := os.OpenFile(procsPath, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0o600) + if err != nil { + return err + } + pidLine := strconv.Itoa(pid) + "\n" + _, err = procsFile.Write([]byte(pidLine)) + if err != nil { + procsFile.Close() + return err + } + return procsFile.Close() +} func (s *suite) TestReadAllOrWarnFail(c *C) { - var logger bytes.Buffer - rep := Reporter{Logger: log.New(&logger, "", 0)} + rep := Reporter{Logger: s.logger} // The special file /proc/self/mem can be opened for // reading, but reading from byte 0 returns an error. @@ -36,12 +110,11 @@ func (s *suite) TestReadAllOrWarnFail(c *C) { defer f.Close() _, err = rep.readAllOrWarn(f) c.Check(err, NotNil) - c.Check(logger.String(), Matches, "^warning: read /proc/self/mem: .*\n") + c.Check(s.logbuf.String(), Matches, ".* msg=\"warning: read /proc/self/mem: .*\n") } func (s *suite) TestReadAllOrWarnSuccess(c *C) { - var logbuf bytes.Buffer - rep := Reporter{Logger: log.New(&logbuf, "", 0)} + rep := Reporter{Logger: s.logger} f, err := os.Open("./crunchstat_test.go") c.Assert(err, IsNil) @@ -49,15 +122,12 @@ func (s *suite) TestReadAllOrWarnSuccess(c *C) { data, err := rep.readAllOrWarn(f) c.Check(err, IsNil) c.Check(string(data), Matches, "(?ms).*\npackage crunchstat\n.*") - c.Check(logbuf.String(), Equals, "") + c.Check(s.logbuf.String(), Equals, "") } func (s *suite) TestReportPIDs(c *C) { - var logbuf bytes.Buffer - logger := logrus.New() - logger.Out = &logbuf r := Reporter{ - Logger: logger, + Logger: s.logger, CgroupRoot: "/sys/fs/cgroup", PollPeriod: time.Second, } @@ -70,7 +140,7 @@ func (s *suite) TestReportPIDs(c *C) { c.Error("timed out") break } - if m := regexp.MustCompile(`(?ms).*procmem \d+ init (\d+) test_process.*`).FindSubmatch(logbuf.Bytes()); len(m) > 0 { + if m := regexp.MustCompile(`(?ms).*procmem \d+ init (\d+) test_process.*`).FindSubmatch(s.logbuf.Bytes()); len(m) > 0 { size, err := strconv.ParseInt(string(m[1]), 10, 64) c.Check(err, IsNil) // Expect >1 MiB and <100 MiB -- otherwise we @@ -81,5 +151,111 @@ func (s *suite) TestReportPIDs(c *C) { break } } - c.Logf("%s", logbuf.String()) + c.Logf("%s", s.logbuf.String()) +} + +func (s *suite) testRSSThresholds(c *C, rssPercentages []int64, alertCount int) { + c.Assert(alertCount <= len(rssPercentages), Equals, true) + rep := Reporter{ + CgroupRoot: fakeRSS.cgroupRoot, + Logger: s.logger, + MemThresholds: map[string][]Threshold{ + "rss": NewThresholdsFromPercentages(GiB, rssPercentages), + }, + PollPeriod: time.Second * 10, + ThresholdLogger: s.logger, + } + rep.Start() + rep.Stop() + logs := s.logbuf.String() + c.Logf("%s", logs) + + for index, expectPercentage := range rssPercentages[:alertCount] { + var logCheck Checker + if index < alertCount { + logCheck = Matches + } else { + logCheck = Not(Matches) + } + pattern := fmt.Sprintf(`%sContainer using over %d%% of memory \(rss %d/%d bytes\)"`, + logMsgPrefix, expectPercentage, fakeRSS.value, GiB) + c.Check(logs, logCheck, pattern) + } +} + +func (s *suite) TestZeroRSSThresholds(c *C) { + s.testRSSThresholds(c, []int64{}, 0) +} + +func (s *suite) TestOneRSSThresholdPassed(c *C) { + s.testRSSThresholds(c, []int64{55}, 1) +} + +func (s *suite) TestOneRSSThresholdNotPassed(c *C) { + s.testRSSThresholds(c, []int64{85}, 0) +} + +func (s *suite) TestMultipleRSSThresholdsNonePassed(c *C) { + s.testRSSThresholds(c, []int64{95, 97, 99}, 0) +} + +func (s *suite) TestMultipleRSSThresholdsSomePassed(c *C) { + s.testRSSThresholds(c, []int64{60, 70, 80, 90}, 2) +} + +func (s *suite) TestMultipleRSSThresholdsAllPassed(c *C) { + s.testRSSThresholds(c, []int64{1, 2, 3}, 3) +} + +func (s *suite) TestLogMaxima(c *C) { + err := s.tempCgroup(c, fakeRSS.cgroupRoot) + c.Assert(err, IsNil) + rep := Reporter{ + CgroupRoot: s.cgroupRoot, + Logger: s.logger, + PollPeriod: time.Second * 10, + TempDir: s.cgroupRoot, + } + rep.Start() + rep.Stop() + rep.LogMaxima(s.logger, map[string]int64{"rss": GiB}) + logs := s.logbuf.String() + c.Logf("%s", logs) + + expectRSS := fmt.Sprintf(`Maximum container memory rss usage was %d%%, %d/%d bytes`, + 100*fakeRSS.value/GiB, fakeRSS.value, GiB) + for _, expected := range []string{ + `Maximum disk usage was \d+%, \d+/\d+ bytes`, + `Maximum container memory cache usage was 73400320 bytes`, + `Maximum container memory swap usage was 320 bytes`, + `Maximum container memory pgmajfault usage was 20 faults`, + expectRSS, + } { + pattern := logMsgPrefix + expected + `"` + c.Check(logs, Matches, pattern) + } +} + +func (s *suite) TestLogProcessMemMax(c *C) { + err := s.tempCgroup(c, fakeRSS.cgroupRoot) + c.Assert(err, IsNil) + pid := os.Getpid() + err = s.addPidToCgroup(pid) + c.Assert(err, IsNil) + + rep := Reporter{ + CgroupRoot: s.cgroupRoot, + Logger: s.logger, + PollPeriod: time.Second * 10, + TempDir: s.cgroupRoot, + } + rep.ReportPID("test-run", pid) + rep.Start() + rep.Stop() + rep.LogProcessMemMax(s.logger) + logs := s.logbuf.String() + c.Logf("%s", logs) + + pattern := logMsgPrefix + `Maximum test-run memory rss usage was \d+ bytes"` + c.Check(logs, Matches, pattern) }