1 // Copyright (C) The Arvados Authors. All rights reserved.
3 // SPDX-License-Identifier: AGPL-3.0
18 "github.com/sirupsen/logrus"
22 const logMsgPrefix = `(?m)(.*\n)*.* msg="`
23 const GiB = int64(1024 * 1024 * 1024)
25 type fakeStat struct {
32 var fakeRSS = fakeStat{
33 cgroupRoot: "testdata/fakestat",
36 // Note this is the value of total_rss, not rss, because that's what should
37 // always be reported for thresholds and maxima.
38 value: 750 * 1024 * 1024,
41 func Test(t *testing.T) {
55 func (s *suite) SetUpSuite(c *C) {
56 s.logger.Out = &s.logbuf
59 func (s *suite) SetUpTest(c *C) {
64 func (s *suite) tempCgroup(c *C, sourceDir string) error {
66 dirents, err := os.ReadDir(sourceDir)
70 for _, dirent := range dirents {
71 srcData, err := os.ReadFile(path.Join(sourceDir, dirent.Name()))
75 destPath := path.Join(tempDir, dirent.Name())
76 err = os.WriteFile(destPath, srcData, 0o600)
81 s.cgroupRoot = tempDir
85 func (s *suite) addPidToCgroup(pid int) error {
86 if s.cgroupRoot == "" {
87 return errors.New("cgroup has not been set up for this test")
89 procsPath := path.Join(s.cgroupRoot, "cgroup.procs")
90 procsFile, err := os.OpenFile(procsPath, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0o600)
94 pidLine := strconv.Itoa(pid) + "\n"
95 _, err = procsFile.Write([]byte(pidLine))
100 return procsFile.Close()
103 func (s *suite) TestReadAllOrWarnFail(c *C) {
104 rep := Reporter{Logger: s.logger}
106 // The special file /proc/self/mem can be opened for
107 // reading, but reading from byte 0 returns an error.
108 f, err := os.Open("/proc/self/mem")
111 _, err = rep.readAllOrWarn(f)
113 c.Check(s.logbuf.String(), Matches, ".* msg=\"warning: read /proc/self/mem: .*\n")
116 func (s *suite) TestReadAllOrWarnSuccess(c *C) {
117 rep := Reporter{Logger: s.logger}
119 f, err := os.Open("./crunchstat_test.go")
122 data, err := rep.readAllOrWarn(f)
124 c.Check(string(data), Matches, "(?ms).*\npackage crunchstat\n.*")
125 c.Check(s.logbuf.String(), Equals, "")
128 func (s *suite) TestReportPIDs(c *C) {
131 CgroupRoot: "/sys/fs/cgroup",
132 PollPeriod: time.Second,
135 r.ReportPID("init", 1)
136 r.ReportPID("test_process", os.Getpid())
137 r.ReportPID("nonexistent", 12345) // should be silently ignored/omitted
138 for deadline := time.Now().Add(10 * time.Second); ; time.Sleep(time.Millisecond) {
139 if time.Now().After(deadline) {
143 if m := regexp.MustCompile(`(?ms).*procmem \d+ init (\d+) test_process.*`).FindSubmatch(s.logbuf.Bytes()); len(m) > 0 {
144 size, err := strconv.ParseInt(string(m[1]), 10, 64)
146 // Expect >1 MiB and <100 MiB -- otherwise we
147 // are probably misinterpreting /proc/N/stat
148 // or multiplying by the wrong page size.
149 c.Check(size > 1000000, Equals, true)
150 c.Check(size < 100000000, Equals, true)
154 c.Logf("%s", s.logbuf.String())
157 func (s *suite) testRSSThresholds(c *C, rssPercentages []int64, alertCount int) {
158 c.Assert(alertCount <= len(rssPercentages), Equals, true)
160 CgroupRoot: fakeRSS.cgroupRoot,
162 MemThresholds: map[string][]Threshold{
163 "rss": NewThresholdsFromPercentages(GiB, rssPercentages),
165 PollPeriod: time.Second * 10,
166 ThresholdLogger: s.logger,
170 logs := s.logbuf.String()
173 for index, expectPercentage := range rssPercentages[:alertCount] {
175 if index < alertCount {
178 logCheck = Not(Matches)
180 pattern := fmt.Sprintf(`%sContainer using over %d%% of memory \(rss %d/%d bytes\)"`,
181 logMsgPrefix, expectPercentage, fakeRSS.value, GiB)
182 c.Check(logs, logCheck, pattern)
186 func (s *suite) TestZeroRSSThresholds(c *C) {
187 s.testRSSThresholds(c, []int64{}, 0)
190 func (s *suite) TestOneRSSThresholdPassed(c *C) {
191 s.testRSSThresholds(c, []int64{55}, 1)
194 func (s *suite) TestOneRSSThresholdNotPassed(c *C) {
195 s.testRSSThresholds(c, []int64{85}, 0)
198 func (s *suite) TestMultipleRSSThresholdsNonePassed(c *C) {
199 s.testRSSThresholds(c, []int64{95, 97, 99}, 0)
202 func (s *suite) TestMultipleRSSThresholdsSomePassed(c *C) {
203 s.testRSSThresholds(c, []int64{60, 70, 80, 90}, 2)
206 func (s *suite) TestMultipleRSSThresholdsAllPassed(c *C) {
207 s.testRSSThresholds(c, []int64{1, 2, 3}, 3)
210 func (s *suite) TestLogMaxima(c *C) {
211 err := s.tempCgroup(c, fakeRSS.cgroupRoot)
214 CgroupRoot: s.cgroupRoot,
216 PollPeriod: time.Second * 10,
217 TempDir: s.cgroupRoot,
221 rep.LogMaxima(s.logger, map[string]int64{"rss": GiB})
222 logs := s.logbuf.String()
225 expectRSS := fmt.Sprintf(`Maximum container memory rss usage was %d%%, %d/%d bytes`,
226 100*fakeRSS.value/GiB, fakeRSS.value, GiB)
227 for _, expected := range []string{
228 `Maximum disk usage was \d+%, \d+/\d+ bytes`,
229 `Maximum container memory cache usage was 73400320 bytes`,
230 `Maximum container memory swap usage was 320 bytes`,
231 `Maximum container memory pgmajfault usage was 20 faults`,
234 pattern := logMsgPrefix + expected + `"`
235 c.Check(logs, Matches, pattern)
239 func (s *suite) TestLogProcessMemMax(c *C) {
240 err := s.tempCgroup(c, fakeRSS.cgroupRoot)
243 err = s.addPidToCgroup(pid)
247 CgroupRoot: s.cgroupRoot,
249 PollPeriod: time.Second * 10,
250 TempDir: s.cgroupRoot,
252 rep.ReportPID("test-run", pid)
255 rep.LogProcessMemMax(s.logger)
256 logs := s.logbuf.String()
259 pattern := logMsgPrefix + `Maximum test-run memory rss usage was \d+ bytes"`
260 c.Check(logs, Matches, pattern)