1 // Copyright (C) The Arvados Authors. All rights reserved.
3 // SPDX-License-Identifier: AGPL-3.0
18 "github.com/sirupsen/logrus"
22 const logMsgPrefix = `(?m)(.*\n)*.* msg="`
24 func Test(t *testing.T) {
28 var _ = Suite(&suite{})
30 type testdatasource struct {
35 func (s testdatasource) Pid() int {
38 func (s testdatasource) FS() fs.FS {
39 return os.DirFS(s.fspath)
42 // To generate a test case for a new OS target, build
43 // cmd/arvados-server and run
45 // arvados-server crunchstat -dump ./testdata/example1234 sleep 2
46 var testdata = map[string]testdatasource{
47 "debian11": {fspath: "testdata/debian11", pid: 4153022},
48 "debian12": {fspath: "testdata/debian12", pid: 1115883},
49 "ubuntu1804": {fspath: "testdata/ubuntu1804", pid: 2523},
50 "ubuntu2004": {fspath: "testdata/ubuntu2004", pid: 1360},
51 "ubuntu2204": {fspath: "testdata/ubuntu2204", pid: 1967},
57 debian12MemoryCurrent int64
60 func (s *suite) SetUpSuite(c *C) {
61 s.logger = logrus.New()
62 s.logger.Out = &s.logbuf
64 buf, err := os.ReadFile("testdata/debian12/sys/fs/cgroup/user.slice/user-1000.slice/session-4.scope/memory.current")
66 _, err = fmt.Sscanf(string(buf), "%d", &s.debian12MemoryCurrent)
70 func (s *suite) SetUpTest(c *C) {
74 // Report stats for the current (go test) process's cgroup, using the
75 // test host's real procfs/sysfs.
76 func (s *suite) TestReportCurrent(c *C) {
80 PollPeriod: time.Second,
84 checkPatterns := []string{
88 `(?ms).* [\d.]+ user [\d.]+ sys ` + fmt.Sprintf("%d", runtime.NumCPU()) + ` cpus -- .*`,
90 for deadline := time.Now().Add(4 * time.Second); !c.Failed(); time.Sleep(time.Millisecond) {
92 for _, pattern := range checkPatterns {
93 if m := regexp.MustCompile(pattern).FindSubmatch(s.logbuf.Bytes()); len(m) == 0 {
95 if time.Now().After(deadline) {
96 c.Errorf("timed out waiting for %s", pattern)
104 c.Logf("%s", s.logbuf.String())
107 // Report stats for a the current (go test) process.
108 func (s *suite) TestReportPIDs(c *C) {
110 Pid: func() int { return 1 },
112 PollPeriod: time.Second,
116 r.ReportPID("init", 1)
117 r.ReportPID("test_process", os.Getpid())
118 r.ReportPID("nonexistent", 12345) // should be silently ignored/omitted
119 for deadline := time.Now().Add(10 * time.Second); ; time.Sleep(time.Millisecond) {
120 if time.Now().After(deadline) {
124 if m := regexp.MustCompile(`(?ms).*procmem \d+ init (\d+) test_process.*`).FindSubmatch(s.logbuf.Bytes()); len(m) > 0 {
125 size, err := strconv.ParseInt(string(m[1]), 10, 64)
127 // Expect >1 MiB and <100 MiB -- otherwise we
128 // are probably misinterpreting /proc/N/stat
129 // or multiplying by the wrong page size.
130 c.Check(size > 1000000, Equals, true)
131 c.Check(size < 100000000, Equals, true)
135 c.Logf("%s", s.logbuf.String())
138 func (s *suite) TestAllTestdata(c *C) {
139 for platform, datasource := range testdata {
141 c.Logf("=== %s", platform)
146 PollPeriod: time.Second,
147 ThresholdLogger: s.logger,
152 logs := s.logbuf.String()
154 c.Check(logs, Matches, `(?ms).* \d\d+ rss\\n.*`)
155 c.Check(logs, Matches, `(?ms).*blkio:\d+:\d+ \d+ write \d+ read\\n.*`)
156 c.Check(logs, Matches, `(?ms).*net:\S+ \d+ tx \d+ rx\\n.*`)
157 c.Check(logs, Matches, `(?ms).* [\d.]+ user [\d.]+ sys [2-9]\d* cpus.*`)
161 func (s *suite) testRSSThresholds(c *C, rssPercentages []int64, alertCount int) {
162 c.Assert(alertCount <= len(rssPercentages), Equals, true)
164 Pid: testdata["debian12"].Pid,
165 FS: testdata["debian12"].FS(),
167 MemThresholds: map[string][]Threshold{
168 "rss": NewThresholdsFromPercentages(s.debian12MemoryCurrent*3/2, rssPercentages),
170 PollPeriod: time.Second * 10,
171 ThresholdLogger: s.logger,
175 logs := s.logbuf.String()
178 for index, expectPercentage := range rssPercentages[:alertCount] {
180 if index < alertCount {
183 logCheck = Not(Matches)
185 pattern := fmt.Sprintf(`%sContainer using over %d%% of memory \(rss %d/%d bytes\)"`,
186 logMsgPrefix, expectPercentage, s.debian12MemoryCurrent, s.debian12MemoryCurrent*3/2)
187 c.Check(logs, logCheck, pattern)
191 func (s *suite) TestZeroRSSThresholds(c *C) {
192 s.testRSSThresholds(c, []int64{}, 0)
195 func (s *suite) TestOneRSSThresholdPassed(c *C) {
196 s.testRSSThresholds(c, []int64{55}, 1)
199 func (s *suite) TestOneRSSThresholdNotPassed(c *C) {
200 s.testRSSThresholds(c, []int64{85}, 0)
203 func (s *suite) TestMultipleRSSThresholdsNonePassed(c *C) {
204 s.testRSSThresholds(c, []int64{95, 97, 99}, 0)
207 func (s *suite) TestMultipleRSSThresholdsSomePassed(c *C) {
208 s.testRSSThresholds(c, []int64{45, 60, 75, 90}, 2)
211 func (s *suite) TestMultipleRSSThresholdsAllPassed(c *C) {
212 s.testRSSThresholds(c, []int64{1, 2, 3}, 3)
215 func (s *suite) TestLogMaxima(c *C) {
217 Pid: testdata["debian12"].Pid,
218 FS: testdata["debian12"].FS(),
220 PollPeriod: time.Second * 10,
225 rep.LogMaxima(s.logger, map[string]int64{"rss": s.debian12MemoryCurrent * 3 / 2})
226 logs := s.logbuf.String()
229 expectRSS := fmt.Sprintf(`Maximum container memory rss usage was %d%%, %d/%d bytes`,
230 66, s.debian12MemoryCurrent, s.debian12MemoryCurrent*3/2)
231 for _, expected := range []string{
232 `Maximum disk usage was \d+%, \d+/\d+ bytes`,
233 `Maximum container memory swap usage was \d\d+ bytes`,
234 `Maximum container memory pgmajfault usage was \d\d+ faults`,
237 pattern := logMsgPrefix + expected + `"`
238 c.Check(logs, Matches, pattern)
242 func (s *suite) TestLogProcessMemMax(c *C) {
246 PollPeriod: time.Second * 10,
248 rep.ReportPID("test-run", os.Getpid())
251 rep.LogProcessMemMax(s.logger)
252 logs := s.logbuf.String()
255 pattern := logMsgPrefix + `Maximum test-run memory rss usage was \d+ bytes"`
256 c.Check(logs, Matches, pattern)