From 0559d0dfc8406eadfc1aa71465179eaea2754f12 Mon Sep 17 00:00:00 2001
From: Brett Smith <brett.smith@curii.com>
Date: Wed, 1 Mar 2023 11:35:31 -0500
Subject: [PATCH] 19986: Log when a container uses nearly max RAM

This is meant to help users diagnose when their container likely failed
of OOM.

Arvados-DCO-1.1-Signed-off-by: Brett Smith <brett.smith@curii.com>
---
 .licenseignore                                |  3 +-
 lib/crunchrun/crunchrun.go                    | 12 ++-
 lib/crunchrun/crunchrun_test.go               | 40 ++++++++++
 lib/crunchrun/testdata/fakestat/cgroup.procs  |  0
 .../testdata/fakestat/cgroupid/cgroup.procs   |  0
 .../testdata/fakestat/cgroupid/memory.stat    |  1 +
 lib/crunchstat/crunchstat.go                  | 63 +++++++++++++++-
 lib/crunchstat/crunchstat_test.go             | 73 +++++++++++++++++++
 lib/crunchstat/testdata/fakestat/cgroup.procs |  0
 lib/crunchstat/testdata/fakestat/memory.stat  |  2 +
 10 files changed, 186 insertions(+), 8 deletions(-)
 create mode 100644 lib/crunchrun/testdata/fakestat/cgroup.procs
 create mode 100644 lib/crunchrun/testdata/fakestat/cgroupid/cgroup.procs
 create mode 100644 lib/crunchrun/testdata/fakestat/cgroupid/memory.stat
 create mode 100644 lib/crunchstat/testdata/fakestat/cgroup.procs
 create mode 100644 lib/crunchstat/testdata/fakestat/memory.stat

diff --git a/.licenseignore b/.licenseignore
index 6ddb5c009c..3d24c4ee3a 100644
--- a/.licenseignore
+++ b/.licenseignore
@@ -92,4 +92,5 @@ sdk/cwl/tests/wf/hello.txt
 sdk/cwl/tests/wf/indir1/hello2.txt
 sdk/cwl/tests/chipseq/data/Genomes/*
 CITATION.cff
-SECURITY.md
\ No newline at end of file
+SECURITY.md
+*/testdata/fakestat/*
diff --git a/lib/crunchrun/crunchrun.go b/lib/crunchrun/crunchrun.go
index 79d7f36740..3607cafaf0 100644
--- a/lib/crunchrun/crunchrun.go
+++ b/lib/crunchrun/crunchrun.go
@@ -764,12 +764,16 @@ func (runner *ContainerRunner) startCrunchstat() error {
 	}
 	runner.statLogger = NewThrottledLogger(w)
 	runner.statReporter = &crunchstat.Reporter{
-		CID:          runner.executor.CgroupID(),
-		Logger:       log.New(runner.statLogger, "", 0),
 		CgroupParent: runner.expectCgroupParent,
 		CgroupRoot:   runner.cgroupRoot,
-		PollPeriod:   runner.statInterval,
-		TempDir:      runner.parentTemp,
+		CID:          runner.executor.CgroupID(),
+		Logger:       log.New(runner.statLogger, "", 0),
+		MemThresholds: map[string][]crunchstat.Threshold{
+			"rss": crunchstat.NewThresholdsFromPercentages(runner.Container.RuntimeConstraints.RAM, []int64{90, 95, 99}),
+		},
+		PollPeriod:      runner.statInterval,
+		TempDir:         runner.parentTemp,
+		ThresholdLogger: runner.CrunchLog,
 	}
 	runner.statReporter.Start()
 	return nil
diff --git a/lib/crunchrun/crunchrun_test.go b/lib/crunchrun/crunchrun_test.go
index 4d127f19c2..46a71e5929 100644
--- a/lib/crunchrun/crunchrun_test.go
+++ b/lib/crunchrun/crunchrun_test.go
@@ -20,6 +20,7 @@ import (
 	"os/exec"
 	"regexp"
 	"runtime/pprof"
+	"strconv"
 	"strings"
 	"sync"
 	"sync/atomic"
@@ -43,6 +44,8 @@ func TestCrunchExec(t *testing.T) {
 	TestingT(t)
 }
 
+const logLineStart = `(?m)(.*\n)*\d{4}-\d\d-\d\dT\d\d:\d\d:\d\d\.\d+Z `
+
 var _ = Suite(&TestSuite{})
 
 type TestSuite struct {
@@ -985,6 +988,43 @@ func (s *TestSuite) TestLogVersionAndRuntime(c *C) {
 	c.Check(s.api.Logs["crunch-run"].String(), Matches, `(?ms).*Using container runtime: stub.*`)
 }
 
+func (s *TestSuite) testLogRSSThresholds(c *C, ram int, expected []int, notExpected int) {
+	s.runner.cgroupRoot = "testdata/fakestat"
+	s.fullRunHelper(c, `{
+		"command": ["true"],
+		"container_image": "`+arvadostest.DockerImage112PDH+`",
+		"cwd": ".",
+		"environment": {},
+		"mounts": {"/tmp": {"kind": "tmp"} },
+		"output_path": "/tmp",
+		"priority": 1,
+		"runtime_constraints": {"ram": `+strconv.Itoa(ram)+`},
+		"state": "Locked"
+	}`, nil, func() int { return 0 })
+	logs := s.api.Logs["crunch-run"].String()
+	pattern := logLineStart + `Container using over %d%% of memory \(rss 734003200/%d bytes\)`
+	var threshold int
+	for _, threshold = range expected {
+		c.Check(logs, Matches, fmt.Sprintf(pattern, threshold, ram))
+	}
+	if notExpected > threshold {
+		c.Check(logs, Not(Matches), fmt.Sprintf(pattern, notExpected, ram))
+	}
+}
+
+func (s *TestSuite) TestLogNoRSSThresholds(c *C) {
+	s.testLogRSSThresholds(c, 7340032000, []int{}, 90)
+}
+
+func (s *TestSuite) TestLogSomeRSSThresholds(c *C) {
+	onePercentRSS := 7340032
+	s.testLogRSSThresholds(c, 102*onePercentRSS, []int{90, 95}, 99)
+}
+
+func (s *TestSuite) TestLogAllRSSThresholds(c *C) {
+	s.testLogRSSThresholds(c, 734003299, []int{90, 95, 99}, 0)
+}
+
 func (s *TestSuite) TestCommitNodeInfoBeforeStart(c *C) {
 	var collection_create, container_update arvadosclient.Dict
 	s.fullRunHelper(c, `{
diff --git a/lib/crunchrun/testdata/fakestat/cgroup.procs b/lib/crunchrun/testdata/fakestat/cgroup.procs
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/lib/crunchrun/testdata/fakestat/cgroupid/cgroup.procs b/lib/crunchrun/testdata/fakestat/cgroupid/cgroup.procs
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/lib/crunchrun/testdata/fakestat/cgroupid/memory.stat b/lib/crunchrun/testdata/fakestat/cgroupid/memory.stat
new file mode 100644
index 0000000000..fff3333c4f
--- /dev/null
+++ b/lib/crunchrun/testdata/fakestat/cgroupid/memory.stat
@@ -0,0 +1 @@
+rss 734003200
diff --git a/lib/crunchstat/crunchstat.go b/lib/crunchstat/crunchstat.go
index 5d2059f7e0..952e3975c2 100644
--- a/lib/crunchstat/crunchstat.go
+++ b/lib/crunchstat/crunchstat.go
@@ -23,6 +23,10 @@ import (
 	"time"
 )
 
+type logPrinter interface {
+	Printf(fmt string, args ...interface{})
+}
+
 // A Reporter gathers statistics for a cgroup and writes them to a
 // log.Logger.
 type Reporter struct {
@@ -49,9 +53,15 @@ type Reporter struct {
 	TempDir string
 
 	// Where to write statistics. Must not be nil.
-	Logger interface {
-		Printf(fmt string, args ...interface{})
-	}
+	Logger logPrinter
+
+	// When stats cross thresholds configured in the fields below,
+	// they are reported to this logger.
+	ThresholdLogger logPrinter
+
+	// MemThresholds maps memory stat names to slices of thresholds.
+	// When the corresponding stat exceeds a threshold, that will be logged.
+	MemThresholds map[string][]Threshold
 
 	kernelPageSize      int64
 	reportedStatFile    map[string]string
@@ -68,6 +78,27 @@ type Reporter struct {
 	flushed chan struct{} // closed when we have made our last report
 }
 
+type Threshold struct {
+	percentage int64
+	threshold  int64
+	total      int64
+}
+
+func NewThresholdFromPercentage(total int64, percentage int64) Threshold {
+	return Threshold{
+		percentage: percentage,
+		threshold:  total * percentage / 100,
+		total:      total,
+	}
+}
+
+func NewThresholdsFromPercentages(total int64, percentages []int64) (thresholds []Threshold) {
+	for _, percentage := range percentages {
+		thresholds = append(thresholds, NewThresholdFromPercentage(total, percentage))
+	}
+	return
+}
+
 // Start starts monitoring in a new goroutine, and returns
 // immediately.
 //
@@ -264,6 +295,32 @@ func (r *Reporter) getMemSample() {
 		thisSample.memStat[stat] = val
 	}
 	r.lastMemSample = thisSample
+
+	if r.ThresholdLogger != nil {
+		for statName, thresholds := range r.MemThresholds {
+			statValue, ok := thisSample.memStat["total_"+statName]
+			if !ok {
+				statValue, ok = thisSample.memStat[statName]
+				if !ok {
+					continue
+				}
+			}
+			var index int
+			var statThreshold Threshold
+			for index, statThreshold = range thresholds {
+				if statValue < statThreshold.threshold {
+					break
+				} else if statThreshold.percentage > 0 {
+					r.ThresholdLogger.Printf("Container using over %d%% of memory (%s %d/%d bytes)",
+						statThreshold.percentage, statName, statValue, statThreshold.total)
+				} else {
+					r.ThresholdLogger.Printf("Container using over %d of memory (%s %s bytes)",
+						statThreshold.threshold, statName, statValue)
+				}
+			}
+			r.MemThresholds[statName] = thresholds[index:]
+		}
+	}
 }
 
 func (r *Reporter) reportMemSample() {
diff --git a/lib/crunchstat/crunchstat_test.go b/lib/crunchstat/crunchstat_test.go
index b4498a135a..d98fe10647 100644
--- a/lib/crunchstat/crunchstat_test.go
+++ b/lib/crunchstat/crunchstat_test.go
@@ -6,6 +6,7 @@ package crunchstat
 
 import (
 	"bytes"
+	"fmt"
 	"os"
 	"regexp"
 	"strconv"
@@ -16,6 +17,25 @@ import (
 	. "gopkg.in/check.v1"
 )
 
+const logMsgPrefix = `(?m)(.*\n)*.* msg="`
+const GiB = int64(1024 * 1024 * 1024)
+
+type fakeStat struct {
+	cgroupRoot string
+	statName   string
+	unit       string
+	value      int64
+}
+
+var fakeRSS = fakeStat{
+	cgroupRoot: "testdata/fakestat",
+	statName:   "mem rss",
+	unit:       "bytes",
+	// Note this is the value of total_rss, not rss, because that's what should
+	// always be reported for thresholds and maxima.
+	value: 750 * 1024 * 1024,
+}
+
 func Test(t *testing.T) {
 	TestingT(t)
 }
@@ -90,3 +110,56 @@ func (s *suite) TestReportPIDs(c *C) {
 	}
 	c.Logf("%s", s.logbuf.String())
 }
+
+func (s *suite) testRSSThresholds(c *C, rssPercentages []int64, alertCount int) {
+	c.Assert(alertCount <= len(rssPercentages), Equals, true)
+	rep := Reporter{
+		CgroupRoot: fakeRSS.cgroupRoot,
+		Logger:     s.logger,
+		MemThresholds: map[string][]Threshold{
+			"rss": NewThresholdsFromPercentages(GiB, rssPercentages),
+		},
+		PollPeriod:      time.Second * 10,
+		ThresholdLogger: s.logger,
+	}
+	rep.Start()
+	rep.Stop()
+	logs := s.logbuf.String()
+	c.Logf("%s", logs)
+
+	for index, expectPercentage := range rssPercentages[:alertCount] {
+		var logCheck Checker
+		if index < alertCount {
+			logCheck = Matches
+		} else {
+			logCheck = Not(Matches)
+		}
+		pattern := fmt.Sprintf(`%sContainer using over %d%% of memory \(rss %d/%d bytes\)"`,
+			logMsgPrefix, expectPercentage, fakeRSS.value, GiB)
+		c.Check(logs, logCheck, pattern)
+	}
+}
+
+func (s *suite) TestZeroRSSThresholds(c *C) {
+	s.testRSSThresholds(c, []int64{}, 0)
+}
+
+func (s *suite) TestOneRSSThresholdPassed(c *C) {
+	s.testRSSThresholds(c, []int64{55}, 1)
+}
+
+func (s *suite) TestOneRSSThresholdNotPassed(c *C) {
+	s.testRSSThresholds(c, []int64{85}, 0)
+}
+
+func (s *suite) TestMultipleRSSThresholdsNonePassed(c *C) {
+	s.testRSSThresholds(c, []int64{95, 97, 99}, 0)
+}
+
+func (s *suite) TestMultipleRSSThresholdsSomePassed(c *C) {
+	s.testRSSThresholds(c, []int64{60, 70, 80, 90}, 2)
+}
+
+func (s *suite) TestMultipleRSSThresholdsAllPassed(c *C) {
+	s.testRSSThresholds(c, []int64{1, 2, 3}, 3)
+}
diff --git a/lib/crunchstat/testdata/fakestat/cgroup.procs b/lib/crunchstat/testdata/fakestat/cgroup.procs
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/lib/crunchstat/testdata/fakestat/memory.stat b/lib/crunchstat/testdata/fakestat/memory.stat
new file mode 100644
index 0000000000..0540eea230
--- /dev/null
+++ b/lib/crunchstat/testdata/fakestat/memory.stat
@@ -0,0 +1,2 @@
+rss 990
+total_rss 786432000
-- 
2.30.2