19986: Log when a container uses nearly max RAM
[arvados.git] / lib / crunchrun / crunchrun.go
index 57eed84bacb11a9759e3e29b73f5b45e84d2be37..3607cafaf0149bc9763c9fc5679ec6ebb6d0a2e1 100644 (file)
@@ -764,12 +764,16 @@ func (runner *ContainerRunner) startCrunchstat() error {
        }
        runner.statLogger = NewThrottledLogger(w)
        runner.statReporter = &crunchstat.Reporter{
-               CID:          runner.executor.CgroupID(),
-               Logger:       log.New(runner.statLogger, "", 0),
                CgroupParent: runner.expectCgroupParent,
                CgroupRoot:   runner.cgroupRoot,
-               PollPeriod:   runner.statInterval,
-               TempDir:      runner.parentTemp,
+               CID:          runner.executor.CgroupID(),
+               Logger:       log.New(runner.statLogger, "", 0),
+               MemThresholds: map[string][]crunchstat.Threshold{
+                       "rss": crunchstat.NewThresholdsFromPercentages(runner.Container.RuntimeConstraints.RAM, []int64{90, 95, 99}),
+               },
+               PollPeriod:      runner.statInterval,
+               TempDir:         runner.parentTemp,
+               ThresholdLogger: runner.CrunchLog,
        }
        runner.statReporter.Start()
        return nil
@@ -1291,7 +1295,7 @@ func (runner *ContainerRunner) checkSpotInterruptionNotices() {
                failures = 0
                if metadata != lastmetadata {
                        lastmetadata = metadata
-                       text := fmt.Sprintf("Cloud provider indicates instance action %q scheduled for time %q", metadata.Action, metadata.Time.UTC().Format(time.RFC3339))
+                       text := fmt.Sprintf("Cloud provider scheduled instance %s at %s", metadata.Action, metadata.Time.UTC().Format(time.RFC3339))
                        runner.CrunchLog.Printf("%s", text)
                        runner.updateRuntimeStatus(arvadosclient.Dict{
                                "warning":          "preemption notice",