X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/1071f4f96fcb2084424c4b29dd5915880c650254..6d03fdac5674dc88eff821f5e8ac70642f39a895:/lib/crunchrun/crunchrun_test.go

diff --git a/lib/crunchrun/crunchrun_test.go b/lib/crunchrun/crunchrun_test.go
index 4d127f19c2..9c4fe20bc1 100644
--- a/lib/crunchrun/crunchrun_test.go
+++ b/lib/crunchrun/crunchrun_test.go
@@ -6,6 +6,7 @@ package crunchrun
 
 import (
 	"bytes"
+	"context"
 	"crypto/md5"
 	"encoding/json"
 	"errors"
@@ -18,8 +19,10 @@ import (
 	"net/http/httptest"
 	"os"
 	"os/exec"
+	"path"
 	"regexp"
 	"runtime/pprof"
+	"strconv"
 	"strings"
 	"sync"
 	"sync/atomic"
@@ -33,7 +36,6 @@ import (
 	"git.arvados.org/arvados.git/sdk/go/arvadosclient"
 	"git.arvados.org/arvados.git/sdk/go/arvadostest"
 	"git.arvados.org/arvados.git/sdk/go/manifest"
-	"golang.org/x/net/context"
 
 	. "gopkg.in/check.v1"
 )
@@ -43,6 +45,8 @@ func TestCrunchExec(t *testing.T) {
 	TestingT(t)
 }
 
+const logLineStart = `(?m)(.*\n)*\d{4}-\d\d-\d\dT\d\d:\d\d:\d\d\.\d+Z `
+
 var _ = Suite(&TestSuite{})
 
 type TestSuite struct {
@@ -985,6 +989,70 @@ func (s *TestSuite) TestLogVersionAndRuntime(c *C) {
 	c.Check(s.api.Logs["crunch-run"].String(), Matches, `(?ms).*Using container runtime: stub.*`)
 }
 
+func (s *TestSuite) testLogRSSThresholds(c *C, ram int, expected []int, notExpected int) {
+	s.runner.cgroupRoot = "testdata/fakestat"
+	s.fullRunHelper(c, `{
+		"command": ["true"],
+		"container_image": "`+arvadostest.DockerImage112PDH+`",
+		"cwd": ".",
+		"environment": {},
+		"mounts": {"/tmp": {"kind": "tmp"} },
+		"output_path": "/tmp",
+		"priority": 1,
+		"runtime_constraints": {"ram": `+strconv.Itoa(ram)+`},
+		"state": "Locked"
+	}`, nil, func() int { return 0 })
+	logs := s.api.Logs["crunch-run"].String()
+	pattern := logLineStart + `Container using over %d%% of memory \(rss 734003200/%d bytes\)`
+	var threshold int
+	for _, threshold = range expected {
+		c.Check(logs, Matches, fmt.Sprintf(pattern, threshold, ram))
+	}
+	if notExpected > threshold {
+		c.Check(logs, Not(Matches), fmt.Sprintf(pattern, notExpected, ram))
+	}
+}
+
+func (s *TestSuite) TestLogNoRSSThresholds(c *C) {
+	s.testLogRSSThresholds(c, 7340032000, []int{}, 90)
+}
+
+func (s *TestSuite) TestLogSomeRSSThresholds(c *C) {
+	onePercentRSS := 7340032
+	s.testLogRSSThresholds(c, 102*onePercentRSS, []int{90, 95}, 99)
+}
+
+func (s *TestSuite) TestLogAllRSSThresholds(c *C) {
+	s.testLogRSSThresholds(c, 734003299, []int{90, 95, 99}, 0)
+}
+
+func (s *TestSuite) TestLogMaximaAfterRun(c *C) {
+	s.runner.cgroupRoot = "testdata/fakestat"
+	s.runner.parentTemp = c.MkDir()
+	s.fullRunHelper(c, `{
+        "command": ["true"],
+        "container_image": "`+arvadostest.DockerImage112PDH+`",
+        "cwd": ".",
+        "environment": {},
+        "mounts": {"/tmp": {"kind": "tmp"} },
+        "output_path": "/tmp",
+        "priority": 1,
+        "runtime_constraints": {"ram": 7340032000},
+        "state": "Locked"
+    }`, nil, func() int { return 0 })
+	logs := s.api.Logs["crunch-run"].String()
+	for _, expected := range []string{
+		`Maximum disk usage was \d+%, \d+/\d+ bytes`,
+		`Maximum container memory cache usage was 73400320 bytes`,
+		`Maximum container memory swap usage was 320 bytes`,
+		`Maximum container memory pgmajfault usage was 20 faults`,
+		`Maximum container memory rss usage was 10%, 734003200/7340032000 bytes`,
+		`Maximum crunch-run memory rss usage was \d+ bytes`,
+	} {
+		c.Check(logs, Matches, logLineStart+expected)
+	}
+}
+
 func (s *TestSuite) TestCommitNodeInfoBeforeStart(c *C) {
 	var collection_create, container_update arvadosclient.Dict
 	s.fullRunHelper(c, `{
@@ -2283,6 +2351,80 @@ func (s *TestSuite) TestCalculateCost(c *C) {
 	c.Check(logbuf.String(), Not(Matches), `(?ms).*changed to 2\.00 .* changed to 2\.00 .*`)
 }
 
+func (s *TestSuite) TestSIGUSR2CostUpdate(c *C) {
+	pid := os.Getpid()
+	now := time.Now()
+	pricesJSON, err := json.Marshal([]cloud.InstancePrice{
+		{StartTime: now.Add(-4 * time.Hour), Price: 2.4},
+		{StartTime: now.Add(-2 * time.Hour), Price: 2.6},
+	})
+	c.Assert(err, IsNil)
+
+	os.Setenv("InstanceType", `{"Price":2.2}`)
+	defer os.Unsetenv("InstanceType")
+	defer func(s string) { lockdir = s }(lockdir)
+	lockdir = c.MkDir()
+
+	// We can't use s.api.CalledWith because timing differences will yield
+	// different cost values across runs. getCostUpdate iterates over API
+	// calls until it finds one that sets the cost, then writes that value
+	// to the next index of costUpdates.
+	deadline := now.Add(time.Second)
+	costUpdates := make([]float64, 2)
+	costIndex := 0
+	apiIndex := 0
+	getCostUpdate := func() {
+		for ; time.Now().Before(deadline); time.Sleep(time.Second / 10) {
+			for apiIndex < len(s.api.Content) {
+				update := s.api.Content[apiIndex]
+				apiIndex++
+				var ok bool
+				var cost float64
+				if update, ok = update["container"].(arvadosclient.Dict); !ok {
+					continue
+				}
+				if cost, ok = update["cost"].(float64); !ok {
+					continue
+				}
+				c.Logf("API call #%d updates cost to %v", apiIndex-1, cost)
+				costUpdates[costIndex] = cost
+				costIndex++
+				return
+			}
+		}
+	}
+
+	s.fullRunHelper(c, `{
+		"command": ["true"],
+		"container_image": "`+arvadostest.DockerImage112PDH+`",
+		"cwd": ".",
+		"environment": {},
+		"mounts": {"/tmp": {"kind": "tmp"} },
+		"output_path": "/tmp",
+		"priority": 1,
+		"runtime_constraints": {},
+		"state": "Locked",
+		"uuid": "zzzzz-dz642-20230320101530a"
+	}`, nil, func() int {
+		s.runner.costStartTime = now.Add(-3 * time.Hour)
+		err := syscall.Kill(pid, syscall.SIGUSR2)
+		c.Check(err, IsNil, Commentf("error sending first SIGUSR2 to runner"))
+		getCostUpdate()
+
+		err = os.WriteFile(path.Join(lockdir, pricesfile), pricesJSON, 0o700)
+		c.Check(err, IsNil, Commentf("error writing JSON prices file"))
+		err = syscall.Kill(pid, syscall.SIGUSR2)
+		c.Check(err, IsNil, Commentf("error sending second SIGUSR2 to runner"))
+		getCostUpdate()
+
+		return 0
+	})
+	// Comparing with format strings makes it easy to ignore minor variations
+	// in cost across runs while keeping diagnostics pretty.
+	c.Check(fmt.Sprintf("%.3f", costUpdates[0]), Equals, "6.600")
+	c.Check(fmt.Sprintf("%.3f", costUpdates[1]), Equals, "7.600")
+}
+
 type FakeProcess struct {
 	cmdLine []string
 }