19563: Report process size in bytes, not pages. 19563-log-cr-mem
authorTom Clegg <tom@curii.com>
Tue, 25 Oct 2022 18:00:38 +0000 (14:00 -0400)
committerTom Clegg <tom@curii.com>
Tue, 25 Oct 2022 18:00:38 +0000 (14:00 -0400)
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom@curii.com>

lib/crunchstat/crunchstat.go
lib/crunchstat/crunchstat_test.go

index 443d2202cecb5420ad174ae56ca41009cd7bea1b..3a473cab8715c49eec14d5e0565b61daf9d71a5e 100644 (file)
@@ -14,6 +14,7 @@ import (
        "io"
        "io/ioutil"
        "os"
+       "regexp"
        "sort"
        "strconv"
        "strings"
@@ -52,6 +53,7 @@ type Reporter struct {
                Printf(fmt string, args ...interface{})
        }
 
+       kernelPageSize      int64
        reportedStatFile    map[string]string
        lastNetSample       map[string]ioSample
        lastDiskIOSample    map[string]ioSample
@@ -274,6 +276,32 @@ func (r *Reporter) doMemoryStats() {
        }
        r.Logger.Printf("mem%s\n", outstat.String())
 
+       if r.kernelPageSize == 0 {
+               // assign "don't try again" value in case we give up
+               // and return without assigning the real value
+               r.kernelPageSize = -1
+               buf, err := os.ReadFile("/proc/self/smaps")
+               if err != nil {
+                       r.Logger.Printf("error reading /proc/self/smaps: %s", err)
+                       return
+               }
+               m := regexp.MustCompile(`\nKernelPageSize:\s*(\d+) kB\n`).FindSubmatch(buf)
+               if len(m) != 2 {
+                       r.Logger.Printf("error parsing /proc/self/smaps: KernelPageSize not found")
+                       return
+               }
+               size, err := strconv.ParseInt(string(m[1]), 10, 64)
+               if err != nil {
+                       r.Logger.Printf("error parsing /proc/self/smaps: KernelPageSize %q: %s", m[1], err)
+                       return
+               }
+               r.kernelPageSize = size * 1024
+       } else if r.kernelPageSize < 0 {
+               // already failed to determine page size, don't keep
+               // trying/logging
+               return
+       }
+
        r.reportPIDsMu.Lock()
        defer r.reportPIDsMu.Unlock()
        procnames := make([]string, 0, len(r.reportPIDs))
@@ -303,11 +331,11 @@ func (r *Reporter) doMemoryStats() {
                // rss is the 24th field in .../stat, and fields[0]
                // here is the last char ')' of the 2nd field, so
                // rss is fields[22]
-               rss, err := strconv.Atoi(string(fields[22]))
+               rss, err := strconv.ParseInt(string(fields[22]), 10, 64)
                if err != nil {
                        continue
                }
-               procmem += fmt.Sprintf(" %d %s", rss, procname)
+               procmem += fmt.Sprintf(" %d %s", rss*r.kernelPageSize, procname)
        }
        if procmem != "" {
                r.Logger.Printf("procmem%s\n", procmem)
index 922aa369ba79a443778f558a0a816e37efb10d7c..5e8e93de6cfae9ce3f51c7b191e515ff8e7d9955 100644 (file)
@@ -9,6 +9,7 @@ import (
        "log"
        "os"
        "regexp"
+       "strconv"
        "testing"
        "time"
 
@@ -69,7 +70,14 @@ func (s *suite) TestReportPIDs(c *C) {
                        c.Error("timed out")
                        break
                }
-               if regexp.MustCompile(`(?ms).*procmem \d+ init \d+ test_process.*`).MatchString(logbuf.String()) {
+               if m := regexp.MustCompile(`(?ms).*procmem \d+ init (\d+) test_process.*`).FindSubmatch(logbuf.Bytes()); len(m) > 0 {
+                       size, err := strconv.ParseInt(string(m[1]), 10, 64)
+                       c.Check(err, IsNil)
+                       // Expect >1 MiB and <100 MiB -- otherwise we
+                       // are probably misinterpreting /proc/N/stat
+                       // or multiplying by the wrong page size.
+                       c.Check(size > 1000000, Equals, true)
+                       c.Check(size < 100000000, Equals, true)
                        break
                }
        }