X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/c63c699aa9948f6a672536ba08e664471fb0d654..3d8da9e64df7c0dd0302b7d2f4d188a9ede9099c:/lib/crunchstat/crunchstat.go diff --git a/lib/crunchstat/crunchstat.go b/lib/crunchstat/crunchstat.go index e0ef058835..056ef0d185 100644 --- a/lib/crunchstat/crunchstat.go +++ b/lib/crunchstat/crunchstat.go @@ -1,3 +1,7 @@ +// Copyright (C) The Arvados Authors. All rights reserved. +// +// SPDX-License-Identifier: AGPL-3.0 + // Package crunchstat reports resource usage (CPU, memory, disk, // network) for a cgroup. package crunchstat @@ -40,7 +44,7 @@ type Reporter struct { // Where cgroup accounting files live on this system, e.g., // "/sys/fs/cgroup". - CgroupRoot string + CgroupRoot string // Parent cgroup, e.g., "docker". CgroupParent string @@ -56,7 +60,8 @@ type Reporter struct { lastDiskSample map[string]ioSample lastCPUSample cpuSample - done chan struct{} + done chan struct{} // closed when we should stop reporting + flushed chan struct{} // closed when we have made our last report } // Start starts monitoring in a new goroutine, and returns @@ -72,6 +77,7 @@ type Reporter struct { // Callers should not modify public data fields after calling Start. func (r *Reporter) Start() { r.done = make(chan struct{}) + r.flushed = make(chan struct{}) go r.run() } @@ -81,12 +87,13 @@ func (r *Reporter) Start() { // Nothing will be logged after Stop returns. func (r *Reporter) Stop() { close(r.done) + <-r.flushed } func (r *Reporter) readAllOrWarn(in io.Reader) ([]byte, error) { content, err := ioutil.ReadAll(in) if err != nil { - r.Logger.Print(err) + r.Logger.Printf("warning: %v", err) } return content, err } @@ -162,7 +169,7 @@ func (r *Reporter) getContainerNetStats() (io.Reader, error) { statsFilename := fmt.Sprintf("/proc/%s/net/dev", taskPid) stats, err := ioutil.ReadFile(statsFilename) if err != nil { - r.Logger.Print(err) + r.Logger.Printf("notice: %v", err) continue } return strings.NewReader(string(stats)), nil @@ -312,6 +319,9 @@ func (r *Reporter) getCPUCount() int64 { } defer cpusetFile.Close() b, err := r.readAllOrWarn(cpusetFile) + if err != nil { + return 0 + } sp := strings.Split(string(b), ",") cpus := int64(0) for _, v := range sp { @@ -337,12 +347,16 @@ func (r *Reporter) doCPUStats() { return } - nextSample := cpuSample{true, time.Now(), 0, 0, r.getCPUCount()} var userTicks, sysTicks int64 fmt.Sscanf(string(b), "user %d\nsystem %d", &userTicks, &sysTicks) userHz := float64(C.sysconf(C._SC_CLK_TCK)) - nextSample.user = float64(userTicks) / userHz - nextSample.sys = float64(sysTicks) / userHz + nextSample := cpuSample{ + hasData: true, + sampleTime: time.Now(), + user: float64(userTicks) / userHz, + sys: float64(sysTicks) / userHz, + cpus: r.getCPUCount(), + } delta := "" if r.lastCPUSample.hasData { @@ -356,9 +370,11 @@ func (r *Reporter) doCPUStats() { r.lastCPUSample = nextSample } -// Report stats periodically until r.done indicates someone called -// Stop. +// Report stats periodically until we learn (via r.done) that someone +// called Stop. func (r *Reporter) run() { + defer close(r.flushed) + r.reportedStatFile = make(map[string]string) if !r.waitForCIDFile() || !r.waitForCgroup() { @@ -383,7 +399,7 @@ func (r *Reporter) run() { } // If CID is empty, wait for it to appear in CIDFile. Return true if -// we get it before r.done indicates someone called Stop. +// we get it before we learn (via r.done) that someone called Stop. func (r *Reporter) waitForCIDFile() bool { if r.CID != "" || r.CIDFile == "" { return true @@ -400,7 +416,7 @@ func (r *Reporter) waitForCIDFile() bool { select { case <-ticker.C: case <-r.done: - r.Logger.Printf("CID never appeared in %+q: %v", r.CIDFile, err) + r.Logger.Printf("warning: CID never appeared in %+q: %v", r.CIDFile, err) return false } } @@ -423,9 +439,9 @@ func (r *Reporter) waitForCgroup() bool { select { case <-ticker.C: case <-warningTimer: - r.Logger.Printf("cgroup stats files have not appeared after %v (config error?) -- still waiting...", r.PollPeriod) + r.Logger.Printf("warning: cgroup stats files have not appeared after %v (config error?) -- still waiting...", r.PollPeriod) case <-r.done: - r.Logger.Printf("cgroup stats files never appeared for %v", r.CID) + r.Logger.Printf("warning: cgroup stats files never appeared for %v", r.CID) return false } }