1 // Copyright (C) The Arvados Authors. All rights reserved.
3 // SPDX-License-Identifier: AGPL-3.0
5 // Package crunchstat reports resource usage (CPU, memory, disk,
6 // network) for a cgroup.
28 // crunchstat collects all memory statistics, but only reports these.
29 var memoryStats = [...]string{"cache", "swap", "pgmajfault", "rss"}
31 type logPrinter interface {
32 Printf(fmt string, args ...interface{})
35 // A Reporter gathers statistics for a cgroup and writes them to a
37 type Reporter struct {
38 // Func that returns the pid of a process inside the desired
39 // cgroup. Reporter will call Pid periodically until it
40 // returns a positive number, then start reporting stats for
41 // the cgroup that process belongs to.
43 // Pid is used when cgroups v2 is available. For cgroups v1,
47 // Interval between samples. Must be positive.
48 PollPeriod time.Duration
50 // Temporary directory, will be monitored for available, used
54 // Where to write statistics. Must not be nil.
57 // When stats cross thresholds configured in the fields below,
58 // they are reported to this logger.
59 ThresholdLogger logPrinter
61 // MemThresholds maps memory stat names to slices of thresholds.
62 // When the corresponding stat exceeds a threshold, that will be logged.
63 MemThresholds map[string][]Threshold
65 // Filesystem to read /proc entries and cgroup stats from.
66 // Non-nil for testing, nil for real root filesystem.
69 // Enable debug messages.
72 // available cgroup hierarchies
74 cpusetCpus string // v1,v2 (via /proc/$PID/cpuset)
75 cpuacctStat string // v1 (via /proc/$PID/cgroup => cpuacct)
77 ioServiceBytes string // v1 (via /proc/$PID/cgroup => blkio)
79 memoryStat string // v1 and v2 (but v2 is missing some entries)
80 memoryCurrent string // v2
81 memorySwapCurrent string // v2
82 netDev string // /proc/$PID/net/dev
86 lastNetSample map[string]ioSample
87 lastDiskIOSample map[string]ioSample
88 lastCPUSample cpuSample
89 lastDiskSpaceSample diskSpaceSample
90 lastMemSample memSample
91 maxDiskSpaceSample diskSpaceSample
92 maxMemSample map[memoryKey]int64
94 // process returned by Pid(), whose cgroup stats we are
98 // individual processes whose memory size we are reporting
99 reportPIDs map[string]int
100 reportPIDsMu sync.Mutex
102 done chan struct{} // closed when we should stop reporting
103 ready chan struct{} // have pid and stat files
104 flushed chan struct{} // closed when we have made our last report
107 type Threshold struct {
113 func NewThresholdFromPercentage(total int64, percentage int64) Threshold {
115 percentage: percentage,
116 threshold: total * percentage / 100,
121 func NewThresholdsFromPercentages(total int64, percentages []int64) (thresholds []Threshold) {
122 for _, percentage := range percentages {
123 thresholds = append(thresholds, NewThresholdFromPercentage(total, percentage))
128 // memoryKey is a key into Reporter.maxMemSample.
129 // Initialize it with just statName to get the host/cgroup maximum.
130 // Initialize it with all fields to get that process' maximum.
131 type memoryKey struct {
137 // Start starts monitoring in a new goroutine, and returns
140 // The monitoring goroutine waits for a non-empty CIDFile to appear
141 // (unless CID is non-empty). Then it waits for the accounting files
142 // to appear for the monitored container. Then it collects and reports
143 // statistics until Stop is called.
145 // Callers should not call Start more than once.
147 // Callers should not modify public data fields after calling Start.
148 func (r *Reporter) Start() {
149 r.done = make(chan struct{})
150 r.ready = make(chan struct{})
151 r.flushed = make(chan struct{})
158 // ReportPID starts reporting stats for a specified process.
159 func (r *Reporter) ReportPID(name string, pid int) {
160 r.reportPIDsMu.Lock()
161 defer r.reportPIDsMu.Unlock()
162 if r.reportPIDs == nil {
163 r.reportPIDs = map[string]int{name: pid}
165 r.reportPIDs[name] = pid
169 // Stop reporting. Do not call more than once, or before calling
172 // Nothing will be logged after Stop returns unless you call a Log* method.
173 func (r *Reporter) Stop() {
178 var v1keys = map[string]bool{
185 // Find cgroup hierarchies in /proc/mounts, e.g.,
188 // "blkio": "/sys/fs/cgroup/blkio",
189 // "unified": "/sys/fs/cgroup/unified",
191 func (r *Reporter) cgroupMounts() map[string]string {
192 procmounts, err := fs.ReadFile(r.FS, "proc/mounts")
194 r.Logger.Printf("error reading /proc/mounts: %s", err)
197 mounts := map[string]string{}
198 for _, line := range bytes.Split(procmounts, []byte{'\n'}) {
199 fields := bytes.SplitN(line, []byte{' '}, 6)
200 if len(fields) != 6 {
203 switch string(fields[2]) {
205 // cgroup /sys/fs/cgroup/unified cgroup2 rw,nosuid,nodev,noexec,relatime 0 0
206 mounts["unified"] = string(fields[1])
208 // cgroup /sys/fs/cgroup/blkio cgroup rw,nosuid,nodev,noexec,relatime,blkio 0 0
209 options := bytes.Split(fields[3], []byte{','})
210 for _, option := range options {
211 option := string(option)
213 mounts[option] = string(fields[1])
222 // generate map of cgroup controller => path for r.pid.
224 // the "unified" controller represents cgroups v2.
225 func (r *Reporter) cgroupPaths(mounts map[string]string) map[string]string {
226 if len(mounts) == 0 {
229 procdir := fmt.Sprintf("proc/%d", r.pid)
230 buf, err := fs.ReadFile(r.FS, procdir+"/cgroup")
232 r.Logger.Printf("error reading cgroup file: %s", err)
235 paths := map[string]string{}
236 for _, line := range bytes.Split(buf, []byte{'\n'}) {
237 // The entry for cgroup v2 is always in the format
239 // https://docs.kernel.org/admin-guide/cgroup-v2.html
240 if bytes.HasPrefix(line, []byte("0::/")) && mounts["unified"] != "" {
241 paths["unified"] = mounts["unified"] + string(line[3:])
244 // cgroups v1 entries look like
245 // "6:cpu,cpuacct:/user.slice"
246 fields := bytes.SplitN(line, []byte{':'}, 3)
247 if len(fields) != 3 {
250 for _, key := range bytes.Split(fields[1], []byte{','}) {
252 if mounts[key] != "" {
253 paths[key] = mounts[key] + string(fields[2])
257 // In unified mode, /proc/$PID/cgroup doesn't have a cpuset
258 // entry, but we still need it -- there's no cpuset.cpus file
259 // in the cgroup2 subtree indicated by the 0::$PATH entry. We
260 // have to get the right path from /proc/$PID/cpuset.
261 if _, found := paths["cpuset"]; !found && mounts["unified"] != "" {
262 buf, _ := fs.ReadFile(r.FS, procdir+"/cpuset")
263 cpusetPath := string(bytes.TrimRight(buf, "\n"))
264 paths["cpuset"] = mounts["unified"] + cpusetPath
269 func (r *Reporter) findStatFiles() {
270 mounts := r.cgroupMounts()
271 paths := r.cgroupPaths(mounts)
272 done := map[*string]bool{}
273 for _, try := range []struct {
278 {&r.statFiles.cpusetCpus, "cpuset", "cpuset.cpus.effective"},
279 {&r.statFiles.cpusetCpus, "cpuset", "cpuset.cpus"},
280 {&r.statFiles.cpuacctStat, "cpuacct", "cpuacct.stat"},
281 {&r.statFiles.cpuStat, "unified", "cpu.stat"},
282 // blkio.throttle.io_service_bytes must precede
283 // blkio.io_service_bytes -- on ubuntu1804, the latter
284 // is present but reports 0
285 {&r.statFiles.ioServiceBytes, "blkio", "blkio.throttle.io_service_bytes"},
286 {&r.statFiles.ioServiceBytes, "blkio", "blkio.io_service_bytes"},
287 {&r.statFiles.ioStat, "unified", "io.stat"},
288 {&r.statFiles.memoryStat, "unified", "memory.stat"},
289 {&r.statFiles.memoryStat, "memory", "memory.stat"},
290 {&r.statFiles.memoryCurrent, "unified", "memory.current"},
291 {&r.statFiles.memorySwapCurrent, "unified", "memory.swap.current"},
293 startpath, ok := paths[try.pathkey]
294 if !ok || done[try.statFile] {
297 // /proc/$PID/cgroup says cgroup path is
298 // /exa/mple/exa/mple, however, sometimes the file we
299 // need is not under that path, it's only available in
300 // a parent cgroup's dir. So we start at
301 // /sys/fs/cgroup/unified/exa/mple/exa/mple/ and walk
302 // up to /sys/fs/cgroup/unified/ until we find the
305 // This might mean our reported stats include more
306 // cgroups in the cgroup tree, but it's the best we
308 for path := startpath; path != "" && path != "/" && (path == startpath || strings.HasPrefix(path, mounts[try.pathkey])); path, _ = filepath.Split(strings.TrimRight(path, "/")) {
309 target := strings.TrimLeft(filepath.Join(path, try.file), "/")
310 buf, err := fs.ReadFile(r.FS, target)
311 if err != nil || len(buf) == 0 || bytes.Equal(buf, []byte{'\n'}) {
313 if os.IsNotExist(err) {
317 r.Logger.Printf("skip /%s: %s", target, err)
321 *try.statFile = target
322 done[try.statFile] = true
323 r.Logger.Printf("notice: reading stats from /%s", target)
328 netdev := fmt.Sprintf("proc/%d/net/dev", r.pid)
329 if buf, err := fs.ReadFile(r.FS, netdev); err == nil && len(buf) > 0 {
330 r.statFiles.netDev = netdev
331 r.Logger.Printf("using /%s", netdev)
335 func (r *Reporter) reportMemoryMax(logger logPrinter, source, statName string, value, limit int64) {
344 percentage := 100 * value / limit
345 logger.Printf("Maximum %s memory %s usage was %d%%, %d/%d %s",
346 source, statName, percentage, value, limit, units)
348 logger.Printf("Maximum %s memory %s usage was %d %s",
349 source, statName, value, units)
353 func (r *Reporter) LogMaxima(logger logPrinter, memLimits map[string]int64) {
354 if r.lastCPUSample.hasData {
355 logger.Printf("Total CPU usage was %f user and %f sys on %d CPUs",
356 r.lastCPUSample.user, r.lastCPUSample.sys, r.lastCPUSample.cpus)
358 for disk, sample := range r.lastDiskIOSample {
359 logger.Printf("Total disk I/O on %s was %d bytes written and %d bytes read",
360 disk, sample.txBytes, sample.rxBytes)
362 if r.maxDiskSpaceSample.total > 0 {
363 percentage := 100 * r.maxDiskSpaceSample.used / r.maxDiskSpaceSample.total
364 logger.Printf("Maximum disk usage was %d%%, %d/%d bytes",
365 percentage, r.maxDiskSpaceSample.used, r.maxDiskSpaceSample.total)
367 for _, statName := range memoryStats {
368 value, ok := r.maxMemSample[memoryKey{statName: "total_" + statName}]
370 value, ok = r.maxMemSample[memoryKey{statName: statName}]
373 r.reportMemoryMax(logger, "container", statName, value, memLimits[statName])
376 for ifname, sample := range r.lastNetSample {
377 logger.Printf("Total network I/O on %s was %d bytes written and %d bytes read",
378 ifname, sample.txBytes, sample.rxBytes)
382 func (r *Reporter) LogProcessMemMax(logger logPrinter) {
383 for memKey, value := range r.maxMemSample {
384 if memKey.processName == "" {
387 r.reportMemoryMax(logger, memKey.processName, memKey.statName, value, 0)
391 func (r *Reporter) readAllOrWarn(in io.Reader) ([]byte, error) {
392 content, err := ioutil.ReadAll(in)
394 r.Logger.Printf("warning: %v", err)
399 type ioSample struct {
405 func (r *Reporter) doBlkIOStats() {
406 var sampleTime = time.Now()
407 newSamples := make(map[string]ioSample)
409 if r.statFiles.ioStat != "" {
410 statfile, err := fs.ReadFile(r.FS, r.statFiles.ioStat)
414 for _, line := range bytes.Split(statfile, []byte{'\n'}) {
415 // 254:16 rbytes=72163328 wbytes=117370880 rios=3811 wios=3906 dbytes=0 dios=0
416 words := bytes.Split(line, []byte{' '})
420 thisSample := ioSample{sampleTime, -1, -1}
421 for _, kv := range words[1:] {
422 if bytes.HasPrefix(kv, []byte("rbytes=")) {
423 fmt.Sscanf(string(kv[7:]), "%d", &thisSample.rxBytes)
424 } else if bytes.HasPrefix(kv, []byte("wbytes=")) {
425 fmt.Sscanf(string(kv[7:]), "%d", &thisSample.txBytes)
428 if thisSample.rxBytes >= 0 && thisSample.txBytes >= 0 {
429 newSamples[string(words[0])] = thisSample
432 } else if r.statFiles.ioServiceBytes != "" {
433 statfile, err := fs.ReadFile(r.FS, r.statFiles.ioServiceBytes)
437 for _, line := range bytes.Split(statfile, []byte{'\n'}) {
438 var device, op string
440 if _, err := fmt.Sscanf(string(line), "%s %s %d", &device, &op, &val); err != nil {
443 var thisSample ioSample
445 if thisSample, ok = newSamples[device]; !ok {
446 thisSample = ioSample{sampleTime, -1, -1}
450 thisSample.rxBytes = val
452 thisSample.txBytes = val
454 newSamples[device] = thisSample
458 for dev, sample := range newSamples {
459 if sample.txBytes < 0 || sample.rxBytes < 0 {
463 if prev, ok := r.lastDiskIOSample[dev]; ok {
464 delta = fmt.Sprintf(" -- interval %.4f seconds %d write %d read",
465 sample.sampleTime.Sub(prev.sampleTime).Seconds(),
466 sample.txBytes-prev.txBytes,
467 sample.rxBytes-prev.rxBytes)
469 r.Logger.Printf("blkio:%s %d write %d read%s\n", dev, sample.txBytes, sample.rxBytes, delta)
470 r.lastDiskIOSample[dev] = sample
474 type memSample struct {
476 memStat map[string]int64
479 func (r *Reporter) getMemSample() {
480 thisSample := memSample{time.Now(), make(map[string]int64)}
482 // memory.stat contains "pgmajfault" in cgroups v1 and v2. It
483 // also contains "rss", "swap", and "cache" in cgroups v1.
484 c, err := r.FS.Open(r.statFiles.memoryStat)
489 b := bufio.NewScanner(c)
493 if _, err := fmt.Sscanf(string(b.Text()), "%s %d", &stat, &val); err != nil {
496 thisSample.memStat[stat] = val
499 // In cgroups v2, we need to read "memory.current" and
500 // "memory.swap.current" as well.
501 for stat, fnm := range map[string]string{
502 // memory.current includes cache. We don't get
503 // separate rss/cache values, so we call
504 // memory usage "rss" for compatibility, and
506 "rss": r.statFiles.memoryCurrent,
507 "swap": r.statFiles.memorySwapCurrent,
512 buf, err := fs.ReadFile(r.FS, fnm)
517 _, err = fmt.Sscanf(string(buf), "%d", &val)
521 thisSample.memStat[stat] = val
523 for stat, val := range thisSample.memStat {
524 maxKey := memoryKey{statName: stat}
525 if val > r.maxMemSample[maxKey] {
526 r.maxMemSample[maxKey] = val
529 r.lastMemSample = thisSample
531 if r.ThresholdLogger != nil {
532 for statName, thresholds := range r.MemThresholds {
533 statValue, ok := thisSample.memStat["total_"+statName]
535 statValue, ok = thisSample.memStat[statName]
541 var statThreshold Threshold
542 for index, statThreshold = range thresholds {
543 if statValue < statThreshold.threshold {
545 } else if statThreshold.percentage > 0 {
546 r.ThresholdLogger.Printf("Container using over %d%% of memory (%s %d/%d bytes)",
547 statThreshold.percentage, statName, statValue, statThreshold.total)
549 r.ThresholdLogger.Printf("Container using over %d of memory (%s %s bytes)",
550 statThreshold.threshold, statName, statValue)
553 r.MemThresholds[statName] = thresholds[index:]
558 func (r *Reporter) reportMemSample() {
559 var outstat bytes.Buffer
560 for _, key := range memoryStats {
561 // Use "total_X" stats (entire hierarchy) if enabled,
562 // otherwise just the single cgroup -- see
563 // https://www.kernel.org/doc/Documentation/cgroup-v1/memory.txt
564 if val, ok := r.lastMemSample.memStat["total_"+key]; ok {
565 fmt.Fprintf(&outstat, " %d %s", val, key)
566 } else if val, ok := r.lastMemSample.memStat[key]; ok {
567 fmt.Fprintf(&outstat, " %d %s", val, key)
570 r.Logger.Printf("mem%s\n", outstat.String())
573 func (r *Reporter) doProcmemStats() {
574 if r.kernelPageSize == 0 {
575 // assign "don't try again" value in case we give up
576 // and return without assigning the real value
577 r.kernelPageSize = -1
578 buf, err := fs.ReadFile(r.FS, "proc/self/smaps")
580 r.Logger.Printf("error reading /proc/self/smaps: %s", err)
583 m := regexp.MustCompile(`\nKernelPageSize:\s*(\d+) kB\n`).FindSubmatch(buf)
585 r.Logger.Printf("error parsing /proc/self/smaps: KernelPageSize not found")
588 size, err := strconv.ParseInt(string(m[1]), 10, 64)
590 r.Logger.Printf("error parsing /proc/self/smaps: KernelPageSize %q: %s", m[1], err)
593 r.kernelPageSize = size * 1024
594 } else if r.kernelPageSize < 0 {
595 // already failed to determine page size, don't keep
600 r.reportPIDsMu.Lock()
601 defer r.reportPIDsMu.Unlock()
602 procnames := make([]string, 0, len(r.reportPIDs))
603 for name := range r.reportPIDs {
604 procnames = append(procnames, name)
606 sort.Strings(procnames)
608 for _, procname := range procnames {
609 pid := r.reportPIDs[procname]
610 buf, err := fs.ReadFile(r.FS, fmt.Sprintf("proc/%d/stat", pid))
614 // If the executable name contains a ')' char,
615 // /proc/$pid/stat will look like '1234 (exec name)) S
616 // 123 ...' -- the last ')' is the end of the 2nd
618 paren := bytes.LastIndexByte(buf, ')')
622 fields := bytes.SplitN(buf[paren:], []byte{' '}, 24)
623 if len(fields) < 24 {
626 // rss is the 24th field in .../stat, and fields[0]
627 // here is the last char ')' of the 2nd field, so
629 rss, err := strconv.ParseInt(string(fields[22]), 10, 64)
633 value := rss * r.kernelPageSize
634 procmem += fmt.Sprintf(" %d %s", value, procname)
635 maxKey := memoryKey{pid, procname, "rss"}
636 if value > r.maxMemSample[maxKey] {
637 r.maxMemSample[maxKey] = value
641 r.Logger.Printf("procmem%s\n", procmem)
645 func (r *Reporter) doNetworkStats() {
646 if r.statFiles.netDev == "" {
649 sampleTime := time.Now()
650 stats, err := r.FS.Open(r.statFiles.netDev)
655 scanner := bufio.NewScanner(stats)
659 words := strings.Fields(scanner.Text())
660 if len(words) != 17 {
661 // Skip lines with wrong format
664 ifName = strings.TrimRight(words[0], ":")
665 if ifName == "lo" || ifName == "" {
666 // Skip loopback interface and lines with wrong format
669 if tx, err = strconv.ParseInt(words[9], 10, 64); err != nil {
672 if rx, err = strconv.ParseInt(words[1], 10, 64); err != nil {
675 nextSample := ioSample{}
676 nextSample.sampleTime = sampleTime
677 nextSample.txBytes = tx
678 nextSample.rxBytes = rx
680 if prev, ok := r.lastNetSample[ifName]; ok {
681 interval := nextSample.sampleTime.Sub(prev.sampleTime).Seconds()
682 delta = fmt.Sprintf(" -- interval %.4f seconds %d tx %d rx",
687 r.Logger.Printf("net:%s %d tx %d rx%s\n", ifName, tx, rx, delta)
688 r.lastNetSample[ifName] = nextSample
692 type diskSpaceSample struct {
700 func (r *Reporter) doDiskSpaceStats() {
701 s := syscall.Statfs_t{}
702 err := syscall.Statfs(r.TempDir, &s)
706 bs := uint64(s.Bsize)
707 nextSample := diskSpaceSample{
709 sampleTime: time.Now(),
710 total: s.Blocks * bs,
711 used: (s.Blocks - s.Bfree) * bs,
712 available: s.Bavail * bs,
714 if nextSample.used > r.maxDiskSpaceSample.used {
715 r.maxDiskSpaceSample = nextSample
719 if r.lastDiskSpaceSample.hasData {
720 prev := r.lastDiskSpaceSample
721 interval := nextSample.sampleTime.Sub(prev.sampleTime).Seconds()
722 delta = fmt.Sprintf(" -- interval %.4f seconds %d used",
724 int64(nextSample.used-prev.used))
726 r.Logger.Printf("statfs %d available %d used %d total%s\n",
727 nextSample.available, nextSample.used, nextSample.total, delta)
728 r.lastDiskSpaceSample = nextSample
731 type cpuSample struct {
732 hasData bool // to distinguish the zero value from real data
739 // Return the number of CPUs available in the container. Return 0 if
740 // we can't figure out the real number of CPUs.
741 func (r *Reporter) getCPUCount() int64 {
742 buf, err := fs.ReadFile(r.FS, r.statFiles.cpusetCpus)
747 for _, v := range bytes.Split(buf, []byte{','}) {
749 n, _ := fmt.Sscanf(string(v), "%d-%d", &min, &max)
751 cpus += (max - min) + 1
759 func (r *Reporter) doCPUStats() {
760 var nextSample cpuSample
761 if r.statFiles.cpuStat != "" {
763 f, err := r.FS.Open(r.statFiles.cpuStat)
768 nextSample = cpuSample{
770 sampleTime: time.Now(),
771 cpus: r.getCPUCount(),
776 n, err := fmt.Fscanf(f, "%s %d\n", &stat, &val)
777 if err != nil || n != 2 {
780 if stat == "user_usec" {
781 nextSample.user = float64(val) / 1000000
782 } else if stat == "system_usec" {
783 nextSample.sys = float64(val) / 1000000
786 } else if r.statFiles.cpuacctStat != "" {
788 b, err := fs.ReadFile(r.FS, r.statFiles.cpuacctStat)
793 var userTicks, sysTicks int64
794 fmt.Sscanf(string(b), "user %d\nsystem %d", &userTicks, &sysTicks)
795 userHz := float64(100)
796 nextSample = cpuSample{
798 sampleTime: time.Now(),
799 user: float64(userTicks) / userHz,
800 sys: float64(sysTicks) / userHz,
801 cpus: r.getCPUCount(),
806 if r.lastCPUSample.hasData {
807 delta = fmt.Sprintf(" -- interval %.4f seconds %.4f user %.4f sys",
808 nextSample.sampleTime.Sub(r.lastCPUSample.sampleTime).Seconds(),
809 nextSample.user-r.lastCPUSample.user,
810 nextSample.sys-r.lastCPUSample.sys)
812 r.Logger.Printf("cpu %.4f user %.4f sys %d cpus%s\n",
813 nextSample.user, nextSample.sys, nextSample.cpus, delta)
814 r.lastCPUSample = nextSample
817 func (r *Reporter) doAllStats() {
826 // Report stats periodically until we learn (via r.done) that someone
828 func (r *Reporter) run() {
829 defer close(r.flushed)
831 r.maxMemSample = make(map[memoryKey]int64)
839 r.lastNetSample = make(map[string]ioSample)
840 r.lastDiskIOSample = make(map[string]ioSample)
842 if len(r.TempDir) == 0 {
843 // Temporary dir not provided, try to get it from the environment.
844 r.TempDir = os.Getenv("TMPDIR")
846 if len(r.TempDir) > 0 {
847 r.Logger.Printf("notice: monitoring temp dir %s\n", r.TempDir)
853 if r.PollPeriod < 1 {
854 r.PollPeriod = time.Second * 10
857 memTicker := time.NewTicker(time.Second)
858 mainTicker := time.NewTicker(r.PollPeriod)
871 // Wait for Pid() to return a real pid. Return true if this succeeds
872 // before Stop is called.
873 func (r *Reporter) waitForPid() bool {
874 ticker := time.NewTicker(100 * time.Millisecond)
876 warningTimer := time.After(r.PollPeriod)
885 r.Logger.Printf("warning: Pid() did not return a process ID after %v (config error?) -- still waiting...", r.PollPeriod)
887 r.Logger.Printf("warning: Pid() never returned a process ID")
894 func (r *Reporter) dumpSourceFiles(destdir string) error {
897 return errors.New("reporter was never ready")
901 fmt.Sprintf("proc/%d/cgroup", r.pid),
902 fmt.Sprintf("proc/%d/cpuset", r.pid),
905 r.statFiles.cpusetCpus,
906 r.statFiles.cpuacctStat,
908 r.statFiles.ioServiceBytes,
910 r.statFiles.memoryStat,
911 r.statFiles.memoryCurrent,
912 r.statFiles.memorySwapCurrent,
915 for _, path := range todo {
919 err := r.createParentsAndCopyFile(destdir, path)
924 r.reportPIDsMu.Lock()
925 r.reportPIDsMu.Unlock()
926 for _, pid := range r.reportPIDs {
927 path := fmt.Sprintf("proc/%d/stat", pid)
928 err := r.createParentsAndCopyFile(destdir, path)
933 if proc, err := os.FindProcess(r.pid); err != nil || proc.Signal(syscall.Signal(0)) != nil {
934 return fmt.Errorf("process %d no longer exists, snapshot is probably broken", r.pid)
939 func (r *Reporter) createParentsAndCopyFile(destdir, path string) error {
940 buf, err := fs.ReadFile(r.FS, path)
941 if os.IsNotExist(err) {
943 } else if err != nil {
946 if parent, _ := filepath.Split(path); parent != "" {
947 err = os.MkdirAll(destdir+"/"+parent, 0777)
949 return fmt.Errorf("mkdir %s: %s", destdir+"/"+parent, err)
952 destfile := destdir + "/" + path
953 r.Logger.Printf("copy %s to %s -- size %d", path, destfile, len(buf))
954 return os.WriteFile(destfile, buf, 0777)