X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/2c6c1cb11153849e87496bb8d1f5ff24f439a6a4..64437af24f74ec3f48b1a5694a5805d52dfc4967:/services/crunch/crunchstat/src/arvados.org/crunchstat/crunchstat.go diff --git a/services/crunch/crunchstat/src/arvados.org/crunchstat/crunchstat.go b/services/crunch/crunchstat/src/arvados.org/crunchstat/crunchstat.go index f11b93e52e..d61871da64 100644 --- a/services/crunch/crunchstat/src/arvados.org/crunchstat/crunchstat.go +++ b/services/crunch/crunchstat/src/arvados.org/crunchstat/crunchstat.go @@ -42,7 +42,28 @@ func OutputChannel(stdout chan string, stderr chan string) { } } -func PollCgroupStats(cgroup_path string, stderr chan string, poll int64) { +func FindStat(cgroup_root string, cgroup_parent string, container_id string, statgroup string, stat string) string { + var path string + path = fmt.Sprintf("%s/%s/%s/%s/%s.%s", cgroup_root, statgroup, cgroup_parent, container_id, statgroup, stat) + if _, err := os.Stat(path); err == nil { + return path + } + path = fmt.Sprintf("%s/%s/%s/%s.%s", cgroup_root, cgroup_parent, container_id, statgroup, stat) + if _, err := os.Stat(path); err == nil { + return path + } + path = fmt.Sprintf("%s/%s/%s.%s", cgroup_root, statgroup, statgroup, stat) + if _, err := os.Stat(path); err == nil { + return path + } + path = fmt.Sprintf("%s/%s.%s", cgroup_root, statgroup, stat) + if _, err := os.Stat(path); err == nil { + return path + } + return "" +} + +func PollCgroupStats(cgroup_root string, cgroup_parent string, container_id string, stderr chan string, poll int64) { //var last_usage int64 = 0 var last_user int64 = 0 var last_sys int64 = 0 @@ -57,13 +78,26 @@ func PollCgroupStats(cgroup_path string, stderr chan string, poll int64) { disk := make(map[string]*Disk) - //cpuacct_usage := fmt.Sprintf("%s/cpuacct.usage", cgroup_path) - cpuacct_stat := fmt.Sprintf("%s/cpuacct.stat", cgroup_path) - blkio_io_service_bytes := fmt.Sprintf("%s/blkio.io_service_bytes", cgroup_path) - cpuset_cpus := fmt.Sprintf("%s/cpuset.cpus", cgroup_path) - memory_stat := fmt.Sprintf("%s/memory.stat", cgroup_path) + //cpuacct_usage := FindStat(cgroup_path, "cpuacct", "usage") + cpuacct_stat := FindStat(cgroup_root, cgroup_parent, container_id, "cpuacct", "stat") + blkio_io_service_bytes := FindStat(cgroup_root, cgroup_parent, container_id, "blkio", "io_service_bytes") + cpuset_cpus := FindStat(cgroup_root, cgroup_parent, container_id, "cpuset", "cpus") + memory_stat := FindStat(cgroup_root, cgroup_parent, container_id, "memory", "stat") + + if cpuacct_stat != "" { + stderr <- fmt.Sprintf("crunchstat: reading stats from %s", cpuacct_stat) + } + if blkio_io_service_bytes != "" { + stderr <- fmt.Sprintf("crunchstat: reading stats from %s", blkio_io_service_bytes) + } + if cpuset_cpus != "" { + stderr <- fmt.Sprintf("crunchstat: reading stats from %s", cpuset_cpus) + } + if memory_stat != "" { + stderr <- fmt.Sprintf("crunchstat: reading stats from %s", memory_stat) + } - var ellapsed int64 = poll + var elapsed int64 = poll for { /*{ @@ -79,7 +113,7 @@ func PollCgroupStats(cgroup_path string, stderr chan string, poll int64) { c.Close() }*/ var cpus int64 = 0 - { + if cpuset_cpus != "" { c, _ := os.Open(cpuset_cpus) b, _ := ioutil.ReadAll(c) sp := strings.Split(string(b), ",") @@ -103,7 +137,7 @@ func PollCgroupStats(cgroup_path string, stderr chan string, poll int64) { if cpus == 0 { cpus = 1 } - { + if cpuacct_stat != "" { c, _ := os.Open(cpuacct_stat) b, _ := ioutil.ReadAll(c) var next_user int64 @@ -119,11 +153,11 @@ func PollCgroupStats(cgroup_path string, stderr chan string, poll int64) { // time is in milliseconds, we need to boost // that to 1000 jiffies per second, then boost // it by another 100x to get a percentage, then - // finally divide by the actual ellapsed time + // finally divide by the actual elapsed time // and the number of cpus to get average load // over the polling period. - user_pct := (user_diff * 10 * 100) / (ellapsed * cpus) - sys_pct := (sys_diff * 10 * 100) / (ellapsed * cpus) + user_pct := (user_diff * 10 * 100) / (elapsed * cpus) + sys_pct := (sys_diff * 10 * 100) / (elapsed * cpus) stderr <- fmt.Sprintf("crunchstat: cpuacct.stat user %v", user_pct) stderr <- fmt.Sprintf("crunchstat: cpuacct.stat sys %v", sys_pct) @@ -135,7 +169,7 @@ func PollCgroupStats(cgroup_path string, stderr chan string, poll int64) { last_user = next_user last_sys = next_sys } - { + if blkio_io_service_bytes != "" { c, _ := os.Open(blkio_io_service_bytes) b := bufio.NewScanner(c) var device, op string @@ -148,14 +182,14 @@ func PollCgroupStats(cgroup_path string, stderr chan string, poll int64) { if op == "Read" { disk[device].last_read = disk[device].next_read disk[device].next_read = next - if disk[device].last_read > 0 { + if disk[device].last_read > 0 && (disk[device].next_read != disk[device].last_read) { stderr <- fmt.Sprintf("crunchstat: blkio.io_service_bytes %s read %v", device, disk[device].next_read-disk[device].last_read) } } if op == "Write" { disk[device].last_write = disk[device].next_write disk[device].next_write = next - if disk[device].last_write > 0 { + if disk[device].last_write > 0 && (disk[device].next_write != disk[device].last_write) { stderr <- fmt.Sprintf("crunchstat: blkio.io_service_bytes %s write %v", device, disk[device].next_write-disk[device].last_write) } } @@ -164,7 +198,7 @@ func PollCgroupStats(cgroup_path string, stderr chan string, poll int64) { c.Close() } - { + if memory_stat != "" { c, _ := os.Open(memory_stat) b := bufio.NewScanner(c) var stat string @@ -182,22 +216,22 @@ func PollCgroupStats(cgroup_path string, stderr chan string, poll int64) { bedtime := time.Now() time.Sleep(time.Duration(poll) * time.Millisecond) morning := time.Now() - ellapsed = morning.Sub(bedtime).Nanoseconds() / int64(time.Millisecond) + elapsed = morning.Sub(bedtime).Nanoseconds() / int64(time.Millisecond) } } func main() { var ( - cgroup_path string + cgroup_root string cgroup_parent string cgroup_cidfile string wait int64 poll int64 ) - flag.StringVar(&cgroup_path, "cgroup-path", "", "Direct path to cgroup") - flag.StringVar(&cgroup_parent, "cgroup-parent", "", "Path to parent cgroup") + flag.StringVar(&cgroup_root, "cgroup-root", "", "Root of cgroup tree") + flag.StringVar(&cgroup_parent, "cgroup-parent", "", "Name of container parent under cgroup") flag.StringVar(&cgroup_cidfile, "cgroup-cid", "", "Path to container id file") flag.Int64Var(&wait, "wait", 5, "Maximum time (in seconds) to wait for cid file to show up") flag.Int64Var(&poll, "poll", 1000, "Polling frequency, in milliseconds") @@ -206,8 +240,8 @@ func main() { logger := log.New(os.Stderr, "crunchstat: ", 0) - if cgroup_path == "" && cgroup_cidfile == "" { - logger.Fatal("Must provide either -cgroup-path or -cgroup-cid") + if cgroup_root == "" { + logger.Fatal("Must provide either -cgroup-root") } // Make output channel @@ -260,6 +294,7 @@ func main() { } // Read the cid file + var container_id string if cgroup_cidfile != "" { // wait up to 'wait' seconds for the cid file to appear var i time.Duration @@ -268,26 +303,19 @@ func main() { if err == nil { cid, err2 := ioutil.ReadAll(f) if err2 == nil && len(cid) > 0 { - cgroup_path = string(cid) + container_id = string(cid) f.Close() break } } time.Sleep(100 * time.Millisecond) } - if cgroup_path == "" { + if cgroup_root == "" { logger.Printf("Could not read cid file %s", cgroup_cidfile) } } - // add the parent prefix - if cgroup_parent != "" { - cgroup_path = fmt.Sprintf("%s/%s", cgroup_parent, cgroup_path) - } - - logger.Print("Using cgroup ", cgroup_path) - - go PollCgroupStats(cgroup_path, stderr_chan, poll) + go PollCgroupStats(cgroup_root, cgroup_parent, container_id, stderr_chan, poll) // Wait for each of stdout and stderr to drain <-finish_chan