From 557b0b0e61a16c098785f66e5f10b0b72973d822 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Thu, 12 Jun 2014 14:49:22 -0400 Subject: [PATCH] Expanded search path for cgroup stats, changed command line interface a bit. Also adjusted polling interval to every 10 seconds istead of every 1 second. refs #2882 --- sdk/cli/bin/crunch-job | 6 +-- .../src/arvados.org/crunchstat/crunchstat.go | 49 ++++++++++--------- 2 files changed, 29 insertions(+), 26 deletions(-) diff --git a/sdk/cli/bin/crunch-job b/sdk/cli/bin/crunch-job index e54b31208d..5da8c78dda 100755 --- a/sdk/cli/bin/crunch-job +++ b/sdk/cli/bin/crunch-job @@ -641,8 +641,8 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++) $command .= "&& exec arv-mount --allow-other $ENV{TASK_KEEPMOUNT} --exec "; if ($docker_image) { - $command .= "crunchstat -cgroup-parent=/sys/fs/cgroup/lxc -cgroup-cid=$ENV{TASK_WORK}/docker.cid -poll=1000 "; - $command .= "$docker_bin run -i -a stdin -a stdout -a stderr -cidfile=$ENV{TASK_WORK}/docker.cid "; + $command .= "crunchstat -cgroup-root=/sys/fs/cgroup -cgroup-parent=docker -cgroup-cid=$ENV{TASK_WORK}/docker.cid -poll=10000 "; + $command .= "$docker_bin run -i -a stdin -a stdout -a stderr --cidfile=$ENV{TASK_WORK}/docker.cid "; # Dynamically configure the container to use the host system as its # DNS server. Get the host's global addresses from the ip command, # and turn them into docker --dns options using gawk. @@ -661,7 +661,7 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++) } $command .= "\Q$docker_image\E "; } else { - $command .= "crunchstat -cgroup-path=/sys/fs/cgroup " + $command .= "crunchstat -cgroup-root=/sys/fs/cgroup -poll=10000 " } $command .= "stdbuf -o0 -e0 "; $command .= "$ENV{CRUNCH_SRC}/crunch_scripts/" . $Job->{"script"}; diff --git a/services/crunch/crunchstat/src/arvados.org/crunchstat/crunchstat.go b/services/crunch/crunchstat/src/arvados.org/crunchstat/crunchstat.go index c3479bfe84..f8d27ec01b 100644 --- a/services/crunch/crunchstat/src/arvados.org/crunchstat/crunchstat.go +++ b/services/crunch/crunchstat/src/arvados.org/crunchstat/crunchstat.go @@ -42,19 +42,28 @@ func OutputChannel(stdout chan string, stderr chan string) { } } -func FindStat(cgroup_path string, statgroup string, stat string) string { - path := fmt.Sprintf("%s/%s.%s", cgroup_path, statgroup, stat) +func FindStat(cgroup_root string, cgroup_parent string, container_id string, statgroup string, stat string) string { + var path string + path = fmt.Sprintf("%s/%s/%s/%s/%s.%s", cgroup_root, statgroup, cgroup_parent, container_id, statgroup, stat) if _, err := os.Stat(path); err == nil { return path } - path = fmt.Sprintf("%s/%s/%s.%s", cgroup_path, statgroup, statgroup, stat) + path = fmt.Sprintf("%s/%s/%s/%s.%s", cgroup_root, cgroup_parent, container_id, statgroup, stat) + if _, err := os.Stat(path); err == nil { + return path + } + path = fmt.Sprintf("%s/%s/%s.%s", cgroup_root, statgroup, statgroup, stat) + if _, err := os.Stat(path); err == nil { + return path + } + path = fmt.Sprintf("%s/%s.%s", cgroup_root, statgroup, stat) if _, err := os.Stat(path); err == nil { return path } return "" } -func PollCgroupStats(cgroup_path string, stderr chan string, poll int64) { +func PollCgroupStats(cgroup_root string, cgroup_parent string, container_id string, stderr chan string, poll int64) { //var last_usage int64 = 0 var last_user int64 = 0 var last_sys int64 = 0 @@ -70,10 +79,10 @@ func PollCgroupStats(cgroup_path string, stderr chan string, poll int64) { disk := make(map[string]*Disk) //cpuacct_usage := FindStat(cgroup_path, "cpuacct", "usage") - cpuacct_stat := FindStat(cgroup_path, "cpuacct", "stat") - blkio_io_service_bytes := FindStat(cgroup_path, "blkio", "io_service_bytes") - cpuset_cpus := FindStat(cgroup_path, "cpuset", "cpus") - memory_stat := FindStat(cgroup_path, "memory", "stat") + cpuacct_stat := FindStat(cgroup_root, cgroup_parent, container_id, "cpuacct", "stat") + blkio_io_service_bytes := FindStat(cgroup_root, cgroup_parent, container_id, "blkio", "io_service_bytes") + cpuset_cpus := FindStat(cgroup_root, cgroup_parent, container_id, "cpuset", "cpus") + memory_stat := FindStat(cgroup_root, cgroup_parent, container_id, "memory", "stat") if cpuacct_stat != "" { stderr <- fmt.Sprintf("crunchstat: reading stats from %s", cpuacct_stat) @@ -214,15 +223,15 @@ func PollCgroupStats(cgroup_path string, stderr chan string, poll int64) { func main() { var ( - cgroup_path string + cgroup_root string cgroup_parent string cgroup_cidfile string wait int64 poll int64 ) - flag.StringVar(&cgroup_path, "cgroup-path", "", "Direct path to cgroup") - flag.StringVar(&cgroup_parent, "cgroup-parent", "", "Path to parent cgroup") + flag.StringVar(&cgroup_root, "cgroup-root", "", "Root of cgroup tree") + flag.StringVar(&cgroup_parent, "cgroup-parent", "", "Name of container parent under cgroup") flag.StringVar(&cgroup_cidfile, "cgroup-cid", "", "Path to container id file") flag.Int64Var(&wait, "wait", 5, "Maximum time (in seconds) to wait for cid file to show up") flag.Int64Var(&poll, "poll", 1000, "Polling frequency, in milliseconds") @@ -231,8 +240,8 @@ func main() { logger := log.New(os.Stderr, "crunchstat: ", 0) - if cgroup_path == "" && cgroup_cidfile == "" { - logger.Fatal("Must provide either -cgroup-path or -cgroup-cid") + if cgroup_root == "" { + logger.Fatal("Must provide either -cgroup-root") } // Make output channel @@ -285,6 +294,7 @@ func main() { } // Read the cid file + var container_id string if cgroup_cidfile != "" { // wait up to 'wait' seconds for the cid file to appear var i time.Duration @@ -293,26 +303,19 @@ func main() { if err == nil { cid, err2 := ioutil.ReadAll(f) if err2 == nil && len(cid) > 0 { - cgroup_path = string(cid) + container_id = string(cid) f.Close() break } } time.Sleep(100 * time.Millisecond) } - if cgroup_path == "" { + if cgroup_root == "" { logger.Printf("Could not read cid file %s", cgroup_cidfile) } } - // add the parent prefix - if cgroup_parent != "" { - cgroup_path = fmt.Sprintf("%s/%s", cgroup_parent, cgroup_path) - } - - logger.Print("Using cgroup ", cgroup_path) - - go PollCgroupStats(cgroup_path, stderr_chan, poll) + go PollCgroupStats(cgroup_root, cgroup_parent, container_id, stderr_chan, poll) // Wait for each of stdout and stderr to drain <-finish_chan -- 2.30.2