From 5df1299bef79cbae96aecfd710a8d2cc088e4ecd Mon Sep 17 00:00:00 2001 From: Tom Clegg Date: Tue, 2 Aug 2016 09:42:07 -0400 Subject: [PATCH] 9406: Add -cgroup-parent-subsystem argument. This accommodates setups where the scheduling system (e.g., SLURM) uses cgroups to impose resource limits on crunch-run itself, and those limits are meant to include resources used by the container itself. Normally, when docker creates a container, it creates new cgroups inside the "docker" cgroup, which circumvents any resource limits set on crunch-run's own cgroup. If a subsystem is specified (e.g., -cgroup-parent-subsystem=memory), crunch-run will look in /proc/self/cgroup to find the current cgroup for the given subsystem, and instruct docker to use that cgroup as the parent of the new container's cgroup instead. --- services/crunch-run/cgroup.go | 29 +++++++++++++++++++++++++++ services/crunch-run/cgroup_test.go | 17 ++++++++++++++++ services/crunch-run/crunchrun.go | 32 ++++++++++++++++++++---------- 3 files changed, 68 insertions(+), 10 deletions(-) create mode 100644 services/crunch-run/cgroup.go create mode 100644 services/crunch-run/cgroup_test.go diff --git a/services/crunch-run/cgroup.go b/services/crunch-run/cgroup.go new file mode 100644 index 0000000000..78123e4aee --- /dev/null +++ b/services/crunch-run/cgroup.go @@ -0,0 +1,29 @@ +package main + +import ( + "bytes" + "io/ioutil" + "log" +) + +// Return the current process's cgroup for the given subsystem. +func findCgroup(subsystem string) string { + subsys := []byte(subsystem) + cgroups, err := ioutil.ReadFile("/proc/self/cgroup") + if err != nil { + log.Fatal(err) + } + for _, line := range bytes.Split(cgroups, []byte("\n")) { + toks := bytes.SplitN(line, []byte(":"), 4) + if len(toks) < 3 { + continue + } + for _, s := range bytes.Split(toks[1], []byte(",")) { + if bytes.Compare(s, subsys) == 0 { + return string(toks[2]) + } + } + } + log.Fatalf("subsystem %q not found in /proc/self/cgroup", subsystem) + return "" +} diff --git a/services/crunch-run/cgroup_test.go b/services/crunch-run/cgroup_test.go new file mode 100644 index 0000000000..bb188367b8 --- /dev/null +++ b/services/crunch-run/cgroup_test.go @@ -0,0 +1,17 @@ +package main + +import ( + . "gopkg.in/check.v1" +) + +type CgroupSuite struct{} + +var _ = Suite(&CgroupSuite{}) + +func (s *CgroupSuite) TestFindCgroup(c *C) { + for _, s := range []string{"devices", "cpu", "cpuset"} { + g := findCgroup(s) + c.Check(g, Not(Equals), "") + c.Logf("cgroup(%q) == %q", s, g) + } +} diff --git a/services/crunch-run/crunchrun.go b/services/crunch-run/crunchrun.go index 32d524abca..40e9fc11ea 100644 --- a/services/crunch-run/crunchrun.go +++ b/services/crunch-run/crunchrun.go @@ -93,11 +93,12 @@ type ContainerRunner struct { ArvMountExit chan error finalState string - statLogger io.WriteCloser - statReporter *crunchstat.Reporter - statInterval time.Duration - cgroupRoot string - cgroupParent string + statLogger io.WriteCloser + statReporter *crunchstat.Reporter + statInterval time.Duration + cgroupRoot string + expectCgroupParent string + setCgroupParent string } // SetupSignals sets up signal handling to gracefully terminate the underlying @@ -393,7 +394,7 @@ func (runner *ContainerRunner) StartCrunchstat() { runner.statReporter = &crunchstat.Reporter{ CID: runner.ContainerID, Logger: log.New(runner.statLogger, "", 0), - CgroupParent: runner.cgroupParent, + CgroupParent: runner.expectCgroupParent, CgroupRoot: runner.cgroupRoot, PollPeriod: runner.statInterval, } @@ -480,8 +481,13 @@ func (runner *ContainerRunner) CreateContainer() error { return fmt.Errorf("While creating container: %v", err) } - runner.HostConfig = dockerclient.HostConfig{Binds: runner.Binds, - LogConfig: dockerclient.LogConfig{Type: "none"}} + runner.HostConfig = dockerclient.HostConfig{ + Binds: runner.Binds, + CgroupParent: runner.setCgroupParent, + LogConfig: dockerclient.LogConfig{ + Type: "none", + }, + } return runner.AttachStreams() } @@ -823,7 +829,8 @@ func NewContainerRunner(api IArvadosClient, func main() { statInterval := flag.Duration("crunchstat-interval", 10*time.Second, "sampling period for periodic resource usage reporting") cgroupRoot := flag.String("cgroup-root", "/sys/fs/cgroup", "path to sysfs cgroup tree") - cgroupParent := flag.String("cgroup-parent", "docker", "name of container's parent cgroup") + cgroupParent := flag.String("cgroup-parent", "docker", "name of container's parent cgroup (ignored if -cgroup-parent-subsystem is used)") + cgroupParentSubsystem := flag.String("cgroup-parent-subsystem", "", "use current cgroup for given subsystem as parent cgroup for container") flag.Parse() containerId := flag.Arg(0) @@ -850,7 +857,12 @@ func main() { cr := NewContainerRunner(api, kc, docker, containerId) cr.statInterval = *statInterval cr.cgroupRoot = *cgroupRoot - cr.cgroupParent = *cgroupParent + cr.expectCgroupParent = *cgroupParent + if *cgroupParentSubsystem != "" { + p := findCgroup(*cgroupParentSubsystem) + cr.setCgroupParent = p + cr.expectCgroupParent = p + } err = cr.Run() if err != nil { -- 2.30.2