9406: Add -cgroup-parent-subsystem argument.
authorTom Clegg <tom@curoverse.com>
Tue, 2 Aug 2016 13:42:07 +0000 (09:42 -0400)
committerTom Clegg <tom@curoverse.com>
Tue, 2 Aug 2016 13:42:07 +0000 (09:42 -0400)
This accommodates setups where the scheduling system (e.g., SLURM)
uses cgroups to impose resource limits on crunch-run itself, and those
limits are meant to include resources used by the container itself.

Normally, when docker creates a container, it creates new cgroups
inside the "docker" cgroup, which circumvents any resource limits set
on crunch-run's own cgroup.

If a subsystem is specified (e.g., -cgroup-parent-subsystem=memory),
crunch-run will look in /proc/self/cgroup to find the current cgroup
for the given subsystem, and instruct docker to use that cgroup as the
parent of the new container's cgroup instead.

services/crunch-run/cgroup.go [new file with mode: 0644]
services/crunch-run/cgroup_test.go [new file with mode: 0644]
services/crunch-run/crunchrun.go

diff --git a/services/crunch-run/cgroup.go b/services/crunch-run/cgroup.go
new file mode 100644 (file)
index 0000000..78123e4
--- /dev/null
@@ -0,0 +1,29 @@
+package main
+
+import (
+       "bytes"
+       "io/ioutil"
+       "log"
+)
+
+// Return the current process's cgroup for the given subsystem.
+func findCgroup(subsystem string) string {
+       subsys := []byte(subsystem)
+       cgroups, err := ioutil.ReadFile("/proc/self/cgroup")
+       if err != nil {
+               log.Fatal(err)
+       }
+       for _, line := range bytes.Split(cgroups, []byte("\n")) {
+               toks := bytes.SplitN(line, []byte(":"), 4)
+               if len(toks) < 3 {
+                       continue
+               }
+               for _, s := range bytes.Split(toks[1], []byte(",")) {
+                       if bytes.Compare(s, subsys) == 0 {
+                               return string(toks[2])
+                       }
+               }
+       }
+       log.Fatalf("subsystem %q not found in /proc/self/cgroup", subsystem)
+       return ""
+}
diff --git a/services/crunch-run/cgroup_test.go b/services/crunch-run/cgroup_test.go
new file mode 100644 (file)
index 0000000..bb18836
--- /dev/null
@@ -0,0 +1,17 @@
+package main
+
+import (
+       . "gopkg.in/check.v1"
+)
+
+type CgroupSuite struct{}
+
+var _ = Suite(&CgroupSuite{})
+
+func (s *CgroupSuite) TestFindCgroup(c *C) {
+       for _, s := range []string{"devices", "cpu", "cpuset"} {
+               g := findCgroup(s)
+               c.Check(g, Not(Equals), "")
+               c.Logf("cgroup(%q) == %q", s, g)
+       }
+}
index 32d524abca2f59689e56efe59b526d9da8f37181..40e9fc11eac528965f978f4b0472249759b42cbe 100644 (file)
@@ -93,11 +93,12 @@ type ContainerRunner struct {
        ArvMountExit   chan error
        finalState     string
 
-       statLogger   io.WriteCloser
-       statReporter *crunchstat.Reporter
-       statInterval time.Duration
-       cgroupRoot   string
-       cgroupParent string
+       statLogger         io.WriteCloser
+       statReporter       *crunchstat.Reporter
+       statInterval       time.Duration
+       cgroupRoot         string
+       expectCgroupParent string
+       setCgroupParent    string
 }
 
 // SetupSignals sets up signal handling to gracefully terminate the underlying
@@ -393,7 +394,7 @@ func (runner *ContainerRunner) StartCrunchstat() {
        runner.statReporter = &crunchstat.Reporter{
                CID:          runner.ContainerID,
                Logger:       log.New(runner.statLogger, "", 0),
-               CgroupParent: runner.cgroupParent,
+               CgroupParent: runner.expectCgroupParent,
                CgroupRoot:   runner.cgroupRoot,
                PollPeriod:   runner.statInterval,
        }
@@ -480,8 +481,13 @@ func (runner *ContainerRunner) CreateContainer() error {
                return fmt.Errorf("While creating container: %v", err)
        }
 
-       runner.HostConfig = dockerclient.HostConfig{Binds: runner.Binds,
-               LogConfig: dockerclient.LogConfig{Type: "none"}}
+       runner.HostConfig = dockerclient.HostConfig{
+               Binds:        runner.Binds,
+               CgroupParent: runner.setCgroupParent,
+               LogConfig: dockerclient.LogConfig{
+                       Type: "none",
+               },
+       }
 
        return runner.AttachStreams()
 }
@@ -823,7 +829,8 @@ func NewContainerRunner(api IArvadosClient,
 func main() {
        statInterval := flag.Duration("crunchstat-interval", 10*time.Second, "sampling period for periodic resource usage reporting")
        cgroupRoot := flag.String("cgroup-root", "/sys/fs/cgroup", "path to sysfs cgroup tree")
-       cgroupParent := flag.String("cgroup-parent", "docker", "name of container's parent cgroup")
+       cgroupParent := flag.String("cgroup-parent", "docker", "name of container's parent cgroup (ignored if -cgroup-parent-subsystem is used)")
+       cgroupParentSubsystem := flag.String("cgroup-parent-subsystem", "", "use current cgroup for given subsystem as parent cgroup for container")
        flag.Parse()
 
        containerId := flag.Arg(0)
@@ -850,7 +857,12 @@ func main() {
        cr := NewContainerRunner(api, kc, docker, containerId)
        cr.statInterval = *statInterval
        cr.cgroupRoot = *cgroupRoot
-       cr.cgroupParent = *cgroupParent
+       cr.expectCgroupParent = *cgroupParent
+       if *cgroupParentSubsystem != "" {
+               p := findCgroup(*cgroupParentSubsystem)
+               cr.setCgroupParent = p
+               cr.expectCgroupParent = p
+       }
 
        err = cr.Run()
        if err != nil {