Merge branch 'master' into 9514-only-delete-old-container-logs
[arvados.git] / services / crunch-run / crunchrun.go
index 32d524abca2f59689e56efe59b526d9da8f37181..0b4eb2bcbf4934f62a179b3037ad395155f7aeb4 100644 (file)
@@ -18,6 +18,7 @@ import (
        "os/exec"
        "os/signal"
        "path"
+       "path/filepath"
        "strings"
        "sync"
        "syscall"
@@ -97,7 +98,19 @@ type ContainerRunner struct {
        statReporter *crunchstat.Reporter
        statInterval time.Duration
        cgroupRoot   string
-       cgroupParent string
+       // What we expect the container's cgroup parent to be.
+       expectCgroupParent string
+       // What we tell docker to use as the container's cgroup
+       // parent. Note: Ideally we would use the same field for both
+       // expectCgroupParent and setCgroupParent, and just make it
+       // default to "docker". However, when using docker < 1.10 with
+       // systemd, specifying a non-empty cgroup parent (even the
+       // default value "docker") hits a docker bug
+       // (https://github.com/docker/docker/issues/17126). Using two
+       // separate fields makes it possible to use the "expect cgroup
+       // parent to be X" feature even on sites where the "specify
+       // cgroup parent" feature breaks.
+       setCgroupParent string
 }
 
 // SetupSignals sets up signal handling to gracefully terminate the underlying
@@ -255,7 +268,8 @@ func (runner *ContainerRunner) SetupMounts() (err error) {
                        }
                }
 
-               if mnt.Kind == "collection" {
+               switch {
+               case mnt.Kind == "collection":
                        var src string
                        if mnt.UUID != "" && mnt.PortableDataHash != "" {
                                return fmt.Errorf("Cannot specify both 'uuid' and 'portable_data_hash' for a collection mount")
@@ -286,25 +300,47 @@ func (runner *ContainerRunner) SetupMounts() (err error) {
                                runner.Binds = append(runner.Binds, fmt.Sprintf("%s:%s:ro", src, bind))
                        }
                        collectionPaths = append(collectionPaths, src)
-               } else if mnt.Kind == "tmp" {
-                       if bind == runner.Container.OutputPath {
-                               runner.HostOutputDir, err = runner.MkTempDir("", "")
-                               if err != nil {
-                                       return fmt.Errorf("While creating mount temp dir: %v", err)
-                               }
-                               st, staterr := os.Stat(runner.HostOutputDir)
-                               if staterr != nil {
-                                       return fmt.Errorf("While Stat on temp dir: %v", staterr)
-                               }
-                               err = os.Chmod(runner.HostOutputDir, st.Mode()|os.ModeSetgid|0777)
-                               if staterr != nil {
-                                       return fmt.Errorf("While Chmod temp dir: %v", err)
-                               }
-                               runner.CleanupTempDir = append(runner.CleanupTempDir, runner.HostOutputDir)
-                               runner.Binds = append(runner.Binds, fmt.Sprintf("%s:%s", runner.HostOutputDir, bind))
-                       } else {
-                               runner.Binds = append(runner.Binds, bind)
+
+               case mnt.Kind == "tmp" && bind == runner.Container.OutputPath:
+                       runner.HostOutputDir, err = runner.MkTempDir("", "")
+                       if err != nil {
+                               return fmt.Errorf("While creating mount temp dir: %v", err)
+                       }
+                       st, staterr := os.Stat(runner.HostOutputDir)
+                       if staterr != nil {
+                               return fmt.Errorf("While Stat on temp dir: %v", staterr)
+                       }
+                       err = os.Chmod(runner.HostOutputDir, st.Mode()|os.ModeSetgid|0777)
+                       if staterr != nil {
+                               return fmt.Errorf("While Chmod temp dir: %v", err)
                        }
+                       runner.CleanupTempDir = append(runner.CleanupTempDir, runner.HostOutputDir)
+                       runner.Binds = append(runner.Binds, fmt.Sprintf("%s:%s", runner.HostOutputDir, bind))
+
+               case mnt.Kind == "tmp":
+                       runner.Binds = append(runner.Binds, bind)
+
+               case mnt.Kind == "json":
+                       jsondata, err := json.Marshal(mnt.Content)
+                       if err != nil {
+                               return fmt.Errorf("encoding json data: %v", err)
+                       }
+                       // Create a tempdir with a single file
+                       // (instead of just a tempfile): this way we
+                       // can ensure the file is world-readable
+                       // inside the container, without having to
+                       // make it world-readable on the docker host.
+                       tmpdir, err := runner.MkTempDir("", "")
+                       if err != nil {
+                               return fmt.Errorf("creating temp dir: %v", err)
+                       }
+                       runner.CleanupTempDir = append(runner.CleanupTempDir, tmpdir)
+                       tmpfn := filepath.Join(tmpdir, "mountdata.json")
+                       err = ioutil.WriteFile(tmpfn, jsondata, 0644)
+                       if err != nil {
+                               return fmt.Errorf("writing temp file: %v", err)
+                       }
+                       runner.Binds = append(runner.Binds, fmt.Sprintf("%s:%s:ro", tmpfn, bind))
                }
        }
 
@@ -393,7 +429,7 @@ func (runner *ContainerRunner) StartCrunchstat() {
        runner.statReporter = &crunchstat.Reporter{
                CID:          runner.ContainerID,
                Logger:       log.New(runner.statLogger, "", 0),
-               CgroupParent: runner.cgroupParent,
+               CgroupParent: runner.expectCgroupParent,
                CgroupRoot:   runner.cgroupRoot,
                PollPeriod:   runner.statInterval,
        }
@@ -480,8 +516,13 @@ func (runner *ContainerRunner) CreateContainer() error {
                return fmt.Errorf("While creating container: %v", err)
        }
 
-       runner.HostConfig = dockerclient.HostConfig{Binds: runner.Binds,
-               LogConfig: dockerclient.LogConfig{Type: "none"}}
+       runner.HostConfig = dockerclient.HostConfig{
+               Binds:        runner.Binds,
+               CgroupParent: runner.setCgroupParent,
+               LogConfig: dockerclient.LogConfig{
+                       Type: "none",
+               },
+       }
 
        return runner.AttachStreams()
 }
@@ -823,7 +864,8 @@ func NewContainerRunner(api IArvadosClient,
 func main() {
        statInterval := flag.Duration("crunchstat-interval", 10*time.Second, "sampling period for periodic resource usage reporting")
        cgroupRoot := flag.String("cgroup-root", "/sys/fs/cgroup", "path to sysfs cgroup tree")
-       cgroupParent := flag.String("cgroup-parent", "docker", "name of container's parent cgroup")
+       cgroupParent := flag.String("cgroup-parent", "docker", "name of container's parent cgroup (ignored if -cgroup-parent-subsystem is used)")
+       cgroupParentSubsystem := flag.String("cgroup-parent-subsystem", "", "use current cgroup for given subsystem as parent cgroup for container")
        flag.Parse()
 
        containerId := flag.Arg(0)
@@ -850,7 +892,12 @@ func main() {
        cr := NewContainerRunner(api, kc, docker, containerId)
        cr.statInterval = *statInterval
        cr.cgroupRoot = *cgroupRoot
-       cr.cgroupParent = *cgroupParent
+       cr.expectCgroupParent = *cgroupParent
+       if *cgroupParentSubsystem != "" {
+               p := findCgroup(*cgroupParentSubsystem)
+               cr.setCgroupParent = p
+               cr.expectCgroupParent = p
+       }
 
        err = cr.Run()
        if err != nil {