13111: Merge branch 'master' into 12308-go-fuse
[arvados.git] / services / crunch-run / crunchrun.go
index 852d5a27b168270d46cad855755cb5019a732f72..a9f1c25d37fa1714868d371c245dc7aa8d041543 100644 (file)
@@ -103,21 +103,23 @@ type ContainerRunner struct {
        LogsPDH       *string
        RunArvMount
        MkTempDir
-       ArvMount       *exec.Cmd
-       ArvMountPoint  string
-       HostOutputDir  string
-       CleanupTempDir []string
-       Binds          []string
-       Volumes        map[string]struct{}
-       OutputPDH      *string
-       SigChan        chan os.Signal
-       ArvMountExit   chan error
-       finalState     string
-
-       statLogger   io.WriteCloser
-       statReporter *crunchstat.Reporter
-       statInterval time.Duration
-       cgroupRoot   string
+       ArvMount      *exec.Cmd
+       ArvMountPoint string
+       HostOutputDir string
+       Binds         []string
+       Volumes       map[string]struct{}
+       OutputPDH     *string
+       SigChan       chan os.Signal
+       ArvMountExit  chan error
+       finalState    string
+       parentTemp    string
+
+       statLogger       io.WriteCloser
+       statReporter     *crunchstat.Reporter
+       hoststatLogger   io.WriteCloser
+       hoststatReporter *crunchstat.Reporter
+       statInterval     time.Duration
+       cgroupRoot       string
        // What we expect the container's cgroup parent to be.
        expectCgroupParent string
        // What we tell docker to use as the container's cgroup
@@ -325,7 +327,7 @@ func (runner *ContainerRunner) ArvMountCmd(arvMountCmd []string, token string) (
 
 func (runner *ContainerRunner) SetupArvMountPoint(prefix string) (err error) {
        if runner.ArvMountPoint == "" {
-               runner.ArvMountPoint, err = runner.MkTempDir("", prefix)
+               runner.ArvMountPoint, err = runner.MkTempDir(runner.parentTemp, prefix)
        }
        return
 }
@@ -336,7 +338,7 @@ func copyfile(src string, dst string) (err error) {
                return
        }
 
-       os.MkdirAll(path.Dir(dst), 0770)
+       os.MkdirAll(path.Dir(dst), 0777)
 
        dstfile, err := os.Create(dst)
        if err != nil {
@@ -488,7 +490,7 @@ func (runner *ContainerRunner) SetupMounts() (err error) {
 
                case mnt.Kind == "tmp":
                        var tmpdir string
-                       tmpdir, err = runner.MkTempDir("", "")
+                       tmpdir, err = runner.MkTempDir(runner.parentTemp, "tmp")
                        if err != nil {
                                return fmt.Errorf("While creating mount temp dir: %v", err)
                        }
@@ -500,7 +502,6 @@ func (runner *ContainerRunner) SetupMounts() (err error) {
                        if staterr != nil {
                                return fmt.Errorf("While Chmod temp dir: %v", err)
                        }
-                       runner.CleanupTempDir = append(runner.CleanupTempDir, tmpdir)
                        runner.Binds = append(runner.Binds, fmt.Sprintf("%s:%s", tmpdir, bind))
                        if bind == runner.Container.OutputPath {
                                runner.HostOutputDir = tmpdir
@@ -516,11 +517,10 @@ func (runner *ContainerRunner) SetupMounts() (err error) {
                        // can ensure the file is world-readable
                        // inside the container, without having to
                        // make it world-readable on the docker host.
-                       tmpdir, err := runner.MkTempDir("", "")
+                       tmpdir, err := runner.MkTempDir(runner.parentTemp, "json")
                        if err != nil {
                                return fmt.Errorf("creating temp dir: %v", err)
                        }
-                       runner.CleanupTempDir = append(runner.CleanupTempDir, tmpdir)
                        tmpfn := filepath.Join(tmpdir, "mountdata.json")
                        err = ioutil.WriteFile(tmpfn, jsondata, 0644)
                        if err != nil {
@@ -529,11 +529,10 @@ func (runner *ContainerRunner) SetupMounts() (err error) {
                        runner.Binds = append(runner.Binds, fmt.Sprintf("%s:%s:ro", tmpfn, bind))
 
                case mnt.Kind == "git_tree":
-                       tmpdir, err := runner.MkTempDir("", "")
+                       tmpdir, err := runner.MkTempDir(runner.parentTemp, "git_tree")
                        if err != nil {
                                return fmt.Errorf("creating temp dir: %v", err)
                        }
-                       runner.CleanupTempDir = append(runner.CleanupTempDir, tmpdir)
                        err = gitMount(mnt).extractTree(runner.ArvClient, tmpdir, token)
                        if err != nil {
                                return err
@@ -576,21 +575,40 @@ func (runner *ContainerRunner) SetupMounts() (err error) {
        }
 
        for _, cp := range copyFiles {
-               dir, err := os.Stat(cp.src)
-               if err == nil {
-                       if dir.IsDir() {
-                               err = filepath.Walk(cp.src, func(walkpath string, walkinfo os.FileInfo, walkerr error) error {
-                                       if walkinfo.Mode().IsRegular() {
-                                               return copyfile(walkpath, path.Join(cp.bind, walkpath[len(cp.src):]))
+               st, err := os.Stat(cp.src)
+               if err != nil {
+                       return fmt.Errorf("While staging writable file from %q to %q: %v", cp.src, cp.bind, err)
+               }
+               if st.IsDir() {
+                       err = filepath.Walk(cp.src, func(walkpath string, walkinfo os.FileInfo, walkerr error) error {
+                               if walkerr != nil {
+                                       return walkerr
+                               }
+                               target := path.Join(cp.bind, walkpath[len(cp.src):])
+                               if walkinfo.Mode().IsRegular() {
+                                       copyerr := copyfile(walkpath, target)
+                                       if copyerr != nil {
+                                               return copyerr
                                        }
-                                       return nil
-                               })
-                       } else {
-                               err = copyfile(cp.src, cp.bind)
+                                       return os.Chmod(target, walkinfo.Mode()|0777)
+                               } else if walkinfo.Mode().IsDir() {
+                                       mkerr := os.MkdirAll(target, 0777)
+                                       if mkerr != nil {
+                                               return mkerr
+                                       }
+                                       return os.Chmod(target, walkinfo.Mode()|os.ModeSetgid|0777)
+                               } else {
+                                       return fmt.Errorf("Source %q is not a regular file or directory", cp.src)
+                               }
+                       })
+               } else if st.Mode().IsRegular() {
+                       err = copyfile(cp.src, cp.bind)
+                       if err == nil {
+                               err = os.Chmod(cp.bind, st.Mode()|0777)
                        }
                }
                if err != nil {
-                       return fmt.Errorf("While staging writable files: %v", err)
+                       return fmt.Errorf("While staging writable file from %q to %q: %v", cp.src, cp.bind, err)
                }
        }
 
@@ -600,52 +618,74 @@ func (runner *ContainerRunner) SetupMounts() (err error) {
 func (runner *ContainerRunner) ProcessDockerAttach(containerReader io.Reader) {
        // Handle docker log protocol
        // https://docs.docker.com/engine/reference/api/docker_remote_api_v1.15/#attach-to-a-container
+       defer close(runner.loggingDone)
 
        header := make([]byte, 8)
-       for {
-               _, readerr := io.ReadAtLeast(containerReader, header, 8)
-
-               if readerr == nil {
-                       readsize := int64(header[7]) | (int64(header[6]) << 8) | (int64(header[5]) << 16) | (int64(header[4]) << 24)
-                       if header[0] == 1 {
-                               // stdout
-                               _, readerr = io.CopyN(runner.Stdout, containerReader, readsize)
-                       } else {
-                               // stderr
-                               _, readerr = io.CopyN(runner.Stderr, containerReader, readsize)
+       var err error
+       for err == nil {
+               _, err = io.ReadAtLeast(containerReader, header, 8)
+               if err != nil {
+                       if err == io.EOF {
+                               err = nil
                        }
+                       break
                }
+               readsize := int64(header[7]) | (int64(header[6]) << 8) | (int64(header[5]) << 16) | (int64(header[4]) << 24)
+               if header[0] == 1 {
+                       // stdout
+                       _, err = io.CopyN(runner.Stdout, containerReader, readsize)
+               } else {
+                       // stderr
+                       _, err = io.CopyN(runner.Stderr, containerReader, readsize)
+               }
+       }
 
-               if readerr != nil {
-                       if readerr != io.EOF {
-                               runner.CrunchLog.Printf("While reading docker logs: %v", readerr)
-                       }
-
-                       closeerr := runner.Stdout.Close()
-                       if closeerr != nil {
-                               runner.CrunchLog.Printf("While closing stdout logs: %v", closeerr)
-                       }
+       if err != nil {
+               runner.CrunchLog.Printf("error reading docker logs: %v", err)
+       }
 
-                       closeerr = runner.Stderr.Close()
-                       if closeerr != nil {
-                               runner.CrunchLog.Printf("While closing stderr logs: %v", closeerr)
-                       }
+       err = runner.Stdout.Close()
+       if err != nil {
+               runner.CrunchLog.Printf("error closing stdout logs: %v", err)
+       }
 
-                       if runner.statReporter != nil {
-                               runner.statReporter.Stop()
-                               closeerr = runner.statLogger.Close()
-                               if closeerr != nil {
-                                       runner.CrunchLog.Printf("While closing crunchstat logs: %v", closeerr)
-                               }
-                       }
+       err = runner.Stderr.Close()
+       if err != nil {
+               runner.CrunchLog.Printf("error closing stderr logs: %v", err)
+       }
 
-                       close(runner.loggingDone)
-                       return
+       if runner.statReporter != nil {
+               runner.statReporter.Stop()
+               err = runner.statLogger.Close()
+               if err != nil {
+                       runner.CrunchLog.Printf("error closing crunchstat logs: %v", err)
                }
        }
 }
 
-func (runner *ContainerRunner) StartCrunchstat() {
+func (runner *ContainerRunner) stopHoststat() error {
+       if runner.hoststatReporter == nil {
+               return nil
+       }
+       runner.hoststatReporter.Stop()
+       err := runner.hoststatLogger.Close()
+       if err != nil {
+               return fmt.Errorf("error closing hoststat logs: %v", err)
+       }
+       return nil
+}
+
+func (runner *ContainerRunner) startHoststat() {
+       runner.hoststatLogger = NewThrottledLogger(runner.NewLogWriter("hoststat"))
+       runner.hoststatReporter = &crunchstat.Reporter{
+               Logger:     log.New(runner.hoststatLogger, "", 0),
+               CgroupRoot: runner.cgroupRoot,
+               PollPeriod: runner.statInterval,
+       }
+       runner.hoststatReporter.Start()
+}
+
+func (runner *ContainerRunner) startCrunchstat() {
        runner.statLogger = NewThrottledLogger(runner.NewLogWriter("crunchstat"))
        runner.statReporter = &crunchstat.Reporter{
                CID:          runner.ContainerID,
@@ -1396,10 +1436,8 @@ func (runner *ContainerRunner) CleanupDirs() {
                }
        }
 
-       for _, tmpdir := range runner.CleanupTempDir {
-               if rmerr := os.RemoveAll(tmpdir); rmerr != nil {
-                       runner.CrunchLog.Printf("While cleaning up temporary directory %s: %v", tmpdir, rmerr)
-               }
+       if rmerr := os.RemoveAll(runner.parentTemp); rmerr != nil {
+               runner.CrunchLog.Printf("While cleaning up temporary directory %s: %v", runner.parentTemp, rmerr)
        }
 }
 
@@ -1569,6 +1607,7 @@ func (runner *ContainerRunner) Run() (err error) {
                }
 
                checkErr(runner.CaptureOutput())
+               checkErr(runner.stopHoststat())
                checkErr(runner.CommitLogs())
                checkErr(runner.UpdateContainerFinal())
        }()
@@ -1577,9 +1616,8 @@ func (runner *ContainerRunner) Run() (err error) {
        if err != nil {
                return
        }
-
-       // setup signal handling
        runner.setupSignals()
+       runner.startHoststat()
 
        // check for and/or load image
        err = runner.LoadImage()
@@ -1628,7 +1666,7 @@ func (runner *ContainerRunner) Run() (err error) {
        }
        runner.finalState = "Cancelled"
 
-       runner.StartCrunchstat()
+       runner.startCrunchstat()
 
        err = runner.StartContainer()
        if err != nil {
@@ -1738,6 +1776,12 @@ func main() {
                os.Exit(1)
        }
 
+       parentTemp, tmperr := cr.MkTempDir("", "crunch-run."+containerId+".")
+       if tmperr != nil {
+               log.Fatalf("%s: %v", containerId, tmperr)
+       }
+
+       cr.parentTemp = parentTemp
        cr.statInterval = *statInterval
        cr.cgroupRoot = *cgroupRoot
        cr.expectCgroupParent = *cgroupParent