10181: Add Size method to arvados.CollectionFileSystem.
[arvados.git] / services / crunch-run / crunchrun.go
index 5996a7cd6b47e6df011f2f1459c6f374778614b8..730194b8251f9397828fa660d1e1c15d6ae5f63e 100644 (file)
@@ -190,7 +190,7 @@ func (runner *ContainerRunner) stop(sig os.Signal) {
 var errorBlacklist = []string{
        "(?ms).*[Cc]annot connect to the Docker daemon.*",
        "(?ms).*oci runtime error.*starting container process.*container init.*mounting.*to rootfs.*no such file or directory.*",
-       "(?ms).*grpc: the connection is unavailable",
+       "(?ms).*grpc: the connection is unavailable.*",
 }
 var brokenNodeHook *string = flag.String("broken-node-hook", "", "Script to run if node is detected to be broken (for example, Docker daemon is not running)")
 
@@ -742,6 +742,7 @@ func (runner *ContainerRunner) startCrunchstat() error {
                CgroupParent: runner.expectCgroupParent,
                CgroupRoot:   runner.cgroupRoot,
                PollPeriod:   runner.statInterval,
+               TempDir:      runner.parentTemp,
        }
        runner.statReporter.Start()
        return nil
@@ -1122,9 +1123,7 @@ func (runner *ContainerRunner) WaitFinish() error {
        }
 
        containerdGone := make(chan error)
-       defer func() {
-               close(containerdGone)
-       }()
+       defer close(containerdGone)
        if runner.checkContainerd > 0 {
                go func() {
                        ticker := time.NewTicker(time.Duration(runner.checkContainerd))
@@ -1137,7 +1136,8 @@ func (runner *ContainerRunner) WaitFinish() error {
                                                return
                                        }
                                case <-containerdGone:
-                                       break
+                                       // Channel closed, quit goroutine
+                                       return
                                }
                        }
                }()
@@ -1428,11 +1428,11 @@ func (runner *ContainerRunner) Run() (err error) {
                // hasn't already been assigned when Run() returns,
                // this cleanup func will cause Run() to return the
                // first non-nil error that is passed to checkErr().
-               checkErr := func(e error) {
+               checkErr := func(errorIn string, e error) {
                        if e == nil {
                                return
                        }
-                       runner.CrunchLog.Print(e)
+                       runner.CrunchLog.Printf("error in %s: %v", errorIn, e)
                        if err == nil {
                                err = e
                        }
@@ -1443,7 +1443,7 @@ func (runner *ContainerRunner) Run() (err error) {
                }
 
                // Log the error encountered in Run(), if any
-               checkErr(err)
+               checkErr("Run", err)
 
                if runner.finalState == "Queued" {
                        runner.UpdateContainerFinal()
@@ -1456,10 +1456,10 @@ func (runner *ContainerRunner) Run() (err error) {
                        // capture partial output and write logs
                }
 
-               checkErr(runner.CaptureOutput())
-               checkErr(runner.stopHoststat())
-               checkErr(runner.CommitLogs())
-               checkErr(runner.UpdateContainerFinal())
+               checkErr("CaptureOutput", runner.CaptureOutput())
+               checkErr("stopHoststat", runner.stopHoststat())
+               checkErr("CommitLogs", runner.CommitLogs())
+               checkErr("UpdateContainerFinal", runner.UpdateContainerFinal())
        }()
 
        err = runner.fetchContainerRecord()