10181: Add Size method to arvados.CollectionFileSystem.
[arvados.git] / services / crunch-run / crunchrun.go
index fc206a91c3eea15bb61f4caafe691e76c08d9fcf..730194b8251f9397828fa660d1e1c15d6ae5f63e 100644 (file)
@@ -190,6 +190,7 @@ func (runner *ContainerRunner) stop(sig os.Signal) {
 var errorBlacklist = []string{
        "(?ms).*[Cc]annot connect to the Docker daemon.*",
        "(?ms).*oci runtime error.*starting container process.*container init.*mounting.*to rootfs.*no such file or directory.*",
+       "(?ms).*grpc: the connection is unavailable.*",
 }
 var brokenNodeHook *string = flag.String("broken-node-hook", "", "Script to run if node is detected to be broken (for example, Docker daemon is not running)")
 
@@ -741,6 +742,7 @@ func (runner *ContainerRunner) startCrunchstat() error {
                CgroupParent: runner.expectCgroupParent,
                CgroupRoot:   runner.cgroupRoot,
                PollPeriod:   runner.statInterval,
+               TempDir:      runner.parentTemp,
        }
        runner.statReporter.Start()
        return nil
@@ -1121,9 +1123,7 @@ func (runner *ContainerRunner) WaitFinish() error {
        }
 
        containerdGone := make(chan error)
-       defer func() {
-               close(containerdGone)
-       }()
+       defer close(containerdGone)
        if runner.checkContainerd > 0 {
                go func() {
                        ticker := time.NewTicker(time.Duration(runner.checkContainerd))
@@ -1136,7 +1136,8 @@ func (runner *ContainerRunner) WaitFinish() error {
                                                return
                                        }
                                case <-containerdGone:
-                                       break
+                                       // Channel closed, quit goroutine
+                                       return
                                }
                        }
                }()
@@ -1427,11 +1428,11 @@ func (runner *ContainerRunner) Run() (err error) {
                // hasn't already been assigned when Run() returns,
                // this cleanup func will cause Run() to return the
                // first non-nil error that is passed to checkErr().
-               checkErr := func(e error) {
+               checkErr := func(errorIn string, e error) {
                        if e == nil {
                                return
                        }
-                       runner.CrunchLog.Print(e)
+                       runner.CrunchLog.Printf("error in %s: %v", errorIn, e)
                        if err == nil {
                                err = e
                        }
@@ -1442,7 +1443,7 @@ func (runner *ContainerRunner) Run() (err error) {
                }
 
                // Log the error encountered in Run(), if any
-               checkErr(err)
+               checkErr("Run", err)
 
                if runner.finalState == "Queued" {
                        runner.UpdateContainerFinal()
@@ -1455,10 +1456,10 @@ func (runner *ContainerRunner) Run() (err error) {
                        // capture partial output and write logs
                }
 
-               checkErr(runner.CaptureOutput())
-               checkErr(runner.stopHoststat())
-               checkErr(runner.CommitLogs())
-               checkErr(runner.UpdateContainerFinal())
+               checkErr("CaptureOutput", runner.CaptureOutput())
+               checkErr("stopHoststat", runner.stopHoststat())
+               checkErr("CommitLogs", runner.CommitLogs())
+               checkErr("UpdateContainerFinal", runner.UpdateContainerFinal())
        }()
 
        err = runner.fetchContainerRecord()
@@ -1649,7 +1650,7 @@ func main() {
        `)
        memprofile := flag.String("memprofile", "", "write memory profile to `file` after running container")
        getVersion := flag.Bool("version", false, "Print version information and exit.")
-       checkContainerd := flag.Duration("check-containerd", 60*time.Second, "Periodic check if (docker-)containerd is running, period in seconds.")
+       checkContainerd := flag.Duration("check-containerd", 60*time.Second, "Periodic check if (docker-)containerd is running (use 0s to disable).")
        flag.Parse()
 
        // Print version information if requested