10777: Close and flush logs right away instead of waiting for next tick.
[arvados.git] / services / crunch-run / crunchrun.go
index 5f4de1409f3e4acaa3943a5511f1dc239fd887bb..47099673e2b2133c92e6205401be508e162b94a1 100644 (file)
@@ -5,12 +5,6 @@ import (
        "errors"
        "flag"
        "fmt"
-       "git.curoverse.com/arvados.git/lib/crunchstat"
-       "git.curoverse.com/arvados.git/sdk/go/arvados"
-       "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
-       "git.curoverse.com/arvados.git/sdk/go/keepclient"
-       "git.curoverse.com/arvados.git/sdk/go/manifest"
-       "github.com/curoverse/dockerclient"
        "io"
        "io/ioutil"
        "log"
@@ -24,6 +18,13 @@ import (
        "sync"
        "syscall"
        "time"
+
+       "git.curoverse.com/arvados.git/lib/crunchstat"
+       "git.curoverse.com/arvados.git/sdk/go/arvados"
+       "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
+       "git.curoverse.com/arvados.git/sdk/go/keepclient"
+       "git.curoverse.com/arvados.git/sdk/go/manifest"
+       "github.com/curoverse/dockerclient"
 )
 
 // IArvadosClient is the minimal Arvados API methods used by crunch-run.
@@ -95,7 +96,6 @@ type ContainerRunner struct {
        SigChan        chan os.Signal
        ArvMountExit   chan error
        finalState     string
-       trashLifetime  time.Duration
 
        statLogger   io.WriteCloser
        statReporter *crunchstat.Reporter
@@ -124,20 +124,29 @@ func (runner *ContainerRunner) SetupSignals() {
        signal.Notify(runner.SigChan, syscall.SIGINT)
        signal.Notify(runner.SigChan, syscall.SIGQUIT)
 
-       go func(sig <-chan os.Signal) {
-               for range sig {
-                       if !runner.Cancelled {
-                               runner.CancelLock.Lock()
-                               runner.Cancelled = true
-                               if runner.ContainerID != "" {
-                                       runner.Docker.StopContainer(runner.ContainerID, 10)
-                               }
-                               runner.CancelLock.Unlock()
-                       }
-               }
+       go func(sig chan os.Signal) {
+               <-sig
+               runner.stop()
+               signal.Stop(sig)
        }(runner.SigChan)
 }
 
+// stop the underlying Docker container.
+func (runner *ContainerRunner) stop() {
+       runner.CancelLock.Lock()
+       defer runner.CancelLock.Unlock()
+       if runner.Cancelled {
+               return
+       }
+       runner.Cancelled = true
+       if runner.ContainerID != "" {
+               err := runner.Docker.StopContainer(runner.ContainerID, 10)
+               if err != nil {
+                       log.Printf("StopContainer failed: %s", err)
+               }
+       }
+}
+
 // LoadImage determines the docker image id from the container record and
 // checks if it is available in the local Docker image store.  If not, it loads
 // the image from Keep.
@@ -601,12 +610,22 @@ func (runner *ContainerRunner) StartContainer() error {
 func (runner *ContainerRunner) WaitFinish() error {
        runner.CrunchLog.Print("Waiting for container to finish")
 
-       result := runner.Docker.Wait(runner.ContainerID)
-       wr := <-result
-       if wr.Error != nil {
-               return fmt.Errorf("While waiting for container to finish: %v", wr.Error)
+       waitDocker := runner.Docker.Wait(runner.ContainerID)
+       waitMount := runner.ArvMountExit
+       for waitDocker != nil {
+               select {
+               case err := <-waitMount:
+                       runner.CrunchLog.Printf("arv-mount exited before container finished: %v", err)
+                       waitMount = nil
+                       runner.stop()
+               case wr := <-waitDocker:
+                       if wr.Error != nil {
+                               return fmt.Errorf("While waiting for container to finish: %v", wr.Error)
+                       }
+                       runner.ExitCode = &wr.ExitCode
+                       waitDocker = nil
+               }
        }
-       runner.ExitCode = &wr.ExitCode
 
        // wait for stdout/stderr to complete
        <-runner.loggingDone
@@ -708,7 +727,7 @@ func (runner *ContainerRunner) CaptureOutput() error {
        err = runner.ArvClient.Create("collections",
                arvadosclient.Dict{
                        "collection": arvadosclient.Dict{
-                               "trash_at":      time.Now().Add(runner.trashLifetime).Format(time.RFC3339),
+                               "is_trashed":    true,
                                "name":          "output for " + runner.Container.UUID,
                                "manifest_text": manifestText}},
                &response)
@@ -760,14 +779,6 @@ func (runner *ContainerRunner) getCollectionManifestForPath(mnt arvados.Mount, b
        return extracted.Text, nil
 }
 
-func (runner *ContainerRunner) loadDiscoveryVars() {
-       tl, err := runner.ArvClient.Discovery("defaultTrashLifetime")
-       if err != nil {
-               log.Fatalf("getting defaultTrashLifetime from discovery document: %s", err)
-       }
-       runner.trashLifetime = time.Duration(tl.(float64)) * time.Second
-}
-
 func (runner *ContainerRunner) CleanupDirs() {
        if runner.ArvMount != nil {
                umount := exec.Command("fusermount", "-z", "-u", runner.ArvMountPoint)
@@ -820,7 +831,7 @@ func (runner *ContainerRunner) CommitLogs() error {
        err = runner.ArvClient.Create("collections",
                arvadosclient.Dict{
                        "collection": arvadosclient.Dict{
-                               "trash_at":      time.Now().Add(runner.trashLifetime).Format(time.RFC3339),
+                               "is_trashed":    true,
                                "name":          "logs for " + runner.Container.UUID,
                                "manifest_text": mt}},
                &response)
@@ -1015,7 +1026,6 @@ func NewContainerRunner(api IArvadosClient,
        cr.Container.UUID = containerUUID
        cr.CrunchLog = NewThrottledLogger(cr.NewLogWriter("crunch-run"))
        cr.CrunchLog.Immediate = log.New(os.Stderr, containerUUID+" ", 0)
-       cr.loadDiscoveryVars()
        return cr
 }