19886: Save log collection when crunch-run starts a container
[arvados.git] / lib / crunchrun / crunchrun.go
index ee9115d8d809903be17cbaa10dc4010d1b7d87dc..d322a3ac3c03f1eb178b9b2e6919de20cf30a390 100644 (file)
@@ -142,6 +142,7 @@ type ContainerRunner struct {
        parentTemp    string
        costStartTime time.Time
 
+       keepstore        *exec.Cmd
        keepstoreLogger  io.WriteCloser
        keepstoreLogbuf  *bufThenWrite
        statLogger       io.WriteCloser
@@ -427,8 +428,14 @@ func (runner *ContainerRunner) SetupMounts() (map[string]bindmount, error) {
                arvMountCmd = append(arvMountCmd, "--allow-other")
        }
 
-       if runner.Container.RuntimeConstraints.KeepCacheRAM > 0 {
-               arvMountCmd = append(arvMountCmd, "--file-cache", fmt.Sprintf("%d", runner.Container.RuntimeConstraints.KeepCacheRAM))
+       if runner.Container.RuntimeConstraints.KeepCacheDisk > 0 {
+               keepcachedir, err := runner.MkTempDir(runner.parentTemp, "keepcache")
+               if err != nil {
+                       return nil, fmt.Errorf("while creating keep cache temp dir: %v", err)
+               }
+               arvMountCmd = append(arvMountCmd, "--disk-cache", "--disk-cache-dir", keepcachedir, "--file-cache", fmt.Sprintf("%d", runner.Container.RuntimeConstraints.KeepCacheDisk))
+       } else if runner.Container.RuntimeConstraints.KeepCacheRAM > 0 {
+               arvMountCmd = append(arvMountCmd, "--ram-cache", "--file-cache", fmt.Sprintf("%d", runner.Container.RuntimeConstraints.KeepCacheRAM))
        }
 
        collectionPaths := []string{}
@@ -660,6 +667,9 @@ func (runner *ContainerRunner) SetupMounts() (map[string]bindmount, error) {
        if err != nil {
                return nil, fmt.Errorf("while trying to start arv-mount: %v", err)
        }
+       if runner.hoststatReporter != nil && runner.ArvMount != nil {
+               runner.hoststatReporter.ReportPID("arv-mount", runner.ArvMount.Process.Pid)
+       }
 
        for _, p := range collectionPaths {
                _, err = os.Stat(p)
@@ -733,6 +743,7 @@ func (runner *ContainerRunner) startHoststat() error {
                PollPeriod: runner.statInterval,
        }
        runner.hoststatReporter.Start()
+       runner.hoststatReporter.ReportPID("crunch-run", os.Getpid())
        return nil
 }
 
@@ -1423,7 +1434,11 @@ func (runner *ContainerRunner) UpdateContainerRunning() error {
                return ErrCancelled
        }
        return runner.DispatcherArvClient.Update("containers", runner.Container.UUID,
-               arvadosclient.Dict{"container": arvadosclient.Dict{"state": "Running", "gateway_address": runner.gateway.Address}}, nil)
+               arvadosclient.Dict{"container": arvadosclient.Dict{
+                       "state":           "Running",
+                       "gateway_address": runner.gateway.Address,
+                       "log":             runner.logUUID,
+               }}, nil)
 }
 
 // ContainerToken returns the api_token the container (and any
@@ -1569,6 +1584,9 @@ func (runner *ContainerRunner) Run() (err error) {
        if err != nil {
                return
        }
+       if runner.keepstore != nil {
+               runner.hoststatReporter.ReportPID("keepstore", runner.keepstore.Process.Pid)
+       }
 
        // set up FUSE mount and binds
        bindmounts, err = runner.SetupMounts()
@@ -1611,6 +1629,10 @@ func (runner *ContainerRunner) Run() (err error) {
                return
        }
 
+       _, err = runner.saveLogCollection(false)
+       if err != nil {
+               runner.CrunchLog.Printf("Error committing initial log collection: %v", err)
+       }
        err = runner.UpdateContainerRunning()
        if err != nil {
                return
@@ -1853,6 +1875,7 @@ func (command) RunCommand(prog string, args []string, stdin io.Reader, stdout, s
                return 1
        }
 
+       cr.keepstore = keepstore
        if keepstore == nil {
                // Log explanation (if any) for why we're not running
                // a local keepstore.