14328: Remove containerd watchdog.
authorTom Clegg <tclegg@veritasgenetics.com>
Thu, 8 Nov 2018 22:05:22 +0000 (17:05 -0500)
committerTom Clegg <tclegg@veritasgenetics.com>
Thu, 8 Nov 2018 22:05:22 +0000 (17:05 -0500)
No longer needed, now that we periodically ask docker-inspect to check
on the specific container we're running.

Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg@veritasgenetics.com>

services/crunch-run/crunchrun.go
services/crunch-run/crunchrun_test.go
vendor/vendor.json

index 1deb74031667d7ade04968344d3b262b3ccf1dd1..36d8394c7fcd67f111095004e03d8a3f68fea74a 100644 (file)
@@ -32,7 +32,6 @@ import (
        "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
        "git.curoverse.com/arvados.git/sdk/go/keepclient"
        "git.curoverse.com/arvados.git/sdk/go/manifest"
-       "github.com/shirou/gopsutil/process"
        "golang.org/x/net/context"
 
        dockertypes "github.com/docker/docker/api/types"
@@ -127,8 +126,6 @@ type ContainerRunner struct {
        finalState      string
        parentTemp      string
 
-       ListProcesses func() ([]PsProcess, error)
-
        statLogger       io.WriteCloser
        statReporter     *crunchstat.Reporter
        hoststatLogger   io.WriteCloser
@@ -153,10 +150,9 @@ type ContainerRunner struct {
        cCancelled bool // StopContainer() invoked
        cRemoved   bool // docker confirmed the container no longer exists
 
-       enableNetwork   string // one of "default" or "always"
-       networkMode     string // passed through to HostConfig.NetworkMode
-       arvMountLog     *ThrottledLogger
-       checkContainerd time.Duration
+       enableNetwork string // one of "default" or "always"
+       networkMode   string // passed through to HostConfig.NetworkMode
+       arvMountLog   *ThrottledLogger
 
        containerWatchdogInterval time.Duration
 }
@@ -1108,27 +1104,6 @@ func (runner *ContainerRunner) StartContainer() error {
        return nil
 }
 
-// checkContainerd checks if "containerd" is present in the process list.
-func (runner *ContainerRunner) CheckContainerd() error {
-       if runner.checkContainerd == 0 {
-               return nil
-       }
-       p, _ := runner.ListProcesses()
-       for _, i := range p {
-               e, _ := i.CmdlineSlice()
-               if len(e) > 0 {
-                       if strings.Index(e[0], "containerd") > -1 {
-                               return nil
-                       }
-               }
-       }
-
-       // Not found
-       runner.runBrokenNodeHook()
-       runner.stop(nil)
-       return fmt.Errorf("'containerd' not found in process list.")
-}
-
 // WaitFinish waits for the container to terminate, capture the exit code, and
 // close the stdout/stderr logging.
 func (runner *ContainerRunner) WaitFinish() error {
@@ -1167,27 +1142,6 @@ func (runner *ContainerRunner) WaitFinish() error {
                }
        }()
 
-       containerdGone := make(chan error)
-       defer close(containerdGone)
-       if runner.checkContainerd > 0 {
-               go func() {
-                       ticker := time.NewTicker(time.Duration(runner.checkContainerd))
-                       defer ticker.Stop()
-                       for {
-                               select {
-                               case <-ticker.C:
-                                       if ck := runner.CheckContainerd(); ck != nil {
-                                               containerdGone <- ck
-                                               return
-                                       }
-                               case <-containerdGone:
-                                       // Channel closed, quit goroutine
-                                       return
-                               }
-                       }
-               }()
-       }
-
        for {
                select {
                case waitBody := <-waitOk:
@@ -1216,9 +1170,6 @@ func (runner *ContainerRunner) WaitFinish() error {
 
                case <-containerGone:
                        return errors.New("docker client never returned status")
-
-               case err := <-containerdGone:
-                       return err
                }
        }
 }
@@ -1604,12 +1555,6 @@ func (runner *ContainerRunner) Run() (err error) {
                return
        }
 
-       // Sanity check that containerd is running.
-       err = runner.CheckContainerd()
-       if err != nil {
-               return
-       }
-
        // check for and/or load image
        err = runner.LoadImage()
        if err != nil {
@@ -1729,17 +1674,6 @@ func NewContainerRunner(client *arvados.Client, api IArvadosClient, kc IKeepClie
        cr.NewLogWriter = cr.NewArvLogWriter
        cr.RunArvMount = cr.ArvMountCmd
        cr.MkTempDir = ioutil.TempDir
-       cr.ListProcesses = func() ([]PsProcess, error) {
-               pr, err := process.Processes()
-               if err != nil {
-                       return nil, err
-               }
-               ps := make([]PsProcess, len(pr))
-               for i, j := range pr {
-                       ps[i] = j
-               }
-               return ps, nil
-       }
        cr.MkArvClient = func(token string) (IArvadosClient, IKeepClient, error) {
                cl, err := arvadosclient.MakeArvadosClient()
                if err != nil {
@@ -1787,7 +1721,7 @@ func main() {
        `)
        memprofile := flag.String("memprofile", "", "write memory profile to `file` after running container")
        getVersion := flag.Bool("version", false, "Print version information and exit.")
-       checkContainerd := flag.Duration("check-containerd", 60*time.Second, "Periodic check if (docker-)containerd is running (use 0s to disable).")
+       flag.Duration("check-containerd", 0, "Ignored. Exists for compatibility with older versions.")
        flag.Parse()
 
        // Print version information if requested
@@ -1843,7 +1777,6 @@ func main() {
        cr.expectCgroupParent = *cgroupParent
        cr.enableNetwork = *enableNetwork
        cr.networkMode = *networkMode
-       cr.checkContainerd = *checkContainerd
        if *cgroupParentSubsystem != "" {
                p := findCgroup(*cgroupParentSubsystem)
                cr.setCgroupParent = p
index 0df048cc8b95000fbb214dd88cabd83c6b9f71d1..89ba9e010bdaef451779c641e3a59d37e6812cba 100644 (file)
@@ -2148,41 +2148,3 @@ type FakeProcess struct {
 func (fp FakeProcess) CmdlineSlice() ([]string, error) {
        return fp.cmdLine, nil
 }
-
-func (s *TestSuite) helpCheckContainerd(c *C, lp func() ([]PsProcess, error)) error {
-       kc := &KeepTestClient{}
-       defer kc.Close()
-       cr, err := NewContainerRunner(s.client, &ArvTestClient{callraw: true}, kc, s.docker, "zzzzz-zzzzz-zzzzzzzzzzzzzzz")
-       cr.checkContainerd = time.Duration(100 * time.Millisecond)
-       c.Assert(err, IsNil)
-       cr.ListProcesses = lp
-
-       s.docker.fn = func(t *TestDockerClient) {
-               time.Sleep(1 * time.Second)
-               t.logWriter.Close()
-       }
-
-       err = cr.CreateContainer()
-       c.Check(err, IsNil)
-
-       err = cr.StartContainer()
-       c.Check(err, IsNil)
-
-       err = cr.WaitFinish()
-       return err
-
-}
-
-func (s *TestSuite) TestCheckContainerdPresent(c *C) {
-       err := s.helpCheckContainerd(c, func() ([]PsProcess, error) {
-               return []PsProcess{FakeProcess{[]string{"docker-containerd"}}}, nil
-       })
-       c.Check(err, IsNil)
-}
-
-func (s *TestSuite) TestCheckContainerdMissing(c *C) {
-       err := s.helpCheckContainerd(c, func() ([]PsProcess, error) {
-               return []PsProcess{FakeProcess{[]string{"abc"}}}, nil
-       })
-       c.Check(err, ErrorMatches, `'containerd' not found in process list.`)
-}
index 9abb9bb15e0ae0824533c812f1302d93cf270722..a6b4c17cf0540020e189c82138b21cf22acc805d 100644 (file)
                        "revision": "d682213848ed68c0a260ca37d6dd5ace8423f5ba",
                        "revisionTime": "2017-12-05T20:32:29Z"
                },
-               {
-                       "checksumSHA1": "st4vb0GmDeoKbsfxdpNZ2MPl76M=",
-                       "path": "github.com/StackExchange/wmi",
-                       "revision": "cdffdb33acae0e14efff2628f9bae377b597840e",
-                       "revisionTime": "2018-04-12T20:51:11Z"
-               },
                {
                        "checksumSHA1": "spyv5/YFBjYyZLZa1U2LBfDR8PM=",
                        "path": "github.com/beorn7/perks/quantile",
                        "revision": "0ca9ea5df5451ffdf184b4428c902747c2c11cd7",
                        "revisionTime": "2017-03-27T23:54:44Z"
                },
-               {
-                       "checksumSHA1": "Kqv7bA4oJG0nPwQvGWDwGGaKONo=",
-                       "path": "github.com/go-ole/go-ole",
-                       "revision": "7a0fa49edf48165190530c675167e2f319a05268",
-                       "revisionTime": "2018-06-25T08:58:08Z"
-               },
-               {
-                       "checksumSHA1": "PArleDBtadu2qO4hJwHR8a3IOTA=",
-                       "path": "github.com/go-ole/go-ole/oleutil",
-                       "revision": "7a0fa49edf48165190530c675167e2f319a05268",
-                       "revisionTime": "2018-06-25T08:58:08Z"
-               },
                {
                        "checksumSHA1": "8UEp6v0Dczw/SlasE0DivB0mAHA=",
                        "path": "github.com/gogo/protobuf/jsonpb",
                        "revision": "1744e2970ca51c86172c8190fadad617561ed6e7",
                        "revisionTime": "2017-11-10T11:01:46Z"
                },
-               {
-                       "checksumSHA1": "q14d3C3xvWevU3dSv4P5K0+OSD0=",
-                       "path": "github.com/shirou/gopsutil/cpu",
-                       "revision": "63728fcf6b24475ecfea044e22242447666c2f52",
-                       "revisionTime": "2018-07-05T13:28:12Z"
-               },
-               {
-                       "checksumSHA1": "LZ9GloiGLTISmQ4dalK2XspH6Wo=",
-                       "path": "github.com/shirou/gopsutil/host",
-                       "revision": "63728fcf6b24475ecfea044e22242447666c2f52",
-                       "revisionTime": "2018-07-05T13:28:12Z"
-               },
-               {
-                       "checksumSHA1": "cyoqI0gryzjxGTkaAfyUqMiuUR0=",
-                       "path": "github.com/shirou/gopsutil/internal/common",
-                       "revision": "63728fcf6b24475ecfea044e22242447666c2f52",
-                       "revisionTime": "2018-07-05T13:28:12Z"
-               },
-               {
-                       "checksumSHA1": "vEQLjAO5T5K9zXblEMYdoaBZzj0=",
-                       "path": "github.com/shirou/gopsutil/mem",
-                       "revision": "63728fcf6b24475ecfea044e22242447666c2f52",
-                       "revisionTime": "2018-07-05T13:28:12Z"
-               },
-               {
-                       "checksumSHA1": "KMWFRa0DVpabo9d8euB4RYjUBQE=",
-                       "path": "github.com/shirou/gopsutil/net",
-                       "revision": "63728fcf6b24475ecfea044e22242447666c2f52",
-                       "revisionTime": "2018-07-05T13:28:12Z"
-               },
-               {
-                       "checksumSHA1": "fbO7c1gv1kSvWKOb/+5HUWFkBaA=",
-                       "path": "github.com/shirou/gopsutil/process",
-                       "revision": "63728fcf6b24475ecfea044e22242447666c2f52",
-                       "revisionTime": "2018-07-05T13:28:12Z"
-               },
-               {
-                       "checksumSHA1": "Nve7SpDmjsv6+rhkXAkfg/UQx94=",
-                       "path": "github.com/shirou/w32",
-                       "revision": "bb4de0191aa41b5507caa14b0650cdbddcd9280b",
-                       "revisionTime": "2016-09-30T03:27:40Z"
-               },
                {
                        "checksumSHA1": "8QeSG127zQqbA+YfkO1WkKx/iUI=",
                        "path": "github.com/src-d/gcfg",