21123: Check container status in integration tests.
[arvados.git] / lib / controller / integration_test.go
index 44be17c77ffb1d99e4fbeafcbe2a01dd30a82b60..53e6a90b8f2fee1d18237c157ccef0474b703227 100644 (file)
@@ -11,6 +11,7 @@ import (
        "encoding/json"
        "fmt"
        "io"
+       "io/fs"
        "io/ioutil"
        "math"
        "net"
@@ -71,6 +72,20 @@ func (s *IntegrationSuite) SetUpSuite(c *check.C) {
       Insecure: true
     SystemLogs:
       Format: text
+    API:
+      MaxConcurrentRequests: 128
+    Containers:
+      CloudVMs:
+        Enable: true
+        Driver: loopback
+        BootProbeCommand: "rm -f /var/lock/crunch-run-broken"
+        ProbeInterval: 1s
+        PollInterval: 5s
+        SyncInterval: 10s
+        TimeoutIdle: 1s
+        TimeoutBooting: 2s
+      RuntimeEngine: singularity
+      CrunchRunArgumentsList: ["--broken-node-hook", "true"]
     RemoteClusters:
       z1111:
         Host: ` + hostport["z1111"] + `
@@ -486,6 +501,7 @@ func (s *IntegrationSuite) TestCreateContainerRequestWithFedToken(c *check.C) {
        req.Header.Set("Authorization", "OAuth2 "+ac2.AuthToken)
        resp, err = arvados.InsecureHTTPClient.Do(req)
        c.Assert(err, check.IsNil)
+       defer resp.Body.Close()
        err = json.NewDecoder(resp.Body).Decode(&cr)
        c.Check(err, check.IsNil)
        c.Check(cr.UUID, check.Matches, "z2222-.*")
@@ -523,8 +539,10 @@ func (s *IntegrationSuite) TestCreateContainerRequestWithBadToken(c *check.C) {
                c.Assert(err, check.IsNil)
                req.Header.Set("Content-Type", "application/json")
                resp, err := ac1.Do(req)
-               c.Assert(err, check.IsNil)
-               c.Assert(resp.StatusCode, check.Equals, tt.expectedCode)
+               if c.Check(err, check.IsNil) {
+                       c.Assert(resp.StatusCode, check.Equals, tt.expectedCode)
+                       resp.Body.Close()
+               }
        }
 }
 
@@ -592,9 +610,11 @@ func (s *IntegrationSuite) TestRequestIDHeader(c *check.C) {
                        var jresp httpserver.ErrorResponse
                        err := json.NewDecoder(resp.Body).Decode(&jresp)
                        c.Check(err, check.IsNil)
-                       c.Assert(jresp.Errors, check.HasLen, 1)
-                       c.Check(jresp.Errors[0], check.Matches, `.*\(`+respHdr+`\).*`)
+                       if c.Check(jresp.Errors, check.HasLen, 1) {
+                               c.Check(jresp.Errors[0], check.Matches, `.*\(`+respHdr+`\).*`)
+                       }
                }
+               resp.Body.Close()
        }
 }
 
@@ -951,8 +971,8 @@ func (s *IntegrationSuite) TestSetupUserWithVM(c *check.C) {
                        "hostname": "example",
                },
                })
+       c.Assert(err, check.IsNil)
        c.Check(outVM.UUID[0:5], check.Equals, "z3333")
-       c.Check(err, check.IsNil)
 
        // Make sure z3333 user list is up to date
        _, err = conn3.UserList(rootctx3, arvados.ListOptions{Limit: 1000})
@@ -1111,3 +1131,171 @@ func (s *IntegrationSuite) TestForwardRuntimeTokenToLoginCluster(c *check.C) {
        c.Check(err, check.ErrorMatches, `request failed: .* 401 Unauthorized: cannot use a locally issued token to forward a request to our login cluster \(z1111\)`)
        c.Check(err, check.Not(check.ErrorMatches), `(?ms).*127\.0\.0\.11.*`)
 }
+
+func (s *IntegrationSuite) TestRunTrivialContainer(c *check.C) {
+       outcoll, _ := s.runContainer(c, "z1111", "", map[string]interface{}{
+               "command":             []string{"sh", "-c", "touch \"/out/hello world\" /out/ohai"},
+               "container_image":     "busybox:uclibc",
+               "cwd":                 "/tmp",
+               "environment":         map[string]string{},
+               "mounts":              map[string]arvados.Mount{"/out": {Kind: "tmp", Capacity: 10000}},
+               "output_path":         "/out",
+               "runtime_constraints": arvados.RuntimeConstraints{RAM: 100000000, VCPUs: 1, KeepCacheRAM: 1 << 26},
+               "priority":            1,
+               "state":               arvados.ContainerRequestStateCommitted,
+       }, 0)
+       c.Check(outcoll.ManifestText, check.Matches, `\. d41d8.* 0:0:hello\\040world 0:0:ohai\n`)
+       c.Check(outcoll.PortableDataHash, check.Equals, "8fa5dee9231a724d7cf377c5a2f4907c+65")
+}
+
+func (s *IntegrationSuite) TestContainerInputOnDifferentCluster(c *check.C) {
+       conn := s.super.Conn("z1111")
+       rootctx, _, _ := s.super.RootClients("z1111")
+       userctx, ac, _, _ := s.super.UserClients("z1111", rootctx, c, conn, s.oidcprovider.AuthEmail, true)
+       z1coll, err := conn.CollectionCreate(userctx, arvados.CreateOptions{Attrs: map[string]interface{}{
+               "manifest_text": ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:ocelot\n",
+       }})
+       c.Assert(err, check.IsNil)
+
+       outcoll, logcfs := s.runContainer(c, "z2222", ac.AuthToken, map[string]interface{}{
+               "command":         []string{"ls", "/in"},
+               "container_image": "busybox:uclibc",
+               "cwd":             "/tmp",
+               "environment":     map[string]string{},
+               "mounts": map[string]arvados.Mount{
+                       "/in":  {Kind: "collection", PortableDataHash: z1coll.PortableDataHash},
+                       "/out": {Kind: "tmp", Capacity: 10000},
+               },
+               "output_path":         "/out",
+               "runtime_constraints": arvados.RuntimeConstraints{RAM: 100000000, VCPUs: 1, KeepCacheRAM: 1 << 26},
+               "priority":            1,
+               "state":               arvados.ContainerRequestStateCommitted,
+               "container_count_max": 1,
+       }, -1)
+       if outcoll.UUID == "" {
+               arvmountlog, err := fs.ReadFile(arvados.FS(logcfs), "/arv-mount.txt")
+               c.Check(err, check.IsNil)
+               c.Check(string(arvmountlog), check.Matches, `(?ms).*cannot use a locally issued token to forward a request to our login cluster \(z1111\).*`)
+               c.Skip("this use case is not supported yet")
+       }
+       stdout, err := fs.ReadFile(arvados.FS(logcfs), "/stdout.txt")
+       c.Check(err, check.IsNil)
+       c.Check(string(stdout), check.Equals, "ocelot\n")
+}
+
+func (s *IntegrationSuite) runContainer(c *check.C, clusterID string, token string, ctrSpec map[string]interface{}, expectExitCode int) (outcoll arvados.Collection, logcfs arvados.CollectionFileSystem) {
+       conn := s.super.Conn(clusterID)
+       rootctx, _, _ := s.super.RootClients(clusterID)
+       if token == "" {
+               _, ac, _, _ := s.super.UserClients(clusterID, rootctx, c, conn, s.oidcprovider.AuthEmail, true)
+               token = ac.AuthToken
+       }
+       _, ac, kc := s.super.ClientsWithToken(clusterID, token)
+
+       c.Log("[docker load]")
+       out, err := exec.Command("docker", "load", "--input", arvadostest.BusyboxDockerImage(c)).CombinedOutput()
+       c.Logf("[docker load output] %s", out)
+       c.Check(err, check.IsNil)
+
+       c.Log("[arv-keepdocker]")
+       akd := exec.Command("arv-keepdocker", "--no-resume", "busybox:uclibc")
+       akd.Env = append(os.Environ(), "ARVADOS_API_HOST="+ac.APIHost, "ARVADOS_API_HOST_INSECURE=1", "ARVADOS_API_TOKEN="+ac.AuthToken)
+       out, err = akd.CombinedOutput()
+       c.Logf("[arv-keepdocker output]\n%s", out)
+       c.Check(err, check.IsNil)
+
+       var cr arvados.ContainerRequest
+       err = ac.RequestAndDecode(&cr, "POST", "/arvados/v1/container_requests", nil, map[string]interface{}{
+               "container_request": ctrSpec,
+       })
+       c.Assert(err, check.IsNil)
+
+       showlogs := func(collectionID string) arvados.CollectionFileSystem {
+               var logcoll arvados.Collection
+               err = ac.RequestAndDecode(&logcoll, "GET", "/arvados/v1/collections/"+collectionID, nil, nil)
+               c.Assert(err, check.IsNil)
+               cfs, err := logcoll.FileSystem(ac, kc)
+               c.Assert(err, check.IsNil)
+               fs.WalkDir(arvados.FS(cfs), "/", func(path string, d fs.DirEntry, err error) error {
+                       if d.IsDir() || strings.HasPrefix(path, "/log for container") {
+                               return nil
+                       }
+                       f, err := cfs.Open(path)
+                       c.Assert(err, check.IsNil)
+                       defer f.Close()
+                       buf, err := ioutil.ReadAll(f)
+                       c.Assert(err, check.IsNil)
+                       c.Logf("=== %s\n%s\n", path, buf)
+                       return nil
+               })
+               return cfs
+       }
+
+       checkwebdavlogs := func(cr arvados.ContainerRequest) {
+               req, err := http.NewRequest("OPTIONS", "https://"+ac.APIHost+"/arvados/v1/container_requests/"+cr.UUID+"/log/"+cr.ContainerUUID+"/", nil)
+               c.Assert(err, check.IsNil)
+               req.Header.Set("Origin", "http://example.example")
+               resp, err := ac.Do(req)
+               c.Assert(err, check.IsNil)
+               c.Check(resp.StatusCode, check.Equals, http.StatusOK)
+               // Check for duplicate headers -- must use Header[], not Header.Get()
+               c.Check(resp.Header["Access-Control-Allow-Origin"], check.DeepEquals, []string{"*"})
+       }
+
+       var ctr arvados.Container
+       var lastState arvados.ContainerState
+       var status, lastStatus arvados.ContainerStatus
+       var allStatus string
+       checkstatus := func() {
+               err := ac.RequestAndDecode(&status, "GET", "/arvados/v1/container_requests/"+cr.UUID+"/container_status", nil, nil)
+               c.Assert(err, check.IsNil)
+               if status != lastStatus {
+                       c.Logf("container status: %s, %s", status.State, status.SchedulingStatus)
+                       allStatus += fmt.Sprintf("%s, %s\n", status.State, status.SchedulingStatus)
+                       lastStatus = status
+               }
+       }
+       deadline := time.Now().Add(time.Minute)
+       for cr.State != arvados.ContainerRequestStateFinal || (lastStatus.State != arvados.ContainerStateComplete && lastStatus.State != arvados.ContainerStateCancelled) {
+               err = ac.RequestAndDecode(&cr, "GET", "/arvados/v1/container_requests/"+cr.UUID, nil, nil)
+               c.Assert(err, check.IsNil)
+               checkstatus()
+               err = ac.RequestAndDecode(&ctr, "GET", "/arvados/v1/containers/"+cr.ContainerUUID, nil, nil)
+               if err != nil {
+                       c.Logf("error getting container state: %s", err)
+               } else if ctr.State != lastState {
+                       c.Logf("container state changed to %q", ctr.State)
+                       lastState = ctr.State
+               } else {
+                       if time.Now().After(deadline) {
+                               c.Errorf("timed out, container state is %q", cr.State)
+                               if ctr.Log == "" {
+                                       c.Logf("=== NO LOG COLLECTION saved for container")
+                               } else {
+                                       showlogs(ctr.Log)
+                               }
+                               c.FailNow()
+                       }
+                       time.Sleep(time.Second / 2)
+               }
+       }
+       checkstatus()
+       c.Logf("cr.CumulativeCost == %f", cr.CumulativeCost)
+       c.Check(cr.CumulativeCost, check.Not(check.Equals), 0.0)
+       if expectExitCode >= 0 {
+               c.Check(ctr.State, check.Equals, arvados.ContainerStateComplete)
+               c.Check(ctr.ExitCode, check.Equals, expectExitCode)
+               err = ac.RequestAndDecode(&outcoll, "GET", "/arvados/v1/collections/"+cr.OutputUUID, nil, nil)
+               c.Assert(err, check.IsNil)
+               c.Check(allStatus, check.Matches, `Queued, waiting for dispatch\n`+
+                       `(Queued, waiting.*\n)*`+
+                       `(Locked, waiting for dispatch\n)?`+
+                       `(Locked, waiting for new instance to be ready\n)?`+
+                       `(Locked, preparing runtime environment\n)?`+
+                       `(Running, \n)?`+
+                       `Complete, \n`)
+       }
+       logcfs = showlogs(cr.LogUUID)
+       checkwebdavlogs(cr)
+       return outcoll, logcfs
+}