Merge branch '20690-remove-wb1-from-installer'. Closes #20690
[arvados.git] / lib / controller / handler_test.go
index 39c2b1c68e5c82921e10bc9125d54e17846a8fed..0c50a6c4be59b85fc23140d93c4346e1f8e8a9ae 100644 (file)
@@ -16,9 +16,11 @@ import (
        "net/url"
        "os"
        "strings"
+       "sync"
        "testing"
        "time"
 
+       "git.arvados.org/arvados.git/lib/controller/rpc"
        "git.arvados.org/arvados.git/sdk/go/arvados"
        "git.arvados.org/arvados.git/sdk/go/arvadostest"
        "git.arvados.org/arvados.git/sdk/go/auth"
@@ -36,11 +38,12 @@ func Test(t *testing.T) {
 var _ = check.Suite(&HandlerSuite{})
 
 type HandlerSuite struct {
-       cluster *arvados.Cluster
-       handler *Handler
-       logbuf  *bytes.Buffer
-       ctx     context.Context
-       cancel  context.CancelFunc
+       cluster  *arvados.Cluster
+       handler  *Handler
+       railsSpy *arvadostest.Proxy
+       logbuf   *bytes.Buffer
+       ctx      context.Context
+       cancel   context.CancelFunc
 }
 
 func (s *HandlerSuite) SetUpTest(c *check.C) {
@@ -54,6 +57,8 @@ func (s *HandlerSuite) SetUpTest(c *check.C) {
        s.cluster.API.RequestTimeout = arvados.Duration(5 * time.Minute)
        s.cluster.TLS.Insecure = true
        arvadostest.SetServiceURL(&s.cluster.Services.RailsAPI, "https://"+os.Getenv("ARVADOS_TEST_API_HOST"))
+       s.railsSpy = arvadostest.NewProxy(c, s.cluster.Services.RailsAPI)
+       arvadostest.SetServiceURL(&s.cluster.Services.RailsAPI, s.railsSpy.URL.String())
        arvadostest.SetServiceURL(&s.cluster.Services.Controller, "http://localhost:/")
        s.handler = newHandler(s.ctx, s.cluster, "", prometheus.NewRegistry()).(*Handler)
 }
@@ -92,6 +97,204 @@ func (s *HandlerSuite) TestConfigExport(c *check.C) {
        }
 }
 
+func (s *HandlerSuite) TestDiscoveryDocCache(c *check.C) {
+       countRailsReqs := func() int {
+               n := 0
+               for _, req := range s.railsSpy.RequestDumps {
+                       if bytes.Contains(req, []byte("/discovery/v1/apis/arvados/v1/rest")) {
+                               n++
+                       }
+               }
+               return n
+       }
+       getDD := func() int {
+               req := httptest.NewRequest(http.MethodGet, "/discovery/v1/apis/arvados/v1/rest", nil)
+               resp := httptest.NewRecorder()
+               s.handler.ServeHTTP(resp, req)
+               if resp.Code == http.StatusOK {
+                       var dd arvados.DiscoveryDocument
+                       err := json.Unmarshal(resp.Body.Bytes(), &dd)
+                       c.Check(err, check.IsNil)
+                       c.Check(dd.Schemas["Collection"].UUIDPrefix, check.Equals, "4zz18")
+               }
+               return resp.Code
+       }
+       getDDConcurrently := func(n int, expectCode int, checkArgs ...interface{}) *sync.WaitGroup {
+               var wg sync.WaitGroup
+               for i := 0; i < n; i++ {
+                       wg.Add(1)
+                       go func() {
+                               defer wg.Done()
+                               c.Check(getDD(), check.Equals, append([]interface{}{expectCode}, checkArgs...)...)
+                       }()
+               }
+               return &wg
+       }
+       clearCache := func() {
+               for _, ent := range s.handler.cache {
+                       ent.refreshLock.Lock()
+                       ent.mtx.Lock()
+                       ent.body, ent.header, ent.refreshAfter = nil, nil, time.Time{}
+                       ent.mtx.Unlock()
+                       ent.refreshLock.Unlock()
+               }
+       }
+       waitPendingUpdates := func() {
+               for _, ent := range s.handler.cache {
+                       ent.refreshLock.Lock()
+                       defer ent.refreshLock.Unlock()
+                       ent.mtx.Lock()
+                       defer ent.mtx.Unlock()
+               }
+       }
+       refreshNow := func() {
+               waitPendingUpdates()
+               for _, ent := range s.handler.cache {
+                       ent.refreshAfter = time.Now()
+               }
+       }
+       expireNow := func() {
+               waitPendingUpdates()
+               for _, ent := range s.handler.cache {
+                       ent.expireAfter = time.Now()
+               }
+       }
+
+       // Easy path: first req fetches, subsequent reqs use cache.
+       c.Check(countRailsReqs(), check.Equals, 0)
+       c.Check(getDD(), check.Equals, http.StatusOK)
+       c.Check(countRailsReqs(), check.Equals, 1)
+       c.Check(getDD(), check.Equals, http.StatusOK)
+       c.Check(countRailsReqs(), check.Equals, 1)
+       c.Check(getDD(), check.Equals, http.StatusOK)
+       c.Check(countRailsReqs(), check.Equals, 1)
+
+       // To guarantee we have concurrent requests, we set up
+       // railsSpy to hold up the Handler's outgoing requests until
+       // we send to (or close) holdReqs.
+       holdReqs := make(chan struct{})
+       s.railsSpy.Director = func(*http.Request) {
+               <-holdReqs
+       }
+
+       // Race at startup: first req fetches, other concurrent reqs
+       // wait for the initial fetch to complete, then all return.
+       clearCache()
+       reqsBefore := countRailsReqs()
+       wg := getDDConcurrently(5, http.StatusOK, check.Commentf("race at startup"))
+       close(holdReqs)
+       wg.Wait()
+       c.Check(countRailsReqs(), check.Equals, reqsBefore+1)
+
+       // Race after expiry: concurrent reqs return the cached data
+       // but initiate a new fetch in the background.
+       refreshNow()
+       holdReqs = make(chan struct{})
+       wg = getDDConcurrently(5, http.StatusOK, check.Commentf("race after expiry"))
+       reqsBefore = countRailsReqs()
+       close(holdReqs)
+       wg.Wait()
+       for deadline := time.Now().Add(time.Second); time.Now().Before(deadline) && countRailsReqs() < reqsBefore+1; {
+               time.Sleep(time.Second / 100)
+       }
+       c.Check(countRailsReqs(), check.Equals, reqsBefore+1)
+
+       // Configure railsSpy to return an error or bad content
+       // depending on flags.
+       var wantError, wantBadContent bool
+       s.railsSpy.Director = func(req *http.Request) {
+               if wantError {
+                       req.Method = "MAKE-COFFEE"
+               } else if wantBadContent {
+                       req.URL.Path = "/_health/ping"
+                       req.Header.Set("Authorization", "Bearer "+arvadostest.ManagementToken)
+               }
+       }
+
+       // Error at startup (empty cache) => caller gets error, and we
+       // make an upstream attempt for each incoming request because
+       // we have nothing better to return
+       clearCache()
+       wantError, wantBadContent = true, false
+       reqsBefore = countRailsReqs()
+       holdReqs = make(chan struct{})
+       wg = getDDConcurrently(5, http.StatusBadGateway, check.Commentf("error at startup"))
+       close(holdReqs)
+       wg.Wait()
+       c.Check(countRailsReqs(), check.Equals, reqsBefore+5)
+
+       // Response status is OK but body is not a discovery document
+       wantError, wantBadContent = false, true
+       reqsBefore = countRailsReqs()
+       c.Check(getDD(), check.Equals, http.StatusBadGateway)
+       c.Check(countRailsReqs(), check.Equals, reqsBefore+1)
+
+       // Error condition clears => caller gets OK, cache is warmed
+       // up
+       wantError, wantBadContent = false, false
+       reqsBefore = countRailsReqs()
+       getDDConcurrently(5, http.StatusOK, check.Commentf("success after errors at startup")).Wait()
+       c.Check(countRailsReqs(), check.Equals, reqsBefore+1)
+
+       // Error with warm cache => caller gets OK (with no attempt to
+       // re-fetch)
+       wantError, wantBadContent = true, false
+       reqsBefore = countRailsReqs()
+       getDDConcurrently(5, http.StatusOK, check.Commentf("error with warm cache")).Wait()
+       c.Check(countRailsReqs(), check.Equals, reqsBefore)
+
+       // Error with stale cache => caller gets OK with stale data
+       // while the re-fetch is attempted in the background
+       refreshNow()
+       wantError, wantBadContent = true, false
+       reqsBefore = countRailsReqs()
+       holdReqs = make(chan struct{})
+       getDDConcurrently(5, http.StatusOK, check.Commentf("error with stale cache")).Wait()
+       close(holdReqs)
+       // Only one attempt to re-fetch (holdReqs ensured the first
+       // update took long enough for the last incoming request to
+       // arrive)
+       c.Check(countRailsReqs(), check.Equals, reqsBefore+1)
+
+       refreshNow()
+       wantError, wantBadContent = false, false
+       reqsBefore = countRailsReqs()
+       holdReqs = make(chan struct{})
+       getDDConcurrently(5, http.StatusOK, check.Commentf("refresh cache after error condition clears")).Wait()
+       close(holdReqs)
+       waitPendingUpdates()
+       c.Check(countRailsReqs(), check.Equals, reqsBefore+1)
+
+       // Make sure expireAfter is getting set
+       waitPendingUpdates()
+       exp := s.handler.cache["/discovery/v1/apis/arvados/v1/rest"].expireAfter.Sub(time.Now())
+       c.Check(exp > cacheTTL, check.Equals, true)
+       c.Check(exp < cacheExpire, check.Equals, true)
+
+       // After the cache *expires* it behaves as if uninitialized:
+       // each incoming request does a new upstream request until one
+       // succeeds.
+       //
+       // First check failure after expiry:
+       expireNow()
+       wantError, wantBadContent = true, false
+       reqsBefore = countRailsReqs()
+       holdReqs = make(chan struct{})
+       wg = getDDConcurrently(5, http.StatusBadGateway, check.Commentf("error after expiry"))
+       close(holdReqs)
+       wg.Wait()
+       c.Check(countRailsReqs(), check.Equals, reqsBefore+5)
+
+       // Success after expiry:
+       wantError, wantBadContent = false, false
+       reqsBefore = countRailsReqs()
+       holdReqs = make(chan struct{})
+       wg = getDDConcurrently(5, http.StatusOK, check.Commentf("success after expiry"))
+       close(holdReqs)
+       wg.Wait()
+       c.Check(countRailsReqs(), check.Equals, reqsBefore+1)
+}
+
 func (s *HandlerSuite) TestVocabularyExport(c *check.C) {
        voc := `{
                "strict_tags": false,
@@ -209,7 +412,7 @@ func (s *HandlerSuite) TestProxyDiscoveryDoc(c *check.C) {
 // etc.
 func (s *HandlerSuite) TestRequestCancel(c *check.C) {
        ctx, cancel := context.WithCancel(context.Background())
-       req := httptest.NewRequest("GET", "/discovery/v1/apis/arvados/v1/rest", nil).WithContext(ctx)
+       req := httptest.NewRequest("GET", "/static/login_failure", nil).WithContext(ctx)
        resp := httptest.NewRecorder()
        cancel()
        s.handler.ServeHTTP(resp, req)
@@ -271,18 +474,20 @@ func (s *HandlerSuite) TestProxyNotFound(c *check.C) {
 }
 
 func (s *HandlerSuite) TestLogoutGoogle(c *check.C) {
+       s.cluster.Services.Workbench2.ExternalURL = arvados.URL{Scheme: "https", Host: "wb2.example", Path: "/"}
        s.cluster.Login.Google.Enable = true
        s.cluster.Login.Google.ClientID = "test"
-       req := httptest.NewRequest("GET", "https://0.0.0.0:1/logout?return_to=https://example.com/foo", nil)
+       req := httptest.NewRequest("GET", "https://0.0.0.0:1/logout?return_to=https://wb2.example/", nil)
        resp := httptest.NewRecorder()
        s.handler.ServeHTTP(resp, req)
        if !c.Check(resp.Code, check.Equals, http.StatusFound) {
                c.Log(resp.Body.String())
        }
-       c.Check(resp.Header().Get("Location"), check.Equals, "https://example.com/foo")
+       c.Check(resp.Header().Get("Location"), check.Equals, "https://wb2.example/")
 }
 
 func (s *HandlerSuite) TestValidateV1APIToken(c *check.C) {
+       c.Assert(s.handler.CheckHealth(), check.IsNil)
        req := httptest.NewRequest("GET", "/arvados/v1/users/current", nil)
        user, ok, err := s.handler.validateAPItoken(req, arvadostest.ActiveToken)
        c.Assert(err, check.IsNil)
@@ -294,6 +499,7 @@ func (s *HandlerSuite) TestValidateV1APIToken(c *check.C) {
 }
 
 func (s *HandlerSuite) TestValidateV2APIToken(c *check.C) {
+       c.Assert(s.handler.CheckHealth(), check.IsNil)
        req := httptest.NewRequest("GET", "/arvados/v1/users/current", nil)
        user, ok, err := s.handler.validateAPItoken(req, arvadostest.ActiveTokenV2)
        c.Assert(err, check.IsNil)
@@ -336,6 +542,7 @@ func (s *HandlerSuite) TestLogTokenUUID(c *check.C) {
 }
 
 func (s *HandlerSuite) TestCreateAPIToken(c *check.C) {
+       c.Assert(s.handler.CheckHealth(), check.IsNil)
        req := httptest.NewRequest("GET", "/arvados/v1/users/current", nil)
        auth, err := s.handler.createAPItoken(req, arvadostest.ActiveUserUUID, nil)
        c.Assert(err, check.IsNil)
@@ -432,7 +639,7 @@ func (s *HandlerSuite) TestGetObjects(c *check.C) {
        testCases := map[string]map[string]bool{
                "api_clients/" + arvadostest.TrustedWorkbenchAPIClientUUID:     nil,
                "api_client_authorizations/" + auth.UUID:                       {"href": true, "modified_by_client_uuid": true, "modified_by_user_uuid": true},
-               "authorized_keys/" + arvadostest.AdminAuthorizedKeysUUID:       nil,
+               "authorized_keys/" + arvadostest.AdminAuthorizedKeysUUID:       {"href": true},
                "collections/" + arvadostest.CollectionWithUniqueWordsUUID:     {"href": true},
                "containers/" + arvadostest.RunningContainerUUID:               nil,
                "container_requests/" + arvadostest.QueuedContainerRequestUUID: nil,
@@ -476,7 +683,7 @@ func (s *HandlerSuite) TestTrashSweep(c *check.C) {
        coll, err := s.handler.federation.CollectionCreate(ctx, arvados.CreateOptions{Attrs: map[string]interface{}{"name": "test trash sweep"}, EnsureUniqueName: true})
        c.Assert(err, check.IsNil)
        defer s.handler.federation.CollectionDelete(ctx, arvados.DeleteOptions{UUID: coll.UUID})
-       db, err := s.handler.db(s.ctx)
+       db, err := s.handler.dbConnector.GetDB(s.ctx)
        c.Assert(err, check.IsNil)
        _, err = db.ExecContext(s.ctx, `update collections set trash_at = $1, delete_at = $2 where uuid = $3`, time.Now().UTC().Add(time.Second/10), time.Now().UTC().Add(time.Hour), coll.UUID)
        c.Assert(err, check.IsNil)
@@ -494,3 +701,123 @@ func (s *HandlerSuite) TestTrashSweep(c *check.C) {
                time.Sleep(time.Second / 10)
        }
 }
+
+func (s *HandlerSuite) TestContainerLogSweep(c *check.C) {
+       s.cluster.SystemRootToken = arvadostest.SystemRootToken
+       s.cluster.Containers.Logging.SweepInterval = arvados.Duration(time.Second / 10)
+       s.handler.CheckHealth()
+       ctx := auth.NewContext(s.ctx, &auth.Credentials{Tokens: []string{arvadostest.ActiveTokenV2}})
+       logentry, err := s.handler.federation.LogCreate(ctx, arvados.CreateOptions{Attrs: map[string]interface{}{
+               "object_uuid": arvadostest.CompletedContainerUUID,
+               "event_type":  "stderr",
+               "properties": map[string]interface{}{
+                       "text": "test trash sweep\n",
+               },
+       }})
+       c.Assert(err, check.IsNil)
+       defer s.handler.federation.LogDelete(ctx, arvados.DeleteOptions{UUID: logentry.UUID})
+       deadline := time.Now().Add(5 * time.Second)
+       for {
+               if time.Now().After(deadline) {
+                       c.Log("timed out")
+                       c.FailNow()
+               }
+               logentries, err := s.handler.federation.LogList(ctx, arvados.ListOptions{Filters: []arvados.Filter{{"uuid", "=", logentry.UUID}}, Limit: -1})
+               c.Assert(err, check.IsNil)
+               if len(logentries.Items) == 0 {
+                       break
+               }
+               time.Sleep(time.Second / 10)
+       }
+}
+
+func (s *HandlerSuite) TestLogActivity(c *check.C) {
+       s.cluster.SystemRootToken = arvadostest.SystemRootToken
+       s.cluster.Users.ActivityLoggingPeriod = arvados.Duration(24 * time.Hour)
+       s.handler.CheckHealth()
+
+       testServer := newServerFromIntegrationTestEnv(c)
+       testServer.Server.Handler = httpserver.AddRequestIDs(httpserver.LogRequests(s.handler))
+       c.Assert(testServer.Start(), check.IsNil)
+       defer testServer.Close()
+
+       u, _ := url.Parse("http://" + testServer.Addr)
+       client := rpc.NewConn(s.cluster.ClusterID, u, true, rpc.PassthroughTokenProvider)
+
+       starttime := time.Now()
+       for i := 0; i < 4; i++ {
+               for _, token := range []string{
+                       arvadostest.ActiveTokenV2,
+                       arvadostest.ActiveToken,
+                       arvadostest.SpectatorToken,
+               } {
+                       ctx := auth.NewContext(s.ctx, &auth.Credentials{Tokens: []string{token}})
+                       _, err := client.CollectionList(ctx, arvados.ListOptions{})
+                       c.Assert(err, check.IsNil)
+               }
+       }
+       db, err := s.handler.dbConnector.GetDB(s.ctx)
+       c.Assert(err, check.IsNil)
+       for _, userUUID := range []string{arvadostest.ActiveUserUUID, arvadostest.SpectatorUserUUID} {
+               var rows int
+               err = db.QueryRowContext(s.ctx, `select count(uuid) from logs where object_uuid = $1 and event_at > $2`, arvadostest.ActiveUserUUID, starttime.UTC()).Scan(&rows)
+               c.Assert(err, check.IsNil)
+               c.Check(rows, check.Equals, 1, check.Commentf("expect 1 row for user uuid %s", userUUID))
+       }
+}
+
+func (s *HandlerSuite) TestLogLimiting(c *check.C) {
+       s.handler.Cluster.API.MaxConcurrentRequests = 2
+       s.handler.Cluster.API.LogCreateRequestFraction = 0.5
+
+       logreq := httptest.NewRequest("POST", "/arvados/v1/logs", strings.NewReader(`{
+                       "log": {
+                          "event_type": "test"
+                       }
+               }`))
+       logreq.Header.Set("Authorization", "Bearer "+arvadostest.ActiveToken)
+
+       // Log create succeeds
+       for i := 0; i < 2; i++ {
+               resp := httptest.NewRecorder()
+               s.handler.ServeHTTP(resp, logreq)
+               c.Check(resp.Code, check.Equals, http.StatusOK)
+               var lg arvados.Log
+               err := json.Unmarshal(resp.Body.Bytes(), &lg)
+               c.Check(err, check.IsNil)
+               c.Check(lg.UUID, check.Matches, "zzzzz-57u5n-.*")
+       }
+
+       // Pretend there's a log create in flight
+       s.handler.limitLogCreate <- struct{}{}
+
+       // Log create should be rejected now
+       resp := httptest.NewRecorder()
+       s.handler.ServeHTTP(resp, logreq)
+       c.Check(resp.Code, check.Equals, http.StatusServiceUnavailable)
+
+       // Other requests still succeed
+       req := httptest.NewRequest("GET", "/arvados/v1/users/current", nil)
+       req.Header.Set("Authorization", "Bearer "+arvadostest.ActiveToken)
+       resp = httptest.NewRecorder()
+       s.handler.ServeHTTP(resp, req)
+       c.Check(resp.Code, check.Equals, http.StatusOK)
+       var u arvados.User
+       err := json.Unmarshal(resp.Body.Bytes(), &u)
+       c.Check(err, check.IsNil)
+       c.Check(u.UUID, check.Equals, arvadostest.ActiveUserUUID)
+
+       // log create still fails
+       resp = httptest.NewRecorder()
+       s.handler.ServeHTTP(resp, logreq)
+       c.Check(resp.Code, check.Equals, http.StatusServiceUnavailable)
+
+       // Pretend in-flight log is done
+       <-s.handler.limitLogCreate
+
+       // log create succeeds again
+       resp = httptest.NewRecorder()
+       s.handler.ServeHTTP(resp, logreq)
+       c.Check(resp.Code, check.Equals, http.StatusOK)
+
+}