Merge branch '21666-provision-test-improvement'
[arvados.git] / services / keep-web / handler.go
index 3cdaf5d2b51c5e2d663fd8e53dc29739ffe76551..b9250efec76b8b45f599c30dc8961cbc3e279474 100644 (file)
@@ -18,6 +18,7 @@ import (
        "strconv"
        "strings"
        "sync"
+       "time"
 
        "git.arvados.org/arvados.git/lib/cmd"
        "git.arvados.org/arvados.git/lib/webdavfs"
@@ -26,15 +27,18 @@ import (
        "git.arvados.org/arvados.git/sdk/go/auth"
        "git.arvados.org/arvados.git/sdk/go/ctxlog"
        "git.arvados.org/arvados.git/sdk/go/httpserver"
-       "git.arvados.org/arvados.git/sdk/go/keepclient"
        "github.com/sirupsen/logrus"
        "golang.org/x/net/webdav"
 )
 
 type handler struct {
-       Cache     cache
-       Cluster   *arvados.Cluster
-       setupOnce sync.Once
+       Cache   cache
+       Cluster *arvados.Cluster
+       metrics *metrics
+
+       lockMtx    sync.Mutex
+       lock       map[string]*sync.RWMutex
+       lockTidied time.Time
 }
 
 var urlPDHDecoder = strings.NewReplacer(" ", "+", "-", "+")
@@ -55,10 +59,6 @@ func parseCollectionIDFromURL(s string) string {
        return ""
 }
 
-func (h *handler) setup() {
-       keepclient.DefaultBlockCache.MaxBlocks = h.Cluster.Collections.WebDAVCache.MaxBlockEntries
-}
-
 func (h *handler) serveStatus(w http.ResponseWriter, r *http.Request) {
        json.NewEncoder(w).Encode(struct{ Version string }{cmd.Version.String()})
 }
@@ -174,13 +174,16 @@ func (h *handler) Done() <-chan struct{} {
 
 // ServeHTTP implements http.Handler.
 func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
-       h.setupOnce.Do(h.setup)
-
        if xfp := r.Header.Get("X-Forwarded-Proto"); xfp != "" && xfp != "http" {
                r.URL.Scheme = xfp
        }
 
-       w := httpserver.WrapResponseWriter(wOrig)
+       wbuffer := newWriteBuffer(wOrig, int(h.Cluster.Collections.WebDAVOutputBuffer))
+       defer wbuffer.Close()
+       w := httpserver.WrapResponseWriter(responseWriter{
+               Writer:         wbuffer,
+               ResponseWriter: wOrig,
+       })
 
        if r.Method == "OPTIONS" && ServeCORSPreflight(w, r.Header) {
                return
@@ -288,12 +291,18 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
                reqTokens = auth.CredentialsFromRequest(r).Tokens
        }
 
-       formToken := r.FormValue("api_token")
+       r.ParseForm()
        origin := r.Header.Get("Origin")
        cors := origin != "" && !strings.HasSuffix(origin, "://"+r.Host)
        safeAjax := cors && (r.Method == http.MethodGet || r.Method == http.MethodHead)
-       safeAttachment := attachment && r.URL.Query().Get("api_token") == ""
-       if formToken == "" {
+       // Important distinction: safeAttachment checks whether api_token exists
+       // as a query parameter. haveFormTokens checks whether api_token exists
+       // as request form data *or* a query parameter. Different checks are
+       // necessary because both the request disposition and the location of
+       // the API token affect whether or not the request needs to be
+       // redirected. The different branch comments below explain further.
+       safeAttachment := attachment && !r.URL.Query().Has("api_token")
+       if formTokens, haveFormTokens := r.Form["api_token"]; !haveFormTokens {
                // No token to use or redact.
        } else if safeAjax || safeAttachment {
                // If this is a cross-origin request, the URL won't
@@ -308,7 +317,9 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
                // form?" problem, so provided the token isn't
                // embedded in the URL, there's no reason to do
                // redirect-with-cookie in this case either.
-               reqTokens = append(reqTokens, formToken)
+               for _, tok := range formTokens {
+                       reqTokens = append(reqTokens, tok)
+               }
        } else if browserMethod[r.Method] {
                // If this is a page view, and the client provided a
                // token via query string or POST body, we must put
@@ -406,13 +417,16 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
                        // collection id is outside scope of supplied
                        // token
                        tokenScopeProblem = true
+                       sess.Release()
                        continue
                } else if os.IsNotExist(err) {
                        // collection does not exist or is not
                        // readable using this token
+                       sess.Release()
                        continue
                } else if err != nil {
                        http.Error(w, err.Error(), http.StatusInternalServerError)
+                       sess.Release()
                        return
                }
                defer f.Close()
@@ -420,6 +434,22 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
                collectionDir, sessionFS, session, tokenUser = f, fs, sess, user
                break
        }
+
+       // releaseSession() is equivalent to session.Release() except
+       // that it's a no-op if (1) session is nil, or (2) it has
+       // already been called.
+       //
+       // This way, we can do a defer call here to ensure it gets
+       // called in all code paths, and also call it inline (see
+       // below) in the cases where we want to release the lock
+       // before returning.
+       releaseSession := func() {}
+       if session != nil {
+               var releaseSessionOnce sync.Once
+               releaseSession = func() { releaseSessionOnce.Do(func() { session.Release() }) }
+       }
+       defer releaseSession()
+
        if forceReload && collectionDir != nil {
                err := collectionDir.Sync()
                if err != nil {
@@ -507,6 +537,7 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
        if r.Method == http.MethodGet || r.Method == http.MethodHead {
                targetfnm := fsprefix + strings.Join(pathParts[stripParts:], "/")
                if fi, err := sessionFS.Stat(targetfnm); err == nil && fi.IsDir() {
+                       releaseSession() // because we won't be writing anything
                        if !strings.HasSuffix(r.URL.Path, "/") {
                                h.seeOtherWithCookie(w, r, r.URL.Path+"/", credentialsOK)
                        } else {
@@ -530,7 +561,11 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
        }
        h.logUploadOrDownload(r, session.arvadosclient, sessionFS, fsprefix+strings.Join(targetPath, "/"), nil, tokenUser)
 
-       if writeMethod[r.Method] {
+       writing := writeMethod[r.Method]
+       locker := h.collectionLock(collectionID, writing)
+       defer locker.Unlock()
+
+       if writing {
                // Save the collection only if/when all
                // webdav->filesystem operations succeed --
                // and send a 500 error if the modified
@@ -572,6 +607,15 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
                                collectionDir.Splice(snap)
                                return nil
                        }}
+       } else {
+               // When writing, we need to block session renewal
+               // until we're finished, in order to guarantee the
+               // effect of the write is visible in future responses.
+               // But if we're not writing, we can release the lock
+               // early.  This enables us to keep renewing sessions
+               // and processing more requests even if a slow client
+               // takes a long time to download a large file.
+               releaseSession()
        }
        if r.Method == http.MethodGet {
                applyContentDispositionHdr(w, r, basename, attachment)
@@ -579,7 +623,7 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
        if webdavPrefix == "" {
                webdavPrefix = "/" + strings.Join(pathParts[:stripParts], "/")
        }
-       wh := webdav.Handler{
+       wh := &webdav.Handler{
                Prefix: webdavPrefix,
                FileSystem: &webdavfs.FS{
                        FileSystem:    sessionFS,
@@ -594,7 +638,7 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
                        }
                },
        }
-       wh.ServeHTTP(w, r)
+       h.metrics.track(wh, w, r)
        if r.Method == http.MethodGet && w.WroteStatus() == http.StatusOK {
                wrote := int64(w.WroteBodyBytes())
                fnm := strings.Join(pathParts[stripParts:], "/")
@@ -763,7 +807,7 @@ func applyContentDispositionHdr(w http.ResponseWriter, r *http.Request, filename
 }
 
 func (h *handler) seeOtherWithCookie(w http.ResponseWriter, r *http.Request, location string, credentialsOK bool) {
-       if formToken := r.FormValue("api_token"); formToken != "" {
+       if formTokens, haveFormTokens := r.Form["api_token"]; haveFormTokens {
                if !credentialsOK {
                        // It is not safe to copy the provided token
                        // into a cookie unless the current vhost
@@ -784,13 +828,19 @@ func (h *handler) seeOtherWithCookie(w http.ResponseWriter, r *http.Request, loc
                // bar, and in the case of a POST request to avoid
                // raising warnings when the user refreshes the
                // resulting page.
-               http.SetCookie(w, &http.Cookie{
-                       Name:     "arvados_api_token",
-                       Value:    auth.EncodeTokenCookie([]byte(formToken)),
-                       Path:     "/",
-                       HttpOnly: true,
-                       SameSite: http.SameSiteLaxMode,
-               })
+               for _, tok := range formTokens {
+                       if tok == "" {
+                               continue
+                       }
+                       http.SetCookie(w, &http.Cookie{
+                               Name:     "arvados_api_token",
+                               Value:    auth.EncodeTokenCookie([]byte(tok)),
+                               Path:     "/",
+                               HttpOnly: true,
+                               SameSite: http.SameSiteLaxMode,
+                       })
+                       break
+               }
        }
 
        // Propagate query parameters (except api_token) from
@@ -942,6 +992,41 @@ func (h *handler) determineCollection(fs arvados.CustomFileSystem, path string)
        return nil, ""
 }
 
+var lockTidyInterval = time.Minute * 10
+
+// Lock the specified collection for reading or writing. Caller must
+// call Unlock() on the returned Locker when the operation is
+// finished.
+func (h *handler) collectionLock(collectionID string, writing bool) sync.Locker {
+       h.lockMtx.Lock()
+       defer h.lockMtx.Unlock()
+       if time.Since(h.lockTidied) > lockTidyInterval {
+               // Periodically delete all locks that aren't in use.
+               h.lockTidied = time.Now()
+               for id, locker := range h.lock {
+                       if locker.TryLock() {
+                               locker.Unlock()
+                               delete(h.lock, id)
+                       }
+               }
+       }
+       locker := h.lock[collectionID]
+       if locker == nil {
+               locker = new(sync.RWMutex)
+               if h.lock == nil {
+                       h.lock = map[string]*sync.RWMutex{}
+               }
+               h.lock[collectionID] = locker
+       }
+       if writing {
+               locker.Lock()
+               return locker
+       } else {
+               locker.RLock()
+               return locker.RLocker()
+       }
+}
+
 func ServeCORSPreflight(w http.ResponseWriter, header http.Header) bool {
        method := header.Get("Access-Control-Request-Method")
        if method == "" {