X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/a8be45a3ff557e879c8ad2d749b3cfbd6d9510fe..57893063e6a832fc2738037fe906a213a8743aa4:/services/keep-web/handler.go diff --git a/services/keep-web/handler.go b/services/keep-web/handler.go index 0e964e4632..d318b5cdd4 100644 --- a/services/keep-web/handler.go +++ b/services/keep-web/handler.go @@ -18,29 +18,33 @@ import ( "strconv" "strings" "sync" + "time" "git.arvados.org/arvados.git/lib/cmd" + "git.arvados.org/arvados.git/lib/webdavfs" "git.arvados.org/arvados.git/sdk/go/arvados" "git.arvados.org/arvados.git/sdk/go/arvadosclient" "git.arvados.org/arvados.git/sdk/go/auth" "git.arvados.org/arvados.git/sdk/go/ctxlog" "git.arvados.org/arvados.git/sdk/go/httpserver" - "git.arvados.org/arvados.git/sdk/go/keepclient" "github.com/sirupsen/logrus" "golang.org/x/net/webdav" ) type handler struct { - Cache cache - Cluster *arvados.Cluster - setupOnce sync.Once - webdavLS webdav.LockSystem + Cache cache + Cluster *arvados.Cluster + metrics *metrics + + lockMtx sync.Mutex + lock map[string]*sync.RWMutex + lockTidied time.Time } var urlPDHDecoder = strings.NewReplacer(" ", "+", "-", "+") var notFoundMessage = "Not Found" -var unauthorizedMessage = "401 Unauthorized\r\n\r\nA valid Arvados token must be provided to access this resource.\r\n" +var unauthorizedMessage = "401 Unauthorized\n\nA valid Arvados token must be provided to access this resource." // parseCollectionIDFromURL returns a UUID or PDH if s is a UUID or a // PDH (even if it is a PDH with "+" replaced by " " or "-"); @@ -55,18 +59,14 @@ func parseCollectionIDFromURL(s string) string { return "" } -func (h *handler) setup() { - keepclient.DefaultBlockCache.MaxBlocks = h.Cluster.Collections.WebDAVCache.MaxBlockEntries - - // Even though we don't accept LOCK requests, every webdav - // handler must have a non-nil LockSystem. - h.webdavLS = &noLockSystem{} -} - func (h *handler) serveStatus(w http.ResponseWriter, r *http.Request) { json.NewEncoder(w).Encode(struct{ Version string }{cmd.Version.String()}) } +type errorWithHTTPStatus interface { + HTTPStatus() int +} + // updateOnSuccess wraps httpserver.ResponseWriter. If the handler // sends an HTTP header indicating success, updateOnSuccess first // calls the provided update func. If the update func fails, an error @@ -97,8 +97,7 @@ func (uos *updateOnSuccess) WriteHeader(code int) { if code >= 200 && code < 400 { if uos.err = uos.update(); uos.err != nil { code := http.StatusInternalServerError - var he interface{ HTTPStatus() int } - if errors.As(uos.err, &he) { + if he := errorWithHTTPStatus(nil); errors.As(uos.err, &he) { code = he.HTTPStatus() } uos.logger.WithError(uos.err).Errorf("update() returned %T error, changing response to HTTP %d", uos.err, code) @@ -114,7 +113,7 @@ var ( corsAllowHeadersHeader = strings.Join([]string{ "Authorization", "Content-Type", "Range", // WebDAV request headers: - "Depth", "Destination", "If", "Lock-Token", "Overwrite", "Timeout", + "Depth", "Destination", "If", "Lock-Token", "Overwrite", "Timeout", "Cache-Control", }, ", ") writeMethod = map[string]bool{ "COPY": true, @@ -175,23 +174,18 @@ func (h *handler) Done() <-chan struct{} { // ServeHTTP implements http.Handler. func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) { - h.setupOnce.Do(h.setup) - if xfp := r.Header.Get("X-Forwarded-Proto"); xfp != "" && xfp != "http" { r.URL.Scheme = xfp } - w := httpserver.WrapResponseWriter(wOrig) + wbuffer := newWriteBuffer(wOrig, int(h.Cluster.Collections.WebDAVOutputBuffer)) + defer wbuffer.Close() + w := httpserver.WrapResponseWriter(responseWriter{ + Writer: wbuffer, + ResponseWriter: wOrig, + }) - if method := r.Header.Get("Access-Control-Request-Method"); method != "" && r.Method == "OPTIONS" { - if !browserMethod[method] && !webdavMethod[method] { - w.WriteHeader(http.StatusMethodNotAllowed) - return - } - w.Header().Set("Access-Control-Allow-Headers", corsAllowHeadersHeader) - w.Header().Set("Access-Control-Allow-Methods", "COPY, DELETE, GET, LOCK, MKCOL, MOVE, OPTIONS, POST, PROPFIND, PROPPATCH, PUT, RMCOL, UNLOCK") - w.Header().Set("Access-Control-Allow-Origin", "*") - w.Header().Set("Access-Control-Max-Age", "86400") + if r.Method == "OPTIONS" && ServeCORSPreflight(w, r.Header) { return } @@ -214,7 +208,26 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) { return } - pathParts := strings.Split(r.URL.Path[1:], "/") + webdavPrefix := "" + arvPath := r.URL.Path + if prefix := r.Header.Get("X-Webdav-Prefix"); prefix != "" { + // Enable a proxy (e.g., container log handler in + // controller) to satisfy a request for path + // "/foo/bar/baz.txt" using content from + // "//abc123-4.internal/bar/baz.txt", by adding a + // request header "X-Webdav-Prefix: /foo" + if !strings.HasPrefix(arvPath, prefix) { + http.Error(w, "X-Webdav-Prefix header is not a prefix of the requested path", http.StatusBadRequest) + return + } + arvPath = r.URL.Path[len(prefix):] + if arvPath == "" { + arvPath = "/" + } + w.Header().Set("Vary", "X-Webdav-Prefix, "+w.Header().Get("Vary")) + webdavPrefix = prefix + } + pathParts := strings.Split(arvPath[1:], "/") var stripParts int var collectionID string @@ -278,12 +291,18 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) { reqTokens = auth.CredentialsFromRequest(r).Tokens } - formToken := r.FormValue("api_token") + r.ParseForm() origin := r.Header.Get("Origin") cors := origin != "" && !strings.HasSuffix(origin, "://"+r.Host) safeAjax := cors && (r.Method == http.MethodGet || r.Method == http.MethodHead) - safeAttachment := attachment && r.URL.Query().Get("api_token") == "" - if formToken == "" { + // Important distinction: safeAttachment checks whether api_token exists + // as a query parameter. haveFormTokens checks whether api_token exists + // as request form data *or* a query parameter. Different checks are + // necessary because both the request disposition and the location of + // the API token affect whether or not the request needs to be + // redirected. The different branch comments below explain further. + safeAttachment := attachment && !r.URL.Query().Has("api_token") + if formTokens, haveFormTokens := r.Form["api_token"]; !haveFormTokens { // No token to use or redact. } else if safeAjax || safeAttachment { // If this is a cross-origin request, the URL won't @@ -298,7 +317,9 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) { // form?" problem, so provided the token isn't // embedded in the URL, there's no reason to do // redirect-with-cookie in this case either. - reqTokens = append(reqTokens, formToken) + for _, tok := range formTokens { + reqTokens = append(reqTokens, tok) + } } else if browserMethod[r.Method] { // If this is a page view, and the client provided a // token via query string or POST body, we must put @@ -326,7 +347,7 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) { fsprefix := "" if useSiteFS { if writeMethod[r.Method] { - http.Error(w, errReadOnly.Error(), http.StatusMethodNotAllowed) + http.Error(w, webdavfs.ErrReadOnly.Error(), http.StatusMethodNotAllowed) return } if len(reqTokens) == 0 { @@ -342,6 +363,10 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) { fsprefix = "by_id/" + collectionID + "/" } + if src := r.Header.Get("X-Webdav-Source"); strings.HasPrefix(src, "/") && !strings.Contains(src, "//") && !strings.Contains(src, "/../") { + fsprefix += src[1:] + } + if tokens == nil { tokens = reqTokens if h.Cluster.Users.AnonymousUserToken != "" { @@ -349,15 +374,6 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) { } } - if tokens == nil { - if !credentialsOK { - http.Error(w, fmt.Sprintf("Authorization tokens are not accepted here: %v, and no anonymous user token is configured.", reasonNotAcceptingCredentials), http.StatusUnauthorized) - } else { - http.Error(w, fmt.Sprintf("No authorization token in request, and no anonymous user token is configured."), http.StatusUnauthorized) - } - return - } - if len(targetPath) > 0 && targetPath[0] == "_" { // If a collection has a directory called "t=foo" or // "_", it can be served at @@ -374,14 +390,15 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) { dirOpenMode = os.O_RDWR } - validToken := make(map[string]bool) + var tokenValid bool + var tokenScopeProblem bool var token string var tokenUser *arvados.User var sessionFS arvados.CustomFileSystem var session *cachedSession var collectionDir arvados.File for _, token = range tokens { - var statusErr interface{ HTTPStatus() int } + var statusErr errorWithHTTPStatus fs, sess, user, err := h.Cache.GetSession(token) if errors.As(err, &statusErr) && statusErr.HTTPStatus() == http.StatusUnauthorized { // bad token @@ -390,19 +407,26 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) { http.Error(w, "cache error: "+err.Error(), http.StatusInternalServerError) return } + if token != h.Cluster.Users.AnonymousUserToken { + tokenValid = true + } f, err := fs.OpenFile(fsprefix, dirOpenMode, 0) - if errors.As(err, &statusErr) && statusErr.HTTPStatus() == http.StatusForbidden { - // collection id is outside token scope - validToken[token] = true + if errors.As(err, &statusErr) && + statusErr.HTTPStatus() == http.StatusForbidden && + token != h.Cluster.Users.AnonymousUserToken { + // collection id is outside scope of supplied + // token + tokenScopeProblem = true + sess.Release() continue - } - validToken[token] = true - if os.IsNotExist(err) { + } else if os.IsNotExist(err) { // collection does not exist or is not // readable using this token + sess.Release() continue } else if err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) + sess.Release() return } defer f.Close() @@ -410,12 +434,27 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) { collectionDir, sessionFS, session, tokenUser = f, fs, sess, user break } - if forceReload { + + // releaseSession() is equivalent to session.Release() except + // that it's a no-op if (1) session is nil, or (2) it has + // already been called. + // + // This way, we can do a defer call here to ensure it gets + // called in all code paths, and also call it inline (see + // below) in the cases where we want to release the lock + // before returning. + releaseSession := func() {} + if session != nil { + var releaseSessionOnce sync.Once + releaseSession = func() { releaseSessionOnce.Do(func() { session.Release() }) } + } + defer releaseSession() + + if forceReload && collectionDir != nil { err := collectionDir.Sync() if err != nil { - var statusErr interface{ HTTPStatus() int } - if errors.As(err, &statusErr) { - http.Error(w, err.Error(), statusErr.HTTPStatus()) + if he := errorWithHTTPStatus(nil); errors.As(err, &he) { + http.Error(w, err.Error(), he.HTTPStatus()) } else { http.Error(w, err.Error(), http.StatusInternalServerError) } @@ -423,22 +462,27 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) { } } if session == nil { - if pathToken || !credentialsOK { - // Either the URL is a "secret sharing link" - // that didn't work out (and asking the client - // for additional credentials would just be - // confusing), or we don't even accept - // credentials at this path. + if pathToken { + // The URL is a "secret sharing link" that + // didn't work out. Asking the client for + // additional credentials would just be + // confusing. http.Error(w, notFoundMessage, http.StatusNotFound) return } - for _, t := range reqTokens { - if validToken[t] { - // The client provided valid token(s), - // but the collection was not found. - http.Error(w, notFoundMessage, http.StatusNotFound) - return - } + if tokenValid { + // The client provided valid token(s), but the + // collection was not found. + http.Error(w, notFoundMessage, http.StatusNotFound) + return + } + if tokenScopeProblem { + // The client provided a valid token but + // fetching a collection returned 401, which + // means the token scope doesn't permit + // fetching that collection. + http.Error(w, notFoundMessage, http.StatusForbidden) + return } // The client's token was invalid (e.g., expired), or // the client didn't even provide one. Redirect to @@ -475,16 +519,25 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) { target.RawQuery = redirkey + "=" + callback w.Header().Add("Location", target.String()) w.WriteHeader(http.StatusSeeOther) - } else { - w.Header().Add("WWW-Authenticate", "Basic realm=\"collections\"") - http.Error(w, unauthorizedMessage, http.StatusUnauthorized) + return } + if !credentialsOK { + http.Error(w, fmt.Sprintf("Authorization tokens are not accepted here: %v, and no anonymous user token is configured.", reasonNotAcceptingCredentials), http.StatusUnauthorized) + return + } + // If none of the above cases apply, suggest the + // user-agent (which is either a non-browser agent + // like wget, or a browser that can't redirect through + // a login flow) prompt the user for credentials. + w.Header().Add("WWW-Authenticate", "Basic realm=\"collections\"") + http.Error(w, unauthorizedMessage, http.StatusUnauthorized) return } if r.Method == http.MethodGet || r.Method == http.MethodHead { targetfnm := fsprefix + strings.Join(pathParts[stripParts:], "/") if fi, err := sessionFS.Stat(targetfnm); err == nil && fi.IsDir() { + releaseSession() // because we won't be writing anything if !strings.HasSuffix(r.URL.Path, "/") { h.seeOtherWithCookie(w, r, r.URL.Path+"/", credentialsOK) } else { @@ -499,7 +552,7 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) { basename = targetPath[len(targetPath)-1] } if arvadosclient.PDHMatch(collectionID) && writeMethod[r.Method] { - http.Error(w, errReadOnly.Error(), http.StatusMethodNotAllowed) + http.Error(w, webdavfs.ErrReadOnly.Error(), http.StatusMethodNotAllowed) return } if !h.userPermittedToUploadOrDownload(r.Method, tokenUser) { @@ -508,7 +561,11 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) { } h.logUploadOrDownload(r, session.arvadosclient, sessionFS, fsprefix+strings.Join(targetPath, "/"), nil, tokenUser) - if writeMethod[r.Method] { + writing := writeMethod[r.Method] + locker := h.collectionLock(collectionID, writing) + defer locker.Unlock() + + if writing { // Save the collection only if/when all // webdav->filesystem operations succeed -- // and send a 500 error if the modified @@ -550,26 +607,38 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) { collectionDir.Splice(snap) return nil }} + } else { + // When writing, we need to block session renewal + // until we're finished, in order to guarantee the + // effect of the write is visible in future responses. + // But if we're not writing, we can release the lcok + // early. This enables us to keep renewing sessions + // and processing more requests even if a slow client + // takes a long time to download a large file. + releaseSession() } if r.Method == http.MethodGet { applyContentDispositionHdr(w, r, basename, attachment) } - wh := webdav.Handler{ - Prefix: "/" + strings.Join(pathParts[:stripParts], "/"), - FileSystem: &webdavFS{ - collfs: sessionFS, - prefix: fsprefix, - writing: writeMethod[r.Method], - alwaysReadEOF: r.Method == "PROPFIND", + if webdavPrefix == "" { + webdavPrefix = "/" + strings.Join(pathParts[:stripParts], "/") + } + wh := &webdav.Handler{ + Prefix: webdavPrefix, + FileSystem: &webdavfs.FS{ + FileSystem: sessionFS, + Prefix: fsprefix, + Writing: writeMethod[r.Method], + AlwaysReadEOF: r.Method == "PROPFIND", }, - LockSystem: h.webdavLS, + LockSystem: webdavfs.NoLockSystem, Logger: func(r *http.Request, err error) { - if err != nil { + if err != nil && !os.IsNotExist(err) { ctxlog.FromContext(r.Context()).WithError(err).Error("error reported by webdav handler") } }, } - wh.ServeHTTP(w, r) + h.metrics.track(wh, w, r) if r.Method == http.MethodGet && w.WroteStatus() == http.StatusOK { wrote := int64(w.WroteBodyBytes()) fnm := strings.Join(pathParts[stripParts:], "/") @@ -738,7 +807,7 @@ func applyContentDispositionHdr(w http.ResponseWriter, r *http.Request, filename } func (h *handler) seeOtherWithCookie(w http.ResponseWriter, r *http.Request, location string, credentialsOK bool) { - if formToken := r.FormValue("api_token"); formToken != "" { + if formTokens, haveFormTokens := r.Form["api_token"]; haveFormTokens { if !credentialsOK { // It is not safe to copy the provided token // into a cookie unless the current vhost @@ -759,13 +828,19 @@ func (h *handler) seeOtherWithCookie(w http.ResponseWriter, r *http.Request, loc // bar, and in the case of a POST request to avoid // raising warnings when the user refreshes the // resulting page. - http.SetCookie(w, &http.Cookie{ - Name: "arvados_api_token", - Value: auth.EncodeTokenCookie([]byte(formToken)), - Path: "/", - HttpOnly: true, - SameSite: http.SameSiteLaxMode, - }) + for _, tok := range formTokens { + if tok == "" { + continue + } + http.SetCookie(w, &http.Cookie{ + Name: "arvados_api_token", + Value: auth.EncodeTokenCookie([]byte(tok)), + Path: "/", + HttpOnly: true, + SameSite: http.SameSiteLaxMode, + }) + break + } } // Propagate query parameters (except api_token) from @@ -916,3 +991,54 @@ func (h *handler) determineCollection(fs arvados.CustomFileSystem, path string) } return nil, "" } + +var lockTidyInterval = time.Minute * 10 + +// Lock the specified collection for reading or writing. Caller must +// call Unlock() on the returned Locker when the operation is +// finished. +func (h *handler) collectionLock(collectionID string, writing bool) sync.Locker { + h.lockMtx.Lock() + defer h.lockMtx.Unlock() + if time.Since(h.lockTidied) > lockTidyInterval { + // Periodically delete all locks that aren't in use. + h.lockTidied = time.Now() + for id, locker := range h.lock { + if locker.TryLock() { + locker.Unlock() + delete(h.lock, id) + } + } + } + locker := h.lock[collectionID] + if locker == nil { + locker = new(sync.RWMutex) + if h.lock == nil { + h.lock = map[string]*sync.RWMutex{} + } + h.lock[collectionID] = locker + } + if writing { + locker.Lock() + return locker + } else { + locker.RLock() + return locker.RLocker() + } +} + +func ServeCORSPreflight(w http.ResponseWriter, header http.Header) bool { + method := header.Get("Access-Control-Request-Method") + if method == "" { + return false + } + if !browserMethod[method] && !webdavMethod[method] { + w.WriteHeader(http.StatusMethodNotAllowed) + return true + } + w.Header().Set("Access-Control-Allow-Headers", corsAllowHeadersHeader) + w.Header().Set("Access-Control-Allow-Methods", "COPY, DELETE, GET, LOCK, MKCOL, MOVE, OPTIONS, POST, PROPFIND, PROPPATCH, PUT, RMCOL, UNLOCK") + w.Header().Set("Access-Control-Allow-Origin", "*") + w.Header().Set("Access-Control-Max-Age", "86400") + return true +}