Merge branch '15928-fs-deadlock'
[arvados.git] / services / keep-web / handler.go
index 63e2f37a80b4e04a244ad1f32de9af03ba834287..bbbbd8f97bfce63a0e22da02cc5fbe32a1e5794e 100644 (file)
@@ -6,7 +6,6 @@ package main
 
 import (
        "encoding/json"
-       "fmt"
        "html"
        "html/template"
        "io"
@@ -19,18 +18,20 @@ import (
        "strings"
        "sync"
 
-       "git.curoverse.com/arvados.git/sdk/go/arvados"
-       "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
-       "git.curoverse.com/arvados.git/sdk/go/auth"
-       "git.curoverse.com/arvados.git/sdk/go/health"
-       "git.curoverse.com/arvados.git/sdk/go/httpserver"
-       "git.curoverse.com/arvados.git/sdk/go/keepclient"
-       log "github.com/Sirupsen/logrus"
+       "git.arvados.org/arvados.git/sdk/go/arvados"
+       "git.arvados.org/arvados.git/sdk/go/arvadosclient"
+       "git.arvados.org/arvados.git/sdk/go/auth"
+       "git.arvados.org/arvados.git/sdk/go/ctxlog"
+       "git.arvados.org/arvados.git/sdk/go/health"
+       "git.arvados.org/arvados.git/sdk/go/httpserver"
+       "git.arvados.org/arvados.git/sdk/go/keepclient"
+       "github.com/sirupsen/logrus"
        "golang.org/x/net/webdav"
 )
 
 type handler struct {
        Config        *Config
+       MetricsAPI    http.Handler
        clientPool    *arvadosclient.ClientPool
        setupOnce     sync.Once
        healthHandler http.Handler
@@ -78,9 +79,10 @@ func (h *handler) setup() {
        h.clientPool = arvadosclient.MakeClientPool()
 
        keepclient.RefreshServiceDiscoveryOnSIGHUP()
+       keepclient.DefaultBlockCache.MaxBlocks = h.Config.cluster.Collections.WebDAVCache.MaxBlockEntries
 
        h.healthHandler = &health.Handler{
-               Token:  h.Config.ManagementToken,
+               Token:  h.Config.cluster.ManagementToken,
                Prefix: "/_health/",
        }
 
@@ -90,14 +92,7 @@ func (h *handler) setup() {
 }
 
 func (h *handler) serveStatus(w http.ResponseWriter, r *http.Request) {
-       status := struct {
-               cacheStats
-               Version string
-       }{
-               cacheStats: h.Config.Cache.Stats(),
-               Version:    version,
-       }
-       json.NewEncoder(w).Encode(status)
+       json.NewEncoder(w).Encode(struct{ Version string }{version})
 }
 
 // updateOnSuccess wraps httpserver.ResponseWriter. If the handler
@@ -107,6 +102,7 @@ func (h *handler) serveStatus(w http.ResponseWriter, r *http.Request) {
 // are ignored (all response writes return the update error).
 type updateOnSuccess struct {
        httpserver.ResponseWriter
+       logger     logrus.FieldLogger
        update     func() error
        sentHeader bool
        err        error
@@ -131,7 +127,7 @@ func (uos *updateOnSuccess) WriteHeader(code int) {
                                if err, ok := uos.err.(*arvados.TransactionError); ok {
                                        code = err.StatusCode
                                }
-                               log.Printf("update() changes response to HTTP %d: %T %q", code, uos.err, uos.err)
+                               uos.logger.WithError(uos.err).Errorf("update() returned error type %T, changing response to HTTP %d", uos.err, code)
                                http.Error(uos.ResponseWriter, uos.err.Error(), code)
                                return
                        }
@@ -141,23 +137,34 @@ func (uos *updateOnSuccess) WriteHeader(code int) {
 }
 
 var (
+       corsAllowHeadersHeader = strings.Join([]string{
+               "Authorization", "Content-Type", "Range",
+               // WebDAV request headers:
+               "Depth", "Destination", "If", "Lock-Token", "Overwrite", "Timeout",
+       }, ", ")
        writeMethod = map[string]bool{
-               "COPY":   true,
-               "DELETE": true,
-               "MKCOL":  true,
-               "MOVE":   true,
-               "PUT":    true,
-               "RMCOL":  true,
+               "COPY":      true,
+               "DELETE":    true,
+               "LOCK":      true,
+               "MKCOL":     true,
+               "MOVE":      true,
+               "PROPPATCH": true,
+               "PUT":       true,
+               "RMCOL":     true,
+               "UNLOCK":    true,
        }
        webdavMethod = map[string]bool{
-               "COPY":     true,
-               "DELETE":   true,
-               "MKCOL":    true,
-               "MOVE":     true,
-               "OPTIONS":  true,
-               "PROPFIND": true,
-               "PUT":      true,
-               "RMCOL":    true,
+               "COPY":      true,
+               "DELETE":    true,
+               "LOCK":      true,
+               "MKCOL":     true,
+               "MOVE":      true,
+               "OPTIONS":   true,
+               "PROPFIND":  true,
+               "PROPPATCH": true,
+               "PUT":       true,
+               "RMCOL":     true,
+               "UNLOCK":    true,
        }
        browserMethod = map[string]bool{
                "GET":  true,
@@ -176,28 +183,15 @@ var (
 func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
        h.setupOnce.Do(h.setup)
 
-       var statusCode = 0
-       var statusText string
-
        remoteAddr := r.RemoteAddr
        if xff := r.Header.Get("X-Forwarded-For"); xff != "" {
                remoteAddr = xff + "," + remoteAddr
        }
+       if xfp := r.Header.Get("X-Forwarded-Proto"); xfp != "" && xfp != "http" {
+               r.URL.Scheme = xfp
+       }
 
        w := httpserver.WrapResponseWriter(wOrig)
-       defer func() {
-               if statusCode == 0 {
-                       statusCode = w.WroteStatus()
-               } else if w.WroteStatus() == 0 {
-                       w.WriteHeader(statusCode)
-               } else if w.WroteStatus() != statusCode {
-                       log.WithField("RequestID", r.Header.Get("X-Request-Id")).Warn(
-                               fmt.Sprintf("Our status changed from %d to %d after we sent headers", w.WroteStatus(), statusCode))
-               }
-               if statusText == "" {
-                       statusText = http.StatusText(statusCode)
-               }
-       }()
 
        if strings.HasPrefix(r.URL.Path, "/_health/") && r.Method == "GET" {
                h.healthHandler.ServeHTTP(w, r)
@@ -206,19 +200,18 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
 
        if method := r.Header.Get("Access-Control-Request-Method"); method != "" && r.Method == "OPTIONS" {
                if !browserMethod[method] && !webdavMethod[method] {
-                       statusCode = http.StatusMethodNotAllowed
+                       w.WriteHeader(http.StatusMethodNotAllowed)
                        return
                }
-               w.Header().Set("Access-Control-Allow-Headers", "Authorization, Content-Type, Range")
-               w.Header().Set("Access-Control-Allow-Methods", "COPY, DELETE, GET, MKCOL, MOVE, OPTIONS, POST, PROPFIND, PUT, RMCOL")
+               w.Header().Set("Access-Control-Allow-Headers", corsAllowHeadersHeader)
+               w.Header().Set("Access-Control-Allow-Methods", "COPY, DELETE, GET, LOCK, MKCOL, MOVE, OPTIONS, POST, PROPFIND, PROPPATCH, PUT, RMCOL, UNLOCK")
                w.Header().Set("Access-Control-Allow-Origin", "*")
                w.Header().Set("Access-Control-Max-Age", "86400")
-               statusCode = http.StatusOK
                return
        }
 
        if !browserMethod[r.Method] && !webdavMethod[r.Method] {
-               statusCode, statusText = http.StatusMethodNotAllowed, r.Method
+               w.WriteHeader(http.StatusMethodNotAllowed)
                return
        }
 
@@ -241,9 +234,9 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
        var pathToken bool
        var attachment bool
        var useSiteFS bool
-       credentialsOK := h.Config.TrustAllContent
+       credentialsOK := h.Config.cluster.Collections.TrustAllContent
 
-       if r.Host != "" && r.Host == h.Config.AttachmentOnlyHost {
+       if r.Host != "" && r.Host == h.Config.cluster.Services.WebDAVDownload.ExternalURL.Host {
                credentialsOK = true
                attachment = true
        } else if r.FormValue("disposition") == "attachment" {
@@ -256,6 +249,9 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
        } else if r.URL.Path == "/status.json" {
                h.serveStatus(w, r)
                return
+       } else if strings.HasPrefix(r.URL.Path, "/metrics") {
+               h.MetricsAPI.ServeHTTP(w, r)
+               return
        } else if siteFSDir[pathParts[0]] {
                useSiteFS = true
        } else if len(pathParts) >= 1 && strings.HasPrefix(pathParts[0], "c=") {
@@ -272,13 +268,16 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
                } else {
                        // /collections/ID/PATH...
                        collectionID = parseCollectionIDFromURL(pathParts[1])
-                       tokens = h.Config.AnonymousTokens
                        stripParts = 2
+                       // This path is only meant to work for public
+                       // data. Tokens provided with the request are
+                       // ignored.
+                       credentialsOK = false
                }
        }
 
        if collectionID == "" && !useSiteFS {
-               statusCode = http.StatusNotFound
+               w.WriteHeader(http.StatusNotFound)
                return
        }
 
@@ -287,6 +286,10 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
                forceReload = true
        }
 
+       if credentialsOK {
+               reqTokens = auth.CredentialsFromRequest(r).Tokens
+       }
+
        formToken := r.FormValue("api_token")
        if formToken != "" && r.Header.Get("Origin") != "" && attachment && r.URL.Query().Get("api_token") == "" {
                // The client provided an explicit token in the POST
@@ -302,7 +305,7 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
                //
                // * The token isn't embedded in the URL, so we don't
                //   need to worry about bookmarks and copy/paste.
-               tokens = append(tokens, formToken)
+               reqTokens = append(reqTokens, formToken)
        } else if formToken != "" && browserMethod[r.Method] {
                // The client provided an explicit token in the query
                // string, or a form in POST body. We must put the
@@ -313,6 +316,11 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
                return
        }
 
+       if useSiteFS {
+               h.serveSiteFS(w, r, reqTokens, credentialsOK, attachment)
+               return
+       }
+
        targetPath := pathParts[stripParts:]
        if tokens == nil && len(targetPath) > 0 && strings.HasPrefix(targetPath[0], "t=") {
                // http://ID.example/t=TOKEN/PATH...
@@ -328,15 +336,7 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
        }
 
        if tokens == nil {
-               if credentialsOK {
-                       reqTokens = auth.NewCredentialsFromHTTPRequest(r).Tokens
-               }
-               tokens = append(reqTokens, h.Config.AnonymousTokens...)
-       }
-
-       if useSiteFS {
-               h.serveSiteFS(w, r, tokens, credentialsOK, attachment)
-               return
+               tokens = append(reqTokens, h.Config.cluster.Users.AnonymousUserToken)
        }
 
        if len(targetPath) > 0 && targetPath[0] == "_" {
@@ -352,7 +352,7 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
 
        arv := h.clientPool.Get()
        if arv == nil {
-               statusCode, statusText = http.StatusInternalServerError, "Pool failed: "+h.clientPool.Err().Error()
+               http.Error(w, "client pool error: "+h.clientPool.Err().Error(), http.StatusInternalServerError)
                return
        }
        defer h.clientPool.Put(arv)
@@ -376,7 +376,7 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
                        }
                }
                // Something more serious is wrong
-               statusCode, statusText = http.StatusInternalServerError, err.Error()
+               http.Error(w, "cache error: "+err.Error(), http.StatusInternalServerError)
                return
        }
        if collection == nil {
@@ -386,14 +386,14 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
                        // for additional credentials would just be
                        // confusing), or we don't even accept
                        // credentials at this path.
-                       statusCode = http.StatusNotFound
+                       w.WriteHeader(http.StatusNotFound)
                        return
                }
                for _, t := range reqTokens {
                        if tokenResult[t] == 404 {
                                // The client provided valid token(s), but the
                                // collection was not found.
-                               statusCode = http.StatusNotFound
+                               w.WriteHeader(http.StatusNotFound)
                                return
                        }
                }
@@ -407,15 +407,16 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
                // data that has been deleted.  Allow a referrer to
                // provide this context somehow?
                w.Header().Add("WWW-Authenticate", "Basic realm=\"collections\"")
-               statusCode = http.StatusUnauthorized
+               w.WriteHeader(http.StatusUnauthorized)
                return
        }
 
        kc, err := keepclient.MakeKeepClient(arv)
        if err != nil {
-               statusCode, statusText = http.StatusInternalServerError, err.Error()
+               http.Error(w, "error setting up keep client: "+err.Error(), http.StatusInternalServerError)
                return
        }
+       kc.RequestID = r.Header.Get("X-Request-Id")
 
        var basename string
        if len(targetPath) > 0 {
@@ -431,14 +432,14 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
 
        fs, err := collection.FileSystem(client, kc)
        if err != nil {
-               statusCode, statusText = http.StatusInternalServerError, err.Error()
+               http.Error(w, "error creating collection filesystem: "+err.Error(), http.StatusInternalServerError)
                return
        }
 
        writefs, writeOK := fs.(arvados.CollectionFileSystem)
        targetIsPDH := arvadosclient.PDHMatch(collectionID)
        if (targetIsPDH || !writeOK) && writeMethod[r.Method] {
-               statusCode, statusText = http.StatusMethodNotAllowed, errReadOnly.Error()
+               http.Error(w, errReadOnly.Error(), http.StatusMethodNotAllowed)
                return
        }
 
@@ -450,6 +451,7 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
                        // collection can't be saved.
                        w = &updateOnSuccess{
                                ResponseWriter: w,
+                               logger:         ctxlog.FromContext(r.Context()),
                                update: func() error {
                                        return h.Config.Cache.Update(client, *collection, writefs)
                                }}
@@ -464,7 +466,7 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
                        LockSystem: h.webdavLS,
                        Logger: func(_ *http.Request, err error) {
                                if err != nil {
-                                       log.Printf("error from webdav handler: %q", err)
+                                       ctxlog.FromContext(r.Context()).WithError(err).Error("error reported by webdav handler")
                                }
                        },
                }
@@ -475,13 +477,13 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
        openPath := "/" + strings.Join(targetPath, "/")
        if f, err := fs.Open(openPath); os.IsNotExist(err) {
                // Requested non-existent path
-               statusCode = http.StatusNotFound
+               w.WriteHeader(http.StatusNotFound)
        } else if err != nil {
                // Some other (unexpected) error
-               statusCode, statusText = http.StatusInternalServerError, err.Error()
+               http.Error(w, "open: "+err.Error(), http.StatusInternalServerError)
        } else if stat, err := f.Stat(); err != nil {
                // Can't get Size/IsDir (shouldn't happen with a collectionFS!)
-               statusCode, statusText = http.StatusInternalServerError, err.Error()
+               http.Error(w, "stat: "+err.Error(), http.StatusInternalServerError)
        } else if stat.IsDir() && !strings.HasSuffix(r.URL.Path, "/") {
                // If client requests ".../dirname", redirect to
                // ".../dirname/". This way, relative links in the
@@ -492,14 +494,14 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
                h.serveDirectory(w, r, collection.Name, fs, openPath, true)
        } else {
                http.ServeContent(w, r, basename, stat.ModTime(), f)
-               if r.Header.Get("Range") == "" && int64(w.WroteBodyBytes()) != stat.Size() {
+               if wrote := int64(w.WroteBodyBytes()); wrote != stat.Size() && r.Header.Get("Range") == "" {
                        // If we wrote fewer bytes than expected, it's
                        // too late to change the real response code
                        // or send an error message to the client, but
                        // at least we can try to put some useful
                        // debugging info in the logs.
                        n, err := f.Read(make([]byte, 1024))
-                       statusCode, statusText = http.StatusInternalServerError, fmt.Sprintf("f.Size()==%d but only wrote %d bytes; read(1024) returns %d, %s", stat.Size(), w.WroteBodyBytes(), n, err)
+                       ctxlog.FromContext(r.Context()).Errorf("stat.Size()==%d but only wrote %d bytes; read(1024) returns %d, %s", stat.Size(), wrote, n, err)
 
                }
        }
@@ -525,9 +527,10 @@ func (h *handler) serveSiteFS(w http.ResponseWriter, r *http.Request, tokens []s
 
        kc, err := keepclient.MakeKeepClient(arv)
        if err != nil {
-               http.Error(w, err.Error(), http.StatusInternalServerError)
+               http.Error(w, "error setting up keep client: "+err.Error(), http.StatusInternalServerError)
                return
        }
+       kc.RequestID = r.Header.Get("X-Request-Id")
        client := (&arvados.Client{
                APIHost:   arv.ApiServer,
                AuthToken: arv.ApiToken,
@@ -565,7 +568,7 @@ func (h *handler) serveSiteFS(w http.ResponseWriter, r *http.Request, tokens []s
                LockSystem: h.webdavLS,
                Logger: func(_ *http.Request, err error) {
                        if err != nil {
-                               log.Printf("error from webdav handler: %q", err)
+                               ctxlog.FromContext(r.Context()).WithError(err).Error("error reported by webdav handler")
                        }
                },
        }
@@ -614,9 +617,9 @@ the entire directory tree with wget, try:</P>
 <UL>
 {{range .Files}}
 {{if .IsDir }}
-  <LI>{{" " | printf "%15s  " | nbsp}}<A href="{{.Name}}/">{{.Name}}/</A></LI>
+  <LI>{{" " | printf "%15s  " | nbsp}}<A href="{{print "./" .Name}}/">{{.Name}}/</A></LI>
 {{else}}
-  <LI>{{.Size | printf "%15d  " | nbsp}}<A href="{{.Name}}">{{.Name}}</A></LI>
+  <LI>{{.Size | printf "%15d  " | nbsp}}<A href="{{print "./" .Name}}">{{.Name}}</A></LI>
 {{end}}
 {{end}}
 </UL>
@@ -679,7 +682,7 @@ func (h *handler) serveDirectory(w http.ResponseWriter, r *http.Request, collect
                return nil
        }
        if err := walk(""); err != nil {
-               http.Error(w, err.Error(), http.StatusInternalServerError)
+               http.Error(w, "error getting directory listing: "+err.Error(), http.StatusInternalServerError)
                return
        }
 
@@ -690,7 +693,7 @@ func (h *handler) serveDirectory(w http.ResponseWriter, r *http.Request, collect
        }
        tmpl, err := template.New("dir").Funcs(funcs).Parse(dirListingTemplate)
        if err != nil {
-               http.Error(w, err.Error(), http.StatusInternalServerError)
+               http.Error(w, "error parsing template: "+err.Error(), http.StatusInternalServerError)
                return
        }
        sort.Slice(files, func(i, j int) bool {
@@ -730,7 +733,7 @@ func (h *handler) seeOtherWithCookie(w http.ResponseWriter, r *http.Request, loc
                        // into a cookie unless the current vhost
                        // (origin) serves only a single collection or
                        // we are in TrustAllContent mode.
-                       w.WriteHeader(http.StatusBadRequest)
+                       http.Error(w, "cannot serve inline content at this URL (possible configuration error; see https://doc.arvados.org/install/install-keep-web.html#dns)", http.StatusBadRequest)
                        return
                }
 
@@ -762,12 +765,13 @@ func (h *handler) seeOtherWithCookie(w http.ResponseWriter, r *http.Request, loc
        if location != "" {
                newu, err := u.Parse(location)
                if err != nil {
-                       w.WriteHeader(http.StatusInternalServerError)
+                       http.Error(w, "error resolving redirect target: "+err.Error(), http.StatusInternalServerError)
                        return
                }
                u = newu
        }
        redir := (&url.URL{
+               Scheme:   r.URL.Scheme,
                Host:     r.Host,
                Path:     u.Path,
                RawQuery: redirQuery.Encode(),