12167: Merge branch 'master'
[arvados.git] / services / keep-web / handler.go
index 16df210fe83cc12096549f5609fafeecdcc09340..517ec1a2a26e96967ad50bec925a65b1f6149f6a 100644 (file)
@@ -13,6 +13,7 @@ import (
        "net/http"
        "net/url"
        "os"
+       "path/filepath"
        "sort"
        "strconv"
        "strings"
@@ -24,6 +25,8 @@ import (
        "git.curoverse.com/arvados.git/sdk/go/health"
        "git.curoverse.com/arvados.git/sdk/go/httpserver"
        "git.curoverse.com/arvados.git/sdk/go/keepclient"
+       log "github.com/Sirupsen/logrus"
+       "golang.org/x/net/webdav"
 )
 
 type handler struct {
@@ -31,6 +34,7 @@ type handler struct {
        clientPool    *arvadosclient.ClientPool
        setupOnce     sync.Once
        healthHandler http.Handler
+       webdavLS      webdav.LockSystem
 }
 
 // parseCollectionIDFromDNSName returns a UUID or PDH if s begins with
@@ -79,17 +83,95 @@ func (h *handler) setup() {
                Token:  h.Config.ManagementToken,
                Prefix: "/_health/",
        }
+
+       // Even though we don't accept LOCK requests, every webdav
+       // handler must have a non-nil LockSystem.
+       h.webdavLS = &noLockSystem{}
 }
 
 func (h *handler) serveStatus(w http.ResponseWriter, r *http.Request) {
        status := struct {
                cacheStats
+               Version string
        }{
                cacheStats: h.Config.Cache.Stats(),
+               Version:    version,
        }
        json.NewEncoder(w).Encode(status)
 }
 
+// updateOnSuccess wraps httpserver.ResponseWriter. If the handler
+// sends an HTTP header indicating success, updateOnSuccess first
+// calls the provided update func. If the update func fails, a 500
+// response is sent, and the status code and body sent by the handler
+// are ignored (all response writes return the update error).
+type updateOnSuccess struct {
+       httpserver.ResponseWriter
+       update     func() error
+       sentHeader bool
+       err        error
+}
+
+func (uos *updateOnSuccess) Write(p []byte) (int, error) {
+       if !uos.sentHeader {
+               uos.WriteHeader(http.StatusOK)
+       }
+       if uos.err != nil {
+               return 0, uos.err
+       }
+       return uos.ResponseWriter.Write(p)
+}
+
+func (uos *updateOnSuccess) WriteHeader(code int) {
+       if !uos.sentHeader {
+               uos.sentHeader = true
+               if code >= 200 && code < 400 {
+                       if uos.err = uos.update(); uos.err != nil {
+                               code := http.StatusInternalServerError
+                               if err, ok := uos.err.(*arvados.TransactionError); ok {
+                                       code = err.StatusCode
+                               }
+                               log.Printf("update() changes response to HTTP %d: %T %q", code, uos.err, uos.err)
+                               http.Error(uos.ResponseWriter, uos.err.Error(), code)
+                               return
+                       }
+               }
+       }
+       uos.ResponseWriter.WriteHeader(code)
+}
+
+var (
+       writeMethod = map[string]bool{
+               "COPY":   true,
+               "DELETE": true,
+               "MKCOL":  true,
+               "MOVE":   true,
+               "PUT":    true,
+               "RMCOL":  true,
+       }
+       webdavMethod = map[string]bool{
+               "COPY":     true,
+               "DELETE":   true,
+               "MKCOL":    true,
+               "MOVE":     true,
+               "OPTIONS":  true,
+               "PROPFIND": true,
+               "PUT":      true,
+               "RMCOL":    true,
+       }
+       browserMethod = map[string]bool{
+               "GET":  true,
+               "HEAD": true,
+               "POST": true,
+       }
+       // top-level dirs to serve with siteFS
+       siteFSDir = map[string]bool{
+               "":      true, // root directory
+               "by_id": true,
+               "users": true,
+       }
+)
+
 // ServeHTTP implements http.Handler.
 func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
        h.setupOnce.Do(h.setup)
@@ -109,13 +191,12 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
                } else if w.WroteStatus() == 0 {
                        w.WriteHeader(statusCode)
                } else if w.WroteStatus() != statusCode {
-                       httpserver.Log(r.RemoteAddr, "WARNING",
+                       log.WithField("RequestID", r.Header.Get("X-Request-Id")).Warn(
                                fmt.Sprintf("Our status changed from %d to %d after we sent headers", w.WroteStatus(), statusCode))
                }
                if statusText == "" {
                        statusText = http.StatusText(statusCode)
                }
-               httpserver.Log(remoteAddr, statusCode, statusText, w.WroteBodyBytes(), r.Method, r.Host, r.URL.Path, r.URL.RawQuery)
        }()
 
        if strings.HasPrefix(r.URL.Path, "/_health/") && r.Method == "GET" {
@@ -123,21 +204,20 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
                return
        }
 
-       if r.Method == "OPTIONS" {
-               method := r.Header.Get("Access-Control-Request-Method")
-               if method != "GET" && method != "POST" {
+       if method := r.Header.Get("Access-Control-Request-Method"); method != "" && r.Method == "OPTIONS" {
+               if !browserMethod[method] && !webdavMethod[method] {
                        statusCode = http.StatusMethodNotAllowed
                        return
                }
-               w.Header().Set("Access-Control-Allow-Headers", "Range")
-               w.Header().Set("Access-Control-Allow-Methods", "GET, POST")
+               w.Header().Set("Access-Control-Allow-Headers", "Authorization, Content-Type, Range")
+               w.Header().Set("Access-Control-Allow-Methods", "COPY, DELETE, GET, MKCOL, MOVE, OPTIONS, POST, PROPFIND, PUT, RMCOL")
                w.Header().Set("Access-Control-Allow-Origin", "*")
                w.Header().Set("Access-Control-Max-Age", "86400")
                statusCode = http.StatusOK
                return
        }
 
-       if r.Method != "GET" && r.Method != "POST" {
+       if !browserMethod[r.Method] && !webdavMethod[r.Method] {
                statusCode, statusText = http.StatusMethodNotAllowed, r.Method
                return
        }
@@ -149,23 +229,18 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
                // SSL certificates. See
                // http://www.w3.org/TR/cors/#user-credentials).
                w.Header().Set("Access-Control-Allow-Origin", "*")
+               w.Header().Set("Access-Control-Expose-Headers", "Content-Range")
        }
 
-       arv := h.clientPool.Get()
-       if arv == nil {
-               statusCode, statusText = http.StatusInternalServerError, "Pool failed: "+h.clientPool.Err().Error()
-               return
-       }
-       defer h.clientPool.Put(arv)
-
        pathParts := strings.Split(r.URL.Path[1:], "/")
 
        var stripParts int
-       var targetID string
+       var collectionID string
        var tokens []string
        var reqTokens []string
        var pathToken bool
        var attachment bool
+       var useSiteFS bool
        credentialsOK := h.Config.TrustAllContent
 
        if r.Host != "" && r.Host == h.Config.AttachmentOnlyHost {
@@ -175,36 +250,43 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
                attachment = true
        }
 
-       if targetID = parseCollectionIDFromDNSName(r.Host); targetID != "" {
+       if collectionID = parseCollectionIDFromDNSName(r.Host); collectionID != "" {
                // http://ID.collections.example/PATH...
                credentialsOK = true
        } else if r.URL.Path == "/status.json" {
                h.serveStatus(w, r)
                return
+       } else if siteFSDir[pathParts[0]] {
+               useSiteFS = true
        } else if len(pathParts) >= 1 && strings.HasPrefix(pathParts[0], "c=") {
                // /c=ID[/PATH...]
-               targetID = parseCollectionIDFromURL(pathParts[0][2:])
+               collectionID = parseCollectionIDFromURL(pathParts[0][2:])
                stripParts = 1
        } else if len(pathParts) >= 2 && pathParts[0] == "collections" {
                if len(pathParts) >= 4 && pathParts[1] == "download" {
                        // /collections/download/ID/TOKEN/PATH...
-                       targetID = parseCollectionIDFromURL(pathParts[2])
+                       collectionID = parseCollectionIDFromURL(pathParts[2])
                        tokens = []string{pathParts[3]}
                        stripParts = 4
                        pathToken = true
                } else {
                        // /collections/ID/PATH...
-                       targetID = parseCollectionIDFromURL(pathParts[1])
+                       collectionID = parseCollectionIDFromURL(pathParts[1])
                        tokens = h.Config.AnonymousTokens
                        stripParts = 2
                }
        }
 
-       if targetID == "" {
+       if collectionID == "" && !useSiteFS {
                statusCode = http.StatusNotFound
                return
        }
 
+       forceReload := false
+       if cc := r.Header.Get("Cache-Control"); strings.Contains(cc, "no-cache") || strings.Contains(cc, "must-revalidate") {
+               forceReload = true
+       }
+
        formToken := r.FormValue("api_token")
        if formToken != "" && r.Header.Get("Origin") != "" && attachment && r.URL.Query().Get("api_token") == "" {
                // The client provided an explicit token in the POST
@@ -221,7 +303,7 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
                // * The token isn't embedded in the URL, so we don't
                //   need to worry about bookmarks and copy/paste.
                tokens = append(tokens, formToken)
-       } else if formToken != "" {
+       } else if formToken != "" && browserMethod[r.Method] {
                // The client provided an explicit token in the query
                // string, or a form in POST body. We must put the
                // token in an HttpOnly cookie, and redirect to the
@@ -231,6 +313,14 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
                return
        }
 
+       if useSiteFS {
+               if tokens == nil {
+                       tokens = auth.NewCredentialsFromHTTPRequest(r).Tokens
+               }
+               h.serveSiteFS(w, r, tokens, credentialsOK, attachment)
+               return
+       }
+
        targetPath := pathParts[stripParts:]
        if tokens == nil && len(targetPath) > 0 && strings.HasPrefix(targetPath[0], "t=") {
                // http://ID.example/t=TOKEN/PATH...
@@ -263,16 +353,18 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
                stripParts++
        }
 
-       forceReload := false
-       if cc := r.Header.Get("Cache-Control"); strings.Contains(cc, "no-cache") || strings.Contains(cc, "must-revalidate") {
-               forceReload = true
+       arv := h.clientPool.Get()
+       if arv == nil {
+               statusCode, statusText = http.StatusInternalServerError, "Pool failed: "+h.clientPool.Err().Error()
+               return
        }
+       defer h.clientPool.Put(arv)
 
        var collection *arvados.Collection
        tokenResult := make(map[string]int)
        for _, arv.ApiToken = range tokens {
                var err error
-               collection, err = h.Config.Cache.Get(arv, targetID, forceReload)
+               collection, err = h.Config.Cache.Get(arv, collectionID, forceReload)
                if err == nil {
                        // Success
                        break
@@ -327,15 +419,63 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
                statusCode, statusText = http.StatusInternalServerError, err.Error()
                return
        }
+       kc.RequestID = r.Header.Get("X-Request-Id")
 
-       basename := targetPath[len(targetPath)-1]
+       var basename string
+       if len(targetPath) > 0 {
+               basename = targetPath[len(targetPath)-1]
+       }
        applyContentDispositionHdr(w, r, basename, attachment)
 
-       fs := collection.FileSystem(&arvados.Client{
+       client := (&arvados.Client{
                APIHost:   arv.ApiServer,
                AuthToken: arv.ApiToken,
                Insecure:  arv.ApiInsecure,
-       }, kc)
+       }).WithRequestID(r.Header.Get("X-Request-Id"))
+
+       fs, err := collection.FileSystem(client, kc)
+       if err != nil {
+               statusCode, statusText = http.StatusInternalServerError, err.Error()
+               return
+       }
+
+       writefs, writeOK := fs.(arvados.CollectionFileSystem)
+       targetIsPDH := arvadosclient.PDHMatch(collectionID)
+       if (targetIsPDH || !writeOK) && writeMethod[r.Method] {
+               statusCode, statusText = http.StatusMethodNotAllowed, errReadOnly.Error()
+               return
+       }
+
+       if webdavMethod[r.Method] {
+               if writeMethod[r.Method] {
+                       // Save the collection only if/when all
+                       // webdav->filesystem operations succeed --
+                       // and send a 500 error if the modified
+                       // collection can't be saved.
+                       w = &updateOnSuccess{
+                               ResponseWriter: w,
+                               update: func() error {
+                                       return h.Config.Cache.Update(client, *collection, writefs)
+                               }}
+               }
+               h := webdav.Handler{
+                       Prefix: "/" + strings.Join(pathParts[:stripParts], "/"),
+                       FileSystem: &webdavFS{
+                               collfs:        fs,
+                               writing:       writeMethod[r.Method],
+                               alwaysReadEOF: r.Method == "PROPFIND",
+                       },
+                       LockSystem: h.webdavLS,
+                       Logger: func(_ *http.Request, err error) {
+                               if err != nil {
+                                       log.Printf("error from webdav handler: %q", err)
+                               }
+                       },
+               }
+               h.ServeHTTP(w, r)
+               return
+       }
+
        openPath := "/" + strings.Join(targetPath, "/")
        if f, err := fs.Open(openPath); os.IsNotExist(err) {
                // Requested non-existent path
@@ -351,9 +491,9 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
                // ".../dirname/". This way, relative links in the
                // listing for "dirname" can always be "fnm", never
                // "dirname/fnm".
-               h.seeOtherWithCookie(w, r, basename+"/", credentialsOK)
+               h.seeOtherWithCookie(w, r, r.URL.Path+"/", credentialsOK)
        } else if stat.IsDir() {
-               h.serveDirectory(w, r, collection.Name, fs, openPath, stripParts)
+               h.serveDirectory(w, r, collection.Name, fs, openPath, true)
        } else {
                http.ServeContent(w, r, basename, stat.ModTime(), f)
                if r.Header.Get("Range") == "" && int64(w.WroteBodyBytes()) != stat.Size() {
@@ -369,10 +509,78 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
        }
 }
 
+func (h *handler) serveSiteFS(w http.ResponseWriter, r *http.Request, tokens []string, credentialsOK, attachment bool) {
+       if len(tokens) == 0 {
+               w.Header().Add("WWW-Authenticate", "Basic realm=\"collections\"")
+               http.Error(w, http.StatusText(http.StatusUnauthorized), http.StatusUnauthorized)
+               return
+       }
+       if writeMethod[r.Method] {
+               http.Error(w, errReadOnly.Error(), http.StatusMethodNotAllowed)
+               return
+       }
+       arv := h.clientPool.Get()
+       if arv == nil {
+               http.Error(w, "Pool failed: "+h.clientPool.Err().Error(), http.StatusInternalServerError)
+               return
+       }
+       defer h.clientPool.Put(arv)
+       arv.ApiToken = tokens[0]
+
+       kc, err := keepclient.MakeKeepClient(arv)
+       if err != nil {
+               http.Error(w, err.Error(), http.StatusInternalServerError)
+               return
+       }
+       kc.RequestID = r.Header.Get("X-Request-Id")
+       client := (&arvados.Client{
+               APIHost:   arv.ApiServer,
+               AuthToken: arv.ApiToken,
+               Insecure:  arv.ApiInsecure,
+       }).WithRequestID(r.Header.Get("X-Request-Id"))
+       fs := client.SiteFileSystem(kc)
+       f, err := fs.Open(r.URL.Path)
+       if os.IsNotExist(err) {
+               http.Error(w, err.Error(), http.StatusNotFound)
+               return
+       } else if err != nil {
+               http.Error(w, err.Error(), http.StatusInternalServerError)
+               return
+       }
+       defer f.Close()
+       if fi, err := f.Stat(); err == nil && fi.IsDir() && r.Method == "GET" {
+               if !strings.HasSuffix(r.URL.Path, "/") {
+                       h.seeOtherWithCookie(w, r, r.URL.Path+"/", credentialsOK)
+               } else {
+                       h.serveDirectory(w, r, fi.Name(), fs, r.URL.Path, false)
+               }
+               return
+       }
+       if r.Method == "GET" {
+               _, basename := filepath.Split(r.URL.Path)
+               applyContentDispositionHdr(w, r, basename, attachment)
+       }
+       wh := webdav.Handler{
+               Prefix: "/",
+               FileSystem: &webdavFS{
+                       collfs:        fs,
+                       writing:       writeMethod[r.Method],
+                       alwaysReadEOF: r.Method == "PROPFIND",
+               },
+               LockSystem: h.webdavLS,
+               Logger: func(_ *http.Request, err error) {
+                       if err != nil {
+                               log.Printf("error from webdav handler: %q", err)
+                       }
+               },
+       }
+       wh.ServeHTTP(w, r)
+}
+
 var dirListingTemplate = `<!DOCTYPE HTML>
 <HTML><HEAD>
   <META name="robots" content="NOINDEX">
-  <TITLE>{{ .Collection.Name }}</TITLE>
+  <TITLE>{{ .CollectionName }}</TITLE>
   <STYLE type="text/css">
     body {
       margin: 1.5em;
@@ -396,19 +604,26 @@ var dirListingTemplate = `<!DOCTYPE HTML>
   </STYLE>
 </HEAD>
 <BODY>
+
 <H1>{{ .CollectionName }}</H1>
 
 <P>This collection of data files is being shared with you through
 Arvados.  You can download individual files listed below.  To download
-the entire collection with wget, try:</P>
+the entire directory tree with wget, try:</P>
 
-<PRE>$ wget --mirror --no-parent --no-host --cut-dirs={{ .StripParts }} https://{{ .Request.Host }}{{ .Request.URL }}</PRE>
+<PRE>$ wget --mirror --no-parent --no-host --cut-dirs={{ .StripParts }} https://{{ .Request.Host }}{{ .Request.URL.Path }}</PRE>
 
 <H2>File Listing</H2>
 
 {{if .Files}}
 <UL>
-{{range .Files}}  <LI>{{.Size | printf "%15d  " | nbsp}}<A href="{{.Name}}">{{.Name}}</A></LI>{{end}}
+{{range .Files}}
+{{if .IsDir }}
+  <LI>{{" " | printf "%15s  " | nbsp}}<A href="{{print "./" .Name}}/">{{.Name}}/</A></LI>
+{{else}}
+  <LI>{{.Size | printf "%15d  " | nbsp}}<A href="{{print "./" .Name}}">{{.Name}}</A></LI>
+{{end}}
+{{end}}
 </UL>
 {{else}}
 <P>(No files; this collection is empty.)</P>
@@ -428,11 +643,12 @@ the entire collection with wget, try:</P>
 `
 
 type fileListEnt struct {
-       Name string
-       Size int64
+       Name  string
+       Size  int64
+       IsDir bool
 }
 
-func (h *handler) serveDirectory(w http.ResponseWriter, r *http.Request, collectionName string, fs http.FileSystem, base string, stripParts int) {
+func (h *handler) serveDirectory(w http.ResponseWriter, r *http.Request, collectionName string, fs http.FileSystem, base string, recurse bool) {
        var files []fileListEnt
        var walk func(string) error
        if !strings.HasSuffix(base, "/") {
@@ -452,15 +668,16 @@ func (h *handler) serveDirectory(w http.ResponseWriter, r *http.Request, collect
                        return err
                }
                for _, ent := range ents {
-                       if ent.IsDir() {
+                       if recurse && ent.IsDir() {
                                err = walk(path + ent.Name() + "/")
                                if err != nil {
                                        return err
                                }
                        } else {
                                files = append(files, fileListEnt{
-                                       Name: path + ent.Name(),
-                                       Size: ent.Size(),
+                                       Name:  path + ent.Name(),
+                                       Size:  ent.Size(),
+                                       IsDir: ent.IsDir(),
                                })
                        }
                }
@@ -489,7 +706,7 @@ func (h *handler) serveDirectory(w http.ResponseWriter, r *http.Request, collect
                "CollectionName": collectionName,
                "Files":          files,
                "Request":        r,
-               "StripParts":     stripParts,
+               "StripParts":     strings.Count(strings.TrimRight(r.URL.Path, "/"), "/"),
        })
 }
 
@@ -512,16 +729,16 @@ func applyContentDispositionHdr(w http.ResponseWriter, r *http.Request, filename
 }
 
 func (h *handler) seeOtherWithCookie(w http.ResponseWriter, r *http.Request, location string, credentialsOK bool) {
-       if !credentialsOK {
-               // It is not safe to copy the provided token
-               // into a cookie unless the current vhost
-               // (origin) serves only a single collection or
-               // we are in TrustAllContent mode.
-               w.WriteHeader(http.StatusBadRequest)
-               return
-       }
-
        if formToken := r.FormValue("api_token"); formToken != "" {
+               if !credentialsOK {
+                       // It is not safe to copy the provided token
+                       // into a cookie unless the current vhost
+                       // (origin) serves only a single collection or
+                       // we are in TrustAllContent mode.
+                       w.WriteHeader(http.StatusBadRequest)
+                       return
+               }
+
                // The HttpOnly flag is necessary to prevent
                // JavaScript code (included in, or loaded by, a page
                // in the collection being served) from employing the
@@ -533,7 +750,6 @@ func (h *handler) seeOtherWithCookie(w http.ResponseWriter, r *http.Request, loc
                // bar, and in the case of a POST request to avoid
                // raising warnings when the user refreshes the
                // resulting page.
-
                http.SetCookie(w, &http.Cookie{
                        Name:     "arvados_api_token",
                        Value:    auth.EncodeTokenCookie([]byte(formToken)),