10666: Merge branch 'master' into 10666-report-version
[arvados.git] / services / keep-web / handler.go
index 5e3e4afdb41f7a703b9bc1f978d203642d1aaea3..a1476d3a8eb1b62fad8ea519702ec290e7f3472c 100644 (file)
@@ -1,28 +1,39 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
 package main
 
 import (
+       "encoding/json"
        "fmt"
        "html"
+       "html/template"
        "io"
+       "log"
        "net/http"
        "net/url"
        "os"
-       "path"
+       "sort"
        "strconv"
        "strings"
        "sync"
-       "time"
 
+       "git.curoverse.com/arvados.git/sdk/go/arvados"
        "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
        "git.curoverse.com/arvados.git/sdk/go/auth"
+       "git.curoverse.com/arvados.git/sdk/go/health"
        "git.curoverse.com/arvados.git/sdk/go/httpserver"
        "git.curoverse.com/arvados.git/sdk/go/keepclient"
+       "golang.org/x/net/webdav"
 )
 
 type handler struct {
-       Config     *Config
-       clientPool *arvadosclient.ClientPool
-       setupOnce  sync.Once
+       Config        *Config
+       clientPool    *arvadosclient.ClientPool
+       setupOnce     sync.Once
+       healthHandler http.Handler
+       webdavLS      webdav.LockSystem
 }
 
 // parseCollectionIDFromDNSName returns a UUID or PDH if s begins with
@@ -64,8 +75,96 @@ func parseCollectionIDFromURL(s string) string {
 
 func (h *handler) setup() {
        h.clientPool = arvadosclient.MakeClientPool()
+
+       keepclient.RefreshServiceDiscoveryOnSIGHUP()
+
+       h.healthHandler = &health.Handler{
+               Token:  h.Config.ManagementToken,
+               Prefix: "/_health/",
+       }
+
+       // Even though we don't accept LOCK requests, every webdav
+       // handler must have a non-nil LockSystem.
+       h.webdavLS = &noLockSystem{}
 }
 
+func (h *handler) serveStatus(w http.ResponseWriter, r *http.Request) {
+       status := struct {
+               cacheStats
+               Version string
+       }{
+               cacheStats: h.Config.Cache.Stats(),
+               Version:    version,
+       }
+       json.NewEncoder(w).Encode(status)
+}
+
+// updateOnSuccess wraps httpserver.ResponseWriter. If the handler
+// sends an HTTP header indicating success, updateOnSuccess first
+// calls the provided update func. If the update func fails, a 500
+// response is sent, and the status code and body sent by the handler
+// are ignored (all response writes return the update error).
+type updateOnSuccess struct {
+       httpserver.ResponseWriter
+       update     func() error
+       sentHeader bool
+       err        error
+}
+
+func (uos *updateOnSuccess) Write(p []byte) (int, error) {
+       if uos.err != nil {
+               return 0, uos.err
+       }
+       if !uos.sentHeader {
+               uos.WriteHeader(http.StatusOK)
+       }
+       return uos.ResponseWriter.Write(p)
+}
+
+func (uos *updateOnSuccess) WriteHeader(code int) {
+       if !uos.sentHeader {
+               uos.sentHeader = true
+               if code >= 200 && code < 400 {
+                       if uos.err = uos.update(); uos.err != nil {
+                               code := http.StatusInternalServerError
+                               if err, ok := uos.err.(*arvados.TransactionError); ok {
+                                       code = err.StatusCode
+                               }
+                               log.Printf("update() changes response to HTTP %d: %T %q", code, uos.err, uos.err)
+                               http.Error(uos.ResponseWriter, uos.err.Error(), code)
+                               return
+                       }
+               }
+       }
+       uos.ResponseWriter.WriteHeader(code)
+}
+
+var (
+       writeMethod = map[string]bool{
+               "COPY":   true,
+               "DELETE": true,
+               "MKCOL":  true,
+               "MOVE":   true,
+               "PUT":    true,
+               "RMCOL":  true,
+       }
+       webdavMethod = map[string]bool{
+               "COPY":     true,
+               "DELETE":   true,
+               "MKCOL":    true,
+               "MOVE":     true,
+               "OPTIONS":  true,
+               "PROPFIND": true,
+               "PUT":      true,
+               "RMCOL":    true,
+       }
+       browserMethod = map[string]bool{
+               "GET":  true,
+               "HEAD": true,
+               "POST": true,
+       }
+)
+
 // ServeHTTP implements http.Handler.
 func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
        h.setupOnce.Do(h.setup)
@@ -94,21 +193,25 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
                httpserver.Log(remoteAddr, statusCode, statusText, w.WroteBodyBytes(), r.Method, r.Host, r.URL.Path, r.URL.RawQuery)
        }()
 
-       if r.Method == "OPTIONS" {
-               method := r.Header.Get("Access-Control-Request-Method")
-               if method != "GET" && method != "POST" {
+       if strings.HasPrefix(r.URL.Path, "/_health/") && r.Method == "GET" {
+               h.healthHandler.ServeHTTP(w, r)
+               return
+       }
+
+       if method := r.Header.Get("Access-Control-Request-Method"); method != "" && r.Method == "OPTIONS" {
+               if !browserMethod[method] && !webdavMethod[method] {
                        statusCode = http.StatusMethodNotAllowed
                        return
                }
-               w.Header().Set("Access-Control-Allow-Headers", "Range")
-               w.Header().Set("Access-Control-Allow-Methods", "GET, POST")
+               w.Header().Set("Access-Control-Allow-Headers", "Authorization, Content-Type, Range")
+               w.Header().Set("Access-Control-Allow-Methods", "COPY, DELETE, GET, MKCOL, MOVE, OPTIONS, POST, PROPFIND, PUT, RMCOL")
                w.Header().Set("Access-Control-Allow-Origin", "*")
                w.Header().Set("Access-Control-Max-Age", "86400")
                statusCode = http.StatusOK
                return
        }
 
-       if r.Method != "GET" && r.Method != "POST" {
+       if !browserMethod[r.Method] && !webdavMethod[r.Method] {
                statusCode, statusText = http.StatusMethodNotAllowed, r.Method
                return
        }
@@ -120,6 +223,7 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
                // SSL certificates. See
                // http://www.w3.org/TR/cors/#user-credentials).
                w.Header().Set("Access-Control-Allow-Origin", "*")
+               w.Header().Set("Access-Control-Expose-Headers", "Content-Range")
        }
 
        arv := h.clientPool.Get()
@@ -131,8 +235,8 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
 
        pathParts := strings.Split(r.URL.Path[1:], "/")
 
+       var stripParts int
        var targetID string
-       var targetPath []string
        var tokens []string
        var reqTokens []string
        var pathToken bool
@@ -149,23 +253,25 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
        if targetID = parseCollectionIDFromDNSName(r.Host); targetID != "" {
                // http://ID.collections.example/PATH...
                credentialsOK = true
-               targetPath = pathParts
-       } else if len(pathParts) >= 2 && strings.HasPrefix(pathParts[0], "c=") {
-               // /c=ID/PATH...
+       } else if r.URL.Path == "/status.json" {
+               h.serveStatus(w, r)
+               return
+       } else if len(pathParts) >= 1 && strings.HasPrefix(pathParts[0], "c=") {
+               // /c=ID[/PATH...]
                targetID = parseCollectionIDFromURL(pathParts[0][2:])
-               targetPath = pathParts[1:]
-       } else if len(pathParts) >= 3 && pathParts[0] == "collections" {
-               if len(pathParts) >= 5 && pathParts[1] == "download" {
+               stripParts = 1
+       } else if len(pathParts) >= 2 && pathParts[0] == "collections" {
+               if len(pathParts) >= 4 && pathParts[1] == "download" {
                        // /collections/download/ID/TOKEN/PATH...
                        targetID = parseCollectionIDFromURL(pathParts[2])
                        tokens = []string{pathParts[3]}
-                       targetPath = pathParts[4:]
+                       stripParts = 4
                        pathToken = true
                } else {
                        // /collections/ID/PATH...
                        targetID = parseCollectionIDFromURL(pathParts[1])
                        tokens = h.Config.AnonymousTokens
-                       targetPath = pathParts[2:]
+                       stripParts = 2
                }
        }
 
@@ -190,62 +296,18 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
                // * The token isn't embedded in the URL, so we don't
                //   need to worry about bookmarks and copy/paste.
                tokens = append(tokens, formToken)
-       } else if formToken != "" {
+       } else if formToken != "" && browserMethod[r.Method] {
                // The client provided an explicit token in the query
                // string, or a form in POST body. We must put the
                // token in an HttpOnly cookie, and redirect to the
                // same URL with the query param redacted and method =
                // GET.
-
-               if !credentialsOK {
-                       // It is not safe to copy the provided token
-                       // into a cookie unless the current vhost
-                       // (origin) serves only a single collection or
-                       // we are in TrustAllContent mode.
-                       statusCode = http.StatusBadRequest
-                       return
-               }
-
-               // The HttpOnly flag is necessary to prevent
-               // JavaScript code (included in, or loaded by, a page
-               // in the collection being served) from employing the
-               // user's token beyond reading other files in the same
-               // domain, i.e., same collection.
-               //
-               // The 303 redirect is necessary in the case of a GET
-               // request to avoid exposing the token in the Location
-               // bar, and in the case of a POST request to avoid
-               // raising warnings when the user refreshes the
-               // resulting page.
-
-               http.SetCookie(w, &http.Cookie{
-                       Name:     "arvados_api_token",
-                       Value:    auth.EncodeTokenCookie([]byte(formToken)),
-                       Path:     "/",
-                       HttpOnly: true,
-               })
-
-               // Propagate query parameters (except api_token) from
-               // the original request.
-               redirQuery := r.URL.Query()
-               redirQuery.Del("api_token")
-
-               redir := (&url.URL{
-                       Host:     r.Host,
-                       Path:     r.URL.Path,
-                       RawQuery: redirQuery.Encode(),
-               }).String()
-
-               w.Header().Add("Location", redir)
-               statusCode, statusText = http.StatusSeeOther, redir
-               w.WriteHeader(statusCode)
-               io.WriteString(w, `<A href="`)
-               io.WriteString(w, html.EscapeString(redir))
-               io.WriteString(w, `">Continue</A>`)
+               h.seeOtherWithCookie(w, r, "", credentialsOK)
                return
        }
 
-       if tokens == nil && strings.HasPrefix(targetPath[0], "t=") {
+       targetPath := pathParts[stripParts:]
+       if tokens == nil && len(targetPath) > 0 && strings.HasPrefix(targetPath[0], "t=") {
                // http://ID.example/t=TOKEN/PATH...
                // /c=ID/t=TOKEN/PATH...
                //
@@ -255,6 +317,7 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
                tokens = []string{targetPath[0][2:]}
                pathToken = true
                targetPath = targetPath[1:]
+               stripParts++
        }
 
        if tokens == nil {
@@ -272,16 +335,21 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
                // //collections.example/t=foo/ won't work because
                // t=foo will be interpreted as a token "foo".
                targetPath = targetPath[1:]
+               stripParts++
        }
 
+       forceReload := false
+       if cc := r.Header.Get("Cache-Control"); strings.Contains(cc, "no-cache") || strings.Contains(cc, "must-revalidate") {
+               forceReload = true
+       }
+
+       var collection *arvados.Collection
        tokenResult := make(map[string]int)
-       collection := make(map[string]interface{})
-       found := false
        for _, arv.ApiToken = range tokens {
-               err := arv.Get("collections", targetID, nil, &collection)
+               var err error
+               collection, err = h.Config.Cache.Get(arv, targetID, forceReload)
                if err == nil {
                        // Success
-                       found = true
                        break
                }
                if srvErr, ok := err.(arvadosclient.APIServerError); ok {
@@ -297,7 +365,7 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
                statusCode, statusText = http.StatusInternalServerError, err.Error()
                return
        }
-       if !found {
+       if collection == nil {
                if pathToken || !credentialsOK {
                        // Either the URL is a "secret sharing link"
                        // that didn't work out (and asking the client
@@ -329,37 +397,219 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
                return
        }
 
-       filename := strings.Join(targetPath, "/")
        kc, err := keepclient.MakeKeepClient(arv)
        if err != nil {
                statusCode, statusText = http.StatusInternalServerError, err.Error()
                return
        }
-       if client, ok := kc.Client.(*http.Client); ok && client.Transport != nil {
-               // Workaround for https://dev.arvados.org/issues/9005
-               if t, ok := client.Transport.(*http.Transport); ok {
-                       defer t.CloseIdleConnections()
+
+       var basename string
+       if len(targetPath) > 0 {
+               basename = targetPath[len(targetPath)-1]
+       }
+       applyContentDispositionHdr(w, r, basename, attachment)
+
+       client := &arvados.Client{
+               APIHost:   arv.ApiServer,
+               AuthToken: arv.ApiToken,
+               Insecure:  arv.ApiInsecure,
+       }
+       fs, err := collection.FileSystem(client, kc)
+       if err != nil {
+               statusCode, statusText = http.StatusInternalServerError, err.Error()
+               return
+       }
+
+       targetIsPDH := arvadosclient.PDHMatch(targetID)
+       if targetIsPDH && writeMethod[r.Method] {
+               statusCode, statusText = http.StatusMethodNotAllowed, errReadOnly.Error()
+               return
+       }
+
+       if webdavMethod[r.Method] {
+               if writeMethod[r.Method] {
+                       // Save the collection only if/when all
+                       // webdav->filesystem operations succeed --
+                       // and send a 500 error if the modified
+                       // collection can't be saved.
+                       w = &updateOnSuccess{
+                               ResponseWriter: w,
+                               update: func() error {
+                                       return h.Config.Cache.Update(client, *collection, fs)
+                               }}
+               }
+               h := webdav.Handler{
+                       Prefix: "/" + strings.Join(pathParts[:stripParts], "/"),
+                       FileSystem: &webdavFS{
+                               collfs:  fs,
+                               writing: writeMethod[r.Method],
+                       },
+                       LockSystem: h.webdavLS,
+                       Logger: func(_ *http.Request, err error) {
+                               if err != nil {
+                                       log.Printf("error from webdav handler: %q", err)
+                               }
+                       },
                }
+               h.ServeHTTP(w, r)
+               return
        }
-       rdr, err := kc.CollectionFileReader(collection, filename)
-       if os.IsNotExist(err) {
+
+       openPath := "/" + strings.Join(targetPath, "/")
+       if f, err := fs.Open(openPath); os.IsNotExist(err) {
+               // Requested non-existent path
                statusCode = http.StatusNotFound
-               return
        } else if err != nil {
-               statusCode, statusText = http.StatusBadGateway, err.Error()
-               return
+               // Some other (unexpected) error
+               statusCode, statusText = http.StatusInternalServerError, err.Error()
+       } else if stat, err := f.Stat(); err != nil {
+               // Can't get Size/IsDir (shouldn't happen with a collectionFS!)
+               statusCode, statusText = http.StatusInternalServerError, err.Error()
+       } else if stat.IsDir() && !strings.HasSuffix(r.URL.Path, "/") {
+               // If client requests ".../dirname", redirect to
+               // ".../dirname/". This way, relative links in the
+               // listing for "dirname" can always be "fnm", never
+               // "dirname/fnm".
+               h.seeOtherWithCookie(w, r, r.URL.Path+"/", credentialsOK)
+       } else if stat.IsDir() {
+               h.serveDirectory(w, r, collection.Name, fs, openPath, stripParts)
+       } else {
+               http.ServeContent(w, r, basename, stat.ModTime(), f)
+               if r.Header.Get("Range") == "" && int64(w.WroteBodyBytes()) != stat.Size() {
+                       // If we wrote fewer bytes than expected, it's
+                       // too late to change the real response code
+                       // or send an error message to the client, but
+                       // at least we can try to put some useful
+                       // debugging info in the logs.
+                       n, err := f.Read(make([]byte, 1024))
+                       statusCode, statusText = http.StatusInternalServerError, fmt.Sprintf("f.Size()==%d but only wrote %d bytes; read(1024) returns %d, %s", stat.Size(), w.WroteBodyBytes(), n, err)
+
+               }
        }
-       defer rdr.Close()
+}
 
-       basename := path.Base(filename)
-       applyContentDispositionHdr(w, r, basename, attachment)
+var dirListingTemplate = `<!DOCTYPE HTML>
+<HTML><HEAD>
+  <META name="robots" content="NOINDEX">
+  <TITLE>{{ .Collection.Name }}</TITLE>
+  <STYLE type="text/css">
+    body {
+      margin: 1.5em;
+    }
+    pre {
+      background-color: #D9EDF7;
+      border-radius: .25em;
+      padding: .75em;
+      overflow: auto;
+    }
+    .footer p {
+      font-size: 82%;
+    }
+    ul {
+      padding: 0;
+    }
+    ul li {
+      font-family: monospace;
+      list-style: none;
+    }
+  </STYLE>
+</HEAD>
+<BODY>
+<H1>{{ .CollectionName }}</H1>
+
+<P>This collection of data files is being shared with you through
+Arvados.  You can download individual files listed below.  To download
+the entire collection with wget, try:</P>
+
+<PRE>$ wget --mirror --no-parent --no-host --cut-dirs={{ .StripParts }} https://{{ .Request.Host }}{{ .Request.URL }}</PRE>
+
+<H2>File Listing</H2>
+
+{{if .Files}}
+<UL>
+{{range .Files}}  <LI>{{.Size | printf "%15d  " | nbsp}}<A href="{{.Name}}">{{.Name}}</A></LI>{{end}}
+</UL>
+{{else}}
+<P>(No files; this collection is empty.)</P>
+{{end}}
+
+<HR noshade>
+<DIV class="footer">
+  <P>
+    About Arvados:
+    Arvados is a free and open source software bioinformatics platform.
+    To learn more, visit arvados.org.
+    Arvados is not responsible for the files listed on this page.
+  </P>
+</DIV>
+
+</BODY>
+`
+
+type fileListEnt struct {
+       Name string
+       Size int64
+}
+
+func (h *handler) serveDirectory(w http.ResponseWriter, r *http.Request, collectionName string, fs http.FileSystem, base string, stripParts int) {
+       var files []fileListEnt
+       var walk func(string) error
+       if !strings.HasSuffix(base, "/") {
+               base = base + "/"
+       }
+       walk = func(path string) error {
+               dirname := base + path
+               if dirname != "/" {
+                       dirname = strings.TrimSuffix(dirname, "/")
+               }
+               d, err := fs.Open(dirname)
+               if err != nil {
+                       return err
+               }
+               ents, err := d.Readdir(-1)
+               if err != nil {
+                       return err
+               }
+               for _, ent := range ents {
+                       if ent.IsDir() {
+                               err = walk(path + ent.Name() + "/")
+                               if err != nil {
+                                       return err
+                               }
+                       } else {
+                               files = append(files, fileListEnt{
+                                       Name: path + ent.Name(),
+                                       Size: ent.Size(),
+                               })
+                       }
+               }
+               return nil
+       }
+       if err := walk(""); err != nil {
+               http.Error(w, err.Error(), http.StatusInternalServerError)
+               return
+       }
 
-       modstr, _ := collection["modified_at"].(string)
-       modtime, err := time.Parse(time.RFC3339Nano, modstr)
+       funcs := template.FuncMap{
+               "nbsp": func(s string) template.HTML {
+                       return template.HTML(strings.Replace(s, " ", "&nbsp;", -1))
+               },
+       }
+       tmpl, err := template.New("dir").Funcs(funcs).Parse(dirListingTemplate)
        if err != nil {
-               modtime = time.Now()
+               http.Error(w, err.Error(), http.StatusInternalServerError)
+               return
        }
-       http.ServeContent(w, r, basename, modtime, rdr)
+       sort.Slice(files, func(i, j int) bool {
+               return files[i].Name < files[j].Name
+       })
+       w.WriteHeader(http.StatusOK)
+       tmpl.Execute(w, map[string]interface{}{
+               "CollectionName": collectionName,
+               "Files":          files,
+               "Request":        r,
+               "StripParts":     stripParts,
+       })
 }
 
 func applyContentDispositionHdr(w http.ResponseWriter, r *http.Request, filename string, isAttachment bool) {
@@ -379,3 +629,60 @@ func applyContentDispositionHdr(w http.ResponseWriter, r *http.Request, filename
                w.Header().Set("Content-Disposition", disposition)
        }
 }
+
+func (h *handler) seeOtherWithCookie(w http.ResponseWriter, r *http.Request, location string, credentialsOK bool) {
+       if formToken := r.FormValue("api_token"); formToken != "" {
+               if !credentialsOK {
+                       // It is not safe to copy the provided token
+                       // into a cookie unless the current vhost
+                       // (origin) serves only a single collection or
+                       // we are in TrustAllContent mode.
+                       w.WriteHeader(http.StatusBadRequest)
+                       return
+               }
+
+               // The HttpOnly flag is necessary to prevent
+               // JavaScript code (included in, or loaded by, a page
+               // in the collection being served) from employing the
+               // user's token beyond reading other files in the same
+               // domain, i.e., same collection.
+               //
+               // The 303 redirect is necessary in the case of a GET
+               // request to avoid exposing the token in the Location
+               // bar, and in the case of a POST request to avoid
+               // raising warnings when the user refreshes the
+               // resulting page.
+               http.SetCookie(w, &http.Cookie{
+                       Name:     "arvados_api_token",
+                       Value:    auth.EncodeTokenCookie([]byte(formToken)),
+                       Path:     "/",
+                       HttpOnly: true,
+               })
+       }
+
+       // Propagate query parameters (except api_token) from
+       // the original request.
+       redirQuery := r.URL.Query()
+       redirQuery.Del("api_token")
+
+       u := r.URL
+       if location != "" {
+               newu, err := u.Parse(location)
+               if err != nil {
+                       w.WriteHeader(http.StatusInternalServerError)
+                       return
+               }
+               u = newu
+       }
+       redir := (&url.URL{
+               Host:     r.Host,
+               Path:     u.Path,
+               RawQuery: redirQuery.Encode(),
+       }).String()
+
+       w.Header().Add("Location", redir)
+       w.WriteHeader(http.StatusSeeOther)
+       io.WriteString(w, `<A href="`)
+       io.WriteString(w, html.EscapeString(redir))
+       io.WriteString(w, `">Continue</A>`)
+}