Arvados-DCO-1.1-Signed-off-by: Radhika Chippada <radhika@curoverse.com>
[arvados.git] / services / keep-web / handler.go
index 11d0d96b298de5e4369474418f9f78583634510e..769c2f59ce9a79effcb2e094fd166978941bfa3a 100644 (file)
@@ -1,18 +1,24 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
 package main
 
 import (
+       "encoding/json"
        "fmt"
        "html"
+       "html/template"
        "io"
-       "mime"
        "net/http"
        "net/url"
        "os"
-       "regexp"
+       "sort"
        "strconv"
        "strings"
        "sync"
 
+       "git.curoverse.com/arvados.git/sdk/go/arvados"
        "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
        "git.curoverse.com/arvados.git/sdk/go/auth"
        "git.curoverse.com/arvados.git/sdk/go/httpserver"
@@ -64,6 +70,16 @@ func parseCollectionIDFromURL(s string) string {
 
 func (h *handler) setup() {
        h.clientPool = arvadosclient.MakeClientPool()
+       keepclient.RefreshServiceDiscoveryOnSIGHUP()
+}
+
+func (h *handler) serveStatus(w http.ResponseWriter, r *http.Request) {
+       status := struct {
+               cacheStats
+       }{
+               cacheStats: h.Config.Cache.Stats(),
+       }
+       json.NewEncoder(w).Encode(status)
 }
 
 // ServeHTTP implements http.Handler.
@@ -94,6 +110,20 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
                httpserver.Log(remoteAddr, statusCode, statusText, w.WroteBodyBytes(), r.Method, r.Host, r.URL.Path, r.URL.RawQuery)
        }()
 
+       if r.Method == "OPTIONS" {
+               method := r.Header.Get("Access-Control-Request-Method")
+               if method != "GET" && method != "POST" {
+                       statusCode = http.StatusMethodNotAllowed
+                       return
+               }
+               w.Header().Set("Access-Control-Allow-Headers", "Range")
+               w.Header().Set("Access-Control-Allow-Methods", "GET, POST")
+               w.Header().Set("Access-Control-Allow-Origin", "*")
+               w.Header().Set("Access-Control-Max-Age", "86400")
+               statusCode = http.StatusOK
+               return
+       }
+
        if r.Method != "GET" && r.Method != "POST" {
                statusCode, statusText = http.StatusMethodNotAllowed, r.Method
                return
@@ -117,8 +147,8 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
 
        pathParts := strings.Split(r.URL.Path[1:], "/")
 
+       var stripParts int
        var targetID string
-       var targetPath []string
        var tokens []string
        var reqTokens []string
        var pathToken bool
@@ -135,25 +165,29 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
        if targetID = parseCollectionIDFromDNSName(r.Host); targetID != "" {
                // http://ID.collections.example/PATH...
                credentialsOK = true
-               targetPath = pathParts
-       } else if len(pathParts) >= 2 && strings.HasPrefix(pathParts[0], "c=") {
-               // /c=ID/PATH...
+       } else if r.URL.Path == "/status.json" {
+               h.serveStatus(w, r)
+               return
+       } else if len(pathParts) >= 1 && strings.HasPrefix(pathParts[0], "c=") {
+               // /c=ID[/PATH...]
                targetID = parseCollectionIDFromURL(pathParts[0][2:])
-               targetPath = pathParts[1:]
-       } else if len(pathParts) >= 3 && pathParts[0] == "collections" {
-               if len(pathParts) >= 5 && pathParts[1] == "download" {
+               stripParts = 1
+       } else if len(pathParts) >= 2 && pathParts[0] == "collections" {
+               if len(pathParts) >= 4 && pathParts[1] == "download" {
                        // /collections/download/ID/TOKEN/PATH...
-                       targetID = pathParts[2]
+                       targetID = parseCollectionIDFromURL(pathParts[2])
                        tokens = []string{pathParts[3]}
-                       targetPath = pathParts[4:]
+                       stripParts = 4
                        pathToken = true
                } else {
                        // /collections/ID/PATH...
-                       targetID = pathParts[1]
+                       targetID = parseCollectionIDFromURL(pathParts[1])
                        tokens = h.Config.AnonymousTokens
-                       targetPath = pathParts[2:]
+                       stripParts = 2
                }
-       } else {
+       }
+
+       if targetID == "" {
                statusCode = http.StatusNotFound
                return
        }
@@ -180,56 +214,12 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
                // token in an HttpOnly cookie, and redirect to the
                // same URL with the query param redacted and method =
                // GET.
-
-               if !credentialsOK {
-                       // It is not safe to copy the provided token
-                       // into a cookie unless the current vhost
-                       // (origin) serves only a single collection or
-                       // we are in TrustAllContent mode.
-                       statusCode = http.StatusBadRequest
-                       return
-               }
-
-               // The HttpOnly flag is necessary to prevent
-               // JavaScript code (included in, or loaded by, a page
-               // in the collection being served) from employing the
-               // user's token beyond reading other files in the same
-               // domain, i.e., same collection.
-               //
-               // The 303 redirect is necessary in the case of a GET
-               // request to avoid exposing the token in the Location
-               // bar, and in the case of a POST request to avoid
-               // raising warnings when the user refreshes the
-               // resulting page.
-
-               http.SetCookie(w, &http.Cookie{
-                       Name:     "arvados_api_token",
-                       Value:    auth.EncodeTokenCookie([]byte(formToken)),
-                       Path:     "/",
-                       HttpOnly: true,
-               })
-
-               // Propagate query parameters (except api_token) from
-               // the original request.
-               redirQuery := r.URL.Query()
-               redirQuery.Del("api_token")
-
-               redir := (&url.URL{
-                       Host:     r.Host,
-                       Path:     r.URL.Path,
-                       RawQuery: redirQuery.Encode(),
-               }).String()
-
-               w.Header().Add("Location", redir)
-               statusCode, statusText = http.StatusSeeOther, redir
-               w.WriteHeader(statusCode)
-               io.WriteString(w, `<A href="`)
-               io.WriteString(w, html.EscapeString(redir))
-               io.WriteString(w, `">Continue</A>`)
+               h.seeOtherWithCookie(w, r, "", credentialsOK)
                return
        }
 
-       if tokens == nil && strings.HasPrefix(targetPath[0], "t=") {
+       targetPath := pathParts[stripParts:]
+       if tokens == nil && len(targetPath) > 0 && strings.HasPrefix(targetPath[0], "t=") {
                // http://ID.example/t=TOKEN/PATH...
                // /c=ID/t=TOKEN/PATH...
                //
@@ -239,6 +229,7 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
                tokens = []string{targetPath[0][2:]}
                pathToken = true
                targetPath = targetPath[1:]
+               stripParts++
        }
 
        if tokens == nil {
@@ -256,16 +247,21 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
                // //collections.example/t=foo/ won't work because
                // t=foo will be interpreted as a token "foo".
                targetPath = targetPath[1:]
+               stripParts++
+       }
+
+       forceReload := false
+       if cc := r.Header.Get("Cache-Control"); strings.Contains(cc, "no-cache") || strings.Contains(cc, "must-revalidate") {
+               forceReload = true
        }
 
+       var collection *arvados.Collection
        tokenResult := make(map[string]int)
-       collection := make(map[string]interface{})
-       found := false
        for _, arv.ApiToken = range tokens {
-               err := arv.Get("collections", targetID, nil, &collection)
+               var err error
+               collection, err = h.Config.Cache.Get(arv, targetID, forceReload)
                if err == nil {
                        // Success
-                       found = true
                        break
                }
                if srvErr, ok := err.(arvadosclient.APIServerError); ok {
@@ -281,7 +277,7 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
                statusCode, statusText = http.StatusInternalServerError, err.Error()
                return
        }
-       if !found {
+       if collection == nil {
                if pathToken || !credentialsOK {
                        // Either the URL is a "secret sharing link"
                        // that didn't work out (and asking the client
@@ -313,73 +309,175 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
                return
        }
 
-       filename := strings.Join(targetPath, "/")
        kc, err := keepclient.MakeKeepClient(arv)
        if err != nil {
                statusCode, statusText = http.StatusInternalServerError, err.Error()
                return
        }
-       if kc.Client != nil && kc.Client.Transport != nil {
-               // Workaround for https://dev.arvados.org/issues/9005
-               if t, ok := kc.Client.Transport.(*http.Transport); ok {
-                       defer t.CloseIdleConnections()
-               }
-       }
-       rdr, err := kc.CollectionFileReader(collection, filename)
-       if os.IsNotExist(err) {
+
+       basename := targetPath[len(targetPath)-1]
+       applyContentDispositionHdr(w, r, basename, attachment)
+
+       fs := collection.FileSystem(&arvados.Client{
+               APIHost:   arv.ApiServer,
+               AuthToken: arv.ApiToken,
+               Insecure:  arv.ApiInsecure,
+       }, kc)
+       openPath := "/" + strings.Join(targetPath, "/")
+       if f, err := fs.Open(openPath); os.IsNotExist(err) {
+               // Requested non-existent path
                statusCode = http.StatusNotFound
-               return
        } else if err != nil {
-               statusCode, statusText = http.StatusBadGateway, err.Error()
-               return
-       }
-       defer rdr.Close()
+               // Some other (unexpected) error
+               statusCode, statusText = http.StatusInternalServerError, err.Error()
+       } else if stat, err := f.Stat(); err != nil {
+               // Can't get Size/IsDir (shouldn't happen with a collectionFS!)
+               statusCode, statusText = http.StatusInternalServerError, err.Error()
+       } else if stat.IsDir() && !strings.HasSuffix(r.URL.Path, "/") {
+               // If client requests ".../dirname", redirect to
+               // ".../dirname/". This way, relative links in the
+               // listing for "dirname" can always be "fnm", never
+               // "dirname/fnm".
+               h.seeOtherWithCookie(w, r, basename+"/", credentialsOK)
+       } else if stat.IsDir() {
+               h.serveDirectory(w, r, collection.Name, fs, openPath, stripParts)
+       } else {
+               http.ServeContent(w, r, basename, stat.ModTime(), f)
+               if r.Header.Get("Range") == "" && int64(w.WroteBodyBytes()) != stat.Size() {
+                       // If we wrote fewer bytes than expected, it's
+                       // too late to change the real response code
+                       // or send an error message to the client, but
+                       // at least we can try to put some useful
+                       // debugging info in the logs.
+                       n, err := f.Read(make([]byte, 1024))
+                       statusCode, statusText = http.StatusInternalServerError, fmt.Sprintf("f.Size()==%d but only wrote %d bytes; read(1024) returns %d, %s", stat.Size(), w.WroteBodyBytes(), n, err)
 
-       basenamePos := strings.LastIndex(filename, "/")
-       if basenamePos < 0 {
-               basenamePos = 0
-       }
-       extPos := strings.LastIndex(filename, ".")
-       if extPos > basenamePos {
-               // Now extPos is safely >= 0.
-               if t := mime.TypeByExtension(filename[extPos:]); t != "" {
-                       w.Header().Set("Content-Type", t)
                }
        }
-       if rdr, ok := rdr.(keepclient.ReadCloserWithLen); ok {
-               w.Header().Set("Content-Length", fmt.Sprintf("%d", rdr.Len()))
-       }
-
-       applyContentDispositionHdr(w, r, filename[basenamePos:], attachment)
-       rangeRdr, statusCode := applyRangeHdr(w, r, rdr)
+}
 
-       w.WriteHeader(statusCode)
-       _, err = io.Copy(w, rangeRdr)
-       if err != nil {
-               statusCode, statusText = http.StatusBadGateway, err.Error()
-       }
+var dirListingTemplate = `<!DOCTYPE HTML>
+<HTML><HEAD>
+  <META name="robots" content="NOINDEX">
+  <TITLE>{{ .Collection.Name }}</TITLE>
+  <STYLE type="text/css">
+    body {
+      margin: 1.5em;
+    }
+    pre {
+      background-color: #D9EDF7;
+      border-radius: .25em;
+      padding: .75em;
+      overflow: auto;
+    }
+    .footer p {
+      font-size: 82%;
+    }
+    ul {
+      padding: 0;
+    }
+    ul li {
+      font-family: monospace;
+      list-style: none;
+    }
+  </STYLE>
+</HEAD>
+<BODY>
+<H1>{{ .CollectionName }}</H1>
+
+<P>This collection of data files is being shared with you through
+Arvados.  You can download individual files listed below.  To download
+the entire collection with wget, try:</P>
+
+<PRE>$ wget --mirror --no-parent --no-host --cut-dirs={{ .StripParts }} https://{{ .Request.Host }}{{ .Request.URL }}</PRE>
+
+<H2>File Listing</H2>
+
+{{if .Files}}
+<UL>
+{{range .Files}}  <LI>{{.Size | printf "%15d  " | nbsp}}<A href="{{.Name}}">{{.Name}}</A></LI>{{end}}
+</UL>
+{{else}}
+<P>(No files; this collection is empty.)</P>
+{{end}}
+
+<HR noshade>
+<DIV class="footer">
+  <P>
+    About Arvados:
+    Arvados is a free and open source software bioinformatics platform.
+    To learn more, visit arvados.org.
+    Arvados is not responsible for the files listed on this page.
+  </P>
+</DIV>
+
+</BODY>
+`
+
+type fileListEnt struct {
+       Name string
+       Size int64
 }
 
-var rangeRe = regexp.MustCompile(`^bytes=0-([0-9]*)$`)
+func (h *handler) serveDirectory(w http.ResponseWriter, r *http.Request, collectionName string, fs http.FileSystem, base string, stripParts int) {
+       var files []fileListEnt
+       var walk func(string) error
+       if !strings.HasSuffix(base, "/") {
+               base = base + "/"
+       }
+       walk = func(path string) error {
+               dirname := base + path
+               if dirname != "/" {
+                       dirname = strings.TrimSuffix(dirname, "/")
+               }
+               d, err := fs.Open(dirname)
+               if err != nil {
+                       return err
+               }
+               ents, err := d.Readdir(-1)
+               if err != nil {
+                       return err
+               }
+               for _, ent := range ents {
+                       if ent.IsDir() {
+                               err = walk(path + ent.Name() + "/")
+                               if err != nil {
+                                       return err
+                               }
+                       } else {
+                               files = append(files, fileListEnt{
+                                       Name: path + ent.Name(),
+                                       Size: ent.Size(),
+                               })
+                       }
+               }
+               return nil
+       }
+       if err := walk(""); err != nil {
+               http.Error(w, err.Error(), http.StatusInternalServerError)
+               return
+       }
 
-func applyRangeHdr(w http.ResponseWriter, r *http.Request, rdr keepclient.ReadCloserWithLen) (io.Reader, int) {
-       w.Header().Set("Accept-Ranges", "bytes")
-       hdr := r.Header.Get("Range")
-       fields := rangeRe.FindStringSubmatch(hdr)
-       if fields == nil {
-               return rdr, http.StatusOK
+       funcs := template.FuncMap{
+               "nbsp": func(s string) template.HTML {
+                       return template.HTML(strings.Replace(s, " ", "&nbsp;", -1))
+               },
        }
-       rangeEnd, err := strconv.ParseInt(fields[1], 10, 64)
+       tmpl, err := template.New("dir").Funcs(funcs).Parse(dirListingTemplate)
        if err != nil {
-               // Empty or too big for int64 == send entire content
-               return rdr, http.StatusOK
-       }
-       if uint64(rangeEnd) >= rdr.Len() {
-               return rdr, http.StatusOK
+               http.Error(w, err.Error(), http.StatusInternalServerError)
+               return
        }
-       w.Header().Set("Content-Length", fmt.Sprintf("%d", rangeEnd+1))
-       w.Header().Set("Content-Range", fmt.Sprintf("bytes %d-%d/%d", 0, rangeEnd, rdr.Len()))
-       return &io.LimitedReader{R: rdr, N: rangeEnd + 1}, http.StatusPartialContent
+       sort.Slice(files, func(i, j int) bool {
+               return files[i].Name < files[j].Name
+       })
+       w.WriteHeader(http.StatusOK)
+       tmpl.Execute(w, map[string]interface{}{
+               "CollectionName": collectionName,
+               "Files":          files,
+               "Request":        r,
+               "StripParts":     stripParts,
+       })
 }
 
 func applyContentDispositionHdr(w http.ResponseWriter, r *http.Request, filename string, isAttachment bool) {
@@ -399,3 +497,61 @@ func applyContentDispositionHdr(w http.ResponseWriter, r *http.Request, filename
                w.Header().Set("Content-Disposition", disposition)
        }
 }
+
+func (h *handler) seeOtherWithCookie(w http.ResponseWriter, r *http.Request, location string, credentialsOK bool) {
+       if !credentialsOK {
+               // It is not safe to copy the provided token
+               // into a cookie unless the current vhost
+               // (origin) serves only a single collection or
+               // we are in TrustAllContent mode.
+               w.WriteHeader(http.StatusBadRequest)
+               return
+       }
+
+       if formToken := r.FormValue("api_token"); formToken != "" {
+               // The HttpOnly flag is necessary to prevent
+               // JavaScript code (included in, or loaded by, a page
+               // in the collection being served) from employing the
+               // user's token beyond reading other files in the same
+               // domain, i.e., same collection.
+               //
+               // The 303 redirect is necessary in the case of a GET
+               // request to avoid exposing the token in the Location
+               // bar, and in the case of a POST request to avoid
+               // raising warnings when the user refreshes the
+               // resulting page.
+
+               http.SetCookie(w, &http.Cookie{
+                       Name:     "arvados_api_token",
+                       Value:    auth.EncodeTokenCookie([]byte(formToken)),
+                       Path:     "/",
+                       HttpOnly: true,
+               })
+       }
+
+       // Propagate query parameters (except api_token) from
+       // the original request.
+       redirQuery := r.URL.Query()
+       redirQuery.Del("api_token")
+
+       u := r.URL
+       if location != "" {
+               newu, err := u.Parse(location)
+               if err != nil {
+                       w.WriteHeader(http.StatusInternalServerError)
+                       return
+               }
+               u = newu
+       }
+       redir := (&url.URL{
+               Host:     r.Host,
+               Path:     u.Path,
+               RawQuery: redirQuery.Encode(),
+       }).String()
+
+       w.Header().Add("Location", redir)
+       w.WriteHeader(http.StatusSeeOther)
+       io.WriteString(w, `<A href="`)
+       io.WriteString(w, html.EscapeString(redir))
+       io.WriteString(w, `">Continue</A>`)
+}