5824: Use vhosts in curl integration tests. Add large file test.
[arvados.git] / services / keep-web / handler.go
index 04af920b9e868d5db68fae78be9f365e98481f41..03b3e2600bcdc272e2d3a7f93c36fb0c8cffa7b6 100644 (file)
@@ -2,11 +2,14 @@ package main
 
 import (
        "fmt"
+       "html"
        "io"
        "mime"
        "net/http"
+       "net/url"
        "os"
        "strings"
+       "time"
 
        "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
        "git.curoverse.com/arvados.git/sdk/go/auth"
@@ -25,26 +28,49 @@ func init() {
        anonymousTokens = []string{}
 }
 
+// return s if s is a UUID or a PDH, otherwise ""
+func parseCollectionIdFromDNSName(s string) string {
+       // Strip domain.
+       if i := strings.IndexRune(s, '.'); i >= 0 {
+               s = s[:i]
+       }
+       // Names like {uuid}--dl.example.com serve the same purpose as
+       // {uuid}.dl.example.com but can reduce cost/effort of using
+       // [additional] wildcard certificates.
+       if i := strings.Index(s, "--"); i >= 0 {
+               s = s[:i]
+       }
+       if !arvadosclient.UUIDMatch(s) && !arvadosclient.PDHMatch(s) {
+               return ""
+       }
+       return s
+}
+
 func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
-       var statusCode int
+       var statusCode = 0
        var statusText string
 
        w := httpserver.WrapResponseWriter(wOrig)
        defer func() {
-               if statusCode > 0 {
-                       if w.WroteStatus() == 0 {
-                               w.WriteHeader(statusCode)
-                       } else {
-                               httpserver.Log(r.RemoteAddr, "WARNING",
-                                       fmt.Sprintf("Our status changed from %d to %d after we sent headers", w.WroteStatus(), statusCode))
-                       }
+               if statusCode == 0 {
+                       statusCode = w.WroteStatus()
+               } else if w.WroteStatus() == 0 {
+                       w.WriteHeader(statusCode)
+               } else if w.WroteStatus() != statusCode {
+                       httpserver.Log(r.RemoteAddr, "WARNING",
+                               fmt.Sprintf("Our status changed from %d to %d after we sent headers", w.WroteStatus(), statusCode))
                }
                if statusText == "" {
                        statusText = http.StatusText(statusCode)
                }
-               httpserver.Log(r.RemoteAddr, statusCode, statusText, w.WroteBodyBytes(), r.Method, r.URL.Path)
+               httpserver.Log(r.RemoteAddr, statusCode, statusText, w.WroteBodyBytes(), r.Method, r.Host, r.URL.Path, r.URL.RawQuery)
        }()
 
+       if r.Method != "GET" && r.Method != "POST" {
+               statusCode, statusText = http.StatusMethodNotAllowed, r.Method
+               return
+       }
+
        arv := clientPool.Get()
        if arv == nil {
                statusCode, statusText = http.StatusInternalServerError, "Pool failed: "+clientPool.Err().Error()
@@ -54,17 +80,70 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
 
        pathParts := strings.Split(r.URL.Path[1:], "/")
 
-       if len(pathParts) < 3 || pathParts[0] != "collections" || pathParts[1] == "" || pathParts[2] == "" {
-               statusCode = http.StatusNotFound
-               return
-       }
-
        var targetId string
        var targetPath []string
        var tokens []string
        var reqTokens []string
        var pathToken bool
-       if len(pathParts) >= 5 && pathParts[1] == "download" {
+
+       if targetId = parseCollectionIdFromDNSName(r.Host); targetId != "" {
+               // "http://{id}.domain.example.com/{path}" form
+               if t := r.FormValue("api_token"); t != "" {
+                       // ...with explicit token in query string or
+                       // form in POST body. We must encrypt the
+                       // token such that it can only be used for
+                       // this collection; put it in an HttpOnly
+                       // cookie; and redirect to the same URL with
+                       // the query param redacted, and method =
+                       // GET.
+                       //
+                       // The HttpOnly flag is necessary to prevent
+                       // JavaScript code (included in, or loaded by,
+                       // a page in the collection being served) from
+                       // employing the user's token beyond reading
+                       // other files in the same domain, i.e., same
+                       // the collection.
+                       //
+                       // The 303 redirect is necessary in the case
+                       // of a GET request to avoid exposing the
+                       // token in the Location bar, and in the case
+                       // of a POST request to avoid raising warnings
+                       // when the user refreshes the resulting page.
+                       http.SetCookie(w, &http.Cookie{
+                               Name:    "api_token",
+                               Value:   auth.EncodeTokenCookie([]byte(t)),
+                               Path:    "/",
+                               Expires: time.Now().AddDate(10,0,0),
+                       })
+                       redir := (&url.URL{Host: r.Host, Path: r.URL.Path}).String()
+
+                       w.Header().Add("Location", redir)
+                       statusCode, statusText = http.StatusSeeOther, redir
+                       w.WriteHeader(statusCode)
+                       io.WriteString(w, `<A href="`)
+                       io.WriteString(w, html.EscapeString(redir))
+                       io.WriteString(w, `">Continue</A>`)
+                       return
+               } else if strings.HasPrefix(pathParts[0], "t=") {
+                       // ...with explicit token in path,
+                       // "{...}.com/t={token}/{path}".  This form
+                       // must only be used to pass scoped tokens
+                       // that give permission for a single
+                       // collection. See FormValue case above.
+                       tokens = []string{pathParts[0][2:]}
+                       targetPath = pathParts[1:]
+                       pathToken = true
+               } else {
+                       // ...with cookie, Authorization header, or
+                       // no token at all
+                       reqTokens = auth.NewCredentialsFromHTTPRequest(r).Tokens
+                       tokens = append(reqTokens, anonymousTokens...)
+                       targetPath = pathParts
+               }
+       } else if len(pathParts) < 3 || pathParts[0] != "collections" || pathParts[1] == "" || pathParts[2] == "" {
+               statusCode = http.StatusNotFound
+               return
+       } else if len(pathParts) >= 5 && pathParts[1] == "download" {
                // "/collections/download/{id}/{token}/path..." form:
                // Don't use our configured anonymous tokens,
                // Authorization headers, etc.  Just use the token in
@@ -86,7 +165,6 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
        found := false
        for _, arv.ApiToken = range tokens {
                err := arv.Get("collections", targetId, nil, &collection)
-               httpserver.Log(err)
                if err == nil {
                        // Success
                        found = true
@@ -131,8 +209,8 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
                // someone trying (anonymously) to download public
                // data that has been deleted.  Allow a referrer to
                // provide this context somehow?
-               statusCode = http.StatusUnauthorized
                w.Header().Add("WWW-Authenticate", "Basic realm=\"dl\"")
+               statusCode = http.StatusUnauthorized
                return
        }
 
@@ -162,6 +240,7 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
                }
        }
 
+       w.WriteHeader(http.StatusOK)
        _, err = io.Copy(w, rdr)
        if err != nil {
                statusCode, statusText = http.StatusBadGateway, err.Error()