X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/b65d8b9008c4d0e6b5816d21bf6f1ae81167ee56..11890d971215ec44b7abb1e24ad748aefa1061a7:/services/keep-web/handler.go?ds=sidebyside diff --git a/services/keep-web/handler.go b/services/keep-web/handler.go index 04af920b9e..03b3e2600b 100644 --- a/services/keep-web/handler.go +++ b/services/keep-web/handler.go @@ -2,11 +2,14 @@ package main import ( "fmt" + "html" "io" "mime" "net/http" + "net/url" "os" "strings" + "time" "git.curoverse.com/arvados.git/sdk/go/arvadosclient" "git.curoverse.com/arvados.git/sdk/go/auth" @@ -25,26 +28,49 @@ func init() { anonymousTokens = []string{} } +// return s if s is a UUID or a PDH, otherwise "" +func parseCollectionIdFromDNSName(s string) string { + // Strip domain. + if i := strings.IndexRune(s, '.'); i >= 0 { + s = s[:i] + } + // Names like {uuid}--dl.example.com serve the same purpose as + // {uuid}.dl.example.com but can reduce cost/effort of using + // [additional] wildcard certificates. + if i := strings.Index(s, "--"); i >= 0 { + s = s[:i] + } + if !arvadosclient.UUIDMatch(s) && !arvadosclient.PDHMatch(s) { + return "" + } + return s +} + func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) { - var statusCode int + var statusCode = 0 var statusText string w := httpserver.WrapResponseWriter(wOrig) defer func() { - if statusCode > 0 { - if w.WroteStatus() == 0 { - w.WriteHeader(statusCode) - } else { - httpserver.Log(r.RemoteAddr, "WARNING", - fmt.Sprintf("Our status changed from %d to %d after we sent headers", w.WroteStatus(), statusCode)) - } + if statusCode == 0 { + statusCode = w.WroteStatus() + } else if w.WroteStatus() == 0 { + w.WriteHeader(statusCode) + } else if w.WroteStatus() != statusCode { + httpserver.Log(r.RemoteAddr, "WARNING", + fmt.Sprintf("Our status changed from %d to %d after we sent headers", w.WroteStatus(), statusCode)) } if statusText == "" { statusText = http.StatusText(statusCode) } - httpserver.Log(r.RemoteAddr, statusCode, statusText, w.WroteBodyBytes(), r.Method, r.URL.Path) + httpserver.Log(r.RemoteAddr, statusCode, statusText, w.WroteBodyBytes(), r.Method, r.Host, r.URL.Path, r.URL.RawQuery) }() + if r.Method != "GET" && r.Method != "POST" { + statusCode, statusText = http.StatusMethodNotAllowed, r.Method + return + } + arv := clientPool.Get() if arv == nil { statusCode, statusText = http.StatusInternalServerError, "Pool failed: "+clientPool.Err().Error() @@ -54,17 +80,70 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) { pathParts := strings.Split(r.URL.Path[1:], "/") - if len(pathParts) < 3 || pathParts[0] != "collections" || pathParts[1] == "" || pathParts[2] == "" { - statusCode = http.StatusNotFound - return - } - var targetId string var targetPath []string var tokens []string var reqTokens []string var pathToken bool - if len(pathParts) >= 5 && pathParts[1] == "download" { + + if targetId = parseCollectionIdFromDNSName(r.Host); targetId != "" { + // "http://{id}.domain.example.com/{path}" form + if t := r.FormValue("api_token"); t != "" { + // ...with explicit token in query string or + // form in POST body. We must encrypt the + // token such that it can only be used for + // this collection; put it in an HttpOnly + // cookie; and redirect to the same URL with + // the query param redacted, and method = + // GET. + // + // The HttpOnly flag is necessary to prevent + // JavaScript code (included in, or loaded by, + // a page in the collection being served) from + // employing the user's token beyond reading + // other files in the same domain, i.e., same + // the collection. + // + // The 303 redirect is necessary in the case + // of a GET request to avoid exposing the + // token in the Location bar, and in the case + // of a POST request to avoid raising warnings + // when the user refreshes the resulting page. + http.SetCookie(w, &http.Cookie{ + Name: "api_token", + Value: auth.EncodeTokenCookie([]byte(t)), + Path: "/", + Expires: time.Now().AddDate(10,0,0), + }) + redir := (&url.URL{Host: r.Host, Path: r.URL.Path}).String() + + w.Header().Add("Location", redir) + statusCode, statusText = http.StatusSeeOther, redir + w.WriteHeader(statusCode) + io.WriteString(w, `Continue`) + return + } else if strings.HasPrefix(pathParts[0], "t=") { + // ...with explicit token in path, + // "{...}.com/t={token}/{path}". This form + // must only be used to pass scoped tokens + // that give permission for a single + // collection. See FormValue case above. + tokens = []string{pathParts[0][2:]} + targetPath = pathParts[1:] + pathToken = true + } else { + // ...with cookie, Authorization header, or + // no token at all + reqTokens = auth.NewCredentialsFromHTTPRequest(r).Tokens + tokens = append(reqTokens, anonymousTokens...) + targetPath = pathParts + } + } else if len(pathParts) < 3 || pathParts[0] != "collections" || pathParts[1] == "" || pathParts[2] == "" { + statusCode = http.StatusNotFound + return + } else if len(pathParts) >= 5 && pathParts[1] == "download" { // "/collections/download/{id}/{token}/path..." form: // Don't use our configured anonymous tokens, // Authorization headers, etc. Just use the token in @@ -86,7 +165,6 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) { found := false for _, arv.ApiToken = range tokens { err := arv.Get("collections", targetId, nil, &collection) - httpserver.Log(err) if err == nil { // Success found = true @@ -131,8 +209,8 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) { // someone trying (anonymously) to download public // data that has been deleted. Allow a referrer to // provide this context somehow? - statusCode = http.StatusUnauthorized w.Header().Add("WWW-Authenticate", "Basic realm=\"dl\"") + statusCode = http.StatusUnauthorized return } @@ -162,6 +240,7 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) { } } + w.WriteHeader(http.StatusOK) _, err = io.Copy(w, rdr) if err != nil { statusCode, statusText = http.StatusBadGateway, err.Error()