FooCollection = "zzzzz-4zz18-fy296fx3hot09f7"
NonexistentCollection = "zzzzz-4zz18-totallynotexist"
HelloWorldCollection = "zzzzz-4zz18-4en62shvi99lxd4"
+ FooPdh = "1f4b0bc7583c2a7f9102c395f4ffc5e3+45"
+ HelloWorldPdh = "55713e6a34081eb03609e7ad5fcad129+62"
PathologicalManifest = ". acbd18db4cc2f85cedef654fccc4a4d8+3 37b51d194a7513e45b56f6524f2d51f2+3 73feffa4b7f6bb68e44cf984c85f6e88+3+Z+K@xyzzy acbd18db4cc2f85cedef654fccc4a4d8+3 0:0:zero@0 0:1:f 1:0:zero@1 1:4:ooba 4:0:zero@4 5:1:r 5:4:rbaz 9:0:zero@9\n" +
"./overlapReverse acbd18db4cc2f85cedef654fccc4a4d8+3 acbd18db4cc2f85cedef654fccc4a4d8+3 5:1:o 4:2:oo 2:4:ofoo\n" +
"./segmented acbd18db4cc2f85cedef654fccc4a4d8+3 37b51d194a7513e45b56f6524f2d51f2+3 0:1:frob 5:1:frob 1:1:frob 1:2:oof 0:1:oof 5:0:frob 3:1:frob\n" +
package auth
import (
+ "encoding/base64"
"net/http"
"net/url"
"strings"
return c
}
+// EncodeTokenCookie accepts a token and returns a byte slice suitable
+// for use as a cookie value, such that it will be decoded correctly
+// by LoadTokensFromHTTPRequest.
+var EncodeTokenCookie func([]byte) string = base64.URLEncoding.EncodeToString
+
+// DecodeTokenCookie accepts a cookie value and returns the encoded
+// token.
+var DecodeTokenCookie func(string) ([]byte, error) = base64.URLEncoding.DecodeString
+
// LoadTokensFromHttpRequest loads all tokens it can find in the
// headers and query string of an http query.
func (a *Credentials) LoadTokensFromHTTPRequest(r *http.Request) {
a.Tokens = append(a.Tokens, val...)
}
+ a.loadTokenFromCookie(r)
+
// TODO: Load token from Rails session cookie (if Rails site
// secret is known)
}
// LoadTokensFromHttpRequest() that [or how] we should read and parse
// the request body. This has to be requested explicitly by the
// application.
+
+func (a *Credentials) loadTokenFromCookie(r *http.Request) {
+ cookie, err := r.Cookie("api_token")
+ if err != nil || len(cookie.Value) == 0 {
+ return
+ }
+ token, err := DecodeTokenCookie(cookie.Value)
+ if err != nil {
+ return
+ }
+ a.Tokens = append(a.Tokens, string(token))
+}
// Keep-web provides read-only HTTP access to files stored in Keep. It
// serves public data to anonymous and unauthenticated clients, and
-// accepts authentication via Arvados tokens. It can be installed
-// anywhere with access to Keep services, typically behind a web proxy
-// that provides SSL support.
+// serves private data to clients that supply Arvados API tokens. It
+// can be installed anywhere with access to Keep services, typically
+// behind a web proxy that supports TLS.
//
-// Given that this amounts to a web hosting service for arbitrary
-// content, it is vital to ensure that at least one of the following is
-// true:
+// Starting the server
//
-// Usage
-//
-// Listening:
+// Serve HTTP requests at port 1234 on all interfaces:
//
// keep-web -address=:1234
//
-// Start an HTTP server on port 1234.
+// Serve HTTP requests at port 1234 on the interface with IP address 1.2.3.4:
//
// keep-web -address=1.2.3.4:1234
//
-// Start an HTTP server on port 1234, on the interface with IP address 1.2.3.4.
+// Proxy configuration
//
// Keep-web does not support SSL natively. Typically, it is installed
// behind a proxy like nginx.
//
+// Here is an example nginx configuration.
+//
+// http {
+// upstream keep-web {
+// server localhost:1234;
+// }
+// server {
+// listen *:443 ssl;
+// server_name dl.example.com *.dl.example.com ~.*--dl.example.com;
+// ssl_certificate /root/wildcard.example.com.crt;
+// ssl_certificate_key /root/wildcard.example.com.key;
+// location / {
+// proxy_pass http://keep-web;
+// proxy_set_header Host $host;
+// proxy_set_header X-Forwarded-For $remote_addr;
+// }
+// }
+// }
+//
+// It is not necessary to run keep-web on the same host as the nginx
+// proxy. However, TLS is not used between nginx and keep-web, so
+// intervening networks must be secured by other means.
+//
+// Download URLs
+//
+// The following "same origin" URL patterns are supported for public
+// collections (i.e., collections which can be served by keep-web
+// without making use of any credentials supplied by the client). See
+// "Same-origin mode" below.
+//
+// http://dl.example.com/c=uuid_or_pdh/path/file.txt
+// http://dl.example.com/c=uuid_or_pdh/path/t=TOKEN/file.txt
+//
+// The following "multiple origin" URL patterns are supported for all
+// collections:
+//
+// http://uuid_or_pdh--dl.example.com/path/file.txt
+// http://uuid_or_pdh--dl.example.com/t=/path/file.txt
+// http://uuid_or_pdh--dl.example.com/t=TOKEN/path/file.txt
+//
+// In the "multiple origin" form, the string "--" can be replaced with
+// "." with identical results (assuming the upstream proxy is
+// configured accordingly). These two are equivalent:
+//
+// http://uuid_or_pdh--dl.example.com/path/file.txt
+// http://uuid_or_pdh.dl.example.com/path/file.txt
+//
+// The first form minimizes the cost and effort of deploying a
+// wildcard TLS certificate for *.dl.example.com. The second form is
+// likely to be easier to configure, and more efficient to run, on an
+// upstream proxy.
+//
+// In all of the above forms, the "dl.example.com" part can be
+// anything at all.
+//
+// In all of the above forms, the "uuid_or_pdh" part can be either a
+// collection UUID or a portable data hash with the "+" character
+// replaced by "-".
+//
+// Assuming there is a collection with UUID
+// zzzzz-4zz18-znfnqtbbv4spc3w and portable data hash
+// 1f4b0bc7583c2a7f9102c395f4ffc5e3+45, the following URLs are
+// interchangeable:
+//
+// http://zzzzz-4zz18-znfnqtbbv4spc3w.dl.example.com/foo
+// http://zzzzz-4zz18-znfnqtbbv4spc3w.dl.example.com/t=/foo
+// http://zzzzz-4zz18-znfnqtbbv4spc3w--dl.example.com/t=/foo
+// http://1f4b0bc7583c2a7f9102c395f4ffc5e3-45--foo.example.com/foo
+// http://1f4b0bc7583c2a7f9102c395f4ffc5e3-45--.invalid/foo
+//
+// Authorization mechanisms
+//
+// A token can be provided in an Authorization header:
+//
+// Authorization: OAuth2 o07j4px7RlJK4CuMYp7C0LDT4CzR1J1qBE5Avo7eCcUjOTikxK
+//
+// A base64-encoded token can be provided in a cookie named "api_token":
+//
+// Cookie: api_token=bzA3ajRweDdSbEpLNEN1TVlwN0MwTERUNEN6UjFKMXFCRTVBdm83ZUNjVWpPVGlreEs=
+//
+// A token can be provided in an URL-encoded query string:
+//
+// GET /foo.txt?api_token=o07j4px7RlJK4CuMYp7C0LDT4CzR1J1qBE5Avo7eCcUjOTikxK
+//
+// A suitably encoded token can be provided in a POST body if the
+// request has a content type of application/x-www-form-urlencoded or
+// multipart/form-data:
+//
+// POST /foo.txt
+// Content-Type: application/x-www-form-urlencoded
+// [...]
+// api_token=o07j4px7RlJK4CuMYp7C0LDT4CzR1J1qBE5Avo7eCcUjOTikxK
+//
+// If a token is provided in a query string or in a POST request, the
+// response is an HTTP 303 redirect to an equivalent GET request, with
+// the token stripped from the query string and added to a cookie
+// instead.
+//
+// Compatibility
+//
+// Client-provided authorization tokens are ignored if the client does
+// not provide a Host header.
+//
+// In order to use the query string or a POST form authorization
+// mechanisms, the client must follow 303 redirects; the client must
+// accept cookies with a 303 response and send those cookies when
+// performing the redirect; and either the client or an intervening
+// proxy must resolve a relative URL ("//host/path") if given in a
+// response Location header.
+//
+// Intranet mode
+//
+// Normally, Keep-web accepts requests for multiple collections using
+// the same host name, provided the client's credentials are not being
+// used. This provides insufficient XSS protection in an installation
+// where the "anonymously accessible" data is not truly public, but
+// merely protected by network topology.
+//
+// In such cases -- for example, a site which is not reachable from
+// the internet, where some data is world-readable from Arvados's
+// perspective but is intended to be available only to users within
+// the local network -- the upstream proxy should configured to return
+// 401 for all paths beginning with "/c=".
+//
+// Same-origin mode
+//
+// Without the same-origin protection outlined above, a web page
+// stored in collection X could execute JavaScript code that uses the
+// current viewer's credentials to download additional data from
+// collection Y -- data which is accessible to the current viewer, but
+// not to the author of collection X -- from the same origin
+// (``https://dl.example.com/'') and upload it to some other site
+// chosen by the author of collection X.
+//
package main
// TODO(TC): Implement
//
// Normally, Keep-web is installed using a wildcard DNS entry and a
// wildcard HTTPS certificate, serving data from collection X at
-// ``https://X.dl.example.com/path/file.ext''.
+// ``https://X--dl.example.com/path/file.ext''.
//
// It will also serve publicly accessible data at
// ``https://dl.example.com/collections/X/path/file.txt'', but it does not
//
// keep-web -trust-all-content [...]
//
-// In the general case, this should not be enabled: A web page stored
-// in collection X can execute JavaScript code that uses the current
-// viewer's credentials to download additional data -- data which is
-// accessible to the current viewer, but not to the author of
-// collection X -- from the same origin (``https://dl.example.com/'')
-// and upload it to some other site chosen by the author of collection
-// X.
+// In the general case, this should not be enabled:
import (
"fmt"
+ "html"
"io"
"mime"
"net/http"
+ "net/url"
"os"
"strings"
+ "time"
"git.curoverse.com/arvados.git/sdk/go/arvadosclient"
"git.curoverse.com/arvados.git/sdk/go/auth"
anonymousTokens = []string{}
}
+// return s if s is a UUID or a PDH, otherwise ""
+func parseCollectionIdFromDNSName(s string) string {
+ // Strip domain.
+ if i := strings.IndexRune(s, '.'); i >= 0 {
+ s = s[:i]
+ }
+ // Names like {uuid}--dl.example.com serve the same purpose as
+ // {uuid}.dl.example.com but can reduce cost/effort of using
+ // [additional] wildcard certificates.
+ if i := strings.Index(s, "--"); i >= 0 {
+ s = s[:i]
+ }
+ if !arvadosclient.UUIDMatch(s) && !arvadosclient.PDHMatch(s) {
+ return ""
+ }
+ return s
+}
+
func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
- var statusCode int
+ var statusCode = 0
var statusText string
w := httpserver.WrapResponseWriter(wOrig)
defer func() {
- if statusCode > 0 {
- if w.WroteStatus() == 0 {
- w.WriteHeader(statusCode)
- } else {
- httpserver.Log(r.RemoteAddr, "WARNING",
- fmt.Sprintf("Our status changed from %d to %d after we sent headers", w.WroteStatus(), statusCode))
- }
+ if statusCode == 0 {
+ statusCode = w.WroteStatus()
+ } else if w.WroteStatus() == 0 {
+ w.WriteHeader(statusCode)
+ } else if w.WroteStatus() != statusCode {
+ httpserver.Log(r.RemoteAddr, "WARNING",
+ fmt.Sprintf("Our status changed from %d to %d after we sent headers", w.WroteStatus(), statusCode))
}
if statusText == "" {
statusText = http.StatusText(statusCode)
}
- httpserver.Log(r.RemoteAddr, statusCode, statusText, w.WroteBodyBytes(), r.Method, r.URL.Path)
+ httpserver.Log(r.RemoteAddr, statusCode, statusText, w.WroteBodyBytes(), r.Method, r.Host, r.URL.Path, r.URL.RawQuery)
}()
+ if r.Method != "GET" && r.Method != "POST" {
+ statusCode, statusText = http.StatusMethodNotAllowed, r.Method
+ return
+ }
+
arv := clientPool.Get()
if arv == nil {
statusCode, statusText = http.StatusInternalServerError, "Pool failed: "+clientPool.Err().Error()
pathParts := strings.Split(r.URL.Path[1:], "/")
- if len(pathParts) < 3 || pathParts[0] != "collections" || pathParts[1] == "" || pathParts[2] == "" {
- statusCode = http.StatusNotFound
- return
- }
-
var targetId string
var targetPath []string
var tokens []string
var reqTokens []string
var pathToken bool
- if len(pathParts) >= 5 && pathParts[1] == "download" {
+
+ if targetId = parseCollectionIdFromDNSName(r.Host); targetId != "" {
+ // "http://{id}.domain.example.com/{path}" form
+ if t := r.FormValue("api_token"); t != "" {
+ // ...with explicit token in query string or
+ // form in POST body. We must encrypt the
+ // token such that it can only be used for
+ // this collection; put it in an HttpOnly
+ // cookie; and redirect to the same URL with
+ // the query param redacted, and method =
+ // GET.
+ //
+ // The HttpOnly flag is necessary to prevent
+ // JavaScript code (included in, or loaded by,
+ // a page in the collection being served) from
+ // employing the user's token beyond reading
+ // other files in the same domain, i.e., same
+ // the collection.
+ //
+ // The 303 redirect is necessary in the case
+ // of a GET request to avoid exposing the
+ // token in the Location bar, and in the case
+ // of a POST request to avoid raising warnings
+ // when the user refreshes the resulting page.
+ http.SetCookie(w, &http.Cookie{
+ Name: "api_token",
+ Value: auth.EncodeTokenCookie([]byte(t)),
+ Path: "/",
+ Expires: time.Now().AddDate(10,0,0),
+ })
+ redir := (&url.URL{Host: r.Host, Path: r.URL.Path}).String()
+
+ w.Header().Add("Location", redir)
+ statusCode, statusText = http.StatusSeeOther, redir
+ w.WriteHeader(statusCode)
+ io.WriteString(w, `<A href="`)
+ io.WriteString(w, html.EscapeString(redir))
+ io.WriteString(w, `">Continue</A>`)
+ return
+ } else if strings.HasPrefix(pathParts[0], "t=") {
+ // ...with explicit token in path,
+ // "{...}.com/t={token}/{path}". This form
+ // must only be used to pass scoped tokens
+ // that give permission for a single
+ // collection. See FormValue case above.
+ tokens = []string{pathParts[0][2:]}
+ targetPath = pathParts[1:]
+ pathToken = true
+ } else {
+ // ...with cookie, Authorization header, or
+ // no token at all
+ reqTokens = auth.NewCredentialsFromHTTPRequest(r).Tokens
+ tokens = append(reqTokens, anonymousTokens...)
+ targetPath = pathParts
+ }
+ } else if len(pathParts) < 3 || pathParts[0] != "collections" || pathParts[1] == "" || pathParts[2] == "" {
+ statusCode = http.StatusNotFound
+ return
+ } else if len(pathParts) >= 5 && pathParts[1] == "download" {
// "/collections/download/{id}/{token}/path..." form:
// Don't use our configured anonymous tokens,
// Authorization headers, etc. Just use the token in
found := false
for _, arv.ApiToken = range tokens {
err := arv.Get("collections", targetId, nil, &collection)
- httpserver.Log(err)
if err == nil {
// Success
found = true
// someone trying (anonymously) to download public
// data that has been deleted. Allow a referrer to
// provide this context somehow?
- statusCode = http.StatusUnauthorized
w.Header().Add("WWW-Authenticate", "Basic realm=\"dl\"")
+ statusCode = http.StatusUnauthorized
return
}
}
}
+ w.WriteHeader(http.StatusOK)
_, err = io.Copy(w, rdr)
if err != nil {
statusCode, statusText = http.StatusBadGateway, err.Error()
--- /dev/null
+package main
+
+import (
+ "html"
+ "io/ioutil"
+ "net/http"
+ "net/http/httptest"
+ "net/url"
+ "regexp"
+ "strings"
+
+ "git.curoverse.com/arvados.git/sdk/go/arvadostest"
+ "git.curoverse.com/arvados.git/sdk/go/auth"
+ check "gopkg.in/check.v1"
+)
+
+var _ = check.Suite(&UnitSuite{})
+
+type UnitSuite struct {}
+
+func mustParseURL(s string) *url.URL {
+ r, err := url.Parse(s)
+ if err != nil {
+ panic("parse URL: " + s)
+ }
+ return r
+}
+
+func (s *IntegrationSuite) TestVhost404(c *check.C) {
+ for _, testURL := range []string{
+ arvadostest.NonexistentCollection + ".example.com/theperthcountyconspiracy",
+ arvadostest.NonexistentCollection + ".example.com/t=" + arvadostest.ActiveToken + "/theperthcountyconspiracy",
+ } {
+ resp := httptest.NewRecorder()
+ req := &http.Request{
+ Method: "GET",
+ URL: mustParseURL(testURL),
+ }
+ (&handler{}).ServeHTTP(resp, req)
+ c.Check(resp.Code, check.Equals, http.StatusNotFound)
+ c.Check(resp.Body.String(), check.Equals, "")
+ }
+}
+
+type authorizer func(*http.Request, string) int
+
+func (s *IntegrationSuite) TestVhostViaAuthzHeader(c *check.C) {
+ doVhostRequests(c, authzViaAuthzHeader)
+}
+func authzViaAuthzHeader(r *http.Request, tok string) int {
+ r.Header.Add("Authorization", "OAuth2 " + tok)
+ return http.StatusUnauthorized
+}
+
+func (s *IntegrationSuite) TestVhostViaCookieValue(c *check.C) {
+ doVhostRequests(c, authzViaCookieValue)
+}
+func authzViaCookieValue(r *http.Request, tok string) int {
+ r.AddCookie(&http.Cookie{
+ Name: "api_token",
+ Value: auth.EncodeTokenCookie([]byte(tok)),
+ })
+ return http.StatusUnauthorized
+}
+
+func (s *IntegrationSuite) TestVhostViaPath(c *check.C) {
+ doVhostRequests(c, authzViaPath)
+}
+func authzViaPath(r *http.Request, tok string) int {
+ r.URL.Path = "/t=" + tok + r.URL.Path
+ return http.StatusNotFound
+}
+
+func (s *IntegrationSuite) TestVhostViaQueryString(c *check.C) {
+ doVhostRequests(c, authzViaQueryString)
+}
+func authzViaQueryString(r *http.Request, tok string) int {
+ r.URL.RawQuery = "api_token=" + tok
+ return http.StatusUnauthorized
+}
+
+func (s *IntegrationSuite) TestVhostViaPOST(c *check.C) {
+ doVhostRequests(c, authzViaPOST)
+}
+func authzViaPOST(r *http.Request, tok string) int {
+ r.Method = "POST"
+ r.Header.Add("Content-Type", "application/x-www-form-urlencoded")
+ r.Body = ioutil.NopCloser(strings.NewReader(
+ url.Values{"api_token": {tok}}.Encode()))
+ return http.StatusUnauthorized
+}
+
+// Try some combinations of {url, token} using the given authorization
+// mechanism, and verify the result is correct.
+func doVhostRequests(c *check.C, authz authorizer) {
+ hostPath := arvadostest.FooCollection + ".example.com/foo"
+ for _, tok := range []string{
+ arvadostest.ActiveToken,
+ arvadostest.ActiveToken[:15],
+ arvadostest.SpectatorToken,
+ "bogus",
+ "",
+ } {
+ u := mustParseURL("http://" + hostPath)
+ req := &http.Request{
+ Method: "GET",
+ Host: u.Host,
+ URL: u,
+ Header: http.Header{},
+ }
+ failCode := authz(req, tok)
+ resp := doReq(req)
+ code, body := resp.Code, resp.Body.String()
+ if tok == arvadostest.ActiveToken {
+ c.Check(code, check.Equals, http.StatusOK)
+ c.Check(body, check.Equals, "foo")
+ } else {
+ c.Check(code >= 400, check.Equals, true)
+ c.Check(code < 500, check.Equals, true)
+ if tok == arvadostest.SpectatorToken {
+ // Valid token never offers to retry
+ // with different credentials.
+ c.Check(code, check.Equals, http.StatusNotFound)
+ } else {
+ // Invalid token can ask to retry
+ // depending on the authz method.
+ c.Check(code, check.Equals, failCode)
+ }
+ c.Check(body, check.Equals, "")
+ }
+ }
+}
+
+func doReq(req *http.Request) *httptest.ResponseRecorder {
+ resp := httptest.NewRecorder()
+ (&handler{}).ServeHTTP(resp, req)
+ if resp.Code != http.StatusSeeOther {
+ return resp
+ }
+ cookies := (&http.Response{Header: resp.Header()}).Cookies()
+ u, _ := req.URL.Parse(resp.Header().Get("Location"))
+ req = &http.Request{
+ Method: "GET",
+ Host: u.Host,
+ URL: u,
+ Header: http.Header{},
+ }
+ for _, c := range cookies {
+ req.AddCookie(c)
+ }
+ return doReq(req)
+}
+
+func (s *IntegrationSuite) TestVhostRedirectQueryTokenToCookie(c *check.C) {
+ s.testVhostRedirectTokenToCookie(c, "GET",
+ arvadostest.FooCollection + ".example.com/foo",
+ "?api_token=" + arvadostest.ActiveToken,
+ "text/plain",
+ "",
+ http.StatusOK,
+ )
+}
+
+func (s *IntegrationSuite) TestVhostRedirectPOSTFormTokenToCookie(c *check.C) {
+ s.testVhostRedirectTokenToCookie(c, "POST",
+ arvadostest.FooCollection + ".example.com/foo",
+ "",
+ "application/x-www-form-urlencoded",
+ url.Values{"api_token": {arvadostest.ActiveToken}}.Encode(),
+ http.StatusOK,
+ )
+}
+
+func (s *IntegrationSuite) TestVhostRedirectPOSTFormTokenToCookie404(c *check.C) {
+ s.testVhostRedirectTokenToCookie(c, "POST",
+ arvadostest.FooCollection + ".example.com/foo",
+ "",
+ "application/x-www-form-urlencoded",
+ url.Values{"api_token": {arvadostest.SpectatorToken}}.Encode(),
+ http.StatusNotFound,
+ )
+}
+
+func (s *IntegrationSuite) testVhostRedirectTokenToCookie(c *check.C, method, hostPath, queryString, contentType, body string, expectStatus int) {
+ u, _ := url.Parse(`http://` + hostPath + queryString)
+ req := &http.Request{
+ Method: method,
+ Host: u.Host,
+ URL: u,
+ Header: http.Header{"Content-Type": {contentType}},
+ Body: ioutil.NopCloser(strings.NewReader(body)),
+ }
+
+ resp := httptest.NewRecorder()
+ (&handler{}).ServeHTTP(resp, req)
+ c.Assert(resp.Code, check.Equals, http.StatusSeeOther)
+ c.Check(resp.Body.String(), check.Matches, `.*href="//` + regexp.QuoteMeta(html.EscapeString(hostPath)) + `".*`)
+ cookies := (&http.Response{Header: resp.Header()}).Cookies()
+
+ u, _ = u.Parse(resp.Header().Get("Location"))
+ req = &http.Request{
+ Method: "GET",
+ Host: u.Host,
+ URL: u,
+ Header: http.Header{},
+ }
+ for _, c := range cookies {
+ req.AddCookie(c)
+ }
+
+ resp = httptest.NewRecorder()
+ (&handler{}).ServeHTTP(resp, req)
+ c.Check(resp.Header().Get("Location"), check.Equals, "")
+ c.Check(resp.Code, check.Equals, expectStatus)
+ if expectStatus == http.StatusOK {
+ c.Check(resp.Body.String(), check.Equals, "foo")
+ }
+}