5824: Handle various combinations of c= and t= more consistently. Use vhosts in integ...
authorTom Clegg <tom@curoverse.com>
Fri, 28 Aug 2015 05:09:46 +0000 (01:09 -0400)
committerTom Clegg <tom@curoverse.com>
Sat, 17 Oct 2015 00:02:14 +0000 (20:02 -0400)
services/keep-web/doc.go
services/keep-web/handler.go
services/keep-web/handler_test.go
services/keep-web/server_test.go

index 89085600eeeed6d3bd4a4e208923534d7dd195e4..f6c71c4e70ec56fa4c87ecd8ce6a00b92808209b 100644 (file)
 // "Same-origin mode" below.
 //
 //   http://dl.example.com/c=uuid_or_pdh/path/file.txt
-//   http://dl.example.com/c=uuid_or_pdh/path/t=TOKEN/file.txt
+//   http://dl.example.com/c=uuid_or_pdh/t=TOKEN/path/file.txt
 //
 // The following "multiple origin" URL patterns are supported for all
 // collections:
 //
 //   http://uuid_or_pdh--dl.example.com/path/file.txt
-//   http://uuid_or_pdh--dl.example.com/t=/path/file.txt
 //   http://uuid_or_pdh--dl.example.com/t=TOKEN/path/file.txt
 //
 // In the "multiple origin" form, the string "--" can be replaced with
 // collection UUID or a portable data hash with the "+" character
 // replaced by "-".
 //
+// In all of the above forms, a top level directory called "_" is
+// skipped. In cases where the "path/file.txt" part might start with
+// "t=" or "c=" or "_/", links should be constructed with a leading
+// "_/" to ensure the top level directory is not interpreted as a
+// token or collection ID.
+//
 // Assuming there is a collection with UUID
 // zzzzz-4zz18-znfnqtbbv4spc3w and portable data hash
 // 1f4b0bc7583c2a7f9102c395f4ffc5e3+45, the following URLs are
 // interchangeable:
 //
 //   http://zzzzz-4zz18-znfnqtbbv4spc3w.dl.example.com/foo
-//   http://zzzzz-4zz18-znfnqtbbv4spc3w.dl.example.com/t=/foo
-//   http://zzzzz-4zz18-znfnqtbbv4spc3w--dl.example.com/t=/foo
+//   http://zzzzz-4zz18-znfnqtbbv4spc3w.dl.example.com/_/foo
+//   http://zzzzz-4zz18-znfnqtbbv4spc3w--dl.example.com/_/foo
 //   http://1f4b0bc7583c2a7f9102c395f4ffc5e3-45--foo.example.com/foo
 //   http://1f4b0bc7583c2a7f9102c395f4ffc5e3-45--.invalid/foo
 //
+// An additional form is supported specifically to make it more
+// convenient to maintain support for existing Workbench download
+// links:
+//
+//   http://dl.example.com/collections/download/uuid_or_pdh/TOKEN/path/file.txt
+//
+// A regular Workbench "download" link is also accepted, but
+// credentials passed via cookie, header, etc. are ignored. Only
+// public data can be served this way:
+//
+//   http://dl.example.com/collections/uuid_or_pdh/path/file.txt
+//
 // Authorization mechanisms
 //
 // A token can be provided in an Authorization header:
 //
 package main
 
-// TODO(TC): Implement
+// TODO(TC): Implement?
 //
 // Trusted content
 //
index 657c72debdd2b615a03a281ed3f6593bb41af0e4..7a2124a2d6ac5312e9dc0bcec9c2a758e8d3dca8 100644 (file)
@@ -50,6 +50,20 @@ func parseCollectionIdFromDNSName(s string) string {
        return ""
 }
 
+var urlPDHDecoder = strings.NewReplacer(" ", "+", "-", "+")
+
+// return a UUID or PDH if s is a UUID or a PDH (even if it is a PDH
+// with "+" replaced by " " or "-"); otherwise return "".
+func parseCollectionIdFromURL(s string) string {
+       if arvadosclient.UUIDMatch(s) {
+               return s
+       }
+       if pdh := urlPDHDecoder.Replace(s); arvadosclient.PDHMatch(pdh) {
+               return pdh
+       }
+       return ""
+}
+
 func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
        var statusCode = 0
        var statusText string
@@ -89,79 +103,104 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
        var tokens []string
        var reqTokens []string
        var pathToken bool
+       var credentialsOK bool
 
        if targetId = parseCollectionIdFromDNSName(r.Host); targetId != "" {
-               // "http://{id}.domain.example.com/{path}" form
-               if t := r.FormValue("api_token"); t != "" {
-                       // ...with explicit token in query string or
-                       // form in POST body. We must encrypt the
-                       // token such that it can only be used for
-                       // this collection; put it in an HttpOnly
-                       // cookie; and redirect to the same URL with
-                       // the query param redacted, and method =
-                       // GET.
-                       //
-                       // The HttpOnly flag is necessary to prevent
-                       // JavaScript code (included in, or loaded by,
-                       // a page in the collection being served) from
-                       // employing the user's token beyond reading
-                       // other files in the same domain, i.e., same
-                       // the collection.
-                       //
-                       // The 303 redirect is necessary in the case
-                       // of a GET request to avoid exposing the
-                       // token in the Location bar, and in the case
-                       // of a POST request to avoid raising warnings
-                       // when the user refreshes the resulting page.
-                       http.SetCookie(w, &http.Cookie{
-                               Name:    "api_token",
-                               Value:   auth.EncodeTokenCookie([]byte(t)),
-                               Path:    "/",
-                               Expires: time.Now().AddDate(10,0,0),
-                       })
-                       redir := (&url.URL{Host: r.Host, Path: r.URL.Path}).String()
-
-                       w.Header().Add("Location", redir)
-                       statusCode, statusText = http.StatusSeeOther, redir
-                       w.WriteHeader(statusCode)
-                       io.WriteString(w, `<A href="`)
-                       io.WriteString(w, html.EscapeString(redir))
-                       io.WriteString(w, `">Continue</A>`)
-                       return
-               } else if strings.HasPrefix(pathParts[0], "t=") {
-                       // ...with explicit token in path,
-                       // "{...}.com/t={token}/{path}".  This form
-                       // must only be used to pass scoped tokens
-                       // that give permission for a single
-                       // collection. See FormValue case above.
-                       tokens = []string{pathParts[0][2:]}
-                       targetPath = pathParts[1:]
+               // http://ID.dl.example/PATH...
+               credentialsOK = true
+               targetPath = pathParts
+       } else if len(pathParts) >= 2 && strings.HasPrefix(pathParts[0], "c=") {
+               // /c=ID/PATH...
+               targetId = parseCollectionIdFromURL(pathParts[0][2:])
+               targetPath = pathParts[1:]
+       } else if len(pathParts) >= 3 && pathParts[0] == "collections" {
+               if len(pathParts) >= 5 && pathParts[1] == "download" {
+                       // /collections/download/ID/TOKEN/PATH...
+                       targetId = pathParts[2]
+                       tokens = []string{pathParts[3]}
+                       targetPath = pathParts[4:]
                        pathToken = true
                } else {
-                       // ...with cookie, Authorization header, or
-                       // no token at all
-                       reqTokens = auth.NewCredentialsFromHTTPRequest(r).Tokens
-                       tokens = append(reqTokens, anonymousTokens...)
-                       targetPath = pathParts
+                       // /collections/ID/PATH...
+                       targetId = pathParts[1]
+                       tokens = anonymousTokens
+                       targetPath = pathParts[2:]
                }
-       } else if len(pathParts) < 3 || pathParts[0] != "collections" || pathParts[1] == "" || pathParts[2] == "" {
+       } else {
                statusCode = http.StatusNotFound
                return
-       } else if len(pathParts) >= 5 && pathParts[1] == "download" {
-               // "/collections/download/{id}/{token}/path..." form:
-               // Don't use our configured anonymous tokens,
-               // Authorization headers, etc.  Just use the token in
-               // the path.
-               targetId = pathParts[2]
-               tokens = []string{pathParts[3]}
-               targetPath = pathParts[4:]
+       }
+       if t := r.FormValue("api_token"); t != "" {
+               // The client provided an explicit token in the query
+               // string, or a form in POST body. We must put the
+               // token in an HttpOnly cookie, and redirect to the
+               // same URL with the query param redacted and method =
+               // GET.
+
+               if !credentialsOK {
+                       // It is not safe to copy the provided token
+                       // into a cookie unless the current vhost
+                       // (origin) serves only a single collection.
+                       statusCode = http.StatusBadRequest
+                       return
+               }
+
+               // The HttpOnly flag is necessary to prevent
+               // JavaScript code (included in, or loaded by, a page
+               // in the collection being served) from employing the
+               // user's token beyond reading other files in the same
+               // domain, i.e., same collection.
+               //
+               // The 303 redirect is necessary in the case of a GET
+               // request to avoid exposing the token in the Location
+               // bar, and in the case of a POST request to avoid
+               // raising warnings when the user refreshes the
+               // resulting page.
+
+               http.SetCookie(w, &http.Cookie{
+                       Name:     "api_token",
+                       Value:    auth.EncodeTokenCookie([]byte(t)),
+                       Path:     "/",
+                       Expires:  time.Now().AddDate(10,0,0),
+                       HttpOnly: true,
+               })
+               redir := (&url.URL{Host: r.Host, Path: r.URL.Path}).String()
+
+               w.Header().Add("Location", redir)
+               statusCode, statusText = http.StatusSeeOther, redir
+               w.WriteHeader(statusCode)
+               io.WriteString(w, `<A href="`)
+               io.WriteString(w, html.EscapeString(redir))
+               io.WriteString(w, `">Continue</A>`)
+               return
+       }
+
+       if tokens == nil && strings.HasPrefix(targetPath[0], "t=") {
+               // http://ID.example/t=TOKEN/PATH...
+               // /c=ID/t=TOKEN/PATH...
+               //
+               // This form must only be used to pass scoped tokens
+               // that give permission for a single collection. See
+               // FormValue case above.
+               tokens = []string{targetPath[0][2:]}
                pathToken = true
-       } else {
-               // "/collections/{id}/path..." form
-               targetId = pathParts[1]
-               reqTokens = auth.NewCredentialsFromHTTPRequest(r).Tokens
+               targetPath = targetPath[1:]
+       }
+
+       if tokens == nil {
+               if credentialsOK {
+                       reqTokens = auth.NewCredentialsFromHTTPRequest(r).Tokens
+               }
                tokens = append(reqTokens, anonymousTokens...)
-               targetPath = pathParts[2:]
+       }
+
+       if len(targetPath) > 0 && targetPath[0] == "_" {
+               // If a collection has a directory called "t=foo" or
+               // "_", it can be served at //dl.example/_/t=foo/ or
+               // //dl.example/_/_/ respectively: //dl.example/t=foo/
+               // won't work because t=foo will be interpreted as a
+               // token "foo".
+               targetPath = targetPath[1:]
        }
 
        tokenResult := make(map[string]int)
@@ -188,11 +227,12 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
                return
        }
        if !found {
-               if pathToken {
-                       // The URL is a "secret sharing link", but it
-                       // didn't work out. Asking the client for
-                       // additional credentials would just be
-                       // confusing.
+               if pathToken || !credentialsOK {
+                       // Either the URL is a "secret sharing link"
+                       // that didn't work out (and asking the client
+                       // for additional credentials would just be
+                       // confusing), or we don't even accept
+                       // credentials at this path.
                        statusCode = http.StatusNotFound
                        return
                }
index b788a38d869bd44d8691177985db0e48a222e37f..50fd7175050c1a22a796f104db6cc31fd57cc605 100644 (file)
@@ -99,8 +99,10 @@ func authzViaPOST(r *http.Request, tok string) int {
 func doVhostRequests(c *check.C, authz authorizer) {
        for _, hostPath := range []string{
                arvadostest.FooCollection + ".example.com/foo",
+               arvadostest.FooCollection + "--dl.example.com/foo",
+               arvadostest.FooCollection + "--dl.example.com/_/foo",
                arvadostest.FooPdh + ".example.com/foo",
-               strings.Replace(arvadostest.FooPdh, "+", "-", -1) + ".example.com/foo",
+               strings.Replace(arvadostest.FooPdh, "+", "-", -1) + "--dl.example.com/foo",
        } {
                c.Log("doRequests: ", hostPath)
                doVhostRequestsWithHostPath(c, authz, hostPath)
index a2a5754380cc24c2ce0695f29eebb6ed12939a0d..fdbb50e79f6a42c329df6581ebdc7d7ea1dcee6e 100644 (file)
@@ -29,7 +29,7 @@ func (s *IntegrationSuite) TestNoToken(c *check.C) {
                "bogustoken",
        } {
                hdr, body, _ := s.runCurl(c, token, "dl.example.com", "/collections/"+arvadostest.FooCollection+"/foo")
-               c.Check(hdr, check.Matches, `(?s)HTTP/1.1 401 Unauthorized\r\n.*`)
+               c.Check(hdr, check.Matches, `(?s)HTTP/1.1 404 Not Found\r\n.*`)
                c.Check(body, check.Equals, "")
 
                if token != "" {
@@ -119,6 +119,14 @@ func (s *IntegrationSuite) test100BlockFile(c *check.C, blocksize int) {
        c.Check(size, check.Equals, int64(blocksize)*100)
 }
 
+type curlCase struct {
+       id      string
+       auth    string
+       host    string
+       path    string
+       dataMD5 string
+}
+
 func (s *IntegrationSuite) Test200(c *check.C) {
        anonymousTokens = []string{arvadostest.AnonymousToken}
        arv, err := arvadosclient.MakeArvadosClient()
@@ -128,28 +136,101 @@ func (s *IntegrationSuite) Test200(c *check.C) {
        c.Assert(err, check.Equals, nil)
        kc.PutB([]byte("Hello world\n"))
        kc.PutB([]byte("foo"))
-       for _, spec := range [][]string{
+       for _, spec := range []curlCase{
                // My collection
-               {arvadostest.ActiveToken, "/collections/" + arvadostest.FooCollection + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
-               {"", "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
-               {"tokensobogus", "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
-               {arvadostest.ActiveToken, "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
-               {arvadostest.AnonymousToken, "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
-               // Anonymously accessible user agreement.
-               {"", "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
-               {arvadostest.ActiveToken, "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
-               {arvadostest.SpectatorToken, "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
-               {arvadostest.SpectatorToken, "/collections/download/" + arvadostest.HelloWorldCollection + "/" + arvadostest.SpectatorToken + "/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
+               {
+                       auth: arvadostest.ActiveToken,
+                       host: arvadostest.FooCollection + "--dl.example.com",
+                       path: "/foo",
+                       dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
+               },
+               {
+                       host: strings.Replace(arvadostest.FooPdh, "+", "-", 1) + ".dl.example.com",
+                       path: "/t=" + arvadostest.ActiveToken + "/foo",
+                       dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
+               },
+               {
+                       path: "/c=" + arvadostest.FooPdh + "/t=" + arvadostest.ActiveToken + "/foo",
+                       dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
+               },
+               {
+                       path: "/c=" + strings.Replace(arvadostest.FooPdh, "+", "-", 1) + "/t=" + arvadostest.ActiveToken + "/_/foo",
+                       dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
+               },
+               {
+                       path: "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo",
+                       dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
+               },
+               {
+                       auth: "tokensobogus",
+                       path: "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo",
+                       dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
+               },
+               {
+                       auth: arvadostest.ActiveToken,
+                       path: "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo",
+                       dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
+               },
+               {
+                       auth: arvadostest.AnonymousToken,
+                       path: "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo",
+                       dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
+               },
+
+               // Anonymously accessible user agreement
+               {
+                       path: "/c=" + arvadostest.HelloWorldCollection + "/Hello%20world.txt",
+                       dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
+               },
+               {
+                       host: arvadostest.HelloWorldCollection + ".dl.example.com",
+                       path: "/Hello%20world.txt",
+                       dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
+               },
+               {
+                       host: arvadostest.HelloWorldCollection + ".dl.example.com",
+                       path: "/_/Hello%20world.txt",
+                       dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
+               },
+               {
+                       path: "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt",
+                       dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
+               },
+               {
+                       auth: arvadostest.ActiveToken,
+                       path: "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt",
+                       dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
+               },
+               {
+                       auth: arvadostest.SpectatorToken,
+                       path: "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt",
+                       dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
+               },
+               {
+                       auth: arvadostest.SpectatorToken,
+                       host: arvadostest.HelloWorldCollection + "--dl.example.com",
+                       path: "/Hello%20world.txt",
+                       dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
+               },
+               {
+                       auth: arvadostest.SpectatorToken,
+                       path: "/collections/download/" + arvadostest.HelloWorldCollection + "/" + arvadostest.SpectatorToken + "/Hello%20world.txt",
+                       dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
+               },
        } {
-               hdr, body, _ := s.runCurl(c, spec[0], "dl.example.com", spec[1])
+               host := spec.host
+               if host == "" {
+                       host = "dl.example.com"
+               }
+               hdr, body, _ := s.runCurl(c, spec.auth, host, spec.path)
                c.Check(hdr, check.Matches, `(?s)HTTP/1.1 200 OK\r\n.*`)
-               if strings.HasSuffix(spec[1], ".txt") {
+               if strings.HasSuffix(spec.path, ".txt") {
                        c.Check(hdr, check.Matches, `(?s).*\r\nContent-Type: text/plain.*`)
                        // TODO: Check some types that aren't
                        // automatically detected by Go's http server
                        // by sniffing the content.
                }
-               c.Check(fmt.Sprintf("%x", md5.Sum([]byte(body))), check.Equals, spec[2])
+               c.Check(fmt.Sprintf("%x", md5.Sum([]byte(body))), check.Equals, spec.dataMD5)
        }
 }