"bytes"
"context"
"fmt"
- "html"
"io"
"io/ioutil"
"net/http"
"git.arvados.org/arvados.git/sdk/go/keepclient"
"github.com/prometheus/client_golang/prometheus"
"github.com/sirupsen/logrus"
+ "golang.org/x/net/html"
check "gopkg.in/check.v1"
)
logger: logger,
registry: prometheus.NewRegistry(),
},
+ metrics: newMetrics(prometheus.NewRegistry()),
+ }
+}
+
+func newCollection(collID string) *arvados.Collection {
+ coll := &arvados.Collection{UUID: collID}
+ manifestKey := collID
+ if pdh, ok := arvadostest.TestCollectionUUIDToPDH[collID]; ok {
+ coll.PortableDataHash = pdh
+ manifestKey = pdh
+ }
+ if mtext, ok := arvadostest.TestCollectionPDHToManifest[manifestKey]; ok {
+ coll.ManifestText = mtext
+ }
+ return coll
+}
+
+func newRequest(method, urlStr string) *http.Request {
+ u := mustParseURL(urlStr)
+ return &http.Request{
+ Method: method,
+ Host: u.Host,
+ URL: u,
+ RequestURI: u.RequestURI(),
+ RemoteAddr: "10.20.30.40:56789",
+ Header: http.Header{},
+ }
+}
+
+func newLoggerAndContext() (*bytes.Buffer, context.Context) {
+ var logbuf bytes.Buffer
+ logger := logrus.New()
+ logger.Out = &logbuf
+ return &logbuf, ctxlog.Context(context.Background(), logger)
+}
+
+func (s *UnitSuite) TestLogEventTypes(c *check.C) {
+ collURL := "http://keep-web.example/c=" + arvadostest.FooCollection
+ for method, expected := range map[string]string{
+ "GET": "file_download",
+ "POST": "file_upload",
+ "PUT": "file_upload",
+ } {
+ filePath := "/" + method
+ req := newRequest(method, collURL+filePath)
+ actual := newFileEventLog(s.handler, req, filePath, nil, nil, "")
+ if !c.Check(actual, check.NotNil) {
+ continue
+ }
+ c.Check(actual.eventType, check.Equals, expected)
+ }
+}
+
+func (s *UnitSuite) TestUnloggedEventTypes(c *check.C) {
+ collURL := "http://keep-web.example/c=" + arvadostest.FooCollection
+ for _, method := range []string{"DELETE", "HEAD", "OPTIONS", "PATCH"} {
+ filePath := "/" + method
+ req := newRequest(method, collURL+filePath)
+ actual := newFileEventLog(s.handler, req, filePath, nil, nil, "")
+ c.Check(actual, check.IsNil,
+ check.Commentf("%s request made a log event", method))
+ }
+}
+
+func (s *UnitSuite) TestLogFilePath(c *check.C) {
+ coll := newCollection(arvadostest.FooCollection)
+ collURL := "http://keep-web.example/c=" + arvadostest.FooCollection
+ for _, filePath := range []string{"/foo", "/Foo", "/foo/bar"} {
+ req := newRequest("GET", collURL+filePath)
+ actual := newFileEventLog(s.handler, req, filePath, coll, nil, "")
+ if !c.Check(actual, check.NotNil) {
+ continue
+ }
+ c.Check(actual.collFilePath, check.Equals, filePath)
+ }
+}
+
+func (s *UnitSuite) TestLogRemoteAddr(c *check.C) {
+ collURL := "http://keep-web.example/c=" + arvadostest.FooCollection
+ filePath := "/foo"
+ req := newRequest("GET", collURL+filePath)
+
+ for _, addr := range []string{"10.20.30.55", "192.168.144.120", "192.0.2.4"} {
+ req.RemoteAddr = addr + ":57914"
+ actual := newFileEventLog(s.handler, req, filePath, nil, nil, "")
+ if !c.Check(actual, check.NotNil) {
+ continue
+ }
+ c.Check(actual.clientAddr, check.Equals, addr)
+ }
+
+ for _, addr := range []string{"100::20:30:40", "2001:db8::90:100", "3fff::30"} {
+ req.RemoteAddr = fmt.Sprintf("[%s]:57916", addr)
+ actual := newFileEventLog(s.handler, req, filePath, nil, nil, "")
+ if !c.Check(actual, check.NotNil) {
+ continue
+ }
+ c.Check(actual.clientAddr, check.Equals, addr)
+ }
+}
+
+func (s *UnitSuite) TestLogXForwardedFor(c *check.C) {
+ collURL := "http://keep-web.example/c=" + arvadostest.FooCollection
+ filePath := "/foo"
+ req := newRequest("GET", collURL+filePath)
+ for xff, expected := range map[string]string{
+ "10.20.30.55": "10.20.30.55",
+ "192.168.144.120, 10.20.30.120": "10.20.30.120",
+ "192.0.2.4, 192.0.2.6, 192.0.2.8": "192.0.2.8",
+ "192.0.2.4,192.168.2.4": "192.168.2.4",
+ "10.20.30.60,192.168.144.40,192.0.2.4": "192.0.2.4",
+ "100::20:30:50": "100::20:30:50",
+ "2001:db8::80:90, 100::100": "100::100",
+ "3fff::ff, 3fff::ee, 3fff::fe": "3fff::fe",
+ "3fff::3f,100::1000": "100::1000",
+ "2001:db8::88,100::88,3fff::88": "3fff::88",
+ "10.20.30.60, 2001:db8::60": "2001:db8::60",
+ "2001:db8::20,10.20.30.20": "10.20.30.20",
+ ", 10.20.30.123, 100::123": "100::123",
+ ",100::321,10.30.20.10": "10.30.20.10",
+ } {
+ req.Header.Set("X-Forwarded-For", xff)
+ actual := newFileEventLog(s.handler, req, filePath, nil, nil, "")
+ if !c.Check(actual, check.NotNil) {
+ continue
+ }
+ c.Check(actual.clientAddr, check.Equals, expected)
+ }
+}
+
+func (s *UnitSuite) TestLogXForwardedForMalformed(c *check.C) {
+ collURL := "http://keep-web.example/c=" + arvadostest.FooCollection
+ filePath := "/foo"
+ req := newRequest("GET", collURL+filePath)
+ for _, xff := range []string{"", ",", "10.20,30.40", "foo, bar"} {
+ req.Header.Set("X-Forwarded-For", xff)
+ actual := newFileEventLog(s.handler, req, filePath, nil, nil, "")
+ if !c.Check(actual, check.NotNil) {
+ continue
+ }
+ c.Check(actual.clientAddr, check.Equals, "10.20.30.40")
+ }
+}
+
+func (s *UnitSuite) TestLogXForwardedForMultivalue(c *check.C) {
+ collURL := "http://keep-web.example/c=" + arvadostest.FooCollection
+ filePath := "/foo"
+ req := newRequest("GET", collURL+filePath)
+ req.Header.Set("X-Forwarded-For", ", ")
+ req.Header.Add("X-Forwarded-For", "2001:db8::db9:dbd")
+ req.Header.Add("X-Forwarded-For", "10.20.30.90")
+ actual := newFileEventLog(s.handler, req, filePath, nil, nil, "")
+ c.Assert(actual, check.NotNil)
+ c.Check(actual.clientAddr, check.Equals, "10.20.30.90")
+}
+
+func (s *UnitSuite) TestLogClientAddressCanonicalization(c *check.C) {
+ collURL := "http://keep-web.example/c=" + arvadostest.FooCollection
+ filePath := "/foo"
+ req := newRequest("GET", collURL+filePath)
+ expected := "2001:db8::12:0"
+
+ req.RemoteAddr = "[2001:db8::012:0000]:57918"
+ a := newFileEventLog(s.handler, req, filePath, nil, nil, "")
+ c.Assert(a, check.NotNil)
+ c.Check(a.clientAddr, check.Equals, expected)
+
+ req.RemoteAddr = "10.20.30.40:57919"
+ req.Header.Set("X-Forwarded-For", "2001:db8:0::0:12:00")
+ b := newFileEventLog(s.handler, req, filePath, nil, nil, "")
+ c.Assert(b, check.NotNil)
+ c.Check(b.clientAddr, check.Equals, expected)
+}
+
+func (s *UnitSuite) TestLogAnonymousUser(c *check.C) {
+ collURL := "http://keep-web.example/c=" + arvadostest.FooCollection
+ filePath := "/foo"
+ req := newRequest("GET", collURL+filePath)
+ actual := newFileEventLog(s.handler, req, filePath, nil, nil, arvadostest.AnonymousToken)
+ c.Assert(actual, check.NotNil)
+ c.Check(actual.userUUID, check.Equals, s.handler.Cluster.ClusterID+"-tpzed-anonymouspublic")
+ c.Check(actual.userFullName, check.Equals, "")
+ c.Check(actual.clientToken, check.Equals, arvadostest.AnonymousToken)
+}
+
+func (s *UnitSuite) TestLogUser(c *check.C) {
+ collURL := "http://keep-web.example/c=" + arvadostest.FooCollection
+ for _, trial := range []struct{ uuid, fullName, token string }{
+ {arvadostest.ActiveUserUUID, "Active User", arvadostest.ActiveToken},
+ {arvadostest.SpectatorUserUUID, "Spectator User", arvadostest.SpectatorToken},
+ } {
+ filePath := "/" + trial.uuid
+ req := newRequest("GET", collURL+filePath)
+ user := &arvados.User{
+ UUID: trial.uuid,
+ FullName: trial.fullName,
+ }
+ actual := newFileEventLog(s.handler, req, filePath, nil, user, trial.token)
+ if !c.Check(actual, check.NotNil) {
+ continue
+ }
+ c.Check(actual.userUUID, check.Equals, trial.uuid)
+ c.Check(actual.userFullName, check.Equals, trial.fullName)
+ c.Check(actual.clientToken, check.Equals, trial.token)
+ }
+}
+
+func (s *UnitSuite) TestLogCollectionByUUID(c *check.C) {
+ for collUUID, collPDH := range arvadostest.TestCollectionUUIDToPDH {
+ collURL := "http://keep-web.example/c=" + collUUID
+ filePath := "/" + collUUID
+ req := newRequest("GET", collURL+filePath)
+ coll := newCollection(collUUID)
+ actual := newFileEventLog(s.handler, req, filePath, coll, nil, "")
+ if !c.Check(actual, check.NotNil) {
+ continue
+ }
+ c.Check(actual.collUUID, check.Equals, collUUID)
+ c.Check(actual.collPDH, check.Equals, collPDH)
+ }
+}
+
+func (s *UnitSuite) TestLogCollectionByPDH(c *check.C) {
+ for _, collPDH := range arvadostest.TestCollectionUUIDToPDH {
+ collURL := "http://keep-web.example/c=" + collPDH
+ filePath := "/PDHFile"
+ req := newRequest("GET", collURL+filePath)
+ coll := newCollection(collPDH)
+ actual := newFileEventLog(s.handler, req, filePath, coll, nil, "")
+ if !c.Check(actual, check.NotNil) {
+ continue
+ }
+ c.Check(actual.collPDH, check.Equals, collPDH)
+ c.Check(actual.collUUID, check.Equals, "")
+ }
+}
+
+func (s *UnitSuite) TestLogGETUUIDAsDict(c *check.C) {
+ filePath := "/foo"
+ reqPath := "/c=" + arvadostest.FooCollection + filePath
+ req := newRequest("GET", "http://keep-web.example"+reqPath)
+ coll := newCollection(arvadostest.FooCollection)
+ logEvent := newFileEventLog(s.handler, req, filePath, coll, nil, "")
+ c.Assert(logEvent, check.NotNil)
+ c.Check(logEvent.asDict(), check.DeepEquals, arvadosclient.Dict{
+ "event_type": "file_download",
+ "object_uuid": s.handler.Cluster.ClusterID + "-tpzed-anonymouspublic",
+ "properties": arvadosclient.Dict{
+ "reqPath": reqPath,
+ "collection_uuid": arvadostest.FooCollection,
+ "collection_file_path": filePath,
+ "portable_data_hash": arvadostest.FooCollectionPDH,
+ },
+ })
+}
+
+func (s *UnitSuite) TestLogGETPDHAsDict(c *check.C) {
+ filePath := "/Foo"
+ reqPath := "/c=" + arvadostest.FooCollectionPDH + filePath
+ req := newRequest("GET", "http://keep-web.example"+reqPath)
+ coll := newCollection(arvadostest.FooCollectionPDH)
+ user := &arvados.User{
+ UUID: arvadostest.ActiveUserUUID,
+ FullName: "Active User",
+ }
+ logEvent := newFileEventLog(s.handler, req, filePath, coll, user, "")
+ c.Assert(logEvent, check.NotNil)
+ c.Check(logEvent.asDict(), check.DeepEquals, arvadosclient.Dict{
+ "event_type": "file_download",
+ "object_uuid": arvadostest.ActiveUserUUID,
+ "properties": arvadosclient.Dict{
+ "reqPath": reqPath,
+ "portable_data_hash": arvadostest.FooCollectionPDH,
+ "collection_uuid": "",
+ "collection_file_path": filePath,
+ },
+ })
+}
+
+func (s *UnitSuite) TestLogUploadAsDict(c *check.C) {
+ coll := newCollection(arvadostest.FooCollection)
+ user := &arvados.User{
+ UUID: arvadostest.ActiveUserUUID,
+ FullName: "Active User",
+ }
+ for _, method := range []string{"POST", "PUT"} {
+ filePath := "/" + method + "File"
+ reqPath := "/c=" + arvadostest.FooCollection + filePath
+ req := newRequest(method, "http://keep-web.example"+reqPath)
+ logEvent := newFileEventLog(s.handler, req, filePath, coll, user, "")
+ if !c.Check(logEvent, check.NotNil) {
+ continue
+ }
+ c.Check(logEvent.asDict(), check.DeepEquals, arvadosclient.Dict{
+ "event_type": "file_upload",
+ "object_uuid": arvadostest.ActiveUserUUID,
+ "properties": arvadosclient.Dict{
+ "reqPath": reqPath,
+ "collection_uuid": arvadostest.FooCollection,
+ "collection_file_path": filePath,
+ },
+ })
+ }
+}
+
+func (s *UnitSuite) TestLogGETUUIDAsFields(c *check.C) {
+ filePath := "/foo"
+ reqPath := "/c=" + arvadostest.FooCollection + filePath
+ req := newRequest("GET", "http://keep-web.example"+reqPath)
+ coll := newCollection(arvadostest.FooCollection)
+ logEvent := newFileEventLog(s.handler, req, filePath, coll, nil, "")
+ c.Assert(logEvent, check.NotNil)
+ c.Check(logEvent.asFields(), check.DeepEquals, logrus.Fields{
+ "user_uuid": s.handler.Cluster.ClusterID + "-tpzed-anonymouspublic",
+ "collection_uuid": arvadostest.FooCollection,
+ "collection_file_path": filePath,
+ "portable_data_hash": arvadostest.FooCollectionPDH,
+ })
+}
+
+func (s *UnitSuite) TestLogGETPDHAsFields(c *check.C) {
+ filePath := "/Foo"
+ reqPath := "/c=" + arvadostest.FooCollectionPDH + filePath
+ req := newRequest("GET", "http://keep-web.example"+reqPath)
+ coll := newCollection(arvadostest.FooCollectionPDH)
+ user := &arvados.User{
+ UUID: arvadostest.ActiveUserUUID,
+ FullName: "Active User",
+ }
+ logEvent := newFileEventLog(s.handler, req, filePath, coll, user, "")
+ c.Assert(logEvent, check.NotNil)
+ c.Check(logEvent.asFields(), check.DeepEquals, logrus.Fields{
+ "user_uuid": arvadostest.ActiveUserUUID,
+ "user_full_name": "Active User",
+ "collection_uuid": "",
+ "collection_file_path": filePath,
+ "portable_data_hash": arvadostest.FooCollectionPDH,
+ })
+}
+
+func (s *UnitSuite) TestLogUploadAsFields(c *check.C) {
+ coll := newCollection(arvadostest.FooCollection)
+ user := &arvados.User{
+ UUID: arvadostest.ActiveUserUUID,
+ FullName: "Active User",
+ }
+ for _, method := range []string{"POST", "PUT"} {
+ filePath := "/" + method + "File"
+ reqPath := "/c=" + arvadostest.FooCollection + filePath
+ req := newRequest(method, "http://keep-web.example"+reqPath)
+ logEvent := newFileEventLog(s.handler, req, filePath, coll, user, "")
+ if !c.Check(logEvent, check.NotNil) {
+ continue
+ }
+ c.Check(logEvent.asFields(), check.DeepEquals, logrus.Fields{
+ "user_uuid": arvadostest.ActiveUserUUID,
+ "user_full_name": "Active User",
+ "collection_uuid": arvadostest.FooCollection,
+ "collection_file_path": filePath,
+ })
}
}
}
func (s *UnitSuite) TestEmptyResponse(c *check.C) {
+ // Ensure we start with an empty cache
+ defer os.Setenv("HOME", os.Getenv("HOME"))
+ os.Setenv("HOME", c.MkDir())
+ s.handler.Cluster.Collections.WebDAVLogDownloadInterval = arvados.Duration(0)
+
for _, trial := range []struct {
dataExists bool
sendIMSHeader bool
// the token is invalid.
type authorizer func(*http.Request, string) int
+// We still need to accept "OAuth2 ..." as equivalent to "Bearer ..."
+// for compatibility with older clients, including SDKs before 3.0.
func (s *IntegrationSuite) TestVhostViaAuthzHeaderOAuth2(c *check.C) {
s.doVhostRequests(c, authzViaAuthzHeaderOAuth2)
}
func authzViaAuthzHeaderOAuth2(r *http.Request, tok string) int {
- r.Header.Add("Authorization", "Bearer "+tok)
+ r.Header.Add("Authorization", "OAuth2 "+tok)
return http.StatusUnauthorized
}
+
func (s *IntegrationSuite) TestVhostViaAuthzHeaderBearer(c *check.C) {
s.doVhostRequests(c, authzViaAuthzHeaderBearer)
}
return http.StatusUnauthorized
}
+func (s *IntegrationSuite) TestVhostViaHTTPBasicAuth(c *check.C) {
+ s.doVhostRequests(c, authzViaHTTPBasicAuth)
+}
+func authzViaHTTPBasicAuth(r *http.Request, tok string) int {
+ r.AddCookie(&http.Cookie{
+ Name: "arvados_api_token",
+ Value: auth.EncodeTokenCookie([]byte(tok)),
+ })
+ return http.StatusUnauthorized
+}
+
+func (s *IntegrationSuite) TestVhostViaHTTPBasicAuthWithExtraSpaceChars(c *check.C) {
+ s.doVhostRequests(c, func(r *http.Request, tok string) int {
+ r.AddCookie(&http.Cookie{
+ Name: "arvados_api_token",
+ Value: auth.EncodeTokenCookie([]byte(" " + tok + "\n")),
+ })
+ return http.StatusUnauthorized
+ })
+}
+
func (s *IntegrationSuite) TestVhostViaPath(c *check.C) {
s.doVhostRequests(c, authzViaPath)
}
c.Check(resp.Header().Get("Content-Disposition"), check.Equals, "attachment")
}
+func (s *IntegrationSuite) TestVhostRedirectMultipleTokens(c *check.C) {
+ baseUrl := arvadostest.FooCollection + ".example.com/foo"
+ query := url.Values{}
+
+ // The intent of these tests is to check that requests are redirected
+ // correctly in the presence of multiple API tokens. The exact response
+ // codes and content are not closely considered: they're just how
+ // keep-web responded when we made the smallest possible fix. Changing
+ // those responses may be okay, but you should still test all these
+ // different cases and the associated redirect logic.
+ query["api_token"] = []string{arvadostest.ActiveToken, arvadostest.AnonymousToken}
+ s.testVhostRedirectTokenToCookie(c, "GET", baseUrl, "?"+query.Encode(), nil, "", http.StatusOK, "foo")
+ query["api_token"] = []string{arvadostest.ActiveToken, arvadostest.AnonymousToken, ""}
+ s.testVhostRedirectTokenToCookie(c, "GET", baseUrl, "?"+query.Encode(), nil, "", http.StatusOK, "foo")
+ query["api_token"] = []string{arvadostest.ActiveToken, "", arvadostest.AnonymousToken}
+ s.testVhostRedirectTokenToCookie(c, "GET", baseUrl, "?"+query.Encode(), nil, "", http.StatusOK, "foo")
+ query["api_token"] = []string{"", arvadostest.ActiveToken}
+ s.testVhostRedirectTokenToCookie(c, "GET", baseUrl, "?"+query.Encode(), nil, "", http.StatusOK, "foo")
+
+ expectContent := regexp.QuoteMeta(unauthorizedMessage + "\n")
+ query["api_token"] = []string{arvadostest.AnonymousToken, "invalidtoo"}
+ s.testVhostRedirectTokenToCookie(c, "GET", baseUrl, "?"+query.Encode(), nil, "", http.StatusUnauthorized, expectContent)
+ query["api_token"] = []string{arvadostest.AnonymousToken, ""}
+ s.testVhostRedirectTokenToCookie(c, "GET", baseUrl, "?"+query.Encode(), nil, "", http.StatusUnauthorized, expectContent)
+ query["api_token"] = []string{"", arvadostest.AnonymousToken}
+ s.testVhostRedirectTokenToCookie(c, "GET", baseUrl, "?"+query.Encode(), nil, "", http.StatusUnauthorized, expectContent)
+}
+
func (s *IntegrationSuite) TestVhostRedirectPOSTFormTokenToCookie(c *check.C) {
s.testVhostRedirectTokenToCookie(c, "POST",
arvadostest.FooCollection+".example.com/foo",
client.AuthToken = arvadostest.ActiveToken
fs, err := (&arvados.Collection{}).FileSystem(client, nil)
c.Assert(err, check.IsNil)
- f, err := fs.OpenFile("https:\\\"odd' path chars", os.O_CREATE, 0777)
+ path := `https:\\"odd' path chars`
+ f, err := fs.OpenFile(path, os.O_CREATE, 0777)
c.Assert(err, check.IsNil)
f.Close()
mtxt, err := fs.MarshalManifest(".")
resp := httptest.NewRecorder()
s.handler.ServeHTTP(resp, req)
c.Check(resp.Code, check.Equals, http.StatusOK)
- c.Check(resp.Body.String(), check.Matches, `(?ms).*href="./https:%5c%22odd%27%20path%20chars"\S+https:\\"odd' path chars.*`)
+ doc, err := html.Parse(resp.Body)
+ c.Assert(err, check.IsNil)
+ pathHrefMap := getPathHrefMap(doc)
+ c.Check(pathHrefMap, check.HasLen, 1) // the one leaf added to collection
+ href, hasPath := pathHrefMap[path]
+ c.Assert(hasPath, check.Equals, true) // the path is listed
+ relUrl := mustParseURL(href)
+ c.Check(relUrl.Path, check.Equals, "./"+path) // href can be decoded back to path
}
func (s *IntegrationSuite) TestForwardSlashSubstitution(c *check.C) {
s.handler.Cluster.Collections.ForwardSlashNameSubstitution = "{SOLIDUS}"
name := "foo/bar/baz"
nameShown := strings.Replace(name, "/", "{SOLIDUS}", -1)
- nameShownEscaped := strings.Replace(name, "/", "%7bSOLIDUS%7d", -1)
client := arvados.NewClientFromEnv()
client.AuthToken = arvadostest.ActiveToken
defer arv.RequestAndDecode(&coll, "DELETE", "arvados/v1/collections/"+coll.UUID, nil, nil)
base := "http://download.example.com/by_id/" + coll.OwnerUUID + "/"
- for tryURL, expectRegexp := range map[string]string{
- base: `(?ms).*href="./` + nameShownEscaped + `/"\S+` + nameShown + `.*`,
- base + nameShownEscaped + "/": `(?ms).*href="./filename"\S+filename.*`,
+ for tryURL, expectedAnchorText := range map[string]string{
+ base: nameShown + "/",
+ base + nameShown + "/": "filename",
} {
u, _ := url.Parse(tryURL)
req := &http.Request{
resp := httptest.NewRecorder()
s.handler.ServeHTTP(resp, req)
c.Check(resp.Code, check.Equals, http.StatusOK)
- c.Check(resp.Body.String(), check.Matches, expectRegexp)
+ doc, err := html.Parse(resp.Body)
+ c.Assert(err, check.IsNil) // valid HTML
+ pathHrefMap := getPathHrefMap(doc)
+ href, hasExpected := pathHrefMap[expectedAnchorText]
+ c.Assert(hasExpected, check.Equals, true) // has expected anchor text
+ c.Assert(href, check.Not(check.Equals), "")
+ relUrl := mustParseURL(href)
+ c.Check(relUrl.Path, check.Equals, "./"+expectedAnchorText) // decoded href maps back to the anchor text
}
}
s.handler.ServeHTTP(resp, req)
if resp.Code != http.StatusSeeOther {
+ attachment, _ := regexp.MatchString(`^attachment(;|$)`, resp.Header().Get("Content-Disposition"))
+ // Since we're not redirecting, check that any api_token in the URL is
+ // handled safely.
+ // If there is no token in the URL, then we're good.
+ // Otherwise, if the response code is an error, the body is expected to
+ // be static content, and nothing that might maliciously introspect the
+ // URL. It's considered safe and allowed.
+ // Otherwise, if the response content has attachment disposition,
+ // that's considered safe for all the reasons explained in the
+ // safeAttachment comment in handler.go.
+ c.Check(!u.Query().Has("api_token") || resp.Code >= 400 || attachment, check.Equals, true)
return resp
}
+
+ loc, err := url.Parse(resp.Header().Get("Location"))
+ c.Assert(err, check.IsNil)
+ c.Check(loc.Scheme, check.Equals, u.Scheme)
+ c.Check(loc.Host, check.Equals, u.Host)
+ c.Check(loc.RawPath, check.Equals, u.RawPath)
+ // If the response was a redirect, it should never include an API token.
+ c.Check(loc.Query().Has("api_token"), check.Equals, false)
c.Check(resp.Body.String(), check.Matches, `.*href="http://`+regexp.QuoteMeta(html.EscapeString(hostPath))+`(\?[^"]*)?".*`)
- c.Check(strings.Split(resp.Header().Get("Location"), "?")[0], check.Equals, "http://"+hostPath)
cookies := (&http.Response{Header: resp.Header()}).Cookies()
- u, err := u.Parse(resp.Header().Get("Location"))
- c.Assert(err, check.IsNil)
c.Logf("following redirect to %s", u)
req = &http.Request{
Method: "GET",
- Host: u.Host,
- URL: u,
- RequestURI: u.RequestURI(),
+ Host: loc.Host,
+ URL: loc,
+ RequestURI: loc.RequestURI(),
Header: reqHeader,
}
for _, c := range cookies {
}
func (s *IntegrationSuite) testDirectoryListing(c *check.C) {
+ // The "ownership cycle" test fixtures are reachable from the
+ // "filter group without filters" group, causing webdav's
+ // walkfs to recurse indefinitely. Avoid that by deleting one
+ // of the bogus fixtures.
+ arv := arvados.NewClientFromEnv()
+ err := arv.RequestAndDecode(nil, "DELETE", "arvados/v1/groups/zzzzz-j7d0g-cx2al9cqkmsf1hs", nil, nil)
+ if err != nil {
+ c.Assert(err, check.FitsTypeOf, &arvados.TransactionError{})
+ c.Check(err.(*arvados.TransactionError).StatusCode, check.Equals, 404)
+ }
+
s.handler.Cluster.Services.WebDAVDownload.ExternalURL.Host = "download.example.com"
authHeader := http.Header{
- "Authorization": {"OAuth2 " + arvadostest.ActiveToken},
+ "Authorization": {"Bearer " + arvadostest.ActiveToken},
}
for _, trial := range []struct {
uri string
expect: []string{"waz"},
cutDirs: 2,
},
+ {
+ uri: "download.example.com/users/active/This filter group/",
+ header: authHeader,
+ expect: []string{"A Subproject/"},
+ cutDirs: 3,
+ },
+ {
+ uri: "download.example.com/users/active/This filter group/A Subproject",
+ header: authHeader,
+ expect: []string{"baz_file/"},
+ cutDirs: 4,
+ },
+ {
+ uri: "download.example.com/by_id/" + arvadostest.AFilterGroupUUID,
+ header: authHeader,
+ expect: []string{"A Subproject/"},
+ cutDirs: 2,
+ },
+ {
+ uri: "download.example.com/by_id/" + arvadostest.AFilterGroupUUID + "/A Subproject",
+ header: authHeader,
+ expect: []string{"baz_file/"},
+ cutDirs: 3,
+ },
} {
- comment := check.Commentf("HTML: %q => %q", trial.uri, trial.expect)
+ comment := check.Commentf("HTML: %q redir %q => %q", trial.uri, trial.redirect, trial.expect)
resp := httptest.NewRecorder()
u := mustParseURL("//" + trial.uri)
req := &http.Request{
c.Check(resp.Code, check.Equals, http.StatusUnauthorized, comment)
} else {
c.Check(resp.Code, check.Equals, http.StatusOK, comment)
+ listingPageDoc, err := html.Parse(resp.Body)
+ c.Check(err, check.IsNil, comment) // valid HTML document
+ pathHrefMap := getPathHrefMap(listingPageDoc)
+ c.Assert(pathHrefMap, check.Not(check.HasLen), 0, comment)
for _, e := range trial.expect {
- c.Check(resp.Body.String(), check.Matches, `(?ms).*href="./`+e+`".*`, comment)
+ href, hasE := pathHrefMap[e]
+ c.Check(hasE, check.Equals, true, comment) // expected path is listed
+ relUrl := mustParseURL(href)
+ c.Check(relUrl.Path, check.Equals, "./"+e, comment) // href can be decoded back to path
}
- c.Check(resp.Body.String(), check.Matches, `(?ms).*--cut-dirs=`+fmt.Sprintf("%d", trial.cutDirs)+` .*`, comment)
+ wgetCommand := getWgetExamplePre(listingPageDoc)
+ wgetExpected := regexp.MustCompile(`^\$ wget .*--cut-dirs=(\d+) .*'(https?://[^']+)'$`)
+ wgetMatchGroups := wgetExpected.FindStringSubmatch(wgetCommand)
+ c.Assert(wgetMatchGroups, check.NotNil) // wget command matches
+ c.Check(wgetMatchGroups[1], check.Equals, fmt.Sprintf("%d", trial.cutDirs)) // correct level of cut dirs in wget command
+ printedUrl := mustParseURL(wgetMatchGroups[2])
+ c.Check(printedUrl.Host, check.Equals, req.URL.Host)
+ c.Check(printedUrl.Path, check.Equals, req.URL.Path) // URL arg in wget command can be decoded to the right path
}
comment = check.Commentf("WebDAV: %q => %q", trial.uri, trial.expect)
}
resp = httptest.NewRecorder()
s.handler.ServeHTTP(resp, req)
+ // This check avoids logging a big XML document in the
+ // event webdav throws a 500 error after sending
+ // headers for a 207.
+ if !c.Check(strings.HasSuffix(resp.Body.String(), "Internal Server Error"), check.Equals, false) {
+ continue
+ }
if trial.expect == nil {
c.Check(resp.Code, check.Equals, http.StatusUnauthorized, comment)
} else {
} else {
e = filepath.Join(u.Path, e)
}
+ e = strings.Replace(e, " ", "%20", -1)
c.Check(resp.Body.String(), check.Matches, `(?ms).*<D:href>`+e+`</D:href>.*`, comment)
}
}
}
}
+// Shallow-traverse the HTML document, gathering the nodes satisfying the
+// predicate function in the output slice. If a node matches the predicate,
+// none of its children will be visited.
+func getNodes(document *html.Node, predicate func(*html.Node) bool) []*html.Node {
+ var acc []*html.Node
+ var traverse func(*html.Node, []*html.Node) []*html.Node
+ traverse = func(root *html.Node, sofar []*html.Node) []*html.Node {
+ if root == nil {
+ return sofar
+ }
+ if predicate(root) {
+ return append(sofar, root)
+ }
+ for cur := root.FirstChild; cur != nil; cur = cur.NextSibling {
+ sofar = traverse(cur, sofar)
+ }
+ return sofar
+ }
+ return traverse(document, acc)
+}
+
+// Returns true if a node has the attribute targetAttr with the given value
+func matchesAttributeValue(node *html.Node, targetAttr string, value string) bool {
+ for _, attr := range node.Attr {
+ if attr.Key == targetAttr && attr.Val == value {
+ return true
+ }
+ }
+ return false
+}
+
+// Concatenate the content of text-node children of node; only direct
+// children are visited, and any non-text children are skipped.
+func getNodeText(node *html.Node) string {
+ var recv strings.Builder
+ for c := node.FirstChild; c != nil; c = c.NextSibling {
+ if c.Type == html.TextNode {
+ recv.WriteString(c.Data)
+ }
+ }
+ return recv.String()
+}
+
+// Returns a map from the directory listing item string (a path) to the href
+// value of its <a> tag (an encoded relative URL)
+func getPathHrefMap(document *html.Node) map[string]string {
+ isItemATag := func(node *html.Node) bool {
+ return node.Type == html.ElementNode && node.Data == "a" && matchesAttributeValue(node, "class", "item")
+ }
+ aTags := getNodes(document, isItemATag)
+ output := make(map[string]string)
+ for _, elem := range aTags {
+ textContent := getNodeText(elem)
+ for _, attr := range elem.Attr {
+ if attr.Key == "href" {
+ output[textContent] = attr.Val
+ break
+ }
+ }
+ }
+ return output
+}
+
+func getWgetExamplePre(document *html.Node) string {
+ isWgetPre := func(node *html.Node) bool {
+ return node.Type == html.ElementNode && matchesAttributeValue(node, "id", "wget-example")
+ }
+ elements := getNodes(document, isWgetPre)
+ if len(elements) != 1 {
+ return ""
+ }
+ return getNodeText(elements[0])
+}
+
func (s *IntegrationSuite) TestDeleteLastFile(c *check.C) {
arv := arvados.NewClientFromEnv()
var newCollection arvados.Collection
}
}
-func (s *IntegrationSuite) TestKeepClientBlockCache(c *check.C) {
- s.handler.Cluster.Collections.WebDAVCache.MaxBlockEntries = 42
- c.Check(keepclient.DefaultBlockCache.MaxBlocks, check.Not(check.Equals), 42)
- u := mustParseURL("http://keep-web.example/c=" + arvadostest.FooCollection + "/t=" + arvadostest.ActiveToken + "/foo")
- req := &http.Request{
- Method: "GET",
- Host: u.Host,
- URL: u,
- RequestURI: u.RequestURI(),
- }
+func (s *IntegrationSuite) TestCacheSize(c *check.C) {
+ req, err := http.NewRequest("GET", "http://"+arvadostest.FooCollection+".example.com/foo", nil)
+ req.Header.Set("Authorization", "Bearer "+arvadostest.ActiveTokenV2)
+ c.Assert(err, check.IsNil)
resp := httptest.NewRecorder()
s.handler.ServeHTTP(resp, req)
- c.Check(resp.Code, check.Equals, http.StatusOK)
- c.Check(keepclient.DefaultBlockCache.MaxBlocks, check.Equals, 42)
+ c.Assert(resp.Code, check.Equals, http.StatusOK)
+ c.Check(s.handler.Cache.sessions[arvadostest.ActiveTokenV2].client.DiskCacheSize.Percent(), check.Equals, int64(10))
}
// Writing to a collection shouldn't affect its entry in the
u := mustParseURL("http://" + arvadostest.FooCollection + ".keep-web.example/foo")
s.handler.Cluster.Collections.TrustAllContent = true
+ s.handler.Cluster.Collections.WebDAVLogDownloadInterval = arvados.Duration(0)
for _, adminperm := range []bool{true, false} {
for _, userperm := range []bool{true, false} {
}
}
+func (s *IntegrationSuite) serveAndLogRequests(c *check.C, reqs *map[*http.Request]int) *bytes.Buffer {
+ logbuf, ctx := newLoggerAndContext()
+ var wg sync.WaitGroup
+ for req, expectStatus := range *reqs {
+ req := req.WithContext(ctx)
+ expectStatus := expectStatus
+ wg.Add(1)
+ go func() {
+ defer wg.Done()
+ resp := httptest.NewRecorder()
+ s.handler.ServeHTTP(resp, req)
+ c.Check(resp.Result().StatusCode, check.Equals, expectStatus)
+ }()
+ }
+ wg.Wait()
+ return logbuf
+}
+
+func countLogMatches(c *check.C, logbuf *bytes.Buffer, pattern string, matchCount int) bool {
+ search, err := regexp.Compile(pattern)
+ if !c.Check(err, check.IsNil, check.Commentf("failed to compile regexp: %v", err)) {
+ return false
+ }
+ matches := search.FindAll(logbuf.Bytes(), -1)
+ return c.Check(matches, check.HasLen, matchCount,
+ check.Commentf("%d matching log messages: %+v", len(matches), matches))
+}
+
+func (s *IntegrationSuite) TestLogThrottling(c *check.C) {
+ s.handler.Cluster.Collections.WebDAVLogDownloadInterval = arvados.Duration(time.Hour)
+ fooURL := "http://" + arvadostest.FooCollection + ".keep-web.example/foo"
+ req := newRequest("GET", fooURL)
+ req.Header.Set("Authorization", "Bearer "+arvadostest.ActiveToken)
+ pattern := `\bmsg="File download".* collection_file_path=foo\b`
+
+ // All these requests get byte zero and should be logged.
+ reqs := make(map[*http.Request]int)
+ reqs[req] = http.StatusOK
+ for _, byterange := range []string{"0-2", "0-1", "0-", "-3"} {
+ req := req.Clone(context.Background())
+ req.Header.Set("Range", "bytes="+byterange)
+ reqs[req] = http.StatusPartialContent
+ }
+ logbuf := s.serveAndLogRequests(c, &reqs)
+ countLogMatches(c, logbuf, pattern, len(reqs))
+
+ // None of these requests get byte zero so they should all be throttled
+ // (now that we've made at least one request for byte zero).
+ reqs = make(map[*http.Request]int)
+ for _, byterange := range []string{"1-2", "1-", "2-", "-1", "-2"} {
+ req := req.Clone(context.Background())
+ req.Header.Set("Range", "bytes="+byterange)
+ reqs[req] = http.StatusPartialContent
+ }
+ logbuf = s.serveAndLogRequests(c, &reqs)
+ countLogMatches(c, logbuf, pattern, 0)
+}
+
+func (s *IntegrationSuite) TestLogThrottleInterval(c *check.C) {
+ s.handler.Cluster.Collections.WebDAVLogDownloadInterval = arvados.Duration(time.Nanosecond)
+ logbuf, ctx := newLoggerAndContext()
+ req := newRequest("GET", "http://"+arvadostest.FooCollection+".keep-web.example/foo")
+ req.Header.Set("Authorization", "Bearer "+arvadostest.ActiveToken)
+ req = req.WithContext(ctx)
+
+ re := regexp.MustCompile(`\bmsg="File download".* collection_file_path=foo\b`)
+ for expected := 1; expected < 4; expected++ {
+ time.Sleep(2 * time.Nanosecond)
+ resp := httptest.NewRecorder()
+ s.handler.ServeHTTP(resp, req)
+ c.Assert(resp.Result().StatusCode, check.Equals, http.StatusOK)
+ matches := re.FindAll(logbuf.Bytes(), -1)
+ c.Assert(matches, check.HasLen, expected,
+ check.Commentf("%d matching log messages: %+v", len(matches), matches))
+ }
+}
+
+func (s *IntegrationSuite) TestLogThrottleDifferentTokens(c *check.C) {
+ s.handler.Cluster.Collections.WebDAVLogDownloadInterval = arvados.Duration(time.Hour)
+ req := newRequest("GET", "http://"+arvadostest.FooCollection+".keep-web.example/foo")
+ reqs := make(map[*http.Request]int)
+ for _, token := range []string{arvadostest.ActiveToken, arvadostest.AdminToken} {
+ req := req.Clone(context.Background())
+ req.Header.Set("Authorization", "Bearer "+token)
+ reqs[req] = http.StatusOK
+ }
+ logbuf := s.serveAndLogRequests(c, &reqs)
+ countLogMatches(c, logbuf, `\bmsg="File download".* collection_file_path=foo\b`, len(reqs))
+}
+
+func (s *IntegrationSuite) TestLogThrottleDifferentFiles(c *check.C) {
+ s.handler.Cluster.Collections.WebDAVLogDownloadInterval = arvados.Duration(time.Hour)
+ baseURL := "http://" + arvadostest.MultilevelCollection1 + ".keep-web.example/"
+ reqs := make(map[*http.Request]int)
+ for _, filename := range []string{"file1", "file2", "file3"} {
+ req := newRequest("GET", baseURL+filename)
+ req.Header.Set("Authorization", "Bearer "+arvadostest.ActiveToken)
+ reqs[req] = http.StatusOK
+ }
+ logbuf := s.serveAndLogRequests(c, &reqs)
+ countLogMatches(c, logbuf, `\bmsg="File download".* collection_uuid=`+arvadostest.MultilevelCollection1+`\b`, len(reqs))
+}
+
+func (s *IntegrationSuite) TestLogThrottleDifferentSources(c *check.C) {
+ s.handler.Cluster.Collections.WebDAVLogDownloadInterval = arvados.Duration(time.Hour)
+ req := newRequest("GET", "http://"+arvadostest.FooCollection+".keep-web.example/foo")
+ req.Header.Set("Authorization", "Bearer "+arvadostest.ActiveToken)
+ reqs := make(map[*http.Request]int)
+ reqs[req] = http.StatusOK
+ for _, xff := range []string{"10.22.33.44", "100::123"} {
+ req := req.Clone(context.Background())
+ req.Header.Set("X-Forwarded-For", xff)
+ reqs[req] = http.StatusOK
+ }
+ logbuf := s.serveAndLogRequests(c, &reqs)
+ countLogMatches(c, logbuf, `\bmsg="File download".* collection_file_path=foo\b`, len(reqs))
+}
+
func (s *IntegrationSuite) TestConcurrentWrites(c *check.C) {
s.handler.Cluster.Collections.WebDAVCache.TTL = arvados.Duration(time.Second * 2)
- lockTidyInterval = time.Second
client := arvados.NewClientFromEnv()
client.AuthToken = arvadostest.ActiveTokenV2
+ var handler http.Handler = s.handler
+ // handler = httpserver.AddRequestIDs(httpserver.LogRequests(s.handler)) // ...to enable request logging in test output
+
+ // Each file we upload will consist of some unique content
+ // followed by 2 MiB of filler content.
+ filler := "."
+ for i := 0; i < 21; i++ {
+ filler += filler
+ }
+
// Start small, and increase concurrency (2^2, 4^2, ...)
// only until hitting failure. Avoids unnecessarily long
// failure reports.
req, err := http.NewRequest("MKCOL", u.String(), nil)
c.Assert(err, check.IsNil)
req.Header.Set("Authorization", "Bearer "+client.AuthToken)
- s.handler.ServeHTTP(resp, req)
+ handler.ServeHTTP(resp, req)
c.Assert(resp.Code, check.Equals, http.StatusCreated)
for j := 0; j < n && !c.Failed(); j++ {
j := j
u := mustParseURL("http://" + coll.UUID + ".collections.example.com/" + content)
resp := httptest.NewRecorder()
- req, err := http.NewRequest("PUT", u.String(), strings.NewReader(content))
+ req, err := http.NewRequest("PUT", u.String(), strings.NewReader(content+filler))
c.Assert(err, check.IsNil)
req.Header.Set("Authorization", "Bearer "+client.AuthToken)
- s.handler.ServeHTTP(resp, req)
+ handler.ServeHTTP(resp, req)
c.Check(resp.Code, check.Equals, http.StatusCreated)
time.Sleep(time.Second)
req, err = http.NewRequest("GET", u.String(), nil)
c.Assert(err, check.IsNil)
req.Header.Set("Authorization", "Bearer "+client.AuthToken)
- s.handler.ServeHTTP(resp, req)
+ handler.ServeHTTP(resp, req)
c.Check(resp.Code, check.Equals, http.StatusOK)
- c.Check(resp.Body.String(), check.Equals, content)
+ c.Check(strings.TrimSuffix(resp.Body.String(), filler), check.Equals, content)
}()
}
}()
}
}
}
+
+func (s *IntegrationSuite) TestDepthHeader(c *check.C) {
+ s.handler.Cluster.Collections.WebDAVCache.TTL = arvados.Duration(time.Second * 2)
+ client := arvados.NewClientFromEnv()
+ client.AuthToken = arvadostest.ActiveTokenV2
+
+ var coll arvados.Collection
+ err := client.RequestAndDecode(&coll, "POST", "arvados/v1/collections", nil, nil)
+ c.Assert(err, check.IsNil)
+ defer client.RequestAndDecode(&coll, "DELETE", "arvados/v1/collections/"+coll.UUID, nil, nil)
+ base := "http://" + coll.UUID + ".collections.example.com/"
+
+ for _, trial := range []struct {
+ method string
+ path string
+ destination string
+ depth string
+ expectCode int // 0 means expect 2xx
+ }{
+ // setup...
+ {method: "MKCOL", path: "dir"},
+ {method: "PUT", path: "dir/file"},
+ {method: "MKCOL", path: "dir/dir2"},
+ // delete with no depth = OK
+ {method: "DELETE", path: "dir/dir2", depth: ""},
+ // delete with depth other than infinity = fail
+ {method: "DELETE", path: "dir", depth: "0", expectCode: 400},
+ {method: "DELETE", path: "dir", depth: "1", expectCode: 400},
+ // delete with depth infinity = OK
+ {method: "DELETE", path: "dir", depth: "infinity"},
+
+ // setup...
+ {method: "MKCOL", path: "dir"},
+ {method: "PUT", path: "dir/file"},
+ {method: "MKCOL", path: "dir/dir2"},
+ // move with depth other than infinity = fail
+ {method: "MOVE", path: "dir", destination: "moved", depth: "0", expectCode: 400},
+ {method: "MOVE", path: "dir", destination: "moved", depth: "1", expectCode: 400},
+ // move with depth infinity = OK
+ {method: "MOVE", path: "dir", destination: "moved", depth: "infinity"},
+ {method: "DELETE", path: "moved"},
+
+ // setup...
+ {method: "MKCOL", path: "dir"},
+ {method: "PUT", path: "dir/file"},
+ {method: "MKCOL", path: "dir/dir2"},
+ // copy with depth 0 = create empty destination dir
+ {method: "COPY", path: "dir/", destination: "copied-empty/", depth: "0"},
+ {method: "DELETE", path: "copied-empty/file", expectCode: 404},
+ {method: "DELETE", path: "copied-empty"},
+ // copy with depth 0 = create empty destination dir
+ // (destination dir has no trailing slash this time)
+ {method: "COPY", path: "dir/", destination: "copied-empty-noslash", depth: "0"},
+ {method: "DELETE", path: "copied-empty-noslash/file", expectCode: 404},
+ {method: "DELETE", path: "copied-empty-noslash"},
+ // copy with depth 0 = create empty destination dir
+ // (source dir has no trailing slash this time)
+ {method: "COPY", path: "dir", destination: "copied-empty-noslash", depth: "0"},
+ {method: "DELETE", path: "copied-empty-noslash/file", expectCode: 404},
+ {method: "DELETE", path: "copied-empty-noslash"},
+ // copy with depth 1 = fail
+ {method: "COPY", path: "dir", destination: "copied", depth: "1", expectCode: 400},
+ // copy with depth infinity = copy entire subtree
+ {method: "COPY", path: "dir/", destination: "copied", depth: "infinity"},
+ {method: "DELETE", path: "copied/file"},
+ {method: "DELETE", path: "copied"},
+ // copy with depth infinity = copy entire subtree
+ // (source dir has no trailing slash this time)
+ {method: "COPY", path: "dir", destination: "copied", depth: "infinity"},
+ {method: "DELETE", path: "copied/file"},
+ {method: "DELETE", path: "copied"},
+ // cleanup
+ {method: "DELETE", path: "dir"},
+ } {
+ c.Logf("trial %+v", trial)
+ resp := httptest.NewRecorder()
+ req, err := http.NewRequest(trial.method, base+trial.path, strings.NewReader(""))
+ c.Assert(err, check.IsNil)
+ req.Header.Set("Authorization", "Bearer "+client.AuthToken)
+ if trial.destination != "" {
+ req.Header.Set("Destination", base+trial.destination)
+ }
+ if trial.depth != "" {
+ req.Header.Set("Depth", trial.depth)
+ }
+ s.handler.ServeHTTP(resp, req)
+ if trial.expectCode != 0 {
+ c.Assert(resp.Code, check.Equals, trial.expectCode)
+ } else {
+ c.Assert(resp.Code >= 200, check.Equals, true, check.Commentf("got code %d", resp.Code))
+ c.Assert(resp.Code < 300, check.Equals, true, check.Commentf("got code %d", resp.Code))
+ }
+ c.Logf("resp.Body: %q", resp.Body.String())
+ }
+}