Merge branch '21901-file-log-throttling'
[arvados.git] / services / keep-web / handler_test.go
index 5a12e26e9dcdf75ffd1e3e0b7a90ec7176fbe36e..8926a1a781a3a043e11574940467be62d8ac301f 100644 (file)
@@ -8,7 +8,6 @@ import (
        "bytes"
        "context"
        "fmt"
-       "html"
        "io"
        "io/ioutil"
        "net/http"
@@ -30,6 +29,7 @@ import (
        "git.arvados.org/arvados.git/sdk/go/keepclient"
        "github.com/prometheus/client_golang/prometheus"
        "github.com/sirupsen/logrus"
+       "golang.org/x/net/html"
        check "gopkg.in/check.v1"
 )
 
@@ -60,6 +60,366 @@ func (s *UnitSuite) SetUpTest(c *check.C) {
                        logger:   logger,
                        registry: prometheus.NewRegistry(),
                },
+               metrics: newMetrics(prometheus.NewRegistry()),
+       }
+}
+
+func newCollection(collID string) *arvados.Collection {
+       coll := &arvados.Collection{UUID: collID}
+       manifestKey := collID
+       if pdh, ok := arvadostest.TestCollectionUUIDToPDH[collID]; ok {
+               coll.PortableDataHash = pdh
+               manifestKey = pdh
+       }
+       if mtext, ok := arvadostest.TestCollectionPDHToManifest[manifestKey]; ok {
+               coll.ManifestText = mtext
+       }
+       return coll
+}
+
+func newRequest(method, urlStr string) *http.Request {
+       u := mustParseURL(urlStr)
+       return &http.Request{
+               Method:     method,
+               Host:       u.Host,
+               URL:        u,
+               RequestURI: u.RequestURI(),
+               RemoteAddr: "10.20.30.40:56789",
+               Header:     http.Header{},
+       }
+}
+
+func newLoggerAndContext() (*bytes.Buffer, context.Context) {
+       var logbuf bytes.Buffer
+       logger := logrus.New()
+       logger.Out = &logbuf
+       return &logbuf, ctxlog.Context(context.Background(), logger)
+}
+
+func (s *UnitSuite) TestLogEventTypes(c *check.C) {
+       collURL := "http://keep-web.example/c=" + arvadostest.FooCollection
+       for method, expected := range map[string]string{
+               "GET":  "file_download",
+               "POST": "file_upload",
+               "PUT":  "file_upload",
+       } {
+               filePath := "/" + method
+               req := newRequest(method, collURL+filePath)
+               actual := newFileEventLog(s.handler, req, filePath, nil, nil, "")
+               if !c.Check(actual, check.NotNil) {
+                       continue
+               }
+               c.Check(actual.eventType, check.Equals, expected)
+       }
+}
+
+func (s *UnitSuite) TestUnloggedEventTypes(c *check.C) {
+       collURL := "http://keep-web.example/c=" + arvadostest.FooCollection
+       for _, method := range []string{"DELETE", "HEAD", "OPTIONS", "PATCH"} {
+               filePath := "/" + method
+               req := newRequest(method, collURL+filePath)
+               actual := newFileEventLog(s.handler, req, filePath, nil, nil, "")
+               c.Check(actual, check.IsNil,
+                       check.Commentf("%s request made a log event", method))
+       }
+}
+
+func (s *UnitSuite) TestLogFilePath(c *check.C) {
+       coll := newCollection(arvadostest.FooCollection)
+       collURL := "http://keep-web.example/c=" + arvadostest.FooCollection
+       for _, filePath := range []string{"/foo", "/Foo", "/foo/bar"} {
+               req := newRequest("GET", collURL+filePath)
+               actual := newFileEventLog(s.handler, req, filePath, coll, nil, "")
+               if !c.Check(actual, check.NotNil) {
+                       continue
+               }
+               c.Check(actual.collFilePath, check.Equals, filePath)
+       }
+}
+
+func (s *UnitSuite) TestLogRemoteAddr(c *check.C) {
+       collURL := "http://keep-web.example/c=" + arvadostest.FooCollection
+       filePath := "/foo"
+       req := newRequest("GET", collURL+filePath)
+
+       for _, addr := range []string{"10.20.30.55", "192.168.144.120", "192.0.2.4"} {
+               req.RemoteAddr = addr + ":57914"
+               actual := newFileEventLog(s.handler, req, filePath, nil, nil, "")
+               if !c.Check(actual, check.NotNil) {
+                       continue
+               }
+               c.Check(actual.clientAddr, check.Equals, addr)
+       }
+
+       for _, addr := range []string{"100::20:30:40", "2001:db8::90:100", "3fff::30"} {
+               req.RemoteAddr = fmt.Sprintf("[%s]:57916", addr)
+               actual := newFileEventLog(s.handler, req, filePath, nil, nil, "")
+               if !c.Check(actual, check.NotNil) {
+                       continue
+               }
+               c.Check(actual.clientAddr, check.Equals, addr)
+       }
+}
+
+func (s *UnitSuite) TestLogXForwardedFor(c *check.C) {
+       collURL := "http://keep-web.example/c=" + arvadostest.FooCollection
+       filePath := "/foo"
+       req := newRequest("GET", collURL+filePath)
+       for xff, expected := range map[string]string{
+               "10.20.30.55":                          "10.20.30.55",
+               "192.168.144.120, 10.20.30.120":        "10.20.30.120",
+               "192.0.2.4, 192.0.2.6, 192.0.2.8":      "192.0.2.8",
+               "192.0.2.4,192.168.2.4":                "192.168.2.4",
+               "10.20.30.60,192.168.144.40,192.0.2.4": "192.0.2.4",
+               "100::20:30:50":                        "100::20:30:50",
+               "2001:db8::80:90, 100::100":            "100::100",
+               "3fff::ff, 3fff::ee, 3fff::fe":         "3fff::fe",
+               "3fff::3f,100::1000":                   "100::1000",
+               "2001:db8::88,100::88,3fff::88":        "3fff::88",
+               "10.20.30.60, 2001:db8::60":            "2001:db8::60",
+               "2001:db8::20,10.20.30.20":             "10.20.30.20",
+               ", 10.20.30.123, 100::123":             "100::123",
+               ",100::321,10.30.20.10":                "10.30.20.10",
+       } {
+               req.Header.Set("X-Forwarded-For", xff)
+               actual := newFileEventLog(s.handler, req, filePath, nil, nil, "")
+               if !c.Check(actual, check.NotNil) {
+                       continue
+               }
+               c.Check(actual.clientAddr, check.Equals, expected)
+       }
+}
+
+func (s *UnitSuite) TestLogXForwardedForMalformed(c *check.C) {
+       collURL := "http://keep-web.example/c=" + arvadostest.FooCollection
+       filePath := "/foo"
+       req := newRequest("GET", collURL+filePath)
+       for _, xff := range []string{"", ",", "10.20,30.40", "foo, bar"} {
+               req.Header.Set("X-Forwarded-For", xff)
+               actual := newFileEventLog(s.handler, req, filePath, nil, nil, "")
+               if !c.Check(actual, check.NotNil) {
+                       continue
+               }
+               c.Check(actual.clientAddr, check.Equals, "10.20.30.40")
+       }
+}
+
+func (s *UnitSuite) TestLogXForwardedForMultivalue(c *check.C) {
+       collURL := "http://keep-web.example/c=" + arvadostest.FooCollection
+       filePath := "/foo"
+       req := newRequest("GET", collURL+filePath)
+       req.Header.Set("X-Forwarded-For", ", ")
+       req.Header.Add("X-Forwarded-For", "2001:db8::db9:dbd")
+       req.Header.Add("X-Forwarded-For", "10.20.30.90")
+       actual := newFileEventLog(s.handler, req, filePath, nil, nil, "")
+       c.Assert(actual, check.NotNil)
+       c.Check(actual.clientAddr, check.Equals, "10.20.30.90")
+}
+
+func (s *UnitSuite) TestLogClientAddressCanonicalization(c *check.C) {
+       collURL := "http://keep-web.example/c=" + arvadostest.FooCollection
+       filePath := "/foo"
+       req := newRequest("GET", collURL+filePath)
+       expected := "2001:db8::12:0"
+
+       req.RemoteAddr = "[2001:db8::012:0000]:57918"
+       a := newFileEventLog(s.handler, req, filePath, nil, nil, "")
+       c.Assert(a, check.NotNil)
+       c.Check(a.clientAddr, check.Equals, expected)
+
+       req.RemoteAddr = "10.20.30.40:57919"
+       req.Header.Set("X-Forwarded-For", "2001:db8:0::0:12:00")
+       b := newFileEventLog(s.handler, req, filePath, nil, nil, "")
+       c.Assert(b, check.NotNil)
+       c.Check(b.clientAddr, check.Equals, expected)
+}
+
+func (s *UnitSuite) TestLogAnonymousUser(c *check.C) {
+       collURL := "http://keep-web.example/c=" + arvadostest.FooCollection
+       filePath := "/foo"
+       req := newRequest("GET", collURL+filePath)
+       actual := newFileEventLog(s.handler, req, filePath, nil, nil, arvadostest.AnonymousToken)
+       c.Assert(actual, check.NotNil)
+       c.Check(actual.userUUID, check.Equals, s.handler.Cluster.ClusterID+"-tpzed-anonymouspublic")
+       c.Check(actual.userFullName, check.Equals, "")
+       c.Check(actual.clientToken, check.Equals, arvadostest.AnonymousToken)
+}
+
+func (s *UnitSuite) TestLogUser(c *check.C) {
+       collURL := "http://keep-web.example/c=" + arvadostest.FooCollection
+       for _, trial := range []struct{ uuid, fullName, token string }{
+               {arvadostest.ActiveUserUUID, "Active User", arvadostest.ActiveToken},
+               {arvadostest.SpectatorUserUUID, "Spectator User", arvadostest.SpectatorToken},
+       } {
+               filePath := "/" + trial.uuid
+               req := newRequest("GET", collURL+filePath)
+               user := &arvados.User{
+                       UUID:     trial.uuid,
+                       FullName: trial.fullName,
+               }
+               actual := newFileEventLog(s.handler, req, filePath, nil, user, trial.token)
+               if !c.Check(actual, check.NotNil) {
+                       continue
+               }
+               c.Check(actual.userUUID, check.Equals, trial.uuid)
+               c.Check(actual.userFullName, check.Equals, trial.fullName)
+               c.Check(actual.clientToken, check.Equals, trial.token)
+       }
+}
+
+func (s *UnitSuite) TestLogCollectionByUUID(c *check.C) {
+       for collUUID, collPDH := range arvadostest.TestCollectionUUIDToPDH {
+               collURL := "http://keep-web.example/c=" + collUUID
+               filePath := "/" + collUUID
+               req := newRequest("GET", collURL+filePath)
+               coll := newCollection(collUUID)
+               actual := newFileEventLog(s.handler, req, filePath, coll, nil, "")
+               if !c.Check(actual, check.NotNil) {
+                       continue
+               }
+               c.Check(actual.collUUID, check.Equals, collUUID)
+               c.Check(actual.collPDH, check.Equals, collPDH)
+       }
+}
+
+func (s *UnitSuite) TestLogCollectionByPDH(c *check.C) {
+       for _, collPDH := range arvadostest.TestCollectionUUIDToPDH {
+               collURL := "http://keep-web.example/c=" + collPDH
+               filePath := "/PDHFile"
+               req := newRequest("GET", collURL+filePath)
+               coll := newCollection(collPDH)
+               actual := newFileEventLog(s.handler, req, filePath, coll, nil, "")
+               if !c.Check(actual, check.NotNil) {
+                       continue
+               }
+               c.Check(actual.collPDH, check.Equals, collPDH)
+               c.Check(actual.collUUID, check.Equals, "")
+       }
+}
+
+func (s *UnitSuite) TestLogGETUUIDAsDict(c *check.C) {
+       filePath := "/foo"
+       reqPath := "/c=" + arvadostest.FooCollection + filePath
+       req := newRequest("GET", "http://keep-web.example"+reqPath)
+       coll := newCollection(arvadostest.FooCollection)
+       logEvent := newFileEventLog(s.handler, req, filePath, coll, nil, "")
+       c.Assert(logEvent, check.NotNil)
+       c.Check(logEvent.asDict(), check.DeepEquals, arvadosclient.Dict{
+               "event_type":  "file_download",
+               "object_uuid": s.handler.Cluster.ClusterID + "-tpzed-anonymouspublic",
+               "properties": arvadosclient.Dict{
+                       "reqPath":              reqPath,
+                       "collection_uuid":      arvadostest.FooCollection,
+                       "collection_file_path": filePath,
+                       "portable_data_hash":   arvadostest.FooCollectionPDH,
+               },
+       })
+}
+
+func (s *UnitSuite) TestLogGETPDHAsDict(c *check.C) {
+       filePath := "/Foo"
+       reqPath := "/c=" + arvadostest.FooCollectionPDH + filePath
+       req := newRequest("GET", "http://keep-web.example"+reqPath)
+       coll := newCollection(arvadostest.FooCollectionPDH)
+       user := &arvados.User{
+               UUID:     arvadostest.ActiveUserUUID,
+               FullName: "Active User",
+       }
+       logEvent := newFileEventLog(s.handler, req, filePath, coll, user, "")
+       c.Assert(logEvent, check.NotNil)
+       c.Check(logEvent.asDict(), check.DeepEquals, arvadosclient.Dict{
+               "event_type":  "file_download",
+               "object_uuid": arvadostest.ActiveUserUUID,
+               "properties": arvadosclient.Dict{
+                       "reqPath":              reqPath,
+                       "portable_data_hash":   arvadostest.FooCollectionPDH,
+                       "collection_uuid":      "",
+                       "collection_file_path": filePath,
+               },
+       })
+}
+
+func (s *UnitSuite) TestLogUploadAsDict(c *check.C) {
+       coll := newCollection(arvadostest.FooCollection)
+       user := &arvados.User{
+               UUID:     arvadostest.ActiveUserUUID,
+               FullName: "Active User",
+       }
+       for _, method := range []string{"POST", "PUT"} {
+               filePath := "/" + method + "File"
+               reqPath := "/c=" + arvadostest.FooCollection + filePath
+               req := newRequest(method, "http://keep-web.example"+reqPath)
+               logEvent := newFileEventLog(s.handler, req, filePath, coll, user, "")
+               if !c.Check(logEvent, check.NotNil) {
+                       continue
+               }
+               c.Check(logEvent.asDict(), check.DeepEquals, arvadosclient.Dict{
+                       "event_type":  "file_upload",
+                       "object_uuid": arvadostest.ActiveUserUUID,
+                       "properties": arvadosclient.Dict{
+                               "reqPath":              reqPath,
+                               "collection_uuid":      arvadostest.FooCollection,
+                               "collection_file_path": filePath,
+                       },
+               })
+       }
+}
+
+func (s *UnitSuite) TestLogGETUUIDAsFields(c *check.C) {
+       filePath := "/foo"
+       reqPath := "/c=" + arvadostest.FooCollection + filePath
+       req := newRequest("GET", "http://keep-web.example"+reqPath)
+       coll := newCollection(arvadostest.FooCollection)
+       logEvent := newFileEventLog(s.handler, req, filePath, coll, nil, "")
+       c.Assert(logEvent, check.NotNil)
+       c.Check(logEvent.asFields(), check.DeepEquals, logrus.Fields{
+               "user_uuid":            s.handler.Cluster.ClusterID + "-tpzed-anonymouspublic",
+               "collection_uuid":      arvadostest.FooCollection,
+               "collection_file_path": filePath,
+               "portable_data_hash":   arvadostest.FooCollectionPDH,
+       })
+}
+
+func (s *UnitSuite) TestLogGETPDHAsFields(c *check.C) {
+       filePath := "/Foo"
+       reqPath := "/c=" + arvadostest.FooCollectionPDH + filePath
+       req := newRequest("GET", "http://keep-web.example"+reqPath)
+       coll := newCollection(arvadostest.FooCollectionPDH)
+       user := &arvados.User{
+               UUID:     arvadostest.ActiveUserUUID,
+               FullName: "Active User",
+       }
+       logEvent := newFileEventLog(s.handler, req, filePath, coll, user, "")
+       c.Assert(logEvent, check.NotNil)
+       c.Check(logEvent.asFields(), check.DeepEquals, logrus.Fields{
+               "user_uuid":            arvadostest.ActiveUserUUID,
+               "user_full_name":       "Active User",
+               "collection_uuid":      "",
+               "collection_file_path": filePath,
+               "portable_data_hash":   arvadostest.FooCollectionPDH,
+       })
+}
+
+func (s *UnitSuite) TestLogUploadAsFields(c *check.C) {
+       coll := newCollection(arvadostest.FooCollection)
+       user := &arvados.User{
+               UUID:     arvadostest.ActiveUserUUID,
+               FullName: "Active User",
+       }
+       for _, method := range []string{"POST", "PUT"} {
+               filePath := "/" + method + "File"
+               reqPath := "/c=" + arvadostest.FooCollection + filePath
+               req := newRequest(method, "http://keep-web.example"+reqPath)
+               logEvent := newFileEventLog(s.handler, req, filePath, coll, user, "")
+               if !c.Check(logEvent, check.NotNil) {
+                       continue
+               }
+               c.Check(logEvent.asFields(), check.DeepEquals, logrus.Fields{
+                       "user_uuid":            arvadostest.ActiveUserUUID,
+                       "user_full_name":       "Active User",
+                       "collection_uuid":      arvadostest.FooCollection,
+                       "collection_file_path": filePath,
+               })
        }
 }
 
@@ -209,6 +569,11 @@ func (s *UnitSuite) TestWebdavPrefixAndSource(c *check.C) {
 }
 
 func (s *UnitSuite) TestEmptyResponse(c *check.C) {
+       // Ensure we start with an empty cache
+       defer os.Setenv("HOME", os.Getenv("HOME"))
+       os.Setenv("HOME", c.MkDir())
+       s.handler.Cluster.Collections.WebDAVLogDownloadInterval = arvados.Duration(0)
+
        for _, trial := range []struct {
                dataExists    bool
                sendIMSHeader bool
@@ -324,6 +689,8 @@ func (s *IntegrationSuite) TestVhost404(c *check.C) {
 // the token is invalid.
 type authorizer func(*http.Request, string) int
 
+// We still need to accept "OAuth2 ..." as equivalent to "Bearer ..."
+// for compatibility with older clients, including SDKs before 3.0.
 func (s *IntegrationSuite) TestVhostViaAuthzHeaderOAuth2(c *check.C) {
        s.doVhostRequests(c, authzViaAuthzHeaderOAuth2)
 }
@@ -900,7 +1267,8 @@ func (s *IntegrationSuite) TestSpecialCharsInPath(c *check.C) {
        client.AuthToken = arvadostest.ActiveToken
        fs, err := (&arvados.Collection{}).FileSystem(client, nil)
        c.Assert(err, check.IsNil)
-       f, err := fs.OpenFile("https:\\\"odd' path chars", os.O_CREATE, 0777)
+       path := `https:\\"odd' path chars`
+       f, err := fs.OpenFile(path, os.O_CREATE, 0777)
        c.Assert(err, check.IsNil)
        f.Close()
        mtxt, err := fs.MarshalManifest(".")
@@ -926,7 +1294,14 @@ func (s *IntegrationSuite) TestSpecialCharsInPath(c *check.C) {
        resp := httptest.NewRecorder()
        s.handler.ServeHTTP(resp, req)
        c.Check(resp.Code, check.Equals, http.StatusOK)
-       c.Check(resp.Body.String(), check.Matches, `(?ms).*href="./https:%5c%22odd%27%20path%20chars"\S+https:\\"odd' path chars.*`)
+       doc, err := html.Parse(resp.Body)
+       c.Assert(err, check.IsNil)
+       pathHrefMap := getPathHrefMap(doc)
+       c.Check(pathHrefMap, check.HasLen, 1) // the one leaf added to collection
+       href, hasPath := pathHrefMap[path]
+       c.Assert(hasPath, check.Equals, true) // the path is listed
+       relUrl := mustParseURL(href)
+       c.Check(relUrl.Path, check.Equals, "./"+path) // href can be decoded back to path
 }
 
 func (s *IntegrationSuite) TestForwardSlashSubstitution(c *check.C) {
@@ -935,7 +1310,6 @@ func (s *IntegrationSuite) TestForwardSlashSubstitution(c *check.C) {
        s.handler.Cluster.Collections.ForwardSlashNameSubstitution = "{SOLIDUS}"
        name := "foo/bar/baz"
        nameShown := strings.Replace(name, "/", "{SOLIDUS}", -1)
-       nameShownEscaped := strings.Replace(name, "/", "%7bSOLIDUS%7d", -1)
 
        client := arvados.NewClientFromEnv()
        client.AuthToken = arvadostest.ActiveToken
@@ -958,9 +1332,9 @@ func (s *IntegrationSuite) TestForwardSlashSubstitution(c *check.C) {
        defer arv.RequestAndDecode(&coll, "DELETE", "arvados/v1/collections/"+coll.UUID, nil, nil)
 
        base := "http://download.example.com/by_id/" + coll.OwnerUUID + "/"
-       for tryURL, expectRegexp := range map[string]string{
-               base:                          `(?ms).*href="./` + nameShownEscaped + `/"\S+` + nameShown + `.*`,
-               base + nameShownEscaped + "/": `(?ms).*href="./filename"\S+filename.*`,
+       for tryURL, expectedAnchorText := range map[string]string{
+               base:                   nameShown + "/",
+               base + nameShown + "/": "filename",
        } {
                u, _ := url.Parse(tryURL)
                req := &http.Request{
@@ -975,7 +1349,14 @@ func (s *IntegrationSuite) TestForwardSlashSubstitution(c *check.C) {
                resp := httptest.NewRecorder()
                s.handler.ServeHTTP(resp, req)
                c.Check(resp.Code, check.Equals, http.StatusOK)
-               c.Check(resp.Body.String(), check.Matches, expectRegexp)
+               doc, err := html.Parse(resp.Body)
+               c.Assert(err, check.IsNil) // valid HTML
+               pathHrefMap := getPathHrefMap(doc)
+               href, hasExpected := pathHrefMap[expectedAnchorText]
+               c.Assert(hasExpected, check.Equals, true) // has expected anchor text
+               c.Assert(href, check.Not(check.Equals), "")
+               relUrl := mustParseURL(href)
+               c.Check(relUrl.Path, check.Equals, "./"+expectedAnchorText) // decoded href maps back to the anchor text
        }
 }
 
@@ -1118,7 +1499,7 @@ func (s *IntegrationSuite) testDirectoryListing(c *check.C) {
 
        s.handler.Cluster.Services.WebDAVDownload.ExternalURL.Host = "download.example.com"
        authHeader := http.Header{
-               "Authorization": {"OAuth2 " + arvadostest.ActiveToken},
+               "Authorization": {"Bearer " + arvadostest.ActiveToken},
        }
        for _, trial := range []struct {
                uri      string
@@ -1312,11 +1693,24 @@ func (s *IntegrationSuite) testDirectoryListing(c *check.C) {
                        c.Check(resp.Code, check.Equals, http.StatusUnauthorized, comment)
                } else {
                        c.Check(resp.Code, check.Equals, http.StatusOK, comment)
+                       listingPageDoc, err := html.Parse(resp.Body)
+                       c.Check(err, check.IsNil, comment) // valid HTML document
+                       pathHrefMap := getPathHrefMap(listingPageDoc)
+                       c.Assert(pathHrefMap, check.Not(check.HasLen), 0, comment)
                        for _, e := range trial.expect {
-                               e = strings.Replace(e, " ", "%20", -1)
-                               c.Check(resp.Body.String(), check.Matches, `(?ms).*href="./`+e+`".*`, comment)
+                               href, hasE := pathHrefMap[e]
+                               c.Check(hasE, check.Equals, true, comment) // expected path is listed
+                               relUrl := mustParseURL(href)
+                               c.Check(relUrl.Path, check.Equals, "./"+e, comment) // href can be decoded back to path
                        }
-                       c.Check(resp.Body.String(), check.Matches, `(?ms).*--cut-dirs=`+fmt.Sprintf("%d", trial.cutDirs)+` .*`, comment)
+                       wgetCommand := getWgetExamplePre(listingPageDoc)
+                       wgetExpected := regexp.MustCompile(`^\$ wget .*--cut-dirs=(\d+) .*'(https?://[^']+)'$`)
+                       wgetMatchGroups := wgetExpected.FindStringSubmatch(wgetCommand)
+                       c.Assert(wgetMatchGroups, check.NotNil)                                     // wget command matches
+                       c.Check(wgetMatchGroups[1], check.Equals, fmt.Sprintf("%d", trial.cutDirs)) // correct level of cut dirs in wget command
+                       printedUrl := mustParseURL(wgetMatchGroups[2])
+                       c.Check(printedUrl.Host, check.Equals, req.URL.Host)
+                       c.Check(printedUrl.Path, check.Equals, req.URL.Path) // URL arg in wget command can be decoded to the right path
                }
 
                comment = check.Commentf("WebDAV: %q => %q", trial.uri, trial.expect)
@@ -1369,6 +1763,80 @@ func (s *IntegrationSuite) testDirectoryListing(c *check.C) {
        }
 }
 
+// Shallow-traverse the HTML document, gathering the nodes satisfying the
+// predicate function in the output slice. If a node matches the predicate,
+// none of its children will be visited.
+func getNodes(document *html.Node, predicate func(*html.Node) bool) []*html.Node {
+       var acc []*html.Node
+       var traverse func(*html.Node, []*html.Node) []*html.Node
+       traverse = func(root *html.Node, sofar []*html.Node) []*html.Node {
+               if root == nil {
+                       return sofar
+               }
+               if predicate(root) {
+                       return append(sofar, root)
+               }
+               for cur := root.FirstChild; cur != nil; cur = cur.NextSibling {
+                       sofar = traverse(cur, sofar)
+               }
+               return sofar
+       }
+       return traverse(document, acc)
+}
+
+// Returns true if a node has the attribute targetAttr with the given value
+func matchesAttributeValue(node *html.Node, targetAttr string, value string) bool {
+       for _, attr := range node.Attr {
+               if attr.Key == targetAttr && attr.Val == value {
+                       return true
+               }
+       }
+       return false
+}
+
+// Concatenate the content of text-node children of node; only direct
+// children are visited, and any non-text children are skipped.
+func getNodeText(node *html.Node) string {
+       var recv strings.Builder
+       for c := node.FirstChild; c != nil; c = c.NextSibling {
+               if c.Type == html.TextNode {
+                       recv.WriteString(c.Data)
+               }
+       }
+       return recv.String()
+}
+
+// Returns a map from the directory listing item string (a path) to the href
+// value of its <a> tag (an encoded relative URL)
+func getPathHrefMap(document *html.Node) map[string]string {
+       isItemATag := func(node *html.Node) bool {
+               return node.Type == html.ElementNode && node.Data == "a" && matchesAttributeValue(node, "class", "item")
+       }
+       aTags := getNodes(document, isItemATag)
+       output := make(map[string]string)
+       for _, elem := range aTags {
+               textContent := getNodeText(elem)
+               for _, attr := range elem.Attr {
+                       if attr.Key == "href" {
+                               output[textContent] = attr.Val
+                               break
+                       }
+               }
+       }
+       return output
+}
+
+func getWgetExamplePre(document *html.Node) string {
+       isWgetPre := func(node *html.Node) bool {
+               return node.Type == html.ElementNode && matchesAttributeValue(node, "id", "wget-example")
+       }
+       elements := getNodes(document, isWgetPre)
+       if len(elements) != 1 {
+               return ""
+       }
+       return getNodeText(elements[0])
+}
+
 func (s *IntegrationSuite) TestDeleteLastFile(c *check.C) {
        arv := arvados.NewClientFromEnv()
        var newCollection arvados.Collection
@@ -1469,20 +1937,14 @@ func (s *IntegrationSuite) TestFileContentType(c *check.C) {
        }
 }
 
-func (s *IntegrationSuite) TestKeepClientBlockCache(c *check.C) {
-       s.handler.Cluster.Collections.WebDAVCache.MaxBlockEntries = 42
-       c.Check(keepclient.DefaultBlockCache.MaxBlocks, check.Not(check.Equals), 42)
-       u := mustParseURL("http://keep-web.example/c=" + arvadostest.FooCollection + "/t=" + arvadostest.ActiveToken + "/foo")
-       req := &http.Request{
-               Method:     "GET",
-               Host:       u.Host,
-               URL:        u,
-               RequestURI: u.RequestURI(),
-       }
+func (s *IntegrationSuite) TestCacheSize(c *check.C) {
+       req, err := http.NewRequest("GET", "http://"+arvadostest.FooCollection+".example.com/foo", nil)
+       req.Header.Set("Authorization", "Bearer "+arvadostest.ActiveTokenV2)
+       c.Assert(err, check.IsNil)
        resp := httptest.NewRecorder()
        s.handler.ServeHTTP(resp, req)
-       c.Check(resp.Code, check.Equals, http.StatusOK)
-       c.Check(keepclient.DefaultBlockCache.MaxBlocks, check.Equals, 42)
+       c.Assert(resp.Code, check.Equals, http.StatusOK)
+       c.Check(s.handler.Cache.sessions[arvadostest.ActiveTokenV2].client.DiskCacheSize.Percent(), check.Equals, int64(10))
 }
 
 // Writing to a collection shouldn't affect its entry in the
@@ -1612,6 +2074,7 @@ func (s *IntegrationSuite) TestDownloadLoggingPermission(c *check.C) {
        u := mustParseURL("http://" + arvadostest.FooCollection + ".keep-web.example/foo")
 
        s.handler.Cluster.Collections.TrustAllContent = true
+       s.handler.Cluster.Collections.WebDAVLogDownloadInterval = arvados.Duration(0)
 
        for _, adminperm := range []bool{true, false} {
                for _, userperm := range []bool{true, false} {
@@ -1735,6 +2198,124 @@ func (s *IntegrationSuite) TestUploadLoggingPermission(c *check.C) {
        }
 }
 
+func (s *IntegrationSuite) serveAndLogRequests(c *check.C, reqs *map[*http.Request]int) *bytes.Buffer {
+       logbuf, ctx := newLoggerAndContext()
+       var wg sync.WaitGroup
+       for req, expectStatus := range *reqs {
+               req := req.WithContext(ctx)
+               expectStatus := expectStatus
+               wg.Add(1)
+               go func() {
+                       defer wg.Done()
+                       resp := httptest.NewRecorder()
+                       s.handler.ServeHTTP(resp, req)
+                       c.Check(resp.Result().StatusCode, check.Equals, expectStatus)
+               }()
+       }
+       wg.Wait()
+       return logbuf
+}
+
+func countLogMatches(c *check.C, logbuf *bytes.Buffer, pattern string, matchCount int) bool {
+       search, err := regexp.Compile(pattern)
+       if !c.Check(err, check.IsNil, check.Commentf("failed to compile regexp: %v", err)) {
+               return false
+       }
+       matches := search.FindAll(logbuf.Bytes(), -1)
+       return c.Check(matches, check.HasLen, matchCount,
+               check.Commentf("%d matching log messages: %+v", len(matches), matches))
+}
+
+func (s *IntegrationSuite) TestLogThrottling(c *check.C) {
+       s.handler.Cluster.Collections.WebDAVLogDownloadInterval = arvados.Duration(time.Hour)
+       fooURL := "http://" + arvadostest.FooCollection + ".keep-web.example/foo"
+       req := newRequest("GET", fooURL)
+       req.Header.Set("Authorization", "Bearer "+arvadostest.ActiveToken)
+       pattern := `\bmsg="File download".* collection_file_path=foo\b`
+
+       // All these requests get byte zero and should be logged.
+       reqs := make(map[*http.Request]int)
+       reqs[req] = http.StatusOK
+       for _, byterange := range []string{"0-2", "0-1", "0-", "-3"} {
+               req := req.Clone(context.Background())
+               req.Header.Set("Range", "bytes="+byterange)
+               reqs[req] = http.StatusPartialContent
+       }
+       logbuf := s.serveAndLogRequests(c, &reqs)
+       countLogMatches(c, logbuf, pattern, len(reqs))
+
+       // None of these requests get byte zero so they should all be throttled
+       // (now that we've made at least one request for byte zero).
+       reqs = make(map[*http.Request]int)
+       for _, byterange := range []string{"1-2", "1-", "2-", "-1", "-2"} {
+               req := req.Clone(context.Background())
+               req.Header.Set("Range", "bytes="+byterange)
+               reqs[req] = http.StatusPartialContent
+       }
+       logbuf = s.serveAndLogRequests(c, &reqs)
+       countLogMatches(c, logbuf, pattern, 0)
+}
+
+func (s *IntegrationSuite) TestLogThrottleInterval(c *check.C) {
+       s.handler.Cluster.Collections.WebDAVLogDownloadInterval = arvados.Duration(time.Nanosecond)
+       logbuf, ctx := newLoggerAndContext()
+       req := newRequest("GET", "http://"+arvadostest.FooCollection+".keep-web.example/foo")
+       req.Header.Set("Authorization", "Bearer "+arvadostest.ActiveToken)
+       req = req.WithContext(ctx)
+
+       re := regexp.MustCompile(`\bmsg="File download".* collection_file_path=foo\b`)
+       for expected := 1; expected < 4; expected++ {
+               time.Sleep(2 * time.Nanosecond)
+               resp := httptest.NewRecorder()
+               s.handler.ServeHTTP(resp, req)
+               c.Assert(resp.Result().StatusCode, check.Equals, http.StatusOK)
+               matches := re.FindAll(logbuf.Bytes(), -1)
+               c.Assert(matches, check.HasLen, expected,
+                       check.Commentf("%d matching log messages: %+v", len(matches), matches))
+       }
+}
+
+func (s *IntegrationSuite) TestLogThrottleDifferentTokens(c *check.C) {
+       s.handler.Cluster.Collections.WebDAVLogDownloadInterval = arvados.Duration(time.Hour)
+       req := newRequest("GET", "http://"+arvadostest.FooCollection+".keep-web.example/foo")
+       reqs := make(map[*http.Request]int)
+       for _, token := range []string{arvadostest.ActiveToken, arvadostest.AdminToken} {
+               req := req.Clone(context.Background())
+               req.Header.Set("Authorization", "Bearer "+token)
+               reqs[req] = http.StatusOK
+       }
+       logbuf := s.serveAndLogRequests(c, &reqs)
+       countLogMatches(c, logbuf, `\bmsg="File download".* collection_file_path=foo\b`, len(reqs))
+}
+
+func (s *IntegrationSuite) TestLogThrottleDifferentFiles(c *check.C) {
+       s.handler.Cluster.Collections.WebDAVLogDownloadInterval = arvados.Duration(time.Hour)
+       baseURL := "http://" + arvadostest.MultilevelCollection1 + ".keep-web.example/"
+       reqs := make(map[*http.Request]int)
+       for _, filename := range []string{"file1", "file2", "file3"} {
+               req := newRequest("GET", baseURL+filename)
+               req.Header.Set("Authorization", "Bearer "+arvadostest.ActiveToken)
+               reqs[req] = http.StatusOK
+       }
+       logbuf := s.serveAndLogRequests(c, &reqs)
+       countLogMatches(c, logbuf, `\bmsg="File download".* collection_uuid=`+arvadostest.MultilevelCollection1+`\b`, len(reqs))
+}
+
+func (s *IntegrationSuite) TestLogThrottleDifferentSources(c *check.C) {
+       s.handler.Cluster.Collections.WebDAVLogDownloadInterval = arvados.Duration(time.Hour)
+       req := newRequest("GET", "http://"+arvadostest.FooCollection+".keep-web.example/foo")
+       req.Header.Set("Authorization", "Bearer "+arvadostest.ActiveToken)
+       reqs := make(map[*http.Request]int)
+       reqs[req] = http.StatusOK
+       for _, xff := range []string{"10.22.33.44", "100::123"} {
+               req := req.Clone(context.Background())
+               req.Header.Set("X-Forwarded-For", xff)
+               reqs[req] = http.StatusOK
+       }
+       logbuf := s.serveAndLogRequests(c, &reqs)
+       countLogMatches(c, logbuf, `\bmsg="File download".* collection_file_path=foo\b`, len(reqs))
+}
+
 func (s *IntegrationSuite) TestConcurrentWrites(c *check.C) {
        s.handler.Cluster.Collections.WebDAVCache.TTL = arvados.Duration(time.Second * 2)
        lockTidyInterval = time.Second