21901: Log all keep-web GET requests that request the first byte
[arvados.git] / services / keep-web / handler_test.go
1 // Copyright (C) The Arvados Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4
5 package keepweb
6
7 import (
8         "bytes"
9         "context"
10         "fmt"
11         "io"
12         "io/ioutil"
13         "net/http"
14         "net/http/httptest"
15         "net/url"
16         "os"
17         "path/filepath"
18         "regexp"
19         "strings"
20         "sync"
21         "time"
22
23         "git.arvados.org/arvados.git/lib/config"
24         "git.arvados.org/arvados.git/sdk/go/arvados"
25         "git.arvados.org/arvados.git/sdk/go/arvadosclient"
26         "git.arvados.org/arvados.git/sdk/go/arvadostest"
27         "git.arvados.org/arvados.git/sdk/go/auth"
28         "git.arvados.org/arvados.git/sdk/go/ctxlog"
29         "git.arvados.org/arvados.git/sdk/go/keepclient"
30         "github.com/prometheus/client_golang/prometheus"
31         "github.com/sirupsen/logrus"
32         "golang.org/x/net/html"
33         check "gopkg.in/check.v1"
34 )
35
36 var _ = check.Suite(&UnitSuite{})
37
38 func init() {
39         arvados.DebugLocksPanicMode = true
40 }
41
42 type UnitSuite struct {
43         cluster *arvados.Cluster
44         handler *handler
45 }
46
47 func (s *UnitSuite) SetUpTest(c *check.C) {
48         logger := ctxlog.TestLogger(c)
49         ldr := config.NewLoader(bytes.NewBufferString("Clusters: {zzzzz: {}}"), logger)
50         ldr.Path = "-"
51         cfg, err := ldr.Load()
52         c.Assert(err, check.IsNil)
53         cc, err := cfg.GetCluster("")
54         c.Assert(err, check.IsNil)
55         s.cluster = cc
56         s.handler = &handler{
57                 Cluster: cc,
58                 Cache: cache{
59                         cluster:  cc,
60                         logger:   logger,
61                         registry: prometheus.NewRegistry(),
62                 },
63                 metrics: newMetrics(prometheus.NewRegistry()),
64         }
65 }
66
67 func newCollection(collID string) *arvados.Collection {
68         coll := &arvados.Collection{UUID: collID}
69         manifestKey := collID
70         if pdh, ok := arvadostest.TestCollectionUUIDToPDH[collID]; ok {
71                 coll.PortableDataHash = pdh
72                 manifestKey = pdh
73         }
74         if mtext, ok := arvadostest.TestCollectionPDHToManifest[manifestKey]; ok {
75                 coll.ManifestText = mtext
76         }
77         return coll
78 }
79
80 func newRequest(method, urlStr string) *http.Request {
81         u := mustParseURL(urlStr)
82         return &http.Request{
83                 Method:     method,
84                 Host:       u.Host,
85                 URL:        u,
86                 RequestURI: u.RequestURI(),
87                 RemoteAddr: "10.20.30.40:56789",
88                 Header:     http.Header{},
89         }
90 }
91
92 func newLoggerAndContext() (*bytes.Buffer, context.Context) {
93         var logbuf bytes.Buffer
94         logger := logrus.New()
95         logger.Out = &logbuf
96         return &logbuf, ctxlog.Context(context.Background(), logger)
97 }
98
99 func (s *UnitSuite) TestLogEventTypes(c *check.C) {
100         collURL := "http://keep-web.example/c=" + arvadostest.FooCollection
101         for method, expected := range map[string]string{
102                 "GET":  "file_download",
103                 "POST": "file_upload",
104                 "PUT":  "file_upload",
105         } {
106                 filePath := "/" + method
107                 req := newRequest(method, collURL+filePath)
108                 actual := newFileEventLog(s.handler, req, filePath, nil, nil, "")
109                 if !c.Check(actual, check.NotNil) {
110                         continue
111                 }
112                 c.Check(actual.eventType, check.Equals, expected)
113         }
114 }
115
116 func (s *UnitSuite) TestUnloggedEventTypes(c *check.C) {
117         collURL := "http://keep-web.example/c=" + arvadostest.FooCollection
118         for _, method := range []string{"DELETE", "HEAD", "OPTIONS", "PATCH"} {
119                 filePath := "/" + method
120                 req := newRequest(method, collURL+filePath)
121                 actual := newFileEventLog(s.handler, req, filePath, nil, nil, "")
122                 c.Check(actual, check.IsNil,
123                         check.Commentf("%s request made a log event", method))
124         }
125 }
126
127 func (s *UnitSuite) TestLogFilePath(c *check.C) {
128         coll := newCollection(arvadostest.FooCollection)
129         collURL := "http://keep-web.example/c=" + arvadostest.FooCollection
130         for _, filePath := range []string{"/foo", "/Foo", "/foo/bar"} {
131                 req := newRequest("GET", collURL+filePath)
132                 actual := newFileEventLog(s.handler, req, filePath, coll, nil, "")
133                 if !c.Check(actual, check.NotNil) {
134                         continue
135                 }
136                 c.Check(actual.collFilePath, check.Equals, filePath)
137         }
138 }
139
140 func (s *UnitSuite) TestLogRemoteAddr(c *check.C) {
141         collURL := "http://keep-web.example/c=" + arvadostest.FooCollection
142         filePath := "/foo"
143         req := newRequest("GET", collURL+filePath)
144
145         for _, addr := range []string{"10.20.30.55", "192.168.144.120", "192.0.2.4"} {
146                 req.RemoteAddr = addr + ":57914"
147                 actual := newFileEventLog(s.handler, req, filePath, nil, nil, "")
148                 if !c.Check(actual, check.NotNil) {
149                         continue
150                 }
151                 c.Check(actual.clientAddr, check.Equals, addr)
152         }
153
154         for _, addr := range []string{"100::20:30:40", "2001:db8::90:100", "3fff::30"} {
155                 req.RemoteAddr = fmt.Sprintf("[%s]:57916", addr)
156                 actual := newFileEventLog(s.handler, req, filePath, nil, nil, "")
157                 if !c.Check(actual, check.NotNil) {
158                         continue
159                 }
160                 c.Check(actual.clientAddr, check.Equals, addr)
161         }
162 }
163
164 func (s *UnitSuite) TestLogXForwardedFor(c *check.C) {
165         collURL := "http://keep-web.example/c=" + arvadostest.FooCollection
166         filePath := "/foo"
167         req := newRequest("GET", collURL+filePath)
168         for xff, expected := range map[string]string{
169                 "10.20.30.55":                          "10.20.30.55",
170                 "192.168.144.120, 10.20.30.120":        "10.20.30.120",
171                 "192.0.2.4, 192.0.2.6, 192.0.2.8":      "192.0.2.8",
172                 "192.0.2.4,192.168.2.4":                "192.168.2.4",
173                 "10.20.30.60,192.168.144.40,192.0.2.4": "192.0.2.4",
174                 "100::20:30:50":                        "100::20:30:50",
175                 "2001:db8::80:90, 100::100":            "100::100",
176                 "3fff::ff, 3fff::ee, 3fff::fe":         "3fff::fe",
177                 "3fff::3f,100::1000":                   "100::1000",
178                 "2001:db8::88,100::88,3fff::88":        "3fff::88",
179                 "10.20.30.60, 2001:db8::60":            "2001:db8::60",
180                 "2001:db8::20,10.20.30.20":             "10.20.30.20",
181                 ", 10.20.30.123, 100::123":             "100::123",
182                 ",100::321,10.30.20.10":                "10.30.20.10",
183         } {
184                 req.Header.Set("X-Forwarded-For", xff)
185                 actual := newFileEventLog(s.handler, req, filePath, nil, nil, "")
186                 if !c.Check(actual, check.NotNil) {
187                         continue
188                 }
189                 c.Check(actual.clientAddr, check.Equals, expected)
190         }
191 }
192
193 func (s *UnitSuite) TestLogXForwardedForMalformed(c *check.C) {
194         collURL := "http://keep-web.example/c=" + arvadostest.FooCollection
195         filePath := "/foo"
196         req := newRequest("GET", collURL+filePath)
197         for _, xff := range []string{"", ",", "10.20,30.40", "foo, bar"} {
198                 req.Header.Set("X-Forwarded-For", xff)
199                 actual := newFileEventLog(s.handler, req, filePath, nil, nil, "")
200                 if !c.Check(actual, check.NotNil) {
201                         continue
202                 }
203                 c.Check(actual.clientAddr, check.Equals, "10.20.30.40")
204         }
205 }
206
207 func (s *UnitSuite) TestLogXForwardedForMultivalue(c *check.C) {
208         collURL := "http://keep-web.example/c=" + arvadostest.FooCollection
209         filePath := "/foo"
210         req := newRequest("GET", collURL+filePath)
211         req.Header.Set("X-Forwarded-For", ", ")
212         req.Header.Add("X-Forwarded-For", "2001:db8::db9:dbd")
213         req.Header.Add("X-Forwarded-For", "10.20.30.90")
214         actual := newFileEventLog(s.handler, req, filePath, nil, nil, "")
215         c.Assert(actual, check.NotNil)
216         c.Check(actual.clientAddr, check.Equals, "10.20.30.90")
217 }
218
219 func (s *UnitSuite) TestLogClientAddressCanonicalization(c *check.C) {
220         collURL := "http://keep-web.example/c=" + arvadostest.FooCollection
221         filePath := "/foo"
222         req := newRequest("GET", collURL+filePath)
223         expected := "2001:db8::12:0"
224
225         req.RemoteAddr = "[2001:db8::012:0000]:57918"
226         a := newFileEventLog(s.handler, req, filePath, nil, nil, "")
227         c.Assert(a, check.NotNil)
228         c.Check(a.clientAddr, check.Equals, expected)
229
230         req.RemoteAddr = "10.20.30.40:57919"
231         req.Header.Set("X-Forwarded-For", "2001:db8:0::0:12:00")
232         b := newFileEventLog(s.handler, req, filePath, nil, nil, "")
233         c.Assert(b, check.NotNil)
234         c.Check(b.clientAddr, check.Equals, expected)
235 }
236
237 func (s *UnitSuite) TestLogAnonymousUser(c *check.C) {
238         collURL := "http://keep-web.example/c=" + arvadostest.FooCollection
239         filePath := "/foo"
240         req := newRequest("GET", collURL+filePath)
241         actual := newFileEventLog(s.handler, req, filePath, nil, nil, arvadostest.AnonymousToken)
242         c.Assert(actual, check.NotNil)
243         c.Check(actual.userUUID, check.Equals, s.handler.Cluster.ClusterID+"-tpzed-anonymouspublic")
244         c.Check(actual.userFullName, check.Equals, "")
245         c.Check(actual.clientToken, check.Equals, arvadostest.AnonymousToken)
246 }
247
248 func (s *UnitSuite) TestLogUser(c *check.C) {
249         collURL := "http://keep-web.example/c=" + arvadostest.FooCollection
250         for _, trial := range []struct{ uuid, fullName, token string }{
251                 {arvadostest.ActiveUserUUID, "Active User", arvadostest.ActiveToken},
252                 {arvadostest.SpectatorUserUUID, "Spectator User", arvadostest.SpectatorToken},
253         } {
254                 filePath := "/" + trial.uuid
255                 req := newRequest("GET", collURL+filePath)
256                 user := &arvados.User{
257                         UUID:     trial.uuid,
258                         FullName: trial.fullName,
259                 }
260                 actual := newFileEventLog(s.handler, req, filePath, nil, user, trial.token)
261                 if !c.Check(actual, check.NotNil) {
262                         continue
263                 }
264                 c.Check(actual.userUUID, check.Equals, trial.uuid)
265                 c.Check(actual.userFullName, check.Equals, trial.fullName)
266                 c.Check(actual.clientToken, check.Equals, trial.token)
267         }
268 }
269
270 func (s *UnitSuite) TestLogCollectionByUUID(c *check.C) {
271         for collUUID, collPDH := range arvadostest.TestCollectionUUIDToPDH {
272                 collURL := "http://keep-web.example/c=" + collUUID
273                 filePath := "/" + collUUID
274                 req := newRequest("GET", collURL+filePath)
275                 coll := newCollection(collUUID)
276                 actual := newFileEventLog(s.handler, req, filePath, coll, nil, "")
277                 if !c.Check(actual, check.NotNil) {
278                         continue
279                 }
280                 c.Check(actual.collUUID, check.Equals, collUUID)
281                 c.Check(actual.collPDH, check.Equals, collPDH)
282         }
283 }
284
285 func (s *UnitSuite) TestLogCollectionByPDH(c *check.C) {
286         for _, collPDH := range arvadostest.TestCollectionUUIDToPDH {
287                 collURL := "http://keep-web.example/c=" + collPDH
288                 filePath := "/PDHFile"
289                 req := newRequest("GET", collURL+filePath)
290                 coll := newCollection(collPDH)
291                 actual := newFileEventLog(s.handler, req, filePath, coll, nil, "")
292                 if !c.Check(actual, check.NotNil) {
293                         continue
294                 }
295                 c.Check(actual.collPDH, check.Equals, collPDH)
296                 c.Check(actual.collUUID, check.Equals, "")
297         }
298 }
299
300 func (s *UnitSuite) TestLogGETUUIDAsDict(c *check.C) {
301         filePath := "/foo"
302         reqPath := "/c=" + arvadostest.FooCollection + filePath
303         req := newRequest("GET", "http://keep-web.example"+reqPath)
304         coll := newCollection(arvadostest.FooCollection)
305         logEvent := newFileEventLog(s.handler, req, filePath, coll, nil, "")
306         c.Assert(logEvent, check.NotNil)
307         c.Check(logEvent.asDict(), check.DeepEquals, arvadosclient.Dict{
308                 "event_type":  "file_download",
309                 "object_uuid": s.handler.Cluster.ClusterID + "-tpzed-anonymouspublic",
310                 "properties": arvadosclient.Dict{
311                         "reqPath":              reqPath,
312                         "collection_uuid":      arvadostest.FooCollection,
313                         "collection_file_path": filePath,
314                         "portable_data_hash":   arvadostest.FooCollectionPDH,
315                 },
316         })
317 }
318
319 func (s *UnitSuite) TestLogGETPDHAsDict(c *check.C) {
320         filePath := "/Foo"
321         reqPath := "/c=" + arvadostest.FooCollectionPDH + filePath
322         req := newRequest("GET", "http://keep-web.example"+reqPath)
323         coll := newCollection(arvadostest.FooCollectionPDH)
324         user := &arvados.User{
325                 UUID:     arvadostest.ActiveUserUUID,
326                 FullName: "Active User",
327         }
328         logEvent := newFileEventLog(s.handler, req, filePath, coll, user, "")
329         c.Assert(logEvent, check.NotNil)
330         c.Check(logEvent.asDict(), check.DeepEquals, arvadosclient.Dict{
331                 "event_type":  "file_download",
332                 "object_uuid": arvadostest.ActiveUserUUID,
333                 "properties": arvadosclient.Dict{
334                         "reqPath":              reqPath,
335                         "portable_data_hash":   arvadostest.FooCollectionPDH,
336                         "collection_uuid":      "",
337                         "collection_file_path": filePath,
338                 },
339         })
340 }
341
342 func (s *UnitSuite) TestLogUploadAsDict(c *check.C) {
343         coll := newCollection(arvadostest.FooCollection)
344         user := &arvados.User{
345                 UUID:     arvadostest.ActiveUserUUID,
346                 FullName: "Active User",
347         }
348         for _, method := range []string{"POST", "PUT"} {
349                 filePath := "/" + method + "File"
350                 reqPath := "/c=" + arvadostest.FooCollection + filePath
351                 req := newRequest(method, "http://keep-web.example"+reqPath)
352                 logEvent := newFileEventLog(s.handler, req, filePath, coll, user, "")
353                 if !c.Check(logEvent, check.NotNil) {
354                         continue
355                 }
356                 c.Check(logEvent.asDict(), check.DeepEquals, arvadosclient.Dict{
357                         "event_type":  "file_upload",
358                         "object_uuid": arvadostest.ActiveUserUUID,
359                         "properties": arvadosclient.Dict{
360                                 "reqPath":              reqPath,
361                                 "collection_uuid":      arvadostest.FooCollection,
362                                 "collection_file_path": filePath,
363                         },
364                 })
365         }
366 }
367
368 func (s *UnitSuite) TestLogGETUUIDAsFields(c *check.C) {
369         filePath := "/foo"
370         reqPath := "/c=" + arvadostest.FooCollection + filePath
371         req := newRequest("GET", "http://keep-web.example"+reqPath)
372         coll := newCollection(arvadostest.FooCollection)
373         logEvent := newFileEventLog(s.handler, req, filePath, coll, nil, "")
374         c.Assert(logEvent, check.NotNil)
375         c.Check(logEvent.asFields(), check.DeepEquals, logrus.Fields{
376                 "user_uuid":            s.handler.Cluster.ClusterID + "-tpzed-anonymouspublic",
377                 "collection_uuid":      arvadostest.FooCollection,
378                 "collection_file_path": filePath,
379                 "portable_data_hash":   arvadostest.FooCollectionPDH,
380         })
381 }
382
383 func (s *UnitSuite) TestLogGETPDHAsFields(c *check.C) {
384         filePath := "/Foo"
385         reqPath := "/c=" + arvadostest.FooCollectionPDH + filePath
386         req := newRequest("GET", "http://keep-web.example"+reqPath)
387         coll := newCollection(arvadostest.FooCollectionPDH)
388         user := &arvados.User{
389                 UUID:     arvadostest.ActiveUserUUID,
390                 FullName: "Active User",
391         }
392         logEvent := newFileEventLog(s.handler, req, filePath, coll, user, "")
393         c.Assert(logEvent, check.NotNil)
394         c.Check(logEvent.asFields(), check.DeepEquals, logrus.Fields{
395                 "user_uuid":            arvadostest.ActiveUserUUID,
396                 "user_full_name":       "Active User",
397                 "collection_uuid":      "",
398                 "collection_file_path": filePath,
399                 "portable_data_hash":   arvadostest.FooCollectionPDH,
400         })
401 }
402
403 func (s *UnitSuite) TestLogUploadAsFields(c *check.C) {
404         coll := newCollection(arvadostest.FooCollection)
405         user := &arvados.User{
406                 UUID:     arvadostest.ActiveUserUUID,
407                 FullName: "Active User",
408         }
409         for _, method := range []string{"POST", "PUT"} {
410                 filePath := "/" + method + "File"
411                 reqPath := "/c=" + arvadostest.FooCollection + filePath
412                 req := newRequest(method, "http://keep-web.example"+reqPath)
413                 logEvent := newFileEventLog(s.handler, req, filePath, coll, user, "")
414                 if !c.Check(logEvent, check.NotNil) {
415                         continue
416                 }
417                 c.Check(logEvent.asFields(), check.DeepEquals, logrus.Fields{
418                         "user_uuid":            arvadostest.ActiveUserUUID,
419                         "user_full_name":       "Active User",
420                         "collection_uuid":      arvadostest.FooCollection,
421                         "collection_file_path": filePath,
422                 })
423         }
424 }
425
426 func (s *UnitSuite) TestCORSPreflight(c *check.C) {
427         h := s.handler
428         u := mustParseURL("http://keep-web.example/c=" + arvadostest.FooCollection + "/foo")
429         req := &http.Request{
430                 Method:     "OPTIONS",
431                 Host:       u.Host,
432                 URL:        u,
433                 RequestURI: u.RequestURI(),
434                 Header: http.Header{
435                         "Origin":                        {"https://workbench.example"},
436                         "Access-Control-Request-Method": {"POST"},
437                 },
438         }
439
440         // Check preflight for an allowed request
441         resp := httptest.NewRecorder()
442         h.ServeHTTP(resp, req)
443         c.Check(resp.Code, check.Equals, http.StatusOK)
444         c.Check(resp.Body.String(), check.Equals, "")
445         c.Check(resp.Header().Get("Access-Control-Allow-Origin"), check.Equals, "*")
446         c.Check(resp.Header().Get("Access-Control-Allow-Methods"), check.Equals, "COPY, DELETE, GET, LOCK, MKCOL, MOVE, OPTIONS, POST, PROPFIND, PROPPATCH, PUT, RMCOL, UNLOCK")
447         c.Check(resp.Header().Get("Access-Control-Allow-Headers"), check.Equals, "Authorization, Content-Type, Range, Depth, Destination, If, Lock-Token, Overwrite, Timeout, Cache-Control")
448
449         // Check preflight for a disallowed request
450         resp = httptest.NewRecorder()
451         req.Header.Set("Access-Control-Request-Method", "MAKE-COFFEE")
452         h.ServeHTTP(resp, req)
453         c.Check(resp.Body.String(), check.Equals, "")
454         c.Check(resp.Code, check.Equals, http.StatusMethodNotAllowed)
455 }
456
457 func (s *UnitSuite) TestWebdavPrefixAndSource(c *check.C) {
458         for _, trial := range []struct {
459                 method   string
460                 path     string
461                 prefix   string
462                 source   string
463                 notFound bool
464                 seeOther bool
465         }{
466                 {
467                         method: "PROPFIND",
468                         path:   "/",
469                 },
470                 {
471                         method: "PROPFIND",
472                         path:   "/dir1",
473                 },
474                 {
475                         method: "PROPFIND",
476                         path:   "/dir1/",
477                 },
478                 {
479                         method: "PROPFIND",
480                         path:   "/dir1/foo",
481                         prefix: "/dir1",
482                         source: "/dir1",
483                 },
484                 {
485                         method: "PROPFIND",
486                         path:   "/prefix/dir1/foo",
487                         prefix: "/prefix/",
488                         source: "",
489                 },
490                 {
491                         method: "PROPFIND",
492                         path:   "/prefix/dir1/foo",
493                         prefix: "/prefix",
494                         source: "",
495                 },
496                 {
497                         method: "PROPFIND",
498                         path:   "/prefix/dir1/foo",
499                         prefix: "/prefix/",
500                         source: "/",
501                 },
502                 {
503                         method: "PROPFIND",
504                         path:   "/prefix/foo",
505                         prefix: "/prefix/",
506                         source: "/dir1/",
507                 },
508                 {
509                         method: "GET",
510                         path:   "/prefix/foo",
511                         prefix: "/prefix/",
512                         source: "/dir1/",
513                 },
514                 {
515                         method: "PROPFIND",
516                         path:   "/prefix/",
517                         prefix: "/prefix",
518                         source: "/dir1",
519                 },
520                 {
521                         method: "PROPFIND",
522                         path:   "/prefix",
523                         prefix: "/prefix",
524                         source: "/dir1/",
525                 },
526                 {
527                         method:   "GET",
528                         path:     "/prefix",
529                         prefix:   "/prefix",
530                         source:   "/dir1",
531                         seeOther: true,
532                 },
533                 {
534                         method:   "PROPFIND",
535                         path:     "/dir1/foo",
536                         prefix:   "",
537                         source:   "/dir1",
538                         notFound: true,
539                 },
540         } {
541                 c.Logf("trial %+v", trial)
542                 u := mustParseURL("http://" + arvadostest.FooBarDirCollection + ".keep-web.example" + trial.path)
543                 req := &http.Request{
544                         Method:     trial.method,
545                         Host:       u.Host,
546                         URL:        u,
547                         RequestURI: u.RequestURI(),
548                         Header: http.Header{
549                                 "Authorization":   {"Bearer " + arvadostest.ActiveTokenV2},
550                                 "X-Webdav-Prefix": {trial.prefix},
551                                 "X-Webdav-Source": {trial.source},
552                         },
553                         Body: ioutil.NopCloser(bytes.NewReader(nil)),
554                 }
555
556                 resp := httptest.NewRecorder()
557                 s.handler.ServeHTTP(resp, req)
558                 if trial.notFound {
559                         c.Check(resp.Code, check.Equals, http.StatusNotFound)
560                 } else if trial.method == "PROPFIND" {
561                         c.Check(resp.Code, check.Equals, http.StatusMultiStatus)
562                         c.Check(resp.Body.String(), check.Matches, `(?ms).*>\n?$`)
563                 } else if trial.seeOther {
564                         c.Check(resp.Code, check.Equals, http.StatusSeeOther)
565                 } else {
566                         c.Check(resp.Code, check.Equals, http.StatusOK)
567                 }
568         }
569 }
570
571 func (s *UnitSuite) TestEmptyResponse(c *check.C) {
572         // Ensure we start with an empty cache
573         defer os.Setenv("HOME", os.Getenv("HOME"))
574         os.Setenv("HOME", c.MkDir())
575         s.handler.Cluster.Collections.WebDAVLogDownloadInterval = arvados.Duration(0)
576
577         for _, trial := range []struct {
578                 dataExists    bool
579                 sendIMSHeader bool
580                 expectStatus  int
581                 logRegexp     string
582         }{
583                 // If we return no content due to a Keep read error,
584                 // we should emit a log message.
585                 {false, false, http.StatusOK, `(?ms).*only wrote 0 bytes.*`},
586
587                 // If we return no content because the client sent an
588                 // If-Modified-Since header, our response should be
589                 // 304.  We still expect a "File download" log since it
590                 // counts as a file access for auditing.
591                 {true, true, http.StatusNotModified, `(?ms).*msg="File download".*`},
592         } {
593                 c.Logf("trial: %+v", trial)
594                 arvadostest.StartKeep(2, true)
595                 if trial.dataExists {
596                         arv, err := arvadosclient.MakeArvadosClient()
597                         c.Assert(err, check.IsNil)
598                         arv.ApiToken = arvadostest.ActiveToken
599                         kc, err := keepclient.MakeKeepClient(arv)
600                         c.Assert(err, check.IsNil)
601                         _, _, err = kc.PutB([]byte("foo"))
602                         c.Assert(err, check.IsNil)
603                 }
604
605                 u := mustParseURL("http://" + arvadostest.FooCollection + ".keep-web.example/foo")
606                 req := &http.Request{
607                         Method:     "GET",
608                         Host:       u.Host,
609                         URL:        u,
610                         RequestURI: u.RequestURI(),
611                         Header: http.Header{
612                                 "Authorization": {"Bearer " + arvadostest.ActiveToken},
613                         },
614                 }
615                 if trial.sendIMSHeader {
616                         req.Header.Set("If-Modified-Since", strings.Replace(time.Now().UTC().Format(time.RFC1123), "UTC", "GMT", -1))
617                 }
618
619                 var logbuf bytes.Buffer
620                 logger := logrus.New()
621                 logger.Out = &logbuf
622                 req = req.WithContext(ctxlog.Context(context.Background(), logger))
623
624                 resp := httptest.NewRecorder()
625                 s.handler.ServeHTTP(resp, req)
626                 c.Check(resp.Code, check.Equals, trial.expectStatus)
627                 c.Check(resp.Body.String(), check.Equals, "")
628
629                 c.Log(logbuf.String())
630                 c.Check(logbuf.String(), check.Matches, trial.logRegexp)
631         }
632 }
633
634 func (s *UnitSuite) TestInvalidUUID(c *check.C) {
635         bogusID := strings.Replace(arvadostest.FooCollectionPDH, "+", "-", 1) + "-"
636         token := arvadostest.ActiveToken
637         for _, trial := range []string{
638                 "http://keep-web/c=" + bogusID + "/foo",
639                 "http://keep-web/c=" + bogusID + "/t=" + token + "/foo",
640                 "http://keep-web/collections/download/" + bogusID + "/" + token + "/foo",
641                 "http://keep-web/collections/" + bogusID + "/foo",
642                 "http://" + bogusID + ".keep-web/" + bogusID + "/foo",
643                 "http://" + bogusID + ".keep-web/t=" + token + "/" + bogusID + "/foo",
644         } {
645                 c.Log(trial)
646                 u := mustParseURL(trial)
647                 req := &http.Request{
648                         Method:     "GET",
649                         Host:       u.Host,
650                         URL:        u,
651                         RequestURI: u.RequestURI(),
652                 }
653                 resp := httptest.NewRecorder()
654                 s.cluster.Users.AnonymousUserToken = arvadostest.AnonymousToken
655                 s.handler.ServeHTTP(resp, req)
656                 c.Check(resp.Code, check.Equals, http.StatusNotFound)
657         }
658 }
659
660 func mustParseURL(s string) *url.URL {
661         r, err := url.Parse(s)
662         if err != nil {
663                 panic("parse URL: " + s)
664         }
665         return r
666 }
667
668 func (s *IntegrationSuite) TestVhost404(c *check.C) {
669         for _, testURL := range []string{
670                 arvadostest.NonexistentCollection + ".example.com/theperthcountyconspiracy",
671                 arvadostest.NonexistentCollection + ".example.com/t=" + arvadostest.ActiveToken + "/theperthcountyconspiracy",
672         } {
673                 resp := httptest.NewRecorder()
674                 u := mustParseURL(testURL)
675                 req := &http.Request{
676                         Method:     "GET",
677                         URL:        u,
678                         RequestURI: u.RequestURI(),
679                 }
680                 s.handler.ServeHTTP(resp, req)
681                 c.Check(resp.Code, check.Equals, http.StatusNotFound)
682                 c.Check(resp.Body.String(), check.Equals, notFoundMessage+"\n")
683         }
684 }
685
686 // An authorizer modifies an HTTP request to make use of the given
687 // token -- by adding it to a header, cookie, query param, or whatever
688 // -- and returns the HTTP status code we should expect from keep-web if
689 // the token is invalid.
690 type authorizer func(*http.Request, string) int
691
692 func (s *IntegrationSuite) TestVhostViaAuthzHeaderOAuth2(c *check.C) {
693         s.doVhostRequests(c, authzViaAuthzHeaderOAuth2)
694 }
695 func authzViaAuthzHeaderOAuth2(r *http.Request, tok string) int {
696         r.Header.Add("Authorization", "OAuth2 "+tok)
697         return http.StatusUnauthorized
698 }
699
700 func (s *IntegrationSuite) TestVhostViaAuthzHeaderBearer(c *check.C) {
701         s.doVhostRequests(c, authzViaAuthzHeaderBearer)
702 }
703 func authzViaAuthzHeaderBearer(r *http.Request, tok string) int {
704         r.Header.Add("Authorization", "Bearer "+tok)
705         return http.StatusUnauthorized
706 }
707
708 func (s *IntegrationSuite) TestVhostViaCookieValue(c *check.C) {
709         s.doVhostRequests(c, authzViaCookieValue)
710 }
711 func authzViaCookieValue(r *http.Request, tok string) int {
712         r.AddCookie(&http.Cookie{
713                 Name:  "arvados_api_token",
714                 Value: auth.EncodeTokenCookie([]byte(tok)),
715         })
716         return http.StatusUnauthorized
717 }
718
719 func (s *IntegrationSuite) TestVhostViaHTTPBasicAuth(c *check.C) {
720         s.doVhostRequests(c, authzViaHTTPBasicAuth)
721 }
722 func authzViaHTTPBasicAuth(r *http.Request, tok string) int {
723         r.AddCookie(&http.Cookie{
724                 Name:  "arvados_api_token",
725                 Value: auth.EncodeTokenCookie([]byte(tok)),
726         })
727         return http.StatusUnauthorized
728 }
729
730 func (s *IntegrationSuite) TestVhostViaHTTPBasicAuthWithExtraSpaceChars(c *check.C) {
731         s.doVhostRequests(c, func(r *http.Request, tok string) int {
732                 r.AddCookie(&http.Cookie{
733                         Name:  "arvados_api_token",
734                         Value: auth.EncodeTokenCookie([]byte(" " + tok + "\n")),
735                 })
736                 return http.StatusUnauthorized
737         })
738 }
739
740 func (s *IntegrationSuite) TestVhostViaPath(c *check.C) {
741         s.doVhostRequests(c, authzViaPath)
742 }
743 func authzViaPath(r *http.Request, tok string) int {
744         r.URL.Path = "/t=" + tok + r.URL.Path
745         return http.StatusNotFound
746 }
747
748 func (s *IntegrationSuite) TestVhostViaQueryString(c *check.C) {
749         s.doVhostRequests(c, authzViaQueryString)
750 }
751 func authzViaQueryString(r *http.Request, tok string) int {
752         r.URL.RawQuery = "api_token=" + tok
753         return http.StatusUnauthorized
754 }
755
756 func (s *IntegrationSuite) TestVhostViaPOST(c *check.C) {
757         s.doVhostRequests(c, authzViaPOST)
758 }
759 func authzViaPOST(r *http.Request, tok string) int {
760         r.Method = "POST"
761         r.Header.Add("Content-Type", "application/x-www-form-urlencoded")
762         r.Body = ioutil.NopCloser(strings.NewReader(
763                 url.Values{"api_token": {tok}}.Encode()))
764         return http.StatusUnauthorized
765 }
766
767 func (s *IntegrationSuite) TestVhostViaXHRPOST(c *check.C) {
768         s.doVhostRequests(c, authzViaPOST)
769 }
770 func authzViaXHRPOST(r *http.Request, tok string) int {
771         r.Method = "POST"
772         r.Header.Add("Content-Type", "application/x-www-form-urlencoded")
773         r.Header.Add("Origin", "https://origin.example")
774         r.Body = ioutil.NopCloser(strings.NewReader(
775                 url.Values{
776                         "api_token":   {tok},
777                         "disposition": {"attachment"},
778                 }.Encode()))
779         return http.StatusUnauthorized
780 }
781
782 // Try some combinations of {url, token} using the given authorization
783 // mechanism, and verify the result is correct.
784 func (s *IntegrationSuite) doVhostRequests(c *check.C, authz authorizer) {
785         for _, hostPath := range []string{
786                 arvadostest.FooCollection + ".example.com/foo",
787                 arvadostest.FooCollection + "--collections.example.com/foo",
788                 arvadostest.FooCollection + "--collections.example.com/_/foo",
789                 arvadostest.FooCollectionPDH + ".example.com/foo",
790                 strings.Replace(arvadostest.FooCollectionPDH, "+", "-", -1) + "--collections.example.com/foo",
791                 arvadostest.FooBarDirCollection + ".example.com/dir1/foo",
792         } {
793                 c.Log("doRequests: ", hostPath)
794                 s.doVhostRequestsWithHostPath(c, authz, hostPath)
795         }
796 }
797
798 func (s *IntegrationSuite) doVhostRequestsWithHostPath(c *check.C, authz authorizer, hostPath string) {
799         for _, tok := range []string{
800                 arvadostest.ActiveToken,
801                 arvadostest.ActiveToken[:15],
802                 arvadostest.SpectatorToken,
803                 "bogus",
804                 "",
805         } {
806                 u := mustParseURL("http://" + hostPath)
807                 req := &http.Request{
808                         Method:     "GET",
809                         Host:       u.Host,
810                         URL:        u,
811                         RequestURI: u.RequestURI(),
812                         Header:     http.Header{},
813                 }
814                 failCode := authz(req, tok)
815                 req, resp := s.doReq(req)
816                 code, body := resp.Code, resp.Body.String()
817
818                 // If the initial request had a (non-empty) token
819                 // showing in the query string, we should have been
820                 // redirected in order to hide it in a cookie.
821                 c.Check(req.URL.String(), check.Not(check.Matches), `.*api_token=.+`)
822
823                 if tok == arvadostest.ActiveToken {
824                         c.Check(code, check.Equals, http.StatusOK)
825                         c.Check(body, check.Equals, "foo")
826                 } else {
827                         c.Check(code >= 400, check.Equals, true)
828                         c.Check(code < 500, check.Equals, true)
829                         if tok == arvadostest.SpectatorToken {
830                                 // Valid token never offers to retry
831                                 // with different credentials.
832                                 c.Check(code, check.Equals, http.StatusNotFound)
833                         } else {
834                                 // Invalid token can ask to retry
835                                 // depending on the authz method.
836                                 c.Check(code, check.Equals, failCode)
837                         }
838                         if code == 404 {
839                                 c.Check(body, check.Equals, notFoundMessage+"\n")
840                         } else {
841                                 c.Check(body, check.Equals, unauthorizedMessage+"\n")
842                         }
843                 }
844         }
845 }
846
847 func (s *IntegrationSuite) TestVhostPortMatch(c *check.C) {
848         for _, host := range []string{"download.example.com", "DOWNLOAD.EXAMPLE.COM"} {
849                 for _, port := range []string{"80", "443", "8000"} {
850                         s.handler.Cluster.Services.WebDAVDownload.ExternalURL.Host = fmt.Sprintf("download.example.com:%v", port)
851                         u := mustParseURL(fmt.Sprintf("http://%v/by_id/%v/foo", host, arvadostest.FooCollection))
852                         req := &http.Request{
853                                 Method:     "GET",
854                                 Host:       u.Host,
855                                 URL:        u,
856                                 RequestURI: u.RequestURI(),
857                                 Header:     http.Header{"Authorization": []string{"Bearer " + arvadostest.ActiveToken}},
858                         }
859                         req, resp := s.doReq(req)
860                         code, _ := resp.Code, resp.Body.String()
861
862                         if port == "8000" {
863                                 c.Check(code, check.Equals, 401)
864                         } else {
865                                 c.Check(code, check.Equals, 200)
866                         }
867                 }
868         }
869 }
870
871 func (s *IntegrationSuite) do(method string, urlstring string, token string, hdr http.Header) (*http.Request, *httptest.ResponseRecorder) {
872         u := mustParseURL(urlstring)
873         if hdr == nil && token != "" {
874                 hdr = http.Header{"Authorization": {"Bearer " + token}}
875         } else if hdr == nil {
876                 hdr = http.Header{}
877         } else if token != "" {
878                 panic("must not pass both token and hdr")
879         }
880         return s.doReq(&http.Request{
881                 Method:     method,
882                 Host:       u.Host,
883                 URL:        u,
884                 RequestURI: u.RequestURI(),
885                 Header:     hdr,
886         })
887 }
888
889 func (s *IntegrationSuite) doReq(req *http.Request) (*http.Request, *httptest.ResponseRecorder) {
890         resp := httptest.NewRecorder()
891         s.handler.ServeHTTP(resp, req)
892         if resp.Code != http.StatusSeeOther {
893                 return req, resp
894         }
895         cookies := (&http.Response{Header: resp.Header()}).Cookies()
896         u, _ := req.URL.Parse(resp.Header().Get("Location"))
897         req = &http.Request{
898                 Method:     "GET",
899                 Host:       u.Host,
900                 URL:        u,
901                 RequestURI: u.RequestURI(),
902                 Header:     http.Header{},
903         }
904         for _, c := range cookies {
905                 req.AddCookie(c)
906         }
907         return s.doReq(req)
908 }
909
910 func (s *IntegrationSuite) TestVhostRedirectQueryTokenToCookie(c *check.C) {
911         s.testVhostRedirectTokenToCookie(c, "GET",
912                 arvadostest.FooCollection+".example.com/foo",
913                 "?api_token="+arvadostest.ActiveToken,
914                 nil,
915                 "",
916                 http.StatusOK,
917                 "foo",
918         )
919 }
920
921 func (s *IntegrationSuite) TestSingleOriginSecretLink(c *check.C) {
922         s.testVhostRedirectTokenToCookie(c, "GET",
923                 "example.com/c="+arvadostest.FooCollection+"/t="+arvadostest.ActiveToken+"/foo",
924                 "",
925                 nil,
926                 "",
927                 http.StatusOK,
928                 "foo",
929         )
930 }
931
932 func (s *IntegrationSuite) TestCollectionSharingToken(c *check.C) {
933         s.testVhostRedirectTokenToCookie(c, "GET",
934                 "example.com/c="+arvadostest.FooFileCollectionUUID+"/t="+arvadostest.FooFileCollectionSharingToken+"/foo",
935                 "",
936                 nil,
937                 "",
938                 http.StatusOK,
939                 "foo",
940         )
941         // Same valid sharing token, but requesting a different collection
942         s.testVhostRedirectTokenToCookie(c, "GET",
943                 "example.com/c="+arvadostest.FooCollection+"/t="+arvadostest.FooFileCollectionSharingToken+"/foo",
944                 "",
945                 nil,
946                 "",
947                 http.StatusNotFound,
948                 regexp.QuoteMeta(notFoundMessage+"\n"),
949         )
950 }
951
952 // Bad token in URL is 404 Not Found because it doesn't make sense to
953 // retry the same URL with different authorization.
954 func (s *IntegrationSuite) TestSingleOriginSecretLinkBadToken(c *check.C) {
955         s.testVhostRedirectTokenToCookie(c, "GET",
956                 "example.com/c="+arvadostest.FooCollection+"/t=bogus/foo",
957                 "",
958                 nil,
959                 "",
960                 http.StatusNotFound,
961                 regexp.QuoteMeta(notFoundMessage+"\n"),
962         )
963 }
964
965 // Bad token in a cookie (even if it got there via our own
966 // query-string-to-cookie redirect) is, in principle, retryable via
967 // wb2-login-and-redirect flow.
968 func (s *IntegrationSuite) TestVhostRedirectQueryTokenToBogusCookie(c *check.C) {
969         // Inline
970         resp := s.testVhostRedirectTokenToCookie(c, "GET",
971                 arvadostest.FooCollection+".example.com/foo",
972                 "?api_token=thisisabogustoken",
973                 http.Header{"Sec-Fetch-Mode": {"navigate"}},
974                 "",
975                 http.StatusSeeOther,
976                 "",
977         )
978         u, err := url.Parse(resp.Header().Get("Location"))
979         c.Assert(err, check.IsNil)
980         c.Logf("redirected to %s", u)
981         c.Check(u.Host, check.Equals, s.handler.Cluster.Services.Workbench2.ExternalURL.Host)
982         c.Check(u.Query().Get("redirectToPreview"), check.Equals, "/c="+arvadostest.FooCollection+"/foo")
983         c.Check(u.Query().Get("redirectToDownload"), check.Equals, "")
984
985         // Download/attachment indicated by ?disposition=attachment
986         resp = s.testVhostRedirectTokenToCookie(c, "GET",
987                 arvadostest.FooCollection+".example.com/foo",
988                 "?api_token=thisisabogustoken&disposition=attachment",
989                 http.Header{"Sec-Fetch-Mode": {"navigate"}},
990                 "",
991                 http.StatusSeeOther,
992                 "",
993         )
994         u, err = url.Parse(resp.Header().Get("Location"))
995         c.Assert(err, check.IsNil)
996         c.Logf("redirected to %s", u)
997         c.Check(u.Host, check.Equals, s.handler.Cluster.Services.Workbench2.ExternalURL.Host)
998         c.Check(u.Query().Get("redirectToPreview"), check.Equals, "")
999         c.Check(u.Query().Get("redirectToDownload"), check.Equals, "/c="+arvadostest.FooCollection+"/foo")
1000
1001         // Download/attachment indicated by vhost
1002         resp = s.testVhostRedirectTokenToCookie(c, "GET",
1003                 s.handler.Cluster.Services.WebDAVDownload.ExternalURL.Host+"/c="+arvadostest.FooCollection+"/foo",
1004                 "?api_token=thisisabogustoken",
1005                 http.Header{"Sec-Fetch-Mode": {"navigate"}},
1006                 "",
1007                 http.StatusSeeOther,
1008                 "",
1009         )
1010         u, err = url.Parse(resp.Header().Get("Location"))
1011         c.Assert(err, check.IsNil)
1012         c.Logf("redirected to %s", u)
1013         c.Check(u.Host, check.Equals, s.handler.Cluster.Services.Workbench2.ExternalURL.Host)
1014         c.Check(u.Query().Get("redirectToPreview"), check.Equals, "")
1015         c.Check(u.Query().Get("redirectToDownload"), check.Equals, "/c="+arvadostest.FooCollection+"/foo")
1016
1017         // Without "Sec-Fetch-Mode: navigate" header, just 401.
1018         s.testVhostRedirectTokenToCookie(c, "GET",
1019                 s.handler.Cluster.Services.WebDAVDownload.ExternalURL.Host+"/c="+arvadostest.FooCollection+"/foo",
1020                 "?api_token=thisisabogustoken",
1021                 http.Header{"Sec-Fetch-Mode": {"cors"}},
1022                 "",
1023                 http.StatusUnauthorized,
1024                 regexp.QuoteMeta(unauthorizedMessage+"\n"),
1025         )
1026         s.testVhostRedirectTokenToCookie(c, "GET",
1027                 s.handler.Cluster.Services.WebDAVDownload.ExternalURL.Host+"/c="+arvadostest.FooCollection+"/foo",
1028                 "?api_token=thisisabogustoken",
1029                 nil,
1030                 "",
1031                 http.StatusUnauthorized,
1032                 regexp.QuoteMeta(unauthorizedMessage+"\n"),
1033         )
1034 }
1035
1036 func (s *IntegrationSuite) TestVhostRedirectWithNoCache(c *check.C) {
1037         resp := s.testVhostRedirectTokenToCookie(c, "GET",
1038                 arvadostest.FooCollection+".example.com/foo",
1039                 "?api_token=thisisabogustoken",
1040                 http.Header{
1041                         "Sec-Fetch-Mode": {"navigate"},
1042                         "Cache-Control":  {"no-cache"},
1043                 },
1044                 "",
1045                 http.StatusSeeOther,
1046                 "",
1047         )
1048         u, err := url.Parse(resp.Header().Get("Location"))
1049         c.Assert(err, check.IsNil)
1050         c.Logf("redirected to %s", u)
1051         c.Check(u.Host, check.Equals, s.handler.Cluster.Services.Workbench2.ExternalURL.Host)
1052         c.Check(u.Query().Get("redirectToPreview"), check.Equals, "/c="+arvadostest.FooCollection+"/foo")
1053         c.Check(u.Query().Get("redirectToDownload"), check.Equals, "")
1054 }
1055
1056 func (s *IntegrationSuite) TestNoTokenWorkbench2LoginFlow(c *check.C) {
1057         for _, trial := range []struct {
1058                 anonToken    bool
1059                 cacheControl string
1060         }{
1061                 {},
1062                 {cacheControl: "no-cache"},
1063                 {anonToken: true},
1064                 {anonToken: true, cacheControl: "no-cache"},
1065         } {
1066                 c.Logf("trial: %+v", trial)
1067
1068                 if trial.anonToken {
1069                         s.handler.Cluster.Users.AnonymousUserToken = arvadostest.AnonymousToken
1070                 } else {
1071                         s.handler.Cluster.Users.AnonymousUserToken = ""
1072                 }
1073                 req, err := http.NewRequest("GET", "http://"+arvadostest.FooCollection+".example.com/foo", nil)
1074                 c.Assert(err, check.IsNil)
1075                 req.Header.Set("Sec-Fetch-Mode", "navigate")
1076                 if trial.cacheControl != "" {
1077                         req.Header.Set("Cache-Control", trial.cacheControl)
1078                 }
1079                 resp := httptest.NewRecorder()
1080                 s.handler.ServeHTTP(resp, req)
1081                 c.Check(resp.Code, check.Equals, http.StatusSeeOther)
1082                 u, err := url.Parse(resp.Header().Get("Location"))
1083                 c.Assert(err, check.IsNil)
1084                 c.Logf("redirected to %q", u)
1085                 c.Check(u.Host, check.Equals, s.handler.Cluster.Services.Workbench2.ExternalURL.Host)
1086                 c.Check(u.Query().Get("redirectToPreview"), check.Equals, "/c="+arvadostest.FooCollection+"/foo")
1087                 c.Check(u.Query().Get("redirectToDownload"), check.Equals, "")
1088         }
1089 }
1090
1091 func (s *IntegrationSuite) TestVhostRedirectQueryTokenSingleOriginError(c *check.C) {
1092         s.testVhostRedirectTokenToCookie(c, "GET",
1093                 "example.com/c="+arvadostest.FooCollection+"/foo",
1094                 "?api_token="+arvadostest.ActiveToken,
1095                 nil,
1096                 "",
1097                 http.StatusBadRequest,
1098                 regexp.QuoteMeta("cannot serve inline content at this URL (possible configuration error; see https://doc.arvados.org/install/install-keep-web.html#dns)\n"),
1099         )
1100 }
1101
1102 // If client requests an attachment by putting ?disposition=attachment
1103 // in the query string, and gets redirected, the redirect target
1104 // should respond with an attachment.
1105 func (s *IntegrationSuite) TestVhostRedirectQueryTokenRequestAttachment(c *check.C) {
1106         resp := s.testVhostRedirectTokenToCookie(c, "GET",
1107                 arvadostest.FooCollection+".example.com/foo",
1108                 "?disposition=attachment&api_token="+arvadostest.ActiveToken,
1109                 nil,
1110                 "",
1111                 http.StatusOK,
1112                 "foo",
1113         )
1114         c.Check(resp.Header().Get("Content-Disposition"), check.Matches, "attachment(;.*)?")
1115 }
1116
1117 func (s *IntegrationSuite) TestVhostRedirectQueryTokenSiteFS(c *check.C) {
1118         s.handler.Cluster.Services.WebDAVDownload.ExternalURL.Host = "download.example.com"
1119         resp := s.testVhostRedirectTokenToCookie(c, "GET",
1120                 "download.example.com/by_id/"+arvadostest.FooCollection+"/foo",
1121                 "?api_token="+arvadostest.ActiveToken,
1122                 nil,
1123                 "",
1124                 http.StatusOK,
1125                 "foo",
1126         )
1127         c.Check(resp.Header().Get("Content-Disposition"), check.Matches, "attachment(;.*)?")
1128 }
1129
1130 func (s *IntegrationSuite) TestPastCollectionVersionFileAccess(c *check.C) {
1131         s.handler.Cluster.Services.WebDAVDownload.ExternalURL.Host = "download.example.com"
1132         resp := s.testVhostRedirectTokenToCookie(c, "GET",
1133                 "download.example.com/c="+arvadostest.WazVersion1Collection+"/waz",
1134                 "?api_token="+arvadostest.ActiveToken,
1135                 nil,
1136                 "",
1137                 http.StatusOK,
1138                 "waz",
1139         )
1140         c.Check(resp.Header().Get("Content-Disposition"), check.Matches, "attachment(;.*)?")
1141         resp = s.testVhostRedirectTokenToCookie(c, "GET",
1142                 "download.example.com/by_id/"+arvadostest.WazVersion1Collection+"/waz",
1143                 "?api_token="+arvadostest.ActiveToken,
1144                 nil,
1145                 "",
1146                 http.StatusOK,
1147                 "waz",
1148         )
1149         c.Check(resp.Header().Get("Content-Disposition"), check.Matches, "attachment(;.*)?")
1150 }
1151
1152 func (s *IntegrationSuite) TestVhostRedirectQueryTokenTrustAllContent(c *check.C) {
1153         s.handler.Cluster.Collections.TrustAllContent = true
1154         s.testVhostRedirectTokenToCookie(c, "GET",
1155                 "example.com/c="+arvadostest.FooCollection+"/foo",
1156                 "?api_token="+arvadostest.ActiveToken,
1157                 nil,
1158                 "",
1159                 http.StatusOK,
1160                 "foo",
1161         )
1162 }
1163
1164 func (s *IntegrationSuite) TestVhostRedirectQueryTokenAttachmentOnlyHost(c *check.C) {
1165         s.handler.Cluster.Services.WebDAVDownload.ExternalURL.Host = "example.com:1234"
1166
1167         s.testVhostRedirectTokenToCookie(c, "GET",
1168                 "example.com/c="+arvadostest.FooCollection+"/foo",
1169                 "?api_token="+arvadostest.ActiveToken,
1170                 nil,
1171                 "",
1172                 http.StatusBadRequest,
1173                 regexp.QuoteMeta("cannot serve inline content at this URL (possible configuration error; see https://doc.arvados.org/install/install-keep-web.html#dns)\n"),
1174         )
1175
1176         resp := s.testVhostRedirectTokenToCookie(c, "GET",
1177                 "example.com:1234/c="+arvadostest.FooCollection+"/foo",
1178                 "?api_token="+arvadostest.ActiveToken,
1179                 nil,
1180                 "",
1181                 http.StatusOK,
1182                 "foo",
1183         )
1184         c.Check(resp.Header().Get("Content-Disposition"), check.Equals, "attachment")
1185 }
1186
1187 func (s *IntegrationSuite) TestVhostRedirectMultipleTokens(c *check.C) {
1188         baseUrl := arvadostest.FooCollection + ".example.com/foo"
1189         query := url.Values{}
1190
1191         // The intent of these tests is to check that requests are redirected
1192         // correctly in the presence of multiple API tokens. The exact response
1193         // codes and content are not closely considered: they're just how
1194         // keep-web responded when we made the smallest possible fix. Changing
1195         // those responses may be okay, but you should still test all these
1196         // different cases and the associated redirect logic.
1197         query["api_token"] = []string{arvadostest.ActiveToken, arvadostest.AnonymousToken}
1198         s.testVhostRedirectTokenToCookie(c, "GET", baseUrl, "?"+query.Encode(), nil, "", http.StatusOK, "foo")
1199         query["api_token"] = []string{arvadostest.ActiveToken, arvadostest.AnonymousToken, ""}
1200         s.testVhostRedirectTokenToCookie(c, "GET", baseUrl, "?"+query.Encode(), nil, "", http.StatusOK, "foo")
1201         query["api_token"] = []string{arvadostest.ActiveToken, "", arvadostest.AnonymousToken}
1202         s.testVhostRedirectTokenToCookie(c, "GET", baseUrl, "?"+query.Encode(), nil, "", http.StatusOK, "foo")
1203         query["api_token"] = []string{"", arvadostest.ActiveToken}
1204         s.testVhostRedirectTokenToCookie(c, "GET", baseUrl, "?"+query.Encode(), nil, "", http.StatusOK, "foo")
1205
1206         expectContent := regexp.QuoteMeta(unauthorizedMessage + "\n")
1207         query["api_token"] = []string{arvadostest.AnonymousToken, "invalidtoo"}
1208         s.testVhostRedirectTokenToCookie(c, "GET", baseUrl, "?"+query.Encode(), nil, "", http.StatusUnauthorized, expectContent)
1209         query["api_token"] = []string{arvadostest.AnonymousToken, ""}
1210         s.testVhostRedirectTokenToCookie(c, "GET", baseUrl, "?"+query.Encode(), nil, "", http.StatusUnauthorized, expectContent)
1211         query["api_token"] = []string{"", arvadostest.AnonymousToken}
1212         s.testVhostRedirectTokenToCookie(c, "GET", baseUrl, "?"+query.Encode(), nil, "", http.StatusUnauthorized, expectContent)
1213 }
1214
1215 func (s *IntegrationSuite) TestVhostRedirectPOSTFormTokenToCookie(c *check.C) {
1216         s.testVhostRedirectTokenToCookie(c, "POST",
1217                 arvadostest.FooCollection+".example.com/foo",
1218                 "",
1219                 http.Header{"Content-Type": {"application/x-www-form-urlencoded"}},
1220                 url.Values{"api_token": {arvadostest.ActiveToken}}.Encode(),
1221                 http.StatusOK,
1222                 "foo",
1223         )
1224 }
1225
1226 func (s *IntegrationSuite) TestVhostRedirectPOSTFormTokenToCookie404(c *check.C) {
1227         s.testVhostRedirectTokenToCookie(c, "POST",
1228                 arvadostest.FooCollection+".example.com/foo",
1229                 "",
1230                 http.Header{"Content-Type": {"application/x-www-form-urlencoded"}},
1231                 url.Values{"api_token": {arvadostest.SpectatorToken}}.Encode(),
1232                 http.StatusNotFound,
1233                 regexp.QuoteMeta(notFoundMessage+"\n"),
1234         )
1235 }
1236
1237 func (s *IntegrationSuite) TestAnonymousTokenOK(c *check.C) {
1238         s.handler.Cluster.Users.AnonymousUserToken = arvadostest.AnonymousToken
1239         s.testVhostRedirectTokenToCookie(c, "GET",
1240                 "example.com/c="+arvadostest.HelloWorldCollection+"/Hello%20world.txt",
1241                 "",
1242                 nil,
1243                 "",
1244                 http.StatusOK,
1245                 "Hello world\n",
1246         )
1247 }
1248
1249 func (s *IntegrationSuite) TestAnonymousTokenError(c *check.C) {
1250         s.handler.Cluster.Users.AnonymousUserToken = "anonymousTokenConfiguredButInvalid"
1251         s.testVhostRedirectTokenToCookie(c, "GET",
1252                 "example.com/c="+arvadostest.HelloWorldCollection+"/Hello%20world.txt",
1253                 "",
1254                 nil,
1255                 "",
1256                 http.StatusUnauthorized,
1257                 "Authorization tokens are not accepted here: .*\n",
1258         )
1259 }
1260
1261 func (s *IntegrationSuite) TestSpecialCharsInPath(c *check.C) {
1262         s.handler.Cluster.Services.WebDAVDownload.ExternalURL.Host = "download.example.com"
1263
1264         client := arvados.NewClientFromEnv()
1265         client.AuthToken = arvadostest.ActiveToken
1266         fs, err := (&arvados.Collection{}).FileSystem(client, nil)
1267         c.Assert(err, check.IsNil)
1268         path := `https:\\"odd' path chars`
1269         f, err := fs.OpenFile(path, os.O_CREATE, 0777)
1270         c.Assert(err, check.IsNil)
1271         f.Close()
1272         mtxt, err := fs.MarshalManifest(".")
1273         c.Assert(err, check.IsNil)
1274         var coll arvados.Collection
1275         err = client.RequestAndDecode(&coll, "POST", "arvados/v1/collections", nil, map[string]interface{}{
1276                 "collection": map[string]string{
1277                         "manifest_text": mtxt,
1278                 },
1279         })
1280         c.Assert(err, check.IsNil)
1281
1282         u, _ := url.Parse("http://download.example.com/c=" + coll.UUID + "/")
1283         req := &http.Request{
1284                 Method:     "GET",
1285                 Host:       u.Host,
1286                 URL:        u,
1287                 RequestURI: u.RequestURI(),
1288                 Header: http.Header{
1289                         "Authorization": {"Bearer " + client.AuthToken},
1290                 },
1291         }
1292         resp := httptest.NewRecorder()
1293         s.handler.ServeHTTP(resp, req)
1294         c.Check(resp.Code, check.Equals, http.StatusOK)
1295         doc, err := html.Parse(resp.Body)
1296         c.Assert(err, check.IsNil)
1297         pathHrefMap := getPathHrefMap(doc)
1298         c.Check(pathHrefMap, check.HasLen, 1) // the one leaf added to collection
1299         href, hasPath := pathHrefMap[path]
1300         c.Assert(hasPath, check.Equals, true) // the path is listed
1301         relUrl := mustParseURL(href)
1302         c.Check(relUrl.Path, check.Equals, "./"+path) // href can be decoded back to path
1303 }
1304
1305 func (s *IntegrationSuite) TestForwardSlashSubstitution(c *check.C) {
1306         arv := arvados.NewClientFromEnv()
1307         s.handler.Cluster.Services.WebDAVDownload.ExternalURL.Host = "download.example.com"
1308         s.handler.Cluster.Collections.ForwardSlashNameSubstitution = "{SOLIDUS}"
1309         name := "foo/bar/baz"
1310         nameShown := strings.Replace(name, "/", "{SOLIDUS}", -1)
1311
1312         client := arvados.NewClientFromEnv()
1313         client.AuthToken = arvadostest.ActiveToken
1314         fs, err := (&arvados.Collection{}).FileSystem(client, nil)
1315         c.Assert(err, check.IsNil)
1316         f, err := fs.OpenFile("filename", os.O_CREATE, 0777)
1317         c.Assert(err, check.IsNil)
1318         f.Close()
1319         mtxt, err := fs.MarshalManifest(".")
1320         c.Assert(err, check.IsNil)
1321         var coll arvados.Collection
1322         err = client.RequestAndDecode(&coll, "POST", "arvados/v1/collections", nil, map[string]interface{}{
1323                 "collection": map[string]string{
1324                         "manifest_text": mtxt,
1325                         "name":          name,
1326                         "owner_uuid":    arvadostest.AProjectUUID,
1327                 },
1328         })
1329         c.Assert(err, check.IsNil)
1330         defer arv.RequestAndDecode(&coll, "DELETE", "arvados/v1/collections/"+coll.UUID, nil, nil)
1331
1332         base := "http://download.example.com/by_id/" + coll.OwnerUUID + "/"
1333         for tryURL, expectedAnchorText := range map[string]string{
1334                 base:                   nameShown + "/",
1335                 base + nameShown + "/": "filename",
1336         } {
1337                 u, _ := url.Parse(tryURL)
1338                 req := &http.Request{
1339                         Method:     "GET",
1340                         Host:       u.Host,
1341                         URL:        u,
1342                         RequestURI: u.RequestURI(),
1343                         Header: http.Header{
1344                                 "Authorization": {"Bearer " + client.AuthToken},
1345                         },
1346                 }
1347                 resp := httptest.NewRecorder()
1348                 s.handler.ServeHTTP(resp, req)
1349                 c.Check(resp.Code, check.Equals, http.StatusOK)
1350                 doc, err := html.Parse(resp.Body)
1351                 c.Assert(err, check.IsNil) // valid HTML
1352                 pathHrefMap := getPathHrefMap(doc)
1353                 href, hasExpected := pathHrefMap[expectedAnchorText]
1354                 c.Assert(hasExpected, check.Equals, true) // has expected anchor text
1355                 c.Assert(href, check.Not(check.Equals), "")
1356                 relUrl := mustParseURL(href)
1357                 c.Check(relUrl.Path, check.Equals, "./"+expectedAnchorText) // decoded href maps back to the anchor text
1358         }
1359 }
1360
1361 // XHRs can't follow redirect-with-cookie so they rely on method=POST
1362 // and disposition=attachment (telling us it's acceptable to respond
1363 // with content instead of a redirect) and an Origin header that gets
1364 // added automatically by the browser (telling us it's desirable to do
1365 // so).
1366 func (s *IntegrationSuite) TestXHRNoRedirect(c *check.C) {
1367         u, _ := url.Parse("http://example.com/c=" + arvadostest.FooCollection + "/foo")
1368         req := &http.Request{
1369                 Method:     "POST",
1370                 Host:       u.Host,
1371                 URL:        u,
1372                 RequestURI: u.RequestURI(),
1373                 Header: http.Header{
1374                         "Origin":       {"https://origin.example"},
1375                         "Content-Type": {"application/x-www-form-urlencoded"},
1376                 },
1377                 Body: ioutil.NopCloser(strings.NewReader(url.Values{
1378                         "api_token":   {arvadostest.ActiveToken},
1379                         "disposition": {"attachment"},
1380                 }.Encode())),
1381         }
1382         resp := httptest.NewRecorder()
1383         s.handler.ServeHTTP(resp, req)
1384         c.Check(resp.Code, check.Equals, http.StatusOK)
1385         c.Check(resp.Body.String(), check.Equals, "foo")
1386         c.Check(resp.Header().Get("Access-Control-Allow-Origin"), check.Equals, "*")
1387
1388         // GET + Origin header is representative of both AJAX GET
1389         // requests and inline images via <IMG crossorigin="anonymous"
1390         // src="...">.
1391         u.RawQuery = "api_token=" + url.QueryEscape(arvadostest.ActiveTokenV2)
1392         req = &http.Request{
1393                 Method:     "GET",
1394                 Host:       u.Host,
1395                 URL:        u,
1396                 RequestURI: u.RequestURI(),
1397                 Header: http.Header{
1398                         "Origin": {"https://origin.example"},
1399                 },
1400         }
1401         resp = httptest.NewRecorder()
1402         s.handler.ServeHTTP(resp, req)
1403         c.Check(resp.Code, check.Equals, http.StatusOK)
1404         c.Check(resp.Body.String(), check.Equals, "foo")
1405         c.Check(resp.Header().Get("Access-Control-Allow-Origin"), check.Equals, "*")
1406 }
1407
1408 func (s *IntegrationSuite) testVhostRedirectTokenToCookie(c *check.C, method, hostPath, queryString string, reqHeader http.Header, reqBody string, expectStatus int, matchRespBody string) *httptest.ResponseRecorder {
1409         if reqHeader == nil {
1410                 reqHeader = http.Header{}
1411         }
1412         u, _ := url.Parse(`http://` + hostPath + queryString)
1413         c.Logf("requesting %s", u)
1414         req := &http.Request{
1415                 Method:     method,
1416                 Host:       u.Host,
1417                 URL:        u,
1418                 RequestURI: u.RequestURI(),
1419                 Header:     reqHeader,
1420                 Body:       ioutil.NopCloser(strings.NewReader(reqBody)),
1421         }
1422
1423         resp := httptest.NewRecorder()
1424         defer func() {
1425                 c.Check(resp.Code, check.Equals, expectStatus)
1426                 c.Check(resp.Body.String(), check.Matches, matchRespBody)
1427         }()
1428
1429         s.handler.ServeHTTP(resp, req)
1430         if resp.Code != http.StatusSeeOther {
1431                 attachment, _ := regexp.MatchString(`^attachment(;|$)`, resp.Header().Get("Content-Disposition"))
1432                 // Since we're not redirecting, check that any api_token in the URL is
1433                 // handled safely.
1434                 // If there is no token in the URL, then we're good.
1435                 // Otherwise, if the response code is an error, the body is expected to
1436                 // be static content, and nothing that might maliciously introspect the
1437                 // URL. It's considered safe and allowed.
1438                 // Otherwise, if the response content has attachment disposition,
1439                 // that's considered safe for all the reasons explained in the
1440                 // safeAttachment comment in handler.go.
1441                 c.Check(!u.Query().Has("api_token") || resp.Code >= 400 || attachment, check.Equals, true)
1442                 return resp
1443         }
1444
1445         loc, err := url.Parse(resp.Header().Get("Location"))
1446         c.Assert(err, check.IsNil)
1447         c.Check(loc.Scheme, check.Equals, u.Scheme)
1448         c.Check(loc.Host, check.Equals, u.Host)
1449         c.Check(loc.RawPath, check.Equals, u.RawPath)
1450         // If the response was a redirect, it should never include an API token.
1451         c.Check(loc.Query().Has("api_token"), check.Equals, false)
1452         c.Check(resp.Body.String(), check.Matches, `.*href="http://`+regexp.QuoteMeta(html.EscapeString(hostPath))+`(\?[^"]*)?".*`)
1453         cookies := (&http.Response{Header: resp.Header()}).Cookies()
1454
1455         c.Logf("following redirect to %s", u)
1456         req = &http.Request{
1457                 Method:     "GET",
1458                 Host:       loc.Host,
1459                 URL:        loc,
1460                 RequestURI: loc.RequestURI(),
1461                 Header:     reqHeader,
1462         }
1463         for _, c := range cookies {
1464                 req.AddCookie(c)
1465         }
1466
1467         resp = httptest.NewRecorder()
1468         s.handler.ServeHTTP(resp, req)
1469
1470         if resp.Code != http.StatusSeeOther {
1471                 c.Check(resp.Header().Get("Location"), check.Equals, "")
1472         }
1473         return resp
1474 }
1475
1476 func (s *IntegrationSuite) TestDirectoryListingWithAnonymousToken(c *check.C) {
1477         s.handler.Cluster.Users.AnonymousUserToken = arvadostest.AnonymousToken
1478         s.testDirectoryListing(c)
1479 }
1480
1481 func (s *IntegrationSuite) TestDirectoryListingWithNoAnonymousToken(c *check.C) {
1482         s.handler.Cluster.Users.AnonymousUserToken = ""
1483         s.testDirectoryListing(c)
1484 }
1485
1486 func (s *IntegrationSuite) testDirectoryListing(c *check.C) {
1487         // The "ownership cycle" test fixtures are reachable from the
1488         // "filter group without filters" group, causing webdav's
1489         // walkfs to recurse indefinitely. Avoid that by deleting one
1490         // of the bogus fixtures.
1491         arv := arvados.NewClientFromEnv()
1492         err := arv.RequestAndDecode(nil, "DELETE", "arvados/v1/groups/zzzzz-j7d0g-cx2al9cqkmsf1hs", nil, nil)
1493         if err != nil {
1494                 c.Assert(err, check.FitsTypeOf, &arvados.TransactionError{})
1495                 c.Check(err.(*arvados.TransactionError).StatusCode, check.Equals, 404)
1496         }
1497
1498         s.handler.Cluster.Services.WebDAVDownload.ExternalURL.Host = "download.example.com"
1499         authHeader := http.Header{
1500                 "Authorization": {"OAuth2 " + arvadostest.ActiveToken},
1501         }
1502         for _, trial := range []struct {
1503                 uri      string
1504                 header   http.Header
1505                 expect   []string
1506                 redirect string
1507                 cutDirs  int
1508         }{
1509                 {
1510                         uri:     strings.Replace(arvadostest.FooAndBarFilesInDirPDH, "+", "-", -1) + ".example.com/",
1511                         header:  authHeader,
1512                         expect:  []string{"dir1/foo", "dir1/bar"},
1513                         cutDirs: 0,
1514                 },
1515                 {
1516                         uri:     strings.Replace(arvadostest.FooAndBarFilesInDirPDH, "+", "-", -1) + ".example.com/dir1/",
1517                         header:  authHeader,
1518                         expect:  []string{"foo", "bar"},
1519                         cutDirs: 1,
1520                 },
1521                 {
1522                         // URLs of this form ignore authHeader, and
1523                         // FooAndBarFilesInDirUUID isn't public, so
1524                         // this returns 401.
1525                         uri:    "download.example.com/collections/" + arvadostest.FooAndBarFilesInDirUUID + "/",
1526                         header: authHeader,
1527                         expect: nil,
1528                 },
1529                 {
1530                         uri:     "download.example.com/users/active/foo_file_in_dir/",
1531                         header:  authHeader,
1532                         expect:  []string{"dir1/"},
1533                         cutDirs: 3,
1534                 },
1535                 {
1536                         uri:     "download.example.com/users/active/foo_file_in_dir/dir1/",
1537                         header:  authHeader,
1538                         expect:  []string{"bar"},
1539                         cutDirs: 4,
1540                 },
1541                 {
1542                         uri:     "download.example.com/",
1543                         header:  authHeader,
1544                         expect:  []string{"users/"},
1545                         cutDirs: 0,
1546                 },
1547                 {
1548                         uri:      "download.example.com/users",
1549                         header:   authHeader,
1550                         redirect: "/users/",
1551                         expect:   []string{"active/"},
1552                         cutDirs:  1,
1553                 },
1554                 {
1555                         uri:     "download.example.com/users/",
1556                         header:  authHeader,
1557                         expect:  []string{"active/"},
1558                         cutDirs: 1,
1559                 },
1560                 {
1561                         uri:      "download.example.com/users/active",
1562                         header:   authHeader,
1563                         redirect: "/users/active/",
1564                         expect:   []string{"foo_file_in_dir/"},
1565                         cutDirs:  2,
1566                 },
1567                 {
1568                         uri:     "download.example.com/users/active/",
1569                         header:  authHeader,
1570                         expect:  []string{"foo_file_in_dir/"},
1571                         cutDirs: 2,
1572                 },
1573                 {
1574                         uri:     "collections.example.com/collections/download/" + arvadostest.FooAndBarFilesInDirUUID + "/" + arvadostest.ActiveToken + "/",
1575                         header:  nil,
1576                         expect:  []string{"dir1/foo", "dir1/bar"},
1577                         cutDirs: 4,
1578                 },
1579                 {
1580                         uri:     "collections.example.com/c=" + arvadostest.FooAndBarFilesInDirUUID + "/t=" + arvadostest.ActiveToken + "/",
1581                         header:  nil,
1582                         expect:  []string{"dir1/foo", "dir1/bar"},
1583                         cutDirs: 2,
1584                 },
1585                 {
1586                         uri:     "collections.example.com/c=" + arvadostest.FooAndBarFilesInDirUUID + "/t=" + arvadostest.ActiveToken,
1587                         header:  nil,
1588                         expect:  []string{"dir1/foo", "dir1/bar"},
1589                         cutDirs: 2,
1590                 },
1591                 {
1592                         uri:     "download.example.com/c=" + arvadostest.FooAndBarFilesInDirUUID,
1593                         header:  authHeader,
1594                         expect:  []string{"dir1/foo", "dir1/bar"},
1595                         cutDirs: 1,
1596                 },
1597                 {
1598                         uri:      "download.example.com/c=" + arvadostest.FooAndBarFilesInDirUUID + "/dir1",
1599                         header:   authHeader,
1600                         redirect: "/c=" + arvadostest.FooAndBarFilesInDirUUID + "/dir1/",
1601                         expect:   []string{"foo", "bar"},
1602                         cutDirs:  2,
1603                 },
1604                 {
1605                         uri:     "download.example.com/c=" + arvadostest.FooAndBarFilesInDirUUID + "/_/dir1/",
1606                         header:  authHeader,
1607                         expect:  []string{"foo", "bar"},
1608                         cutDirs: 3,
1609                 },
1610                 {
1611                         uri:      arvadostest.FooAndBarFilesInDirUUID + ".example.com/dir1?api_token=" + arvadostest.ActiveToken,
1612                         header:   authHeader,
1613                         redirect: "/dir1/",
1614                         expect:   []string{"foo", "bar"},
1615                         cutDirs:  1,
1616                 },
1617                 {
1618                         uri:    "collections.example.com/c=" + arvadostest.FooAndBarFilesInDirUUID + "/theperthcountyconspiracydoesnotexist/",
1619                         header: authHeader,
1620                         expect: nil,
1621                 },
1622                 {
1623                         uri:     "download.example.com/c=" + arvadostest.WazVersion1Collection,
1624                         header:  authHeader,
1625                         expect:  []string{"waz"},
1626                         cutDirs: 1,
1627                 },
1628                 {
1629                         uri:     "download.example.com/by_id/" + arvadostest.WazVersion1Collection,
1630                         header:  authHeader,
1631                         expect:  []string{"waz"},
1632                         cutDirs: 2,
1633                 },
1634                 {
1635                         uri:     "download.example.com/users/active/This filter group/",
1636                         header:  authHeader,
1637                         expect:  []string{"A Subproject/"},
1638                         cutDirs: 3,
1639                 },
1640                 {
1641                         uri:     "download.example.com/users/active/This filter group/A Subproject",
1642                         header:  authHeader,
1643                         expect:  []string{"baz_file/"},
1644                         cutDirs: 4,
1645                 },
1646                 {
1647                         uri:     "download.example.com/by_id/" + arvadostest.AFilterGroupUUID,
1648                         header:  authHeader,
1649                         expect:  []string{"A Subproject/"},
1650                         cutDirs: 2,
1651                 },
1652                 {
1653                         uri:     "download.example.com/by_id/" + arvadostest.AFilterGroupUUID + "/A Subproject",
1654                         header:  authHeader,
1655                         expect:  []string{"baz_file/"},
1656                         cutDirs: 3,
1657                 },
1658         } {
1659                 comment := check.Commentf("HTML: %q redir %q => %q", trial.uri, trial.redirect, trial.expect)
1660                 resp := httptest.NewRecorder()
1661                 u := mustParseURL("//" + trial.uri)
1662                 req := &http.Request{
1663                         Method:     "GET",
1664                         Host:       u.Host,
1665                         URL:        u,
1666                         RequestURI: u.RequestURI(),
1667                         Header:     copyHeader(trial.header),
1668                 }
1669                 s.handler.ServeHTTP(resp, req)
1670                 var cookies []*http.Cookie
1671                 for resp.Code == http.StatusSeeOther {
1672                         u, _ := req.URL.Parse(resp.Header().Get("Location"))
1673                         req = &http.Request{
1674                                 Method:     "GET",
1675                                 Host:       u.Host,
1676                                 URL:        u,
1677                                 RequestURI: u.RequestURI(),
1678                                 Header:     copyHeader(trial.header),
1679                         }
1680                         cookies = append(cookies, (&http.Response{Header: resp.Header()}).Cookies()...)
1681                         for _, c := range cookies {
1682                                 req.AddCookie(c)
1683                         }
1684                         resp = httptest.NewRecorder()
1685                         s.handler.ServeHTTP(resp, req)
1686                 }
1687                 if trial.redirect != "" {
1688                         c.Check(req.URL.Path, check.Equals, trial.redirect, comment)
1689                 }
1690                 if trial.expect == nil {
1691                         c.Check(resp.Code, check.Equals, http.StatusUnauthorized, comment)
1692                 } else {
1693                         c.Check(resp.Code, check.Equals, http.StatusOK, comment)
1694                         listingPageDoc, err := html.Parse(resp.Body)
1695                         c.Check(err, check.IsNil, comment) // valid HTML document
1696                         pathHrefMap := getPathHrefMap(listingPageDoc)
1697                         c.Assert(pathHrefMap, check.Not(check.HasLen), 0, comment)
1698                         for _, e := range trial.expect {
1699                                 href, hasE := pathHrefMap[e]
1700                                 c.Check(hasE, check.Equals, true, comment) // expected path is listed
1701                                 relUrl := mustParseURL(href)
1702                                 c.Check(relUrl.Path, check.Equals, "./"+e, comment) // href can be decoded back to path
1703                         }
1704                         wgetCommand := getWgetExamplePre(listingPageDoc)
1705                         wgetExpected := regexp.MustCompile(`^\$ wget .*--cut-dirs=(\d+) .*'(https?://[^']+)'$`)
1706                         wgetMatchGroups := wgetExpected.FindStringSubmatch(wgetCommand)
1707                         c.Assert(wgetMatchGroups, check.NotNil)                                     // wget command matches
1708                         c.Check(wgetMatchGroups[1], check.Equals, fmt.Sprintf("%d", trial.cutDirs)) // correct level of cut dirs in wget command
1709                         printedUrl := mustParseURL(wgetMatchGroups[2])
1710                         c.Check(printedUrl.Host, check.Equals, req.URL.Host)
1711                         c.Check(printedUrl.Path, check.Equals, req.URL.Path) // URL arg in wget command can be decoded to the right path
1712                 }
1713
1714                 comment = check.Commentf("WebDAV: %q => %q", trial.uri, trial.expect)
1715                 req = &http.Request{
1716                         Method:     "OPTIONS",
1717                         Host:       u.Host,
1718                         URL:        u,
1719                         RequestURI: u.RequestURI(),
1720                         Header:     copyHeader(trial.header),
1721                         Body:       ioutil.NopCloser(&bytes.Buffer{}),
1722                 }
1723                 resp = httptest.NewRecorder()
1724                 s.handler.ServeHTTP(resp, req)
1725                 if trial.expect == nil {
1726                         c.Check(resp.Code, check.Equals, http.StatusUnauthorized, comment)
1727                 } else {
1728                         c.Check(resp.Code, check.Equals, http.StatusOK, comment)
1729                 }
1730
1731                 req = &http.Request{
1732                         Method:     "PROPFIND",
1733                         Host:       u.Host,
1734                         URL:        u,
1735                         RequestURI: u.RequestURI(),
1736                         Header:     copyHeader(trial.header),
1737                         Body:       ioutil.NopCloser(&bytes.Buffer{}),
1738                 }
1739                 resp = httptest.NewRecorder()
1740                 s.handler.ServeHTTP(resp, req)
1741                 // This check avoids logging a big XML document in the
1742                 // event webdav throws a 500 error after sending
1743                 // headers for a 207.
1744                 if !c.Check(strings.HasSuffix(resp.Body.String(), "Internal Server Error"), check.Equals, false) {
1745                         continue
1746                 }
1747                 if trial.expect == nil {
1748                         c.Check(resp.Code, check.Equals, http.StatusUnauthorized, comment)
1749                 } else {
1750                         c.Check(resp.Code, check.Equals, http.StatusMultiStatus, comment)
1751                         for _, e := range trial.expect {
1752                                 if strings.HasSuffix(e, "/") {
1753                                         e = filepath.Join(u.Path, e) + "/"
1754                                 } else {
1755                                         e = filepath.Join(u.Path, e)
1756                                 }
1757                                 e = strings.Replace(e, " ", "%20", -1)
1758                                 c.Check(resp.Body.String(), check.Matches, `(?ms).*<D:href>`+e+`</D:href>.*`, comment)
1759                         }
1760                 }
1761         }
1762 }
1763
1764 // Shallow-traverse the HTML document, gathering the nodes satisfying the
1765 // predicate function in the output slice. If a node matches the predicate,
1766 // none of its children will be visited.
1767 func getNodes(document *html.Node, predicate func(*html.Node) bool) []*html.Node {
1768         var acc []*html.Node
1769         var traverse func(*html.Node, []*html.Node) []*html.Node
1770         traverse = func(root *html.Node, sofar []*html.Node) []*html.Node {
1771                 if root == nil {
1772                         return sofar
1773                 }
1774                 if predicate(root) {
1775                         return append(sofar, root)
1776                 }
1777                 for cur := root.FirstChild; cur != nil; cur = cur.NextSibling {
1778                         sofar = traverse(cur, sofar)
1779                 }
1780                 return sofar
1781         }
1782         return traverse(document, acc)
1783 }
1784
1785 // Returns true if a node has the attribute targetAttr with the given value
1786 func matchesAttributeValue(node *html.Node, targetAttr string, value string) bool {
1787         for _, attr := range node.Attr {
1788                 if attr.Key == targetAttr && attr.Val == value {
1789                         return true
1790                 }
1791         }
1792         return false
1793 }
1794
1795 // Concatenate the content of text-node children of node; only direct
1796 // children are visited, and any non-text children are skipped.
1797 func getNodeText(node *html.Node) string {
1798         var recv strings.Builder
1799         for c := node.FirstChild; c != nil; c = c.NextSibling {
1800                 if c.Type == html.TextNode {
1801                         recv.WriteString(c.Data)
1802                 }
1803         }
1804         return recv.String()
1805 }
1806
1807 // Returns a map from the directory listing item string (a path) to the href
1808 // value of its <a> tag (an encoded relative URL)
1809 func getPathHrefMap(document *html.Node) map[string]string {
1810         isItemATag := func(node *html.Node) bool {
1811                 return node.Type == html.ElementNode && node.Data == "a" && matchesAttributeValue(node, "class", "item")
1812         }
1813         aTags := getNodes(document, isItemATag)
1814         output := make(map[string]string)
1815         for _, elem := range aTags {
1816                 textContent := getNodeText(elem)
1817                 for _, attr := range elem.Attr {
1818                         if attr.Key == "href" {
1819                                 output[textContent] = attr.Val
1820                                 break
1821                         }
1822                 }
1823         }
1824         return output
1825 }
1826
1827 func getWgetExamplePre(document *html.Node) string {
1828         isWgetPre := func(node *html.Node) bool {
1829                 return node.Type == html.ElementNode && matchesAttributeValue(node, "id", "wget-example")
1830         }
1831         elements := getNodes(document, isWgetPre)
1832         if len(elements) != 1 {
1833                 return ""
1834         }
1835         return getNodeText(elements[0])
1836 }
1837
1838 func (s *IntegrationSuite) TestDeleteLastFile(c *check.C) {
1839         arv := arvados.NewClientFromEnv()
1840         var newCollection arvados.Collection
1841         err := arv.RequestAndDecode(&newCollection, "POST", "arvados/v1/collections", nil, map[string]interface{}{
1842                 "collection": map[string]string{
1843                         "owner_uuid":    arvadostest.ActiveUserUUID,
1844                         "manifest_text": ". acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:foo.txt 0:3:bar.txt\n",
1845                         "name":          "keep-web test collection",
1846                 },
1847                 "ensure_unique_name": true,
1848         })
1849         c.Assert(err, check.IsNil)
1850         defer arv.RequestAndDecode(&newCollection, "DELETE", "arvados/v1/collections/"+newCollection.UUID, nil, nil)
1851
1852         var updated arvados.Collection
1853         for _, fnm := range []string{"foo.txt", "bar.txt"} {
1854                 s.handler.Cluster.Services.WebDAVDownload.ExternalURL.Host = "example.com"
1855                 u, _ := url.Parse("http://example.com/c=" + newCollection.UUID + "/" + fnm)
1856                 req := &http.Request{
1857                         Method:     "DELETE",
1858                         Host:       u.Host,
1859                         URL:        u,
1860                         RequestURI: u.RequestURI(),
1861                         Header: http.Header{
1862                                 "Authorization": {"Bearer " + arvadostest.ActiveToken},
1863                         },
1864                 }
1865                 resp := httptest.NewRecorder()
1866                 s.handler.ServeHTTP(resp, req)
1867                 c.Check(resp.Code, check.Equals, http.StatusNoContent)
1868
1869                 updated = arvados.Collection{}
1870                 err = arv.RequestAndDecode(&updated, "GET", "arvados/v1/collections/"+newCollection.UUID, nil, nil)
1871                 c.Check(err, check.IsNil)
1872                 c.Check(updated.ManifestText, check.Not(check.Matches), `(?ms).*\Q`+fnm+`\E.*`)
1873                 c.Logf("updated manifest_text %q", updated.ManifestText)
1874         }
1875         c.Check(updated.ManifestText, check.Equals, "")
1876 }
1877
1878 func (s *IntegrationSuite) TestFileContentType(c *check.C) {
1879         s.handler.Cluster.Services.WebDAVDownload.ExternalURL.Host = "download.example.com"
1880
1881         client := arvados.NewClientFromEnv()
1882         client.AuthToken = arvadostest.ActiveToken
1883         arv, err := arvadosclient.New(client)
1884         c.Assert(err, check.Equals, nil)
1885         kc, err := keepclient.MakeKeepClient(arv)
1886         c.Assert(err, check.Equals, nil)
1887
1888         fs, err := (&arvados.Collection{}).FileSystem(client, kc)
1889         c.Assert(err, check.IsNil)
1890
1891         trials := []struct {
1892                 filename    string
1893                 content     string
1894                 contentType string
1895         }{
1896                 {"picture.txt", "BMX bikes are small this year\n", "text/plain; charset=utf-8"},
1897                 {"picture.bmp", "BMX bikes are small this year\n", "image/(x-ms-)?bmp"},
1898                 {"picture.jpg", "BMX bikes are small this year\n", "image/jpeg"},
1899                 {"picture1", "BMX bikes are small this year\n", "image/bmp"},            // content sniff; "BM" is the magic signature for .bmp
1900                 {"picture2", "Cars are small this year\n", "text/plain; charset=utf-8"}, // content sniff
1901         }
1902         for _, trial := range trials {
1903                 f, err := fs.OpenFile(trial.filename, os.O_CREATE|os.O_WRONLY, 0777)
1904                 c.Assert(err, check.IsNil)
1905                 _, err = f.Write([]byte(trial.content))
1906                 c.Assert(err, check.IsNil)
1907                 c.Assert(f.Close(), check.IsNil)
1908         }
1909         mtxt, err := fs.MarshalManifest(".")
1910         c.Assert(err, check.IsNil)
1911         var coll arvados.Collection
1912         err = client.RequestAndDecode(&coll, "POST", "arvados/v1/collections", nil, map[string]interface{}{
1913                 "collection": map[string]string{
1914                         "manifest_text": mtxt,
1915                 },
1916         })
1917         c.Assert(err, check.IsNil)
1918
1919         for _, trial := range trials {
1920                 u, _ := url.Parse("http://download.example.com/by_id/" + coll.UUID + "/" + trial.filename)
1921                 req := &http.Request{
1922                         Method:     "GET",
1923                         Host:       u.Host,
1924                         URL:        u,
1925                         RequestURI: u.RequestURI(),
1926                         Header: http.Header{
1927                                 "Authorization": {"Bearer " + client.AuthToken},
1928                         },
1929                 }
1930                 resp := httptest.NewRecorder()
1931                 s.handler.ServeHTTP(resp, req)
1932                 c.Check(resp.Code, check.Equals, http.StatusOK)
1933                 c.Check(resp.Header().Get("Content-Type"), check.Matches, trial.contentType)
1934                 c.Check(resp.Body.String(), check.Equals, trial.content)
1935         }
1936 }
1937
1938 func (s *IntegrationSuite) TestCacheSize(c *check.C) {
1939         req, err := http.NewRequest("GET", "http://"+arvadostest.FooCollection+".example.com/foo", nil)
1940         req.Header.Set("Authorization", "Bearer "+arvadostest.ActiveTokenV2)
1941         c.Assert(err, check.IsNil)
1942         resp := httptest.NewRecorder()
1943         s.handler.ServeHTTP(resp, req)
1944         c.Assert(resp.Code, check.Equals, http.StatusOK)
1945         c.Check(s.handler.Cache.sessions[arvadostest.ActiveTokenV2].client.DiskCacheSize.Percent(), check.Equals, int64(10))
1946 }
1947
1948 // Writing to a collection shouldn't affect its entry in the
1949 // PDH-to-manifest cache.
1950 func (s *IntegrationSuite) TestCacheWriteCollectionSamePDH(c *check.C) {
1951         arv, err := arvadosclient.MakeArvadosClient()
1952         c.Assert(err, check.Equals, nil)
1953         arv.ApiToken = arvadostest.ActiveToken
1954
1955         u := mustParseURL("http://x.example/testfile")
1956         req := &http.Request{
1957                 Method:     "GET",
1958                 Host:       u.Host,
1959                 URL:        u,
1960                 RequestURI: u.RequestURI(),
1961                 Header:     http.Header{"Authorization": {"Bearer " + arv.ApiToken}},
1962         }
1963
1964         checkWithID := func(id string, status int) {
1965                 req.URL.Host = strings.Replace(id, "+", "-", -1) + ".example"
1966                 req.Host = req.URL.Host
1967                 resp := httptest.NewRecorder()
1968                 s.handler.ServeHTTP(resp, req)
1969                 c.Check(resp.Code, check.Equals, status)
1970         }
1971
1972         var colls [2]arvados.Collection
1973         for i := range colls {
1974                 err := arv.Create("collections",
1975                         map[string]interface{}{
1976                                 "ensure_unique_name": true,
1977                                 "collection": map[string]interface{}{
1978                                         "name": "test collection",
1979                                 },
1980                         }, &colls[i])
1981                 c.Assert(err, check.Equals, nil)
1982         }
1983
1984         // Populate cache with empty collection
1985         checkWithID(colls[0].PortableDataHash, http.StatusNotFound)
1986
1987         // write a file to colls[0]
1988         reqPut := *req
1989         reqPut.Method = "PUT"
1990         reqPut.URL.Host = colls[0].UUID + ".example"
1991         reqPut.Host = req.URL.Host
1992         reqPut.Body = ioutil.NopCloser(bytes.NewBufferString("testdata"))
1993         resp := httptest.NewRecorder()
1994         s.handler.ServeHTTP(resp, &reqPut)
1995         c.Check(resp.Code, check.Equals, http.StatusCreated)
1996
1997         // new file should not appear in colls[1]
1998         checkWithID(colls[1].PortableDataHash, http.StatusNotFound)
1999         checkWithID(colls[1].UUID, http.StatusNotFound)
2000
2001         checkWithID(colls[0].UUID, http.StatusOK)
2002 }
2003
2004 func copyHeader(h http.Header) http.Header {
2005         hc := http.Header{}
2006         for k, v := range h {
2007                 hc[k] = append([]string(nil), v...)
2008         }
2009         return hc
2010 }
2011
2012 func (s *IntegrationSuite) checkUploadDownloadRequest(c *check.C, req *http.Request,
2013         successCode int, direction string, perm bool, userUuid, collectionUuid, collectionPDH, filepath string) {
2014
2015         client := arvados.NewClientFromEnv()
2016         client.AuthToken = arvadostest.AdminToken
2017         var logentries arvados.LogList
2018         limit1 := 1
2019         err := client.RequestAndDecode(&logentries, "GET", "arvados/v1/logs", nil,
2020                 arvados.ResourceListParams{
2021                         Limit: &limit1,
2022                         Order: "created_at desc"})
2023         c.Check(err, check.IsNil)
2024         c.Check(logentries.Items, check.HasLen, 1)
2025         lastLogId := logentries.Items[0].ID
2026         c.Logf("lastLogId: %d", lastLogId)
2027
2028         var logbuf bytes.Buffer
2029         logger := logrus.New()
2030         logger.Out = &logbuf
2031         resp := httptest.NewRecorder()
2032         req = req.WithContext(ctxlog.Context(context.Background(), logger))
2033         s.handler.ServeHTTP(resp, req)
2034
2035         if perm {
2036                 c.Check(resp.Result().StatusCode, check.Equals, successCode)
2037                 c.Check(logbuf.String(), check.Matches, `(?ms).*msg="File `+direction+`".*`)
2038                 c.Check(logbuf.String(), check.Not(check.Matches), `(?ms).*level=error.*`)
2039
2040                 deadline := time.Now().Add(time.Second)
2041                 for {
2042                         c.Assert(time.Now().After(deadline), check.Equals, false, check.Commentf("timed out waiting for log entry"))
2043                         logentries = arvados.LogList{}
2044                         err = client.RequestAndDecode(&logentries, "GET", "arvados/v1/logs", nil,
2045                                 arvados.ResourceListParams{
2046                                         Filters: []arvados.Filter{
2047                                                 {Attr: "event_type", Operator: "=", Operand: "file_" + direction},
2048                                                 {Attr: "object_uuid", Operator: "=", Operand: userUuid},
2049                                         },
2050                                         Limit: &limit1,
2051                                         Order: "created_at desc",
2052                                 })
2053                         c.Assert(err, check.IsNil)
2054                         if len(logentries.Items) > 0 &&
2055                                 logentries.Items[0].ID > lastLogId &&
2056                                 logentries.Items[0].ObjectUUID == userUuid &&
2057                                 logentries.Items[0].Properties["collection_uuid"] == collectionUuid &&
2058                                 (collectionPDH == "" || logentries.Items[0].Properties["portable_data_hash"] == collectionPDH) &&
2059                                 logentries.Items[0].Properties["collection_file_path"] == filepath {
2060                                 break
2061                         }
2062                         c.Logf("logentries.Items: %+v", logentries.Items)
2063                         time.Sleep(50 * time.Millisecond)
2064                 }
2065         } else {
2066                 c.Check(resp.Result().StatusCode, check.Equals, http.StatusForbidden)
2067                 c.Check(logbuf.String(), check.Equals, "")
2068         }
2069 }
2070
2071 func (s *IntegrationSuite) TestDownloadLoggingPermission(c *check.C) {
2072         u := mustParseURL("http://" + arvadostest.FooCollection + ".keep-web.example/foo")
2073
2074         s.handler.Cluster.Collections.TrustAllContent = true
2075         s.handler.Cluster.Collections.WebDAVLogDownloadInterval = arvados.Duration(0)
2076
2077         for _, adminperm := range []bool{true, false} {
2078                 for _, userperm := range []bool{true, false} {
2079                         s.handler.Cluster.Collections.WebDAVPermission.Admin.Download = adminperm
2080                         s.handler.Cluster.Collections.WebDAVPermission.User.Download = userperm
2081
2082                         // Test admin permission
2083                         req := &http.Request{
2084                                 Method:     "GET",
2085                                 Host:       u.Host,
2086                                 URL:        u,
2087                                 RequestURI: u.RequestURI(),
2088                                 Header: http.Header{
2089                                         "Authorization": {"Bearer " + arvadostest.AdminToken},
2090                                 },
2091                         }
2092                         s.checkUploadDownloadRequest(c, req, http.StatusOK, "download", adminperm,
2093                                 arvadostest.AdminUserUUID, arvadostest.FooCollection, arvadostest.FooCollectionPDH, "foo")
2094
2095                         // Test user permission
2096                         req = &http.Request{
2097                                 Method:     "GET",
2098                                 Host:       u.Host,
2099                                 URL:        u,
2100                                 RequestURI: u.RequestURI(),
2101                                 Header: http.Header{
2102                                         "Authorization": {"Bearer " + arvadostest.ActiveToken},
2103                                 },
2104                         }
2105                         s.checkUploadDownloadRequest(c, req, http.StatusOK, "download", userperm,
2106                                 arvadostest.ActiveUserUUID, arvadostest.FooCollection, arvadostest.FooCollectionPDH, "foo")
2107                 }
2108         }
2109
2110         s.handler.Cluster.Collections.WebDAVPermission.User.Download = true
2111
2112         for _, tryurl := range []string{"http://" + arvadostest.MultilevelCollection1 + ".keep-web.example/dir1/subdir/file1",
2113                 "http://keep-web/users/active/multilevel_collection_1/dir1/subdir/file1"} {
2114
2115                 u = mustParseURL(tryurl)
2116                 req := &http.Request{
2117                         Method:     "GET",
2118                         Host:       u.Host,
2119                         URL:        u,
2120                         RequestURI: u.RequestURI(),
2121                         Header: http.Header{
2122                                 "Authorization": {"Bearer " + arvadostest.ActiveToken},
2123                         },
2124                 }
2125                 s.checkUploadDownloadRequest(c, req, http.StatusOK, "download", true,
2126                         arvadostest.ActiveUserUUID, arvadostest.MultilevelCollection1, arvadostest.MultilevelCollection1PDH, "dir1/subdir/file1")
2127         }
2128
2129         u = mustParseURL("http://" + strings.Replace(arvadostest.FooCollectionPDH, "+", "-", 1) + ".keep-web.example/foo")
2130         req := &http.Request{
2131                 Method:     "GET",
2132                 Host:       u.Host,
2133                 URL:        u,
2134                 RequestURI: u.RequestURI(),
2135                 Header: http.Header{
2136                         "Authorization": {"Bearer " + arvadostest.ActiveToken},
2137                 },
2138         }
2139         s.checkUploadDownloadRequest(c, req, http.StatusOK, "download", true,
2140                 arvadostest.ActiveUserUUID, "", arvadostest.FooCollectionPDH, "foo")
2141 }
2142
2143 func (s *IntegrationSuite) TestUploadLoggingPermission(c *check.C) {
2144         for _, adminperm := range []bool{true, false} {
2145                 for _, userperm := range []bool{true, false} {
2146
2147                         arv := arvados.NewClientFromEnv()
2148                         arv.AuthToken = arvadostest.ActiveToken
2149
2150                         var coll arvados.Collection
2151                         err := arv.RequestAndDecode(&coll,
2152                                 "POST",
2153                                 "/arvados/v1/collections",
2154                                 nil,
2155                                 map[string]interface{}{
2156                                         "ensure_unique_name": true,
2157                                         "collection": map[string]interface{}{
2158                                                 "name": "test collection",
2159                                         },
2160                                 })
2161                         c.Assert(err, check.Equals, nil)
2162
2163                         u := mustParseURL("http://" + coll.UUID + ".keep-web.example/bar")
2164
2165                         s.handler.Cluster.Collections.WebDAVPermission.Admin.Upload = adminperm
2166                         s.handler.Cluster.Collections.WebDAVPermission.User.Upload = userperm
2167
2168                         // Test admin permission
2169                         req := &http.Request{
2170                                 Method:     "PUT",
2171                                 Host:       u.Host,
2172                                 URL:        u,
2173                                 RequestURI: u.RequestURI(),
2174                                 Header: http.Header{
2175                                         "Authorization": {"Bearer " + arvadostest.AdminToken},
2176                                 },
2177                                 Body: io.NopCloser(bytes.NewReader([]byte("bar"))),
2178                         }
2179                         s.checkUploadDownloadRequest(c, req, http.StatusCreated, "upload", adminperm,
2180                                 arvadostest.AdminUserUUID, coll.UUID, "", "bar")
2181
2182                         // Test user permission
2183                         req = &http.Request{
2184                                 Method:     "PUT",
2185                                 Host:       u.Host,
2186                                 URL:        u,
2187                                 RequestURI: u.RequestURI(),
2188                                 Header: http.Header{
2189                                         "Authorization": {"Bearer " + arvadostest.ActiveToken},
2190                                 },
2191                                 Body: io.NopCloser(bytes.NewReader([]byte("bar"))),
2192                         }
2193                         s.checkUploadDownloadRequest(c, req, http.StatusCreated, "upload", userperm,
2194                                 arvadostest.ActiveUserUUID, coll.UUID, "", "bar")
2195                 }
2196         }
2197 }
2198
2199 func (s *IntegrationSuite) serveAndLogRequests(c *check.C, reqs *map[*http.Request]int) *bytes.Buffer {
2200         logbuf, ctx := newLoggerAndContext()
2201         var wg sync.WaitGroup
2202         for req, expectStatus := range *reqs {
2203                 req := req.WithContext(ctx)
2204                 expectStatus := expectStatus
2205                 wg.Add(1)
2206                 go func() {
2207                         defer wg.Done()
2208                         resp := httptest.NewRecorder()
2209                         s.handler.ServeHTTP(resp, req)
2210                         c.Check(resp.Result().StatusCode, check.Equals, expectStatus)
2211                 }()
2212         }
2213         wg.Wait()
2214         return logbuf
2215 }
2216
2217 func countLogMatches(c *check.C, logbuf *bytes.Buffer, pattern string, matchCount int) bool {
2218         search, err := regexp.Compile(pattern)
2219         if !c.Check(err, check.IsNil, check.Commentf("failed to compile regexp: %v", err)) {
2220                 return false
2221         }
2222         matches := search.FindAll(logbuf.Bytes(), -1)
2223         return c.Check(matches, check.HasLen, matchCount,
2224                 check.Commentf("%d matching log messages: %+v", len(matches), matches))
2225 }
2226
2227 func (s *IntegrationSuite) TestLogThrottling(c *check.C) {
2228         s.handler.Cluster.Collections.WebDAVLogDownloadInterval = arvados.Duration(time.Hour)
2229         fooURL := "http://" + arvadostest.FooCollection + ".keep-web.example/foo"
2230         req := newRequest("GET", fooURL)
2231         req.Header.Set("Authorization", "Bearer "+arvadostest.ActiveToken)
2232         pattern := `\bmsg="File download".* collection_file_path=foo\b`
2233
2234         // All these requests get byte zero and should be logged.
2235         reqs := make(map[*http.Request]int)
2236         reqs[req] = http.StatusOK
2237         for _, byterange := range []string{"0-2", "0-1", "0-", "-3"} {
2238                 req := req.Clone(context.Background())
2239                 req.Header.Set("Range", "bytes="+byterange)
2240                 reqs[req] = http.StatusPartialContent
2241         }
2242         logbuf := s.serveAndLogRequests(c, &reqs)
2243         countLogMatches(c, logbuf, pattern, len(reqs))
2244
2245         // None of these requests get byte zero so they should all be throttled
2246         // (now that we've made at least one request for byte zero).
2247         reqs = make(map[*http.Request]int)
2248         for _, byterange := range []string{"1-2", "1-", "2-", "-1", "-2"} {
2249                 req := req.Clone(context.Background())
2250                 req.Header.Set("Range", "bytes="+byterange)
2251                 reqs[req] = http.StatusPartialContent
2252         }
2253         logbuf = s.serveAndLogRequests(c, &reqs)
2254         countLogMatches(c, logbuf, pattern, 0)
2255 }
2256
2257 func (s *IntegrationSuite) TestLogThrottleInterval(c *check.C) {
2258         s.handler.Cluster.Collections.WebDAVLogDownloadInterval = arvados.Duration(time.Nanosecond)
2259         logbuf, ctx := newLoggerAndContext()
2260         req := newRequest("GET", "http://"+arvadostest.FooCollection+".keep-web.example/foo")
2261         req.Header.Set("Authorization", "Bearer "+arvadostest.ActiveToken)
2262         req = req.WithContext(ctx)
2263
2264         re := regexp.MustCompile(`\bmsg="File download".* collection_file_path=foo\b`)
2265         for expected := 1; expected < 4; expected++ {
2266                 time.Sleep(2 * time.Nanosecond)
2267                 resp := httptest.NewRecorder()
2268                 s.handler.ServeHTTP(resp, req)
2269                 c.Assert(resp.Result().StatusCode, check.Equals, http.StatusOK)
2270                 matches := re.FindAll(logbuf.Bytes(), -1)
2271                 c.Assert(matches, check.HasLen, expected,
2272                         check.Commentf("%d matching log messages: %+v", len(matches), matches))
2273         }
2274 }
2275
2276 func (s *IntegrationSuite) TestLogThrottleDifferentTokens(c *check.C) {
2277         s.handler.Cluster.Collections.WebDAVLogDownloadInterval = arvados.Duration(time.Hour)
2278         req := newRequest("GET", "http://"+arvadostest.FooCollection+".keep-web.example/foo")
2279         reqs := make(map[*http.Request]int)
2280         for _, token := range []string{arvadostest.ActiveToken, arvadostest.AdminToken} {
2281                 req := req.Clone(context.Background())
2282                 req.Header.Set("Authorization", "Bearer "+token)
2283                 reqs[req] = http.StatusOK
2284         }
2285         logbuf := s.serveAndLogRequests(c, &reqs)
2286         countLogMatches(c, logbuf, `\bmsg="File download".* collection_file_path=foo\b`, len(reqs))
2287 }
2288
2289 func (s *IntegrationSuite) TestLogThrottleDifferentFiles(c *check.C) {
2290         s.handler.Cluster.Collections.WebDAVLogDownloadInterval = arvados.Duration(time.Hour)
2291         baseURL := "http://" + arvadostest.MultilevelCollection1 + ".keep-web.example/"
2292         reqs := make(map[*http.Request]int)
2293         for _, filename := range []string{"file1", "file2", "file3"} {
2294                 req := newRequest("GET", baseURL+filename)
2295                 req.Header.Set("Authorization", "Bearer "+arvadostest.ActiveToken)
2296                 reqs[req] = http.StatusOK
2297         }
2298         logbuf := s.serveAndLogRequests(c, &reqs)
2299         countLogMatches(c, logbuf, `\bmsg="File download".* collection_uuid=`+arvadostest.MultilevelCollection1+`\b`, len(reqs))
2300 }
2301
2302 func (s *IntegrationSuite) TestLogThrottleDifferentSources(c *check.C) {
2303         s.handler.Cluster.Collections.WebDAVLogDownloadInterval = arvados.Duration(time.Hour)
2304         req := newRequest("GET", "http://"+arvadostest.FooCollection+".keep-web.example/foo")
2305         req.Header.Set("Authorization", "Bearer "+arvadostest.ActiveToken)
2306         reqs := make(map[*http.Request]int)
2307         reqs[req] = http.StatusOK
2308         for _, xff := range []string{"10.22.33.44", "100::123"} {
2309                 req := req.Clone(context.Background())
2310                 req.Header.Set("X-Forwarded-For", xff)
2311                 reqs[req] = http.StatusOK
2312         }
2313         logbuf := s.serveAndLogRequests(c, &reqs)
2314         countLogMatches(c, logbuf, `\bmsg="File download".* collection_file_path=foo\b`, len(reqs))
2315 }
2316
2317 func (s *IntegrationSuite) TestConcurrentWrites(c *check.C) {
2318         s.handler.Cluster.Collections.WebDAVCache.TTL = arvados.Duration(time.Second * 2)
2319         lockTidyInterval = time.Second
2320         client := arvados.NewClientFromEnv()
2321         client.AuthToken = arvadostest.ActiveTokenV2
2322         // Start small, and increase concurrency (2^2, 4^2, ...)
2323         // only until hitting failure. Avoids unnecessarily long
2324         // failure reports.
2325         for n := 2; n < 16 && !c.Failed(); n = n * 2 {
2326                 c.Logf("%s: n=%d", c.TestName(), n)
2327
2328                 var coll arvados.Collection
2329                 err := client.RequestAndDecode(&coll, "POST", "arvados/v1/collections", nil, nil)
2330                 c.Assert(err, check.IsNil)
2331                 defer client.RequestAndDecode(&coll, "DELETE", "arvados/v1/collections/"+coll.UUID, nil, nil)
2332
2333                 var wg sync.WaitGroup
2334                 for i := 0; i < n && !c.Failed(); i++ {
2335                         i := i
2336                         wg.Add(1)
2337                         go func() {
2338                                 defer wg.Done()
2339                                 u := mustParseURL(fmt.Sprintf("http://%s.collections.example.com/i=%d", coll.UUID, i))
2340                                 resp := httptest.NewRecorder()
2341                                 req, err := http.NewRequest("MKCOL", u.String(), nil)
2342                                 c.Assert(err, check.IsNil)
2343                                 req.Header.Set("Authorization", "Bearer "+client.AuthToken)
2344                                 s.handler.ServeHTTP(resp, req)
2345                                 c.Assert(resp.Code, check.Equals, http.StatusCreated)
2346                                 for j := 0; j < n && !c.Failed(); j++ {
2347                                         j := j
2348                                         wg.Add(1)
2349                                         go func() {
2350                                                 defer wg.Done()
2351                                                 content := fmt.Sprintf("i=%d/j=%d", i, j)
2352                                                 u := mustParseURL("http://" + coll.UUID + ".collections.example.com/" + content)
2353
2354                                                 resp := httptest.NewRecorder()
2355                                                 req, err := http.NewRequest("PUT", u.String(), strings.NewReader(content))
2356                                                 c.Assert(err, check.IsNil)
2357                                                 req.Header.Set("Authorization", "Bearer "+client.AuthToken)
2358                                                 s.handler.ServeHTTP(resp, req)
2359                                                 c.Check(resp.Code, check.Equals, http.StatusCreated)
2360
2361                                                 time.Sleep(time.Second)
2362                                                 resp = httptest.NewRecorder()
2363                                                 req, err = http.NewRequest("GET", u.String(), nil)
2364                                                 c.Assert(err, check.IsNil)
2365                                                 req.Header.Set("Authorization", "Bearer "+client.AuthToken)
2366                                                 s.handler.ServeHTTP(resp, req)
2367                                                 c.Check(resp.Code, check.Equals, http.StatusOK)
2368                                                 c.Check(resp.Body.String(), check.Equals, content)
2369                                         }()
2370                                 }
2371                         }()
2372                 }
2373                 wg.Wait()
2374                 for i := 0; i < n; i++ {
2375                         u := mustParseURL(fmt.Sprintf("http://%s.collections.example.com/i=%d", coll.UUID, i))
2376                         resp := httptest.NewRecorder()
2377                         req, err := http.NewRequest("PROPFIND", u.String(), &bytes.Buffer{})
2378                         c.Assert(err, check.IsNil)
2379                         req.Header.Set("Authorization", "Bearer "+client.AuthToken)
2380                         s.handler.ServeHTTP(resp, req)
2381                         c.Assert(resp.Code, check.Equals, http.StatusMultiStatus)
2382                 }
2383         }
2384 }