]> git.arvados.org - arvados.git/blob - services/keep-web/handler_test.go
22845: counts now update when fetched
[arvados.git] / services / keep-web / handler_test.go
1 // Copyright (C) The Arvados Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4
5 package keepweb
6
7 import (
8         "bytes"
9         "context"
10         "fmt"
11         "io"
12         "io/ioutil"
13         "net/http"
14         "net/http/httptest"
15         "net/url"
16         "os"
17         "path/filepath"
18         "regexp"
19         "strings"
20         "sync"
21         "sync/atomic"
22         "time"
23
24         "git.arvados.org/arvados.git/lib/config"
25         "git.arvados.org/arvados.git/sdk/go/arvados"
26         "git.arvados.org/arvados.git/sdk/go/arvadosclient"
27         "git.arvados.org/arvados.git/sdk/go/arvadostest"
28         "git.arvados.org/arvados.git/sdk/go/auth"
29         "git.arvados.org/arvados.git/sdk/go/ctxlog"
30         "git.arvados.org/arvados.git/sdk/go/keepclient"
31         "github.com/prometheus/client_golang/prometheus"
32         "github.com/sirupsen/logrus"
33         "golang.org/x/net/html"
34         check "gopkg.in/check.v1"
35 )
36
37 var _ = check.Suite(&UnitSuite{})
38
39 func init() {
40         arvados.DebugLocksPanicMode = true
41 }
42
43 type UnitSuite struct {
44         cluster *arvados.Cluster
45         handler *handler
46 }
47
48 func (s *UnitSuite) SetUpTest(c *check.C) {
49         logger := ctxlog.TestLogger(c)
50         ldr := config.NewLoader(&bytes.Buffer{}, logger)
51         cfg, err := ldr.Load()
52         c.Assert(err, check.IsNil)
53         cc, err := cfg.GetCluster("")
54         c.Assert(err, check.IsNil)
55         s.cluster = cc
56         s.handler = &handler{
57                 Cluster: cc,
58                 Cache: cache{
59                         cluster:  cc,
60                         logger:   logger,
61                         registry: prometheus.NewRegistry(),
62                 },
63                 metrics: newMetrics(prometheus.NewRegistry()),
64         }
65 }
66
67 func newCollection(collID string) *arvados.Collection {
68         coll := &arvados.Collection{UUID: collID}
69         manifestKey := collID
70         if pdh, ok := arvadostest.TestCollectionUUIDToPDH[collID]; ok {
71                 coll.PortableDataHash = pdh
72                 manifestKey = pdh
73         }
74         if mtext, ok := arvadostest.TestCollectionPDHToManifest[manifestKey]; ok {
75                 coll.ManifestText = mtext
76         }
77         return coll
78 }
79
80 func newRequest(method, urlStr string) *http.Request {
81         u := mustParseURL(urlStr)
82         return &http.Request{
83                 Method:     method,
84                 Host:       u.Host,
85                 URL:        u,
86                 RequestURI: u.RequestURI(),
87                 RemoteAddr: "10.20.30.40:56789",
88                 Header:     http.Header{},
89         }
90 }
91
92 func newLoggerAndContext() (*bytes.Buffer, context.Context) {
93         var logbuf bytes.Buffer
94         logger := logrus.New()
95         logger.Out = &logbuf
96         return &logbuf, ctxlog.Context(context.Background(), logger)
97 }
98
99 func (s *UnitSuite) TestLogEventTypes(c *check.C) {
100         collURL := "http://keep-web.example/c=" + arvadostest.FooCollection
101         for method, expected := range map[string]string{
102                 "GET":  "file_download",
103                 "POST": "file_upload",
104                 "PUT":  "file_upload",
105         } {
106                 filePath := "/" + method
107                 req := newRequest(method, collURL+filePath)
108                 actual := newFileEventLog(s.handler, req, filePath, nil, nil, "")
109                 if !c.Check(actual, check.NotNil) {
110                         continue
111                 }
112                 c.Check(actual.eventType, check.Equals, expected)
113         }
114 }
115
116 func (s *UnitSuite) TestUnloggedEventTypes(c *check.C) {
117         collURL := "http://keep-web.example/c=" + arvadostest.FooCollection
118         for _, method := range []string{"DELETE", "HEAD", "OPTIONS", "PATCH"} {
119                 filePath := "/" + method
120                 req := newRequest(method, collURL+filePath)
121                 actual := newFileEventLog(s.handler, req, filePath, nil, nil, "")
122                 c.Check(actual, check.IsNil,
123                         check.Commentf("%s request made a log event", method))
124         }
125 }
126
127 func (s *UnitSuite) TestLogFilePath(c *check.C) {
128         coll := newCollection(arvadostest.FooCollection)
129         collURL := "http://keep-web.example/c=" + arvadostest.FooCollection
130         for _, filePath := range []string{"/foo", "/Foo", "/foo/bar"} {
131                 req := newRequest("GET", collURL+filePath)
132                 actual := newFileEventLog(s.handler, req, filePath, coll, nil, "")
133                 if !c.Check(actual, check.NotNil) {
134                         continue
135                 }
136                 c.Check(actual.collFilePath, check.Equals, filePath)
137         }
138 }
139
140 func (s *UnitSuite) TestLogRemoteAddr(c *check.C) {
141         collURL := "http://keep-web.example/c=" + arvadostest.FooCollection
142         filePath := "/foo"
143         req := newRequest("GET", collURL+filePath)
144
145         for _, addr := range []string{"10.20.30.55", "192.168.144.120", "192.0.2.4"} {
146                 req.RemoteAddr = addr + ":57914"
147                 actual := newFileEventLog(s.handler, req, filePath, nil, nil, "")
148                 if !c.Check(actual, check.NotNil) {
149                         continue
150                 }
151                 c.Check(actual.clientAddr, check.Equals, addr)
152         }
153
154         for _, addr := range []string{"100::20:30:40", "2001:db8::90:100", "3fff::30"} {
155                 req.RemoteAddr = fmt.Sprintf("[%s]:57916", addr)
156                 actual := newFileEventLog(s.handler, req, filePath, nil, nil, "")
157                 if !c.Check(actual, check.NotNil) {
158                         continue
159                 }
160                 c.Check(actual.clientAddr, check.Equals, addr)
161         }
162 }
163
164 func (s *UnitSuite) TestLogXForwardedFor(c *check.C) {
165         collURL := "http://keep-web.example/c=" + arvadostest.FooCollection
166         filePath := "/foo"
167         req := newRequest("GET", collURL+filePath)
168         for xff, expected := range map[string]string{
169                 "10.20.30.55":                          "10.20.30.55",
170                 "192.168.144.120, 10.20.30.120":        "10.20.30.120",
171                 "192.0.2.4, 192.0.2.6, 192.0.2.8":      "192.0.2.8",
172                 "192.0.2.4,192.168.2.4":                "192.168.2.4",
173                 "10.20.30.60,192.168.144.40,192.0.2.4": "192.0.2.4",
174                 "100::20:30:50":                        "100::20:30:50",
175                 "2001:db8::80:90, 100::100":            "100::100",
176                 "3fff::ff, 3fff::ee, 3fff::fe":         "3fff::fe",
177                 "3fff::3f,100::1000":                   "100::1000",
178                 "2001:db8::88,100::88,3fff::88":        "3fff::88",
179                 "10.20.30.60, 2001:db8::60":            "2001:db8::60",
180                 "2001:db8::20,10.20.30.20":             "10.20.30.20",
181                 ", 10.20.30.123, 100::123":             "100::123",
182                 ",100::321,10.30.20.10":                "10.30.20.10",
183         } {
184                 req.Header.Set("X-Forwarded-For", xff)
185                 actual := newFileEventLog(s.handler, req, filePath, nil, nil, "")
186                 if !c.Check(actual, check.NotNil) {
187                         continue
188                 }
189                 c.Check(actual.clientAddr, check.Equals, expected)
190         }
191 }
192
193 func (s *UnitSuite) TestLogXForwardedForMalformed(c *check.C) {
194         collURL := "http://keep-web.example/c=" + arvadostest.FooCollection
195         filePath := "/foo"
196         req := newRequest("GET", collURL+filePath)
197         for _, xff := range []string{"", ",", "10.20,30.40", "foo, bar"} {
198                 req.Header.Set("X-Forwarded-For", xff)
199                 actual := newFileEventLog(s.handler, req, filePath, nil, nil, "")
200                 if !c.Check(actual, check.NotNil) {
201                         continue
202                 }
203                 c.Check(actual.clientAddr, check.Equals, "10.20.30.40")
204         }
205 }
206
207 func (s *UnitSuite) TestLogXForwardedForMultivalue(c *check.C) {
208         collURL := "http://keep-web.example/c=" + arvadostest.FooCollection
209         filePath := "/foo"
210         req := newRequest("GET", collURL+filePath)
211         req.Header.Set("X-Forwarded-For", ", ")
212         req.Header.Add("X-Forwarded-For", "2001:db8::db9:dbd")
213         req.Header.Add("X-Forwarded-For", "10.20.30.90")
214         actual := newFileEventLog(s.handler, req, filePath, nil, nil, "")
215         c.Assert(actual, check.NotNil)
216         c.Check(actual.clientAddr, check.Equals, "10.20.30.90")
217 }
218
219 func (s *UnitSuite) TestLogClientAddressCanonicalization(c *check.C) {
220         collURL := "http://keep-web.example/c=" + arvadostest.FooCollection
221         filePath := "/foo"
222         req := newRequest("GET", collURL+filePath)
223         expected := "2001:db8::12:0"
224
225         req.RemoteAddr = "[2001:db8::012:0000]:57918"
226         a := newFileEventLog(s.handler, req, filePath, nil, nil, "")
227         c.Assert(a, check.NotNil)
228         c.Check(a.clientAddr, check.Equals, expected)
229
230         req.RemoteAddr = "10.20.30.40:57919"
231         req.Header.Set("X-Forwarded-For", "2001:db8:0::0:12:00")
232         b := newFileEventLog(s.handler, req, filePath, nil, nil, "")
233         c.Assert(b, check.NotNil)
234         c.Check(b.clientAddr, check.Equals, expected)
235 }
236
237 func (s *UnitSuite) TestLogAnonymousUser(c *check.C) {
238         collURL := "http://keep-web.example/c=" + arvadostest.FooCollection
239         filePath := "/foo"
240         req := newRequest("GET", collURL+filePath)
241         actual := newFileEventLog(s.handler, req, filePath, nil, nil, arvadostest.AnonymousToken)
242         c.Assert(actual, check.NotNil)
243         c.Check(actual.userUUID, check.Equals, s.handler.Cluster.ClusterID+"-tpzed-anonymouspublic")
244         c.Check(actual.userFullName, check.Equals, "")
245         c.Check(actual.clientToken, check.Equals, arvadostest.AnonymousToken)
246 }
247
248 func (s *UnitSuite) TestLogUser(c *check.C) {
249         collURL := "http://keep-web.example/c=" + arvadostest.FooCollection
250         for _, trial := range []struct{ uuid, fullName, token string }{
251                 {arvadostest.ActiveUserUUID, "Active User", arvadostest.ActiveToken},
252                 {arvadostest.SpectatorUserUUID, "Spectator User", arvadostest.SpectatorToken},
253         } {
254                 filePath := "/" + trial.uuid
255                 req := newRequest("GET", collURL+filePath)
256                 user := &arvados.User{
257                         UUID:     trial.uuid,
258                         FullName: trial.fullName,
259                 }
260                 actual := newFileEventLog(s.handler, req, filePath, nil, user, trial.token)
261                 if !c.Check(actual, check.NotNil) {
262                         continue
263                 }
264                 c.Check(actual.userUUID, check.Equals, trial.uuid)
265                 c.Check(actual.userFullName, check.Equals, trial.fullName)
266                 c.Check(actual.clientToken, check.Equals, trial.token)
267         }
268 }
269
270 func (s *UnitSuite) TestLogCollectionByUUID(c *check.C) {
271         for collUUID, collPDH := range arvadostest.TestCollectionUUIDToPDH {
272                 collURL := "http://keep-web.example/c=" + collUUID
273                 filePath := "/" + collUUID
274                 req := newRequest("GET", collURL+filePath)
275                 coll := newCollection(collUUID)
276                 actual := newFileEventLog(s.handler, req, filePath, coll, nil, "")
277                 if !c.Check(actual, check.NotNil) {
278                         continue
279                 }
280                 c.Check(actual.collUUID, check.Equals, collUUID)
281                 c.Check(actual.collPDH, check.Equals, collPDH)
282         }
283 }
284
285 func (s *UnitSuite) TestLogCollectionByPDH(c *check.C) {
286         for _, collPDH := range arvadostest.TestCollectionUUIDToPDH {
287                 collURL := "http://keep-web.example/c=" + collPDH
288                 filePath := "/PDHFile"
289                 req := newRequest("GET", collURL+filePath)
290                 coll := newCollection(collPDH)
291                 actual := newFileEventLog(s.handler, req, filePath, coll, nil, "")
292                 if !c.Check(actual, check.NotNil) {
293                         continue
294                 }
295                 c.Check(actual.collPDH, check.Equals, collPDH)
296                 c.Check(actual.collUUID, check.Equals, "")
297         }
298 }
299
300 func (s *UnitSuite) TestLogGETUUIDAsDict(c *check.C) {
301         filePath := "/foo"
302         reqPath := "/c=" + arvadostest.FooCollection + filePath
303         req := newRequest("GET", "http://keep-web.example"+reqPath)
304         coll := newCollection(arvadostest.FooCollection)
305         logEvent := newFileEventLog(s.handler, req, filePath, coll, nil, "")
306         c.Assert(logEvent, check.NotNil)
307         c.Check(logEvent.asDict(), check.DeepEquals, arvadosclient.Dict{
308                 "event_type":  "file_download",
309                 "object_uuid": s.handler.Cluster.ClusterID + "-tpzed-anonymouspublic",
310                 "properties": arvadosclient.Dict{
311                         "reqPath":              reqPath,
312                         "collection_uuid":      arvadostest.FooCollection,
313                         "collection_file_path": filePath,
314                         "portable_data_hash":   arvadostest.FooCollectionPDH,
315                 },
316         })
317 }
318
319 func (s *UnitSuite) TestLogGETPDHAsDict(c *check.C) {
320         filePath := "/Foo"
321         reqPath := "/c=" + arvadostest.FooCollectionPDH + filePath
322         req := newRequest("GET", "http://keep-web.example"+reqPath)
323         coll := newCollection(arvadostest.FooCollectionPDH)
324         user := &arvados.User{
325                 UUID:     arvadostest.ActiveUserUUID,
326                 FullName: "Active User",
327         }
328         logEvent := newFileEventLog(s.handler, req, filePath, coll, user, "")
329         c.Assert(logEvent, check.NotNil)
330         c.Check(logEvent.asDict(), check.DeepEquals, arvadosclient.Dict{
331                 "event_type":  "file_download",
332                 "object_uuid": arvadostest.ActiveUserUUID,
333                 "properties": arvadosclient.Dict{
334                         "reqPath":              reqPath,
335                         "portable_data_hash":   arvadostest.FooCollectionPDH,
336                         "collection_uuid":      "",
337                         "collection_file_path": filePath,
338                 },
339         })
340 }
341
342 func (s *UnitSuite) TestLogUploadAsDict(c *check.C) {
343         coll := newCollection(arvadostest.FooCollection)
344         user := &arvados.User{
345                 UUID:     arvadostest.ActiveUserUUID,
346                 FullName: "Active User",
347         }
348         for _, method := range []string{"POST", "PUT"} {
349                 filePath := "/" + method + "File"
350                 reqPath := "/c=" + arvadostest.FooCollection + filePath
351                 req := newRequest(method, "http://keep-web.example"+reqPath)
352                 logEvent := newFileEventLog(s.handler, req, filePath, coll, user, "")
353                 if !c.Check(logEvent, check.NotNil) {
354                         continue
355                 }
356                 c.Check(logEvent.asDict(), check.DeepEquals, arvadosclient.Dict{
357                         "event_type":  "file_upload",
358                         "object_uuid": arvadostest.ActiveUserUUID,
359                         "properties": arvadosclient.Dict{
360                                 "reqPath":              reqPath,
361                                 "collection_uuid":      arvadostest.FooCollection,
362                                 "collection_file_path": filePath,
363                         },
364                 })
365         }
366 }
367
368 func (s *UnitSuite) TestLogGETUUIDAsFields(c *check.C) {
369         filePath := "/foo"
370         reqPath := "/c=" + arvadostest.FooCollection + filePath
371         req := newRequest("GET", "http://keep-web.example"+reqPath)
372         coll := newCollection(arvadostest.FooCollection)
373         logEvent := newFileEventLog(s.handler, req, filePath, coll, nil, "")
374         c.Assert(logEvent, check.NotNil)
375         c.Check(logEvent.asFields(), check.DeepEquals, logrus.Fields{
376                 "user_uuid":            s.handler.Cluster.ClusterID + "-tpzed-anonymouspublic",
377                 "collection_uuid":      arvadostest.FooCollection,
378                 "collection_file_path": filePath,
379                 "portable_data_hash":   arvadostest.FooCollectionPDH,
380         })
381 }
382
383 func (s *UnitSuite) TestLogGETPDHAsFields(c *check.C) {
384         filePath := "/Foo"
385         reqPath := "/c=" + arvadostest.FooCollectionPDH + filePath
386         req := newRequest("GET", "http://keep-web.example"+reqPath)
387         coll := newCollection(arvadostest.FooCollectionPDH)
388         user := &arvados.User{
389                 UUID:     arvadostest.ActiveUserUUID,
390                 FullName: "Active User",
391         }
392         logEvent := newFileEventLog(s.handler, req, filePath, coll, user, "")
393         c.Assert(logEvent, check.NotNil)
394         c.Check(logEvent.asFields(), check.DeepEquals, logrus.Fields{
395                 "user_uuid":            arvadostest.ActiveUserUUID,
396                 "user_full_name":       "Active User",
397                 "collection_uuid":      "",
398                 "collection_file_path": filePath,
399                 "portable_data_hash":   arvadostest.FooCollectionPDH,
400         })
401 }
402
403 func (s *UnitSuite) TestLogUploadAsFields(c *check.C) {
404         coll := newCollection(arvadostest.FooCollection)
405         user := &arvados.User{
406                 UUID:     arvadostest.ActiveUserUUID,
407                 FullName: "Active User",
408         }
409         for _, method := range []string{"POST", "PUT"} {
410                 filePath := "/" + method + "File"
411                 reqPath := "/c=" + arvadostest.FooCollection + filePath
412                 req := newRequest(method, "http://keep-web.example"+reqPath)
413                 logEvent := newFileEventLog(s.handler, req, filePath, coll, user, "")
414                 if !c.Check(logEvent, check.NotNil) {
415                         continue
416                 }
417                 c.Check(logEvent.asFields(), check.DeepEquals, logrus.Fields{
418                         "user_uuid":            arvadostest.ActiveUserUUID,
419                         "user_full_name":       "Active User",
420                         "collection_uuid":      arvadostest.FooCollection,
421                         "collection_file_path": filePath,
422                 })
423         }
424 }
425
426 func (s *UnitSuite) TestCORSPreflight(c *check.C) {
427         h := s.handler
428         u := mustParseURL("http://keep-web.example/c=" + arvadostest.FooCollection + "/foo")
429         req := &http.Request{
430                 Method:     "OPTIONS",
431                 Host:       u.Host,
432                 URL:        u,
433                 RequestURI: u.RequestURI(),
434                 Header: http.Header{
435                         "Origin":                        {"https://workbench.example"},
436                         "Access-Control-Request-Method": {"POST"},
437                 },
438         }
439
440         // Check preflight for an allowed request
441         resp := httptest.NewRecorder()
442         h.ServeHTTP(resp, req)
443         c.Check(resp.Code, check.Equals, http.StatusOK)
444         c.Check(resp.Body.String(), check.Equals, "")
445         c.Check(resp.Header().Get("Access-Control-Allow-Origin"), check.Equals, "*")
446         c.Check(resp.Header().Get("Access-Control-Allow-Methods"), check.Equals, "COPY, DELETE, GET, LOCK, MKCOL, MOVE, OPTIONS, POST, PROPFIND, PROPPATCH, PUT, RMCOL, UNLOCK")
447         c.Check(resp.Header().Get("Access-Control-Allow-Headers"), check.Equals, "Authorization, Content-Type, Range, Depth, Destination, If, Lock-Token, Overwrite, Timeout, Cache-Control")
448
449         // Check preflight for a disallowed request
450         resp = httptest.NewRecorder()
451         req.Header.Set("Access-Control-Request-Method", "MAKE-COFFEE")
452         h.ServeHTTP(resp, req)
453         c.Check(resp.Body.String(), check.Equals, "")
454         c.Check(resp.Code, check.Equals, http.StatusMethodNotAllowed)
455 }
456
457 func (s *UnitSuite) TestWebdavPrefixAndSource(c *check.C) {
458         for _, trial := range []struct {
459                 method   string
460                 path     string
461                 prefix   string
462                 source   string
463                 notFound bool
464                 seeOther bool
465         }{
466                 {
467                         method: "PROPFIND",
468                         path:   "/",
469                 },
470                 {
471                         method: "PROPFIND",
472                         path:   "/dir1",
473                 },
474                 {
475                         method: "PROPFIND",
476                         path:   "/dir1/",
477                 },
478                 {
479                         method: "PROPFIND",
480                         path:   "/dir1/foo",
481                         prefix: "/dir1",
482                         source: "/dir1",
483                 },
484                 {
485                         method: "PROPFIND",
486                         path:   "/prefix/dir1/foo",
487                         prefix: "/prefix/",
488                         source: "",
489                 },
490                 {
491                         method: "PROPFIND",
492                         path:   "/prefix/dir1/foo",
493                         prefix: "/prefix",
494                         source: "",
495                 },
496                 {
497                         method: "PROPFIND",
498                         path:   "/prefix/dir1/foo",
499                         prefix: "/prefix/",
500                         source: "/",
501                 },
502                 {
503                         method: "PROPFIND",
504                         path:   "/prefix/foo",
505                         prefix: "/prefix/",
506                         source: "/dir1/",
507                 },
508                 {
509                         method: "GET",
510                         path:   "/prefix/foo",
511                         prefix: "/prefix/",
512                         source: "/dir1/",
513                 },
514                 {
515                         method: "PROPFIND",
516                         path:   "/prefix/",
517                         prefix: "/prefix",
518                         source: "/dir1",
519                 },
520                 {
521                         method: "PROPFIND",
522                         path:   "/prefix",
523                         prefix: "/prefix",
524                         source: "/dir1/",
525                 },
526                 {
527                         method:   "GET",
528                         path:     "/prefix",
529                         prefix:   "/prefix",
530                         source:   "/dir1",
531                         seeOther: true,
532                 },
533                 {
534                         method:   "PROPFIND",
535                         path:     "/dir1/foo",
536                         prefix:   "",
537                         source:   "/dir1",
538                         notFound: true,
539                 },
540         } {
541                 c.Logf("trial %+v", trial)
542                 u := mustParseURL("http://" + arvadostest.FooBarDirCollection + ".keep-web.example" + trial.path)
543                 req := &http.Request{
544                         Method:     trial.method,
545                         Host:       u.Host,
546                         URL:        u,
547                         RequestURI: u.RequestURI(),
548                         Header: http.Header{
549                                 "Authorization":   {"Bearer " + arvadostest.ActiveTokenV2},
550                                 "X-Webdav-Prefix": {trial.prefix},
551                                 "X-Webdav-Source": {trial.source},
552                         },
553                         Body: ioutil.NopCloser(bytes.NewReader(nil)),
554                 }
555
556                 resp := httptest.NewRecorder()
557                 s.handler.ServeHTTP(resp, req)
558                 if trial.notFound {
559                         c.Check(resp.Code, check.Equals, http.StatusNotFound)
560                 } else if trial.method == "PROPFIND" {
561                         c.Check(resp.Code, check.Equals, http.StatusMultiStatus)
562                         c.Check(resp.Body.String(), check.Matches, `(?ms).*>\n?$`)
563                 } else if trial.seeOther {
564                         c.Check(resp.Code, check.Equals, http.StatusSeeOther)
565                 } else {
566                         c.Check(resp.Code, check.Equals, http.StatusOK)
567                 }
568         }
569 }
570
571 func (s *UnitSuite) TestEmptyResponse(c *check.C) {
572         // Ensure we start with an empty cache
573         defer os.Setenv("HOME", os.Getenv("HOME"))
574         os.Setenv("HOME", c.MkDir())
575         s.handler.Cluster.Collections.WebDAVLogDownloadInterval = arvados.Duration(0)
576
577         for _, trial := range []struct {
578                 dataExists    bool
579                 sendIMSHeader bool
580                 expectStatus  int
581                 logRegexp     string
582         }{
583                 // If we return no content due to a Keep read error,
584                 // we should emit a log message.
585                 {false, false, http.StatusOK, `(?ms).*only wrote 0 bytes.*`},
586
587                 // If we return no content because the client sent an
588                 // If-Modified-Since header, our response should be
589                 // 304.  We still expect a "File download" log since it
590                 // counts as a file access for auditing.
591                 {true, true, http.StatusNotModified, `(?ms).*msg="File download".*`},
592         } {
593                 c.Logf("trial: %+v", trial)
594                 arvadostest.StartKeep(2, true)
595                 if trial.dataExists {
596                         arv, err := arvadosclient.MakeArvadosClient()
597                         c.Assert(err, check.IsNil)
598                         arv.ApiToken = arvadostest.ActiveToken
599                         kc, err := keepclient.MakeKeepClient(arv)
600                         c.Assert(err, check.IsNil)
601                         _, _, err = kc.PutB([]byte("foo"))
602                         c.Assert(err, check.IsNil)
603                 }
604
605                 u := mustParseURL("http://" + arvadostest.FooCollection + ".keep-web.example/foo")
606                 req := &http.Request{
607                         Method:     "GET",
608                         Host:       u.Host,
609                         URL:        u,
610                         RequestURI: u.RequestURI(),
611                         Header: http.Header{
612                                 "Authorization": {"Bearer " + arvadostest.ActiveToken},
613                         },
614                 }
615                 if trial.sendIMSHeader {
616                         req.Header.Set("If-Modified-Since", strings.Replace(time.Now().UTC().Format(time.RFC1123), "UTC", "GMT", -1))
617                 }
618
619                 var logbuf bytes.Buffer
620                 logger := logrus.New()
621                 logger.Out = &logbuf
622                 req = req.WithContext(ctxlog.Context(context.Background(), logger))
623
624                 resp := httptest.NewRecorder()
625                 s.handler.ServeHTTP(resp, req)
626                 c.Check(resp.Code, check.Equals, trial.expectStatus)
627                 c.Check(resp.Body.String(), check.Equals, "")
628
629                 c.Log(logbuf.String())
630                 c.Check(logbuf.String(), check.Matches, trial.logRegexp)
631         }
632 }
633
634 func (s *UnitSuite) TestInvalidUUID(c *check.C) {
635         bogusID := strings.Replace(arvadostest.FooCollectionPDH, "+", "-", 1) + "-"
636         token := arvadostest.ActiveToken
637         for _, trial := range []string{
638                 "http://keep-web/c=" + bogusID + "/foo",
639                 "http://keep-web/c=" + bogusID + "/t=" + token + "/foo",
640                 "http://keep-web/collections/download/" + bogusID + "/" + token + "/foo",
641                 "http://keep-web/collections/" + bogusID + "/foo",
642                 "http://" + bogusID + ".keep-web/" + bogusID + "/foo",
643                 "http://" + bogusID + ".keep-web/t=" + token + "/" + bogusID + "/foo",
644         } {
645                 c.Log(trial)
646                 u := mustParseURL(trial)
647                 req := &http.Request{
648                         Method:     "GET",
649                         Host:       u.Host,
650                         URL:        u,
651                         RequestURI: u.RequestURI(),
652                 }
653                 resp := httptest.NewRecorder()
654                 s.cluster.Users.AnonymousUserToken = arvadostest.AnonymousToken
655                 s.handler.ServeHTTP(resp, req)
656                 c.Check(resp.Code, check.Equals, http.StatusNotFound)
657         }
658 }
659
660 func mustParseURL(s string) *url.URL {
661         r, err := url.Parse(s)
662         if err != nil {
663                 panic("parse URL: " + s)
664         }
665         return r
666 }
667
668 func (s *IntegrationSuite) TestVhost404(c *check.C) {
669         for _, testURL := range []string{
670                 arvadostest.NonexistentCollection + ".example.com/theperthcountyconspiracy",
671                 arvadostest.NonexistentCollection + ".example.com/t=" + arvadostest.ActiveToken + "/theperthcountyconspiracy",
672         } {
673                 resp := httptest.NewRecorder()
674                 u := mustParseURL(testURL)
675                 req := &http.Request{
676                         Method:     "GET",
677                         URL:        u,
678                         RequestURI: u.RequestURI(),
679                 }
680                 s.handler.ServeHTTP(resp, req)
681                 c.Check(resp.Code, check.Equals, http.StatusNotFound)
682                 c.Check(resp.Body.String(), check.Equals, notFoundMessage+"\n")
683         }
684 }
685
686 // An authorizer modifies an HTTP request to make use of the given
687 // token -- by adding it to a header, cookie, query param, or whatever
688 // -- and returns the HTTP status code we should expect from keep-web if
689 // the token is invalid.
690 type authorizer func(*http.Request, string) int
691
692 // We still need to accept "OAuth2 ..." as equivalent to "Bearer ..."
693 // for compatibility with older clients, including SDKs before 3.0.
694 func (s *IntegrationSuite) TestVhostViaAuthzHeaderOAuth2(c *check.C) {
695         s.doVhostRequests(c, authzViaAuthzHeaderOAuth2)
696 }
697 func authzViaAuthzHeaderOAuth2(r *http.Request, tok string) int {
698         r.Header.Add("Authorization", "OAuth2 "+tok)
699         return http.StatusUnauthorized
700 }
701
702 func (s *IntegrationSuite) TestVhostViaAuthzHeaderBearer(c *check.C) {
703         s.doVhostRequests(c, authzViaAuthzHeaderBearer)
704 }
705 func authzViaAuthzHeaderBearer(r *http.Request, tok string) int {
706         r.Header.Add("Authorization", "Bearer "+tok)
707         return http.StatusUnauthorized
708 }
709
710 func (s *IntegrationSuite) TestVhostViaCookieValue(c *check.C) {
711         s.doVhostRequests(c, authzViaCookieValue)
712 }
713 func authzViaCookieValue(r *http.Request, tok string) int {
714         r.AddCookie(&http.Cookie{
715                 Name:  "arvados_api_token",
716                 Value: auth.EncodeTokenCookie([]byte(tok)),
717         })
718         return http.StatusUnauthorized
719 }
720
721 func (s *IntegrationSuite) TestVhostViaHTTPBasicAuth(c *check.C) {
722         s.doVhostRequests(c, authzViaHTTPBasicAuth)
723 }
724 func authzViaHTTPBasicAuth(r *http.Request, tok string) int {
725         r.AddCookie(&http.Cookie{
726                 Name:  "arvados_api_token",
727                 Value: auth.EncodeTokenCookie([]byte(tok)),
728         })
729         return http.StatusUnauthorized
730 }
731
732 func (s *IntegrationSuite) TestVhostViaHTTPBasicAuthWithExtraSpaceChars(c *check.C) {
733         s.doVhostRequests(c, func(r *http.Request, tok string) int {
734                 r.AddCookie(&http.Cookie{
735                         Name:  "arvados_api_token",
736                         Value: auth.EncodeTokenCookie([]byte(" " + tok + "\n")),
737                 })
738                 return http.StatusUnauthorized
739         })
740 }
741
742 func (s *IntegrationSuite) TestVhostViaPath(c *check.C) {
743         s.doVhostRequests(c, authzViaPath)
744 }
745 func authzViaPath(r *http.Request, tok string) int {
746         r.URL.Path = "/t=" + tok + r.URL.Path
747         return http.StatusNotFound
748 }
749
750 func (s *IntegrationSuite) TestVhostViaQueryString(c *check.C) {
751         s.doVhostRequests(c, authzViaQueryString)
752 }
753 func authzViaQueryString(r *http.Request, tok string) int {
754         r.URL.RawQuery = "api_token=" + tok
755         return http.StatusUnauthorized
756 }
757
758 func (s *IntegrationSuite) TestVhostViaPOST(c *check.C) {
759         s.doVhostRequests(c, authzViaPOST)
760 }
761 func authzViaPOST(r *http.Request, tok string) int {
762         r.Method = "POST"
763         r.Header.Add("Content-Type", "application/x-www-form-urlencoded")
764         r.Body = ioutil.NopCloser(strings.NewReader(
765                 url.Values{"api_token": {tok}}.Encode()))
766         return http.StatusUnauthorized
767 }
768
769 func (s *IntegrationSuite) TestVhostViaXHRPOST(c *check.C) {
770         s.doVhostRequests(c, authzViaPOST)
771 }
772 func authzViaXHRPOST(r *http.Request, tok string) int {
773         r.Method = "POST"
774         r.Header.Add("Content-Type", "application/x-www-form-urlencoded")
775         r.Header.Add("Origin", "https://origin.example")
776         r.Body = ioutil.NopCloser(strings.NewReader(
777                 url.Values{
778                         "api_token":   {tok},
779                         "disposition": {"attachment"},
780                 }.Encode()))
781         return http.StatusUnauthorized
782 }
783
784 // Try some combinations of {url, token} using the given authorization
785 // mechanism, and verify the result is correct.
786 func (s *IntegrationSuite) doVhostRequests(c *check.C, authz authorizer) {
787         for _, hostPath := range []string{
788                 arvadostest.FooCollection + ".example.com/foo",
789                 arvadostest.FooCollection + "--collections.example.com/foo",
790                 arvadostest.FooCollection + "--collections.example.com/_/foo",
791                 arvadostest.FooCollectionPDH + ".example.com/foo",
792                 strings.Replace(arvadostest.FooCollectionPDH, "+", "-", -1) + "--collections.example.com/foo",
793                 arvadostest.FooBarDirCollection + ".example.com/dir1/foo",
794         } {
795                 c.Log("doRequests: ", hostPath)
796                 s.doVhostRequestsWithHostPath(c, authz, hostPath)
797         }
798 }
799
800 func (s *IntegrationSuite) doVhostRequestsWithHostPath(c *check.C, authz authorizer, hostPath string) {
801         for _, tok := range []string{
802                 arvadostest.ActiveToken,
803                 arvadostest.ActiveToken[:15],
804                 arvadostest.SpectatorToken,
805                 "bogus",
806                 "",
807         } {
808                 u := mustParseURL("http://" + hostPath)
809                 req := &http.Request{
810                         Method:     "GET",
811                         Host:       u.Host,
812                         URL:        u,
813                         RequestURI: u.RequestURI(),
814                         Header:     http.Header{},
815                 }
816                 failCode := authz(req, tok)
817                 req, resp := s.doReq(req)
818                 code := resp.StatusCode
819                 buf, _ := io.ReadAll(resp.Body)
820                 body := string(buf)
821
822                 // If the initial request had a (non-empty) token
823                 // showing in the query string, we should have been
824                 // redirected in order to hide it in a cookie.
825                 c.Check(req.URL.String(), check.Not(check.Matches), `.*api_token=.+`)
826
827                 if tok == arvadostest.ActiveToken {
828                         c.Check(code, check.Equals, http.StatusOK)
829                         c.Check(body, check.Equals, "foo")
830                 } else {
831                         c.Check(code >= 400, check.Equals, true)
832                         c.Check(code < 500, check.Equals, true)
833                         if tok == arvadostest.SpectatorToken {
834                                 // Valid token never offers to retry
835                                 // with different credentials.
836                                 c.Check(code, check.Equals, http.StatusNotFound)
837                         } else {
838                                 // Invalid token can ask to retry
839                                 // depending on the authz method.
840                                 c.Check(code, check.Equals, failCode)
841                         }
842                         if code == 404 {
843                                 c.Check(body, check.Equals, notFoundMessage+"\n")
844                         } else {
845                                 c.Check(body, check.Equals, unauthorizedMessage+"\n")
846                         }
847                 }
848         }
849 }
850
851 func (s *IntegrationSuite) TestVhostPortMatch(c *check.C) {
852         for _, host := range []string{"download.example.com", "DOWNLOAD.EXAMPLE.COM"} {
853                 for _, port := range []string{"80", "443", "8000"} {
854                         s.handler.Cluster.Services.WebDAVDownload.ExternalURL.Host = fmt.Sprintf("download.example.com:%v", port)
855                         u := mustParseURL(fmt.Sprintf("http://%v/by_id/%v/foo", host, arvadostest.FooCollection))
856                         req := &http.Request{
857                                 Method:     "GET",
858                                 Host:       u.Host,
859                                 URL:        u,
860                                 RequestURI: u.RequestURI(),
861                                 Header:     http.Header{"Authorization": []string{"Bearer " + arvadostest.ActiveToken}},
862                         }
863                         req, resp := s.doReq(req)
864                         if port == "8000" {
865                                 c.Check(resp.StatusCode, check.Equals, 401)
866                         } else {
867                                 c.Check(resp.StatusCode, check.Equals, 200)
868                         }
869                 }
870         }
871 }
872
873 func (s *IntegrationSuite) collectionURL(uuid, path string) string {
874         return "http://" + uuid + ".collections.example.com/" + path
875 }
876
877 // Create a request and process it using s.handler.
878 func (s *IntegrationSuite) do(method string, urlstring string, token string, hdr http.Header, body []byte) (*http.Request, *http.Response) {
879         u := mustParseURL(urlstring)
880         if hdr == nil && token != "" {
881                 hdr = http.Header{"Authorization": {"Bearer " + token}}
882         } else if hdr == nil {
883                 hdr = http.Header{}
884         } else if token != "" {
885                 panic("must not pass both token and hdr")
886         }
887         return s.doReq(&http.Request{
888                 Method:     method,
889                 Host:       u.Host,
890                 URL:        u,
891                 RequestURI: u.RequestURI(),
892                 Header:     hdr,
893                 Body:       io.NopCloser(bytes.NewReader(body)),
894         })
895 }
896
897 // Process req using s.handler, and follow redirects if any.
898 func (s *IntegrationSuite) doReq(req *http.Request) (*http.Request, *http.Response) {
899         resp := httptest.NewRecorder()
900         var handler http.Handler = s.handler
901         // // Uncomment to enable request logging in test output:
902         // handler = httpserver.AddRequestIDs(httpserver.LogRequests(handler))
903         handler.ServeHTTP(resp, req)
904         if resp.Code != http.StatusSeeOther {
905                 return req, resp.Result()
906         }
907         cookies := (&http.Response{Header: resp.Header()}).Cookies()
908         u, _ := req.URL.Parse(resp.Header().Get("Location"))
909         req = &http.Request{
910                 Method:     "GET",
911                 Host:       u.Host,
912                 URL:        u,
913                 RequestURI: u.RequestURI(),
914                 Header:     http.Header{},
915         }
916         for _, c := range cookies {
917                 req.AddCookie(c)
918         }
919         return s.doReq(req)
920 }
921
922 func (s *IntegrationSuite) TestVhostRedirectQueryTokenToCookie(c *check.C) {
923         s.testVhostRedirectTokenToCookie(c, "GET",
924                 arvadostest.FooCollection+".example.com/foo",
925                 "?api_token="+arvadostest.ActiveToken,
926                 nil,
927                 "",
928                 http.StatusOK,
929                 "foo",
930         )
931 }
932
933 func (s *IntegrationSuite) TestSingleOriginSecretLink(c *check.C) {
934         s.testVhostRedirectTokenToCookie(c, "GET",
935                 "example.com/c="+arvadostest.FooCollection+"/t="+arvadostest.ActiveToken+"/foo",
936                 "",
937                 nil,
938                 "",
939                 http.StatusOK,
940                 "foo",
941         )
942 }
943
944 func (s *IntegrationSuite) TestCollectionSharingToken(c *check.C) {
945         s.testVhostRedirectTokenToCookie(c, "GET",
946                 "example.com/c="+arvadostest.FooFileCollectionUUID+"/t="+arvadostest.FooFileCollectionSharingToken+"/foo",
947                 "",
948                 nil,
949                 "",
950                 http.StatusOK,
951                 "foo",
952         )
953         // Same valid sharing token, but requesting a different collection
954         s.testVhostRedirectTokenToCookie(c, "GET",
955                 "example.com/c="+arvadostest.FooCollection+"/t="+arvadostest.FooFileCollectionSharingToken+"/foo",
956                 "",
957                 nil,
958                 "",
959                 http.StatusNotFound,
960                 regexp.QuoteMeta(notFoundMessage+"\n"),
961         )
962 }
963
964 // Bad token in URL is 404 Not Found because it doesn't make sense to
965 // retry the same URL with different authorization.
966 func (s *IntegrationSuite) TestSingleOriginSecretLinkBadToken(c *check.C) {
967         s.testVhostRedirectTokenToCookie(c, "GET",
968                 "example.com/c="+arvadostest.FooCollection+"/t=bogus/foo",
969                 "",
970                 nil,
971                 "",
972                 http.StatusNotFound,
973                 regexp.QuoteMeta(notFoundMessage+"\n"),
974         )
975 }
976
977 // Bad token in a cookie (even if it got there via our own
978 // query-string-to-cookie redirect) is, in principle, retryable via
979 // wb2-login-and-redirect flow.
980 func (s *IntegrationSuite) TestVhostRedirectQueryTokenToBogusCookie(c *check.C) {
981         // Inline
982         resp := s.testVhostRedirectTokenToCookie(c, "GET",
983                 arvadostest.FooCollection+".example.com/foo",
984                 "?api_token=thisisabogustoken",
985                 http.Header{"Sec-Fetch-Mode": {"navigate"}},
986                 "",
987                 http.StatusSeeOther,
988                 "",
989         )
990         u, err := url.Parse(resp.Header().Get("Location"))
991         c.Assert(err, check.IsNil)
992         c.Logf("redirected to %s", u)
993         c.Check(u.Host, check.Equals, s.handler.Cluster.Services.Workbench2.ExternalURL.Host)
994         c.Check(u.Query().Get("redirectToPreview"), check.Equals, "/c="+arvadostest.FooCollection+"/foo")
995         c.Check(u.Query().Get("redirectToDownload"), check.Equals, "")
996
997         // Download/attachment indicated by ?disposition=attachment
998         resp = s.testVhostRedirectTokenToCookie(c, "GET",
999                 arvadostest.FooCollection+".example.com/foo",
1000                 "?api_token=thisisabogustoken&disposition=attachment",
1001                 http.Header{"Sec-Fetch-Mode": {"navigate"}},
1002                 "",
1003                 http.StatusSeeOther,
1004                 "",
1005         )
1006         u, err = url.Parse(resp.Header().Get("Location"))
1007         c.Assert(err, check.IsNil)
1008         c.Logf("redirected to %s", u)
1009         c.Check(u.Host, check.Equals, s.handler.Cluster.Services.Workbench2.ExternalURL.Host)
1010         c.Check(u.Query().Get("redirectToPreview"), check.Equals, "")
1011         c.Check(u.Query().Get("redirectToDownload"), check.Equals, "/c="+arvadostest.FooCollection+"/foo")
1012
1013         // Download/attachment indicated by vhost
1014         resp = s.testVhostRedirectTokenToCookie(c, "GET",
1015                 s.handler.Cluster.Services.WebDAVDownload.ExternalURL.Host+"/c="+arvadostest.FooCollection+"/foo",
1016                 "?api_token=thisisabogustoken",
1017                 http.Header{"Sec-Fetch-Mode": {"navigate"}},
1018                 "",
1019                 http.StatusSeeOther,
1020                 "",
1021         )
1022         u, err = url.Parse(resp.Header().Get("Location"))
1023         c.Assert(err, check.IsNil)
1024         c.Logf("redirected to %s", u)
1025         c.Check(u.Host, check.Equals, s.handler.Cluster.Services.Workbench2.ExternalURL.Host)
1026         c.Check(u.Query().Get("redirectToPreview"), check.Equals, "")
1027         c.Check(u.Query().Get("redirectToDownload"), check.Equals, "/c="+arvadostest.FooCollection+"/foo")
1028
1029         // Without "Sec-Fetch-Mode: navigate" header, just 401.
1030         s.testVhostRedirectTokenToCookie(c, "GET",
1031                 s.handler.Cluster.Services.WebDAVDownload.ExternalURL.Host+"/c="+arvadostest.FooCollection+"/foo",
1032                 "?api_token=thisisabogustoken",
1033                 http.Header{"Sec-Fetch-Mode": {"cors"}},
1034                 "",
1035                 http.StatusUnauthorized,
1036                 regexp.QuoteMeta(unauthorizedMessage+"\n"),
1037         )
1038         s.testVhostRedirectTokenToCookie(c, "GET",
1039                 s.handler.Cluster.Services.WebDAVDownload.ExternalURL.Host+"/c="+arvadostest.FooCollection+"/foo",
1040                 "?api_token=thisisabogustoken",
1041                 nil,
1042                 "",
1043                 http.StatusUnauthorized,
1044                 regexp.QuoteMeta(unauthorizedMessage+"\n"),
1045         )
1046 }
1047
1048 func (s *IntegrationSuite) TestVhostRedirectWithNoCache(c *check.C) {
1049         resp := s.testVhostRedirectTokenToCookie(c, "GET",
1050                 arvadostest.FooCollection+".example.com/foo",
1051                 "?api_token=thisisabogustoken",
1052                 http.Header{
1053                         "Sec-Fetch-Mode": {"navigate"},
1054                         "Cache-Control":  {"no-cache"},
1055                 },
1056                 "",
1057                 http.StatusSeeOther,
1058                 "",
1059         )
1060         u, err := url.Parse(resp.Header().Get("Location"))
1061         c.Assert(err, check.IsNil)
1062         c.Logf("redirected to %s", u)
1063         c.Check(u.Host, check.Equals, s.handler.Cluster.Services.Workbench2.ExternalURL.Host)
1064         c.Check(u.Query().Get("redirectToPreview"), check.Equals, "/c="+arvadostest.FooCollection+"/foo")
1065         c.Check(u.Query().Get("redirectToDownload"), check.Equals, "")
1066 }
1067
1068 func (s *IntegrationSuite) TestNoTokenWorkbench2LoginFlow(c *check.C) {
1069         for _, trial := range []struct {
1070                 anonToken    bool
1071                 cacheControl string
1072         }{
1073                 {},
1074                 {cacheControl: "no-cache"},
1075                 {anonToken: true},
1076                 {anonToken: true, cacheControl: "no-cache"},
1077         } {
1078                 c.Logf("trial: %+v", trial)
1079
1080                 if trial.anonToken {
1081                         s.handler.Cluster.Users.AnonymousUserToken = arvadostest.AnonymousToken
1082                 } else {
1083                         s.handler.Cluster.Users.AnonymousUserToken = ""
1084                 }
1085                 req, err := http.NewRequest("GET", "http://"+arvadostest.FooCollection+".example.com/foo", nil)
1086                 c.Assert(err, check.IsNil)
1087                 req.Header.Set("Sec-Fetch-Mode", "navigate")
1088                 if trial.cacheControl != "" {
1089                         req.Header.Set("Cache-Control", trial.cacheControl)
1090                 }
1091                 resp := httptest.NewRecorder()
1092                 s.handler.ServeHTTP(resp, req)
1093                 c.Check(resp.Code, check.Equals, http.StatusSeeOther)
1094                 u, err := url.Parse(resp.Header().Get("Location"))
1095                 c.Assert(err, check.IsNil)
1096                 c.Logf("redirected to %q", u)
1097                 c.Check(u.Host, check.Equals, s.handler.Cluster.Services.Workbench2.ExternalURL.Host)
1098                 c.Check(u.Query().Get("redirectToPreview"), check.Equals, "/c="+arvadostest.FooCollection+"/foo")
1099                 c.Check(u.Query().Get("redirectToDownload"), check.Equals, "")
1100         }
1101 }
1102
1103 func (s *IntegrationSuite) TestVhostRedirectQueryTokenSingleOriginError(c *check.C) {
1104         s.testVhostRedirectTokenToCookie(c, "GET",
1105                 "example.com/c="+arvadostest.FooCollection+"/foo",
1106                 "?api_token="+arvadostest.ActiveToken,
1107                 nil,
1108                 "",
1109                 http.StatusBadRequest,
1110                 regexp.QuoteMeta("cannot serve inline content at this URL (possible configuration error; see https://doc.arvados.org/install/install-keep-web.html#dns)\n"),
1111         )
1112 }
1113
1114 // If client requests an attachment by putting ?disposition=attachment
1115 // in the query string, and gets redirected, the redirect target
1116 // should respond with an attachment.
1117 func (s *IntegrationSuite) TestVhostRedirectQueryTokenRequestAttachment(c *check.C) {
1118         resp := s.testVhostRedirectTokenToCookie(c, "GET",
1119                 arvadostest.FooCollection+".example.com/foo",
1120                 "?disposition=attachment&api_token="+arvadostest.ActiveToken,
1121                 nil,
1122                 "",
1123                 http.StatusOK,
1124                 "foo",
1125         )
1126         c.Check(resp.Header().Get("Content-Disposition"), check.Matches, "attachment(;.*)?")
1127 }
1128
1129 func (s *IntegrationSuite) TestVhostRedirectQueryTokenSiteFS(c *check.C) {
1130         s.handler.Cluster.Services.WebDAVDownload.ExternalURL.Host = "download.example.com"
1131         resp := s.testVhostRedirectTokenToCookie(c, "GET",
1132                 "download.example.com/by_id/"+arvadostest.FooCollection+"/foo",
1133                 "?api_token="+arvadostest.ActiveToken,
1134                 nil,
1135                 "",
1136                 http.StatusOK,
1137                 "foo",
1138         )
1139         c.Check(resp.Header().Get("Content-Disposition"), check.Matches, "attachment(;.*)?")
1140 }
1141
1142 func (s *IntegrationSuite) TestPastCollectionVersionFileAccess(c *check.C) {
1143         s.handler.Cluster.Services.WebDAVDownload.ExternalURL.Host = "download.example.com"
1144         resp := s.testVhostRedirectTokenToCookie(c, "GET",
1145                 "download.example.com/c="+arvadostest.WazVersion1Collection+"/waz",
1146                 "?api_token="+arvadostest.ActiveToken,
1147                 nil,
1148                 "",
1149                 http.StatusOK,
1150                 "waz",
1151         )
1152         c.Check(resp.Header().Get("Content-Disposition"), check.Matches, "attachment(;.*)?")
1153         resp = s.testVhostRedirectTokenToCookie(c, "GET",
1154                 "download.example.com/by_id/"+arvadostest.WazVersion1Collection+"/waz",
1155                 "?api_token="+arvadostest.ActiveToken,
1156                 nil,
1157                 "",
1158                 http.StatusOK,
1159                 "waz",
1160         )
1161         c.Check(resp.Header().Get("Content-Disposition"), check.Matches, "attachment(;.*)?")
1162 }
1163
1164 func (s *IntegrationSuite) TestVhostRedirectQueryTokenTrustAllContent(c *check.C) {
1165         s.handler.Cluster.Collections.TrustAllContent = true
1166         s.testVhostRedirectTokenToCookie(c, "GET",
1167                 "example.com/c="+arvadostest.FooCollection+"/foo",
1168                 "?api_token="+arvadostest.ActiveToken,
1169                 nil,
1170                 "",
1171                 http.StatusOK,
1172                 "foo",
1173         )
1174 }
1175
1176 func (s *IntegrationSuite) TestVhostRedirectQueryTokenAttachmentOnlyHost(c *check.C) {
1177         s.handler.Cluster.Services.WebDAVDownload.ExternalURL.Host = "example.com:1234"
1178
1179         s.testVhostRedirectTokenToCookie(c, "GET",
1180                 "example.com/c="+arvadostest.FooCollection+"/foo",
1181                 "?api_token="+arvadostest.ActiveToken,
1182                 nil,
1183                 "",
1184                 http.StatusBadRequest,
1185                 regexp.QuoteMeta("cannot serve inline content at this URL (possible configuration error; see https://doc.arvados.org/install/install-keep-web.html#dns)\n"),
1186         )
1187
1188         resp := s.testVhostRedirectTokenToCookie(c, "GET",
1189                 "example.com:1234/c="+arvadostest.FooCollection+"/foo",
1190                 "?api_token="+arvadostest.ActiveToken,
1191                 nil,
1192                 "",
1193                 http.StatusOK,
1194                 "foo",
1195         )
1196         c.Check(resp.Header().Get("Content-Disposition"), check.Equals, "attachment")
1197 }
1198
1199 func (s *IntegrationSuite) TestVhostRedirectMultipleTokens(c *check.C) {
1200         baseUrl := arvadostest.FooCollection + ".example.com/foo"
1201         query := url.Values{}
1202
1203         // The intent of these tests is to check that requests are redirected
1204         // correctly in the presence of multiple API tokens. The exact response
1205         // codes and content are not closely considered: they're just how
1206         // keep-web responded when we made the smallest possible fix. Changing
1207         // those responses may be okay, but you should still test all these
1208         // different cases and the associated redirect logic.
1209         query["api_token"] = []string{arvadostest.ActiveToken, arvadostest.AnonymousToken}
1210         s.testVhostRedirectTokenToCookie(c, "GET", baseUrl, "?"+query.Encode(), nil, "", http.StatusOK, "foo")
1211         query["api_token"] = []string{arvadostest.ActiveToken, arvadostest.AnonymousToken, ""}
1212         s.testVhostRedirectTokenToCookie(c, "GET", baseUrl, "?"+query.Encode(), nil, "", http.StatusOK, "foo")
1213         query["api_token"] = []string{arvadostest.ActiveToken, "", arvadostest.AnonymousToken}
1214         s.testVhostRedirectTokenToCookie(c, "GET", baseUrl, "?"+query.Encode(), nil, "", http.StatusOK, "foo")
1215         query["api_token"] = []string{"", arvadostest.ActiveToken}
1216         s.testVhostRedirectTokenToCookie(c, "GET", baseUrl, "?"+query.Encode(), nil, "", http.StatusOK, "foo")
1217
1218         expectContent := regexp.QuoteMeta(unauthorizedMessage + "\n")
1219         query["api_token"] = []string{arvadostest.AnonymousToken, "invalidtoo"}
1220         s.testVhostRedirectTokenToCookie(c, "GET", baseUrl, "?"+query.Encode(), nil, "", http.StatusUnauthorized, expectContent)
1221         query["api_token"] = []string{arvadostest.AnonymousToken, ""}
1222         s.testVhostRedirectTokenToCookie(c, "GET", baseUrl, "?"+query.Encode(), nil, "", http.StatusUnauthorized, expectContent)
1223         query["api_token"] = []string{"", arvadostest.AnonymousToken}
1224         s.testVhostRedirectTokenToCookie(c, "GET", baseUrl, "?"+query.Encode(), nil, "", http.StatusUnauthorized, expectContent)
1225 }
1226
1227 func (s *IntegrationSuite) TestVhostRedirectPOSTFormTokenToCookie(c *check.C) {
1228         s.testVhostRedirectTokenToCookie(c, "POST",
1229                 arvadostest.FooCollection+".example.com/foo",
1230                 "",
1231                 http.Header{"Content-Type": {"application/x-www-form-urlencoded"}},
1232                 url.Values{"api_token": {arvadostest.ActiveToken}}.Encode(),
1233                 http.StatusOK,
1234                 "foo",
1235         )
1236 }
1237
1238 func (s *IntegrationSuite) TestVhostRedirectPOSTFormTokenToCookie404(c *check.C) {
1239         s.testVhostRedirectTokenToCookie(c, "POST",
1240                 arvadostest.FooCollection+".example.com/foo",
1241                 "",
1242                 http.Header{"Content-Type": {"application/x-www-form-urlencoded"}},
1243                 url.Values{"api_token": {arvadostest.SpectatorToken}}.Encode(),
1244                 http.StatusNotFound,
1245                 regexp.QuoteMeta(notFoundMessage+"\n"),
1246         )
1247 }
1248
1249 func (s *IntegrationSuite) TestAnonymousTokenOK(c *check.C) {
1250         s.handler.Cluster.Users.AnonymousUserToken = arvadostest.AnonymousToken
1251         s.testVhostRedirectTokenToCookie(c, "GET",
1252                 "example.com/c="+arvadostest.HelloWorldCollection+"/Hello%20world.txt",
1253                 "",
1254                 nil,
1255                 "",
1256                 http.StatusOK,
1257                 "Hello world\n",
1258         )
1259 }
1260
1261 func (s *IntegrationSuite) TestAnonymousTokenError(c *check.C) {
1262         s.handler.Cluster.Users.AnonymousUserToken = "anonymousTokenConfiguredButInvalid"
1263         s.testVhostRedirectTokenToCookie(c, "GET",
1264                 "example.com/c="+arvadostest.HelloWorldCollection+"/Hello%20world.txt",
1265                 "",
1266                 nil,
1267                 "",
1268                 http.StatusUnauthorized,
1269                 "Authorization tokens are not accepted here: .*\n",
1270         )
1271 }
1272
1273 func (s *IntegrationSuite) TestSpecialCharsInPath(c *check.C) {
1274         s.handler.Cluster.Services.WebDAVDownload.ExternalURL.Host = "download.example.com"
1275
1276         client := arvados.NewClientFromEnv()
1277         client.AuthToken = arvadostest.ActiveToken
1278         fs, err := (&arvados.Collection{}).FileSystem(client, nil)
1279         c.Assert(err, check.IsNil)
1280         path := `https:\\"odd' path chars`
1281         f, err := fs.OpenFile(path, os.O_CREATE, 0777)
1282         c.Assert(err, check.IsNil)
1283         f.Close()
1284         mtxt, err := fs.MarshalManifest(".")
1285         c.Assert(err, check.IsNil)
1286         var coll arvados.Collection
1287         err = client.RequestAndDecode(&coll, "POST", "arvados/v1/collections", nil, map[string]interface{}{
1288                 "collection": map[string]string{
1289                         "manifest_text": mtxt,
1290                 },
1291         })
1292         c.Assert(err, check.IsNil)
1293
1294         u, _ := url.Parse("http://download.example.com/c=" + coll.UUID + "/")
1295         req := &http.Request{
1296                 Method:     "GET",
1297                 Host:       u.Host,
1298                 URL:        u,
1299                 RequestURI: u.RequestURI(),
1300                 Header: http.Header{
1301                         "Authorization": {"Bearer " + client.AuthToken},
1302                 },
1303         }
1304         resp := httptest.NewRecorder()
1305         s.handler.ServeHTTP(resp, req)
1306         c.Check(resp.Code, check.Equals, http.StatusOK)
1307         doc, err := html.Parse(resp.Body)
1308         c.Assert(err, check.IsNil)
1309         pathHrefMap := getPathHrefMap(doc)
1310         c.Check(pathHrefMap, check.HasLen, 1) // the one leaf added to collection
1311         href, hasPath := pathHrefMap[path]
1312         c.Assert(hasPath, check.Equals, true) // the path is listed
1313         relUrl := mustParseURL(href)
1314         c.Check(relUrl.Path, check.Equals, "./"+path) // href can be decoded back to path
1315 }
1316
1317 func (s *IntegrationSuite) TestForwardSlashSubstitution(c *check.C) {
1318         arv := arvados.NewClientFromEnv()
1319         s.handler.Cluster.Services.WebDAVDownload.ExternalURL.Host = "download.example.com"
1320         s.handler.Cluster.Collections.ForwardSlashNameSubstitution = "{SOLIDUS}"
1321         name := "foo/bar/baz"
1322         nameShown := strings.Replace(name, "/", "{SOLIDUS}", -1)
1323
1324         client := arvados.NewClientFromEnv()
1325         client.AuthToken = arvadostest.ActiveToken
1326         fs, err := (&arvados.Collection{}).FileSystem(client, nil)
1327         c.Assert(err, check.IsNil)
1328         f, err := fs.OpenFile("filename", os.O_CREATE, 0777)
1329         c.Assert(err, check.IsNil)
1330         f.Close()
1331         mtxt, err := fs.MarshalManifest(".")
1332         c.Assert(err, check.IsNil)
1333         var coll arvados.Collection
1334         err = client.RequestAndDecode(&coll, "POST", "arvados/v1/collections", nil, map[string]interface{}{
1335                 "collection": map[string]string{
1336                         "manifest_text": mtxt,
1337                         "name":          name,
1338                         "owner_uuid":    arvadostest.AProjectUUID,
1339                 },
1340         })
1341         c.Assert(err, check.IsNil)
1342         defer arv.RequestAndDecode(&coll, "DELETE", "arvados/v1/collections/"+coll.UUID, nil, nil)
1343
1344         base := "http://download.example.com/by_id/" + coll.OwnerUUID + "/"
1345         for tryURL, expectedAnchorText := range map[string]string{
1346                 base:                   nameShown + "/",
1347                 base + nameShown + "/": "filename",
1348         } {
1349                 u, _ := url.Parse(tryURL)
1350                 req := &http.Request{
1351                         Method:     "GET",
1352                         Host:       u.Host,
1353                         URL:        u,
1354                         RequestURI: u.RequestURI(),
1355                         Header: http.Header{
1356                                 "Authorization": {"Bearer " + client.AuthToken},
1357                         },
1358                 }
1359                 resp := httptest.NewRecorder()
1360                 s.handler.ServeHTTP(resp, req)
1361                 c.Check(resp.Code, check.Equals, http.StatusOK)
1362                 doc, err := html.Parse(resp.Body)
1363                 c.Assert(err, check.IsNil) // valid HTML
1364                 pathHrefMap := getPathHrefMap(doc)
1365                 href, hasExpected := pathHrefMap[expectedAnchorText]
1366                 c.Assert(hasExpected, check.Equals, true) // has expected anchor text
1367                 c.Assert(href, check.Not(check.Equals), "")
1368                 relUrl := mustParseURL(href)
1369                 c.Check(relUrl.Path, check.Equals, "./"+expectedAnchorText) // decoded href maps back to the anchor text
1370         }
1371 }
1372
1373 // XHRs can't follow redirect-with-cookie so they rely on method=POST
1374 // and disposition=attachment (telling us it's acceptable to respond
1375 // with content instead of a redirect) and an Origin header that gets
1376 // added automatically by the browser (telling us it's desirable to do
1377 // so).
1378 func (s *IntegrationSuite) TestXHRNoRedirect(c *check.C) {
1379         u, _ := url.Parse("http://example.com/c=" + arvadostest.FooCollection + "/foo")
1380         req := &http.Request{
1381                 Method:     "POST",
1382                 Host:       u.Host,
1383                 URL:        u,
1384                 RequestURI: u.RequestURI(),
1385                 Header: http.Header{
1386                         "Origin":       {"https://origin.example"},
1387                         "Content-Type": {"application/x-www-form-urlencoded"},
1388                 },
1389                 Body: ioutil.NopCloser(strings.NewReader(url.Values{
1390                         "api_token":   {arvadostest.ActiveToken},
1391                         "disposition": {"attachment"},
1392                 }.Encode())),
1393         }
1394         resp := httptest.NewRecorder()
1395         s.handler.ServeHTTP(resp, req)
1396         c.Check(resp.Code, check.Equals, http.StatusOK)
1397         c.Check(resp.Body.String(), check.Equals, "foo")
1398         c.Check(resp.Header().Get("Access-Control-Allow-Origin"), check.Equals, "*")
1399
1400         // GET + Origin header is representative of both AJAX GET
1401         // requests and inline images via <IMG crossorigin="anonymous"
1402         // src="...">.
1403         u.RawQuery = "api_token=" + url.QueryEscape(arvadostest.ActiveTokenV2)
1404         req = &http.Request{
1405                 Method:     "GET",
1406                 Host:       u.Host,
1407                 URL:        u,
1408                 RequestURI: u.RequestURI(),
1409                 Header: http.Header{
1410                         "Origin": {"https://origin.example"},
1411                 },
1412         }
1413         resp = httptest.NewRecorder()
1414         s.handler.ServeHTTP(resp, req)
1415         c.Check(resp.Code, check.Equals, http.StatusOK)
1416         c.Check(resp.Body.String(), check.Equals, "foo")
1417         c.Check(resp.Header().Get("Access-Control-Allow-Origin"), check.Equals, "*")
1418 }
1419
1420 func (s *IntegrationSuite) testVhostRedirectTokenToCookie(c *check.C, method, hostPath, queryString string, reqHeader http.Header, reqBody string, expectStatus int, matchRespBody string) *httptest.ResponseRecorder {
1421         if reqHeader == nil {
1422                 reqHeader = http.Header{}
1423         }
1424         u, _ := url.Parse(`http://` + hostPath + queryString)
1425         c.Logf("requesting %s", u)
1426         req := &http.Request{
1427                 Method:     method,
1428                 Host:       u.Host,
1429                 URL:        u,
1430                 RequestURI: u.RequestURI(),
1431                 Header:     reqHeader,
1432                 Body:       ioutil.NopCloser(strings.NewReader(reqBody)),
1433         }
1434
1435         resp := httptest.NewRecorder()
1436         defer func() {
1437                 c.Check(resp.Code, check.Equals, expectStatus)
1438                 c.Check(resp.Body.String(), check.Matches, matchRespBody)
1439         }()
1440
1441         s.handler.ServeHTTP(resp, req)
1442         if resp.Code != http.StatusSeeOther {
1443                 attachment, _ := regexp.MatchString(`^attachment(;|$)`, resp.Header().Get("Content-Disposition"))
1444                 // Since we're not redirecting, check that any api_token in the URL is
1445                 // handled safely.
1446                 // If there is no token in the URL, then we're good.
1447                 // Otherwise, if the response code is an error, the body is expected to
1448                 // be static content, and nothing that might maliciously introspect the
1449                 // URL. It's considered safe and allowed.
1450                 // Otherwise, if the response content has attachment disposition,
1451                 // that's considered safe for all the reasons explained in the
1452                 // safeAttachment comment in handler.go.
1453                 c.Check(!u.Query().Has("api_token") || resp.Code >= 400 || attachment, check.Equals, true)
1454                 return resp
1455         }
1456
1457         loc, err := url.Parse(resp.Header().Get("Location"))
1458         c.Assert(err, check.IsNil)
1459         c.Check(loc.Scheme, check.Equals, u.Scheme)
1460         c.Check(loc.Host, check.Equals, u.Host)
1461         c.Check(loc.RawPath, check.Equals, u.RawPath)
1462         // If the response was a redirect, it should never include an API token.
1463         c.Check(loc.Query().Has("api_token"), check.Equals, false)
1464         c.Check(resp.Body.String(), check.Matches, `.*href="http://`+regexp.QuoteMeta(html.EscapeString(hostPath))+`(\?[^"]*)?".*`)
1465         cookies := (&http.Response{Header: resp.Header()}).Cookies()
1466
1467         c.Logf("following redirect to %s", u)
1468         req = &http.Request{
1469                 Method:     "GET",
1470                 Host:       loc.Host,
1471                 URL:        loc,
1472                 RequestURI: loc.RequestURI(),
1473                 Header:     reqHeader,
1474         }
1475         for _, c := range cookies {
1476                 req.AddCookie(c)
1477         }
1478
1479         resp = httptest.NewRecorder()
1480         s.handler.ServeHTTP(resp, req)
1481
1482         if resp.Code != http.StatusSeeOther {
1483                 c.Check(resp.Header().Get("Location"), check.Equals, "")
1484         }
1485         return resp
1486 }
1487
1488 func (s *IntegrationSuite) TestDirectoryListingWithAnonymousToken(c *check.C) {
1489         s.handler.Cluster.Users.AnonymousUserToken = arvadostest.AnonymousToken
1490         s.testDirectoryListing(c)
1491 }
1492
1493 func (s *IntegrationSuite) TestDirectoryListingWithNoAnonymousToken(c *check.C) {
1494         s.handler.Cluster.Users.AnonymousUserToken = ""
1495         s.testDirectoryListing(c)
1496 }
1497
1498 func (s *IntegrationSuite) testDirectoryListing(c *check.C) {
1499         // The "ownership cycle" test fixtures are reachable from the
1500         // "filter group without filters" group, causing webdav's
1501         // walkfs to recurse indefinitely. Avoid that by deleting one
1502         // of the bogus fixtures.
1503         arv := arvados.NewClientFromEnv()
1504         err := arv.RequestAndDecode(nil, "DELETE", "arvados/v1/groups/zzzzz-j7d0g-cx2al9cqkmsf1hs", nil, nil)
1505         if err != nil {
1506                 c.Assert(err, check.FitsTypeOf, &arvados.TransactionError{})
1507                 c.Check(err.(*arvados.TransactionError).StatusCode, check.Equals, 404)
1508         }
1509
1510         s.handler.Cluster.Services.WebDAVDownload.ExternalURL.Host = "download.example.com"
1511         authHeader := http.Header{
1512                 "Authorization": {"Bearer " + arvadostest.ActiveToken},
1513         }
1514         for _, trial := range []struct {
1515                 uri      string
1516                 header   http.Header
1517                 expect   []string
1518                 redirect string
1519                 cutDirs  int
1520         }{
1521                 {
1522                         uri:     strings.Replace(arvadostest.FooAndBarFilesInDirPDH, "+", "-", -1) + ".example.com/",
1523                         header:  authHeader,
1524                         expect:  []string{"dir1/foo", "dir1/bar"},
1525                         cutDirs: 0,
1526                 },
1527                 {
1528                         uri:     strings.Replace(arvadostest.FooAndBarFilesInDirPDH, "+", "-", -1) + ".example.com/dir1/",
1529                         header:  authHeader,
1530                         expect:  []string{"foo", "bar"},
1531                         cutDirs: 1,
1532                 },
1533                 {
1534                         // URLs of this form ignore authHeader, and
1535                         // FooAndBarFilesInDirUUID isn't public, so
1536                         // this returns 401.
1537                         uri:    "download.example.com/collections/" + arvadostest.FooAndBarFilesInDirUUID + "/",
1538                         header: authHeader,
1539                         expect: nil,
1540                 },
1541                 {
1542                         uri:     "download.example.com/users/active/foo_file_in_dir/",
1543                         header:  authHeader,
1544                         expect:  []string{"dir1/"},
1545                         cutDirs: 3,
1546                 },
1547                 {
1548                         uri:     "download.example.com/users/active/foo_file_in_dir/dir1/",
1549                         header:  authHeader,
1550                         expect:  []string{"bar"},
1551                         cutDirs: 4,
1552                 },
1553                 {
1554                         uri:     "download.example.com/",
1555                         header:  authHeader,
1556                         expect:  []string{"users/"},
1557                         cutDirs: 0,
1558                 },
1559                 {
1560                         uri:      "download.example.com/users",
1561                         header:   authHeader,
1562                         redirect: "/users/",
1563                         expect:   []string{"active/"},
1564                         cutDirs:  1,
1565                 },
1566                 {
1567                         uri:     "download.example.com/users/",
1568                         header:  authHeader,
1569                         expect:  []string{"active/"},
1570                         cutDirs: 1,
1571                 },
1572                 {
1573                         uri:      "download.example.com/users/active",
1574                         header:   authHeader,
1575                         redirect: "/users/active/",
1576                         expect:   []string{"foo_file_in_dir/"},
1577                         cutDirs:  2,
1578                 },
1579                 {
1580                         uri:     "download.example.com/users/active/",
1581                         header:  authHeader,
1582                         expect:  []string{"foo_file_in_dir/"},
1583                         cutDirs: 2,
1584                 },
1585                 {
1586                         uri:     "collections.example.com/collections/download/" + arvadostest.FooAndBarFilesInDirUUID + "/" + arvadostest.ActiveToken + "/",
1587                         header:  nil,
1588                         expect:  []string{"dir1/foo", "dir1/bar"},
1589                         cutDirs: 4,
1590                 },
1591                 {
1592                         uri:     "collections.example.com/c=" + arvadostest.FooAndBarFilesInDirUUID + "/t=" + arvadostest.ActiveToken + "/",
1593                         header:  nil,
1594                         expect:  []string{"dir1/foo", "dir1/bar"},
1595                         cutDirs: 2,
1596                 },
1597                 {
1598                         uri:     "collections.example.com/c=" + arvadostest.FooAndBarFilesInDirUUID + "/t=" + arvadostest.ActiveToken,
1599                         header:  nil,
1600                         expect:  []string{"dir1/foo", "dir1/bar"},
1601                         cutDirs: 2,
1602                 },
1603                 {
1604                         uri:     "download.example.com/c=" + arvadostest.FooAndBarFilesInDirUUID,
1605                         header:  authHeader,
1606                         expect:  []string{"dir1/foo", "dir1/bar"},
1607                         cutDirs: 1,
1608                 },
1609                 {
1610                         uri:      "download.example.com/c=" + arvadostest.FooAndBarFilesInDirUUID + "/dir1",
1611                         header:   authHeader,
1612                         redirect: "/c=" + arvadostest.FooAndBarFilesInDirUUID + "/dir1/",
1613                         expect:   []string{"foo", "bar"},
1614                         cutDirs:  2,
1615                 },
1616                 {
1617                         uri:     "download.example.com/c=" + arvadostest.FooAndBarFilesInDirUUID + "/_/dir1/",
1618                         header:  authHeader,
1619                         expect:  []string{"foo", "bar"},
1620                         cutDirs: 3,
1621                 },
1622                 {
1623                         uri:      arvadostest.FooAndBarFilesInDirUUID + ".example.com/dir1?api_token=" + arvadostest.ActiveToken,
1624                         header:   authHeader,
1625                         redirect: "/dir1/",
1626                         expect:   []string{"foo", "bar"},
1627                         cutDirs:  1,
1628                 },
1629                 {
1630                         uri:    "collections.example.com/c=" + arvadostest.FooAndBarFilesInDirUUID + "/theperthcountyconspiracydoesnotexist/",
1631                         header: authHeader,
1632                         expect: nil,
1633                 },
1634                 {
1635                         uri:     "download.example.com/c=" + arvadostest.WazVersion1Collection,
1636                         header:  authHeader,
1637                         expect:  []string{"waz"},
1638                         cutDirs: 1,
1639                 },
1640                 {
1641                         uri:     "download.example.com/by_id/" + arvadostest.WazVersion1Collection,
1642                         header:  authHeader,
1643                         expect:  []string{"waz"},
1644                         cutDirs: 2,
1645                 },
1646                 {
1647                         uri:     "download.example.com/users/active/This filter group/",
1648                         header:  authHeader,
1649                         expect:  []string{"A Subproject/"},
1650                         cutDirs: 3,
1651                 },
1652                 {
1653                         uri:     "download.example.com/users/active/This filter group/A Subproject",
1654                         header:  authHeader,
1655                         expect:  []string{"baz_file/"},
1656                         cutDirs: 4,
1657                 },
1658                 {
1659                         uri:     "download.example.com/by_id/" + arvadostest.AFilterGroupUUID,
1660                         header:  authHeader,
1661                         expect:  []string{"A Subproject/"},
1662                         cutDirs: 2,
1663                 },
1664                 {
1665                         uri:     "download.example.com/by_id/" + arvadostest.AFilterGroupUUID + "/A Subproject",
1666                         header:  authHeader,
1667                         expect:  []string{"baz_file/"},
1668                         cutDirs: 3,
1669                 },
1670         } {
1671                 comment := check.Commentf("HTML: %q redir %q => %q", trial.uri, trial.redirect, trial.expect)
1672                 resp := httptest.NewRecorder()
1673                 u := mustParseURL("//" + trial.uri)
1674                 req := &http.Request{
1675                         Method:     "GET",
1676                         Host:       u.Host,
1677                         URL:        u,
1678                         RequestURI: u.RequestURI(),
1679                         Header:     copyHeader(trial.header),
1680                 }
1681                 s.handler.ServeHTTP(resp, req)
1682                 var cookies []*http.Cookie
1683                 for resp.Code == http.StatusSeeOther {
1684                         u, _ := req.URL.Parse(resp.Header().Get("Location"))
1685                         req = &http.Request{
1686                                 Method:     "GET",
1687                                 Host:       u.Host,
1688                                 URL:        u,
1689                                 RequestURI: u.RequestURI(),
1690                                 Header:     copyHeader(trial.header),
1691                         }
1692                         cookies = append(cookies, (&http.Response{Header: resp.Header()}).Cookies()...)
1693                         for _, c := range cookies {
1694                                 req.AddCookie(c)
1695                         }
1696                         resp = httptest.NewRecorder()
1697                         s.handler.ServeHTTP(resp, req)
1698                 }
1699                 if trial.redirect != "" {
1700                         c.Check(req.URL.Path, check.Equals, trial.redirect, comment)
1701                 }
1702                 if trial.expect == nil {
1703                         c.Check(resp.Code, check.Equals, http.StatusUnauthorized, comment)
1704                 } else {
1705                         c.Check(resp.Code, check.Equals, http.StatusOK, comment)
1706                         listingPageDoc, err := html.Parse(resp.Body)
1707                         c.Check(err, check.IsNil, comment) // valid HTML document
1708                         pathHrefMap := getPathHrefMap(listingPageDoc)
1709                         c.Assert(pathHrefMap, check.Not(check.HasLen), 0, comment)
1710                         for _, e := range trial.expect {
1711                                 href, hasE := pathHrefMap[e]
1712                                 c.Check(hasE, check.Equals, true, comment) // expected path is listed
1713                                 relUrl := mustParseURL(href)
1714                                 c.Check(relUrl.Path, check.Equals, "./"+e, comment) // href can be decoded back to path
1715                         }
1716                         wgetCommand := getWgetExamplePre(listingPageDoc)
1717                         wgetExpected := regexp.MustCompile(`^\$ wget .*--cut-dirs=(\d+) .*'(https?://[^']+)'$`)
1718                         wgetMatchGroups := wgetExpected.FindStringSubmatch(wgetCommand)
1719                         c.Assert(wgetMatchGroups, check.NotNil)                                     // wget command matches
1720                         c.Check(wgetMatchGroups[1], check.Equals, fmt.Sprintf("%d", trial.cutDirs)) // correct level of cut dirs in wget command
1721                         printedUrl := mustParseURL(wgetMatchGroups[2])
1722                         c.Check(printedUrl.Host, check.Equals, req.URL.Host)
1723                         c.Check(printedUrl.Path, check.Equals, req.URL.Path) // URL arg in wget command can be decoded to the right path
1724                 }
1725
1726                 comment = check.Commentf("WebDAV: %q => %q", trial.uri, trial.expect)
1727                 req = &http.Request{
1728                         Method:     "OPTIONS",
1729                         Host:       u.Host,
1730                         URL:        u,
1731                         RequestURI: u.RequestURI(),
1732                         Header:     copyHeader(trial.header),
1733                         Body:       ioutil.NopCloser(&bytes.Buffer{}),
1734                 }
1735                 resp = httptest.NewRecorder()
1736                 s.handler.ServeHTTP(resp, req)
1737                 if trial.expect == nil {
1738                         c.Check(resp.Code, check.Equals, http.StatusUnauthorized, comment)
1739                 } else {
1740                         c.Check(resp.Code, check.Equals, http.StatusOK, comment)
1741                 }
1742
1743                 req = &http.Request{
1744                         Method:     "PROPFIND",
1745                         Host:       u.Host,
1746                         URL:        u,
1747                         RequestURI: u.RequestURI(),
1748                         Header:     copyHeader(trial.header),
1749                         Body:       ioutil.NopCloser(&bytes.Buffer{}),
1750                 }
1751                 resp = httptest.NewRecorder()
1752                 s.handler.ServeHTTP(resp, req)
1753                 // This check avoids logging a big XML document in the
1754                 // event webdav throws a 500 error after sending
1755                 // headers for a 207.
1756                 if !c.Check(strings.HasSuffix(resp.Body.String(), "Internal Server Error"), check.Equals, false) {
1757                         continue
1758                 }
1759                 if trial.expect == nil {
1760                         c.Check(resp.Code, check.Equals, http.StatusUnauthorized, comment)
1761                 } else {
1762                         c.Check(resp.Code, check.Equals, http.StatusMultiStatus, comment)
1763                         for _, e := range trial.expect {
1764                                 if strings.HasSuffix(e, "/") {
1765                                         e = filepath.Join(u.Path, e) + "/"
1766                                 } else {
1767                                         e = filepath.Join(u.Path, e)
1768                                 }
1769                                 e = strings.Replace(e, " ", "%20", -1)
1770                                 c.Check(resp.Body.String(), check.Matches, `(?ms).*<D:href>`+e+`</D:href>.*`, comment)
1771                         }
1772                 }
1773         }
1774 }
1775
1776 // Shallow-traverse the HTML document, gathering the nodes satisfying the
1777 // predicate function in the output slice. If a node matches the predicate,
1778 // none of its children will be visited.
1779 func getNodes(document *html.Node, predicate func(*html.Node) bool) []*html.Node {
1780         var acc []*html.Node
1781         var traverse func(*html.Node, []*html.Node) []*html.Node
1782         traverse = func(root *html.Node, sofar []*html.Node) []*html.Node {
1783                 if root == nil {
1784                         return sofar
1785                 }
1786                 if predicate(root) {
1787                         return append(sofar, root)
1788                 }
1789                 for cur := root.FirstChild; cur != nil; cur = cur.NextSibling {
1790                         sofar = traverse(cur, sofar)
1791                 }
1792                 return sofar
1793         }
1794         return traverse(document, acc)
1795 }
1796
1797 // Returns true if a node has the attribute targetAttr with the given value
1798 func matchesAttributeValue(node *html.Node, targetAttr string, value string) bool {
1799         for _, attr := range node.Attr {
1800                 if attr.Key == targetAttr && attr.Val == value {
1801                         return true
1802                 }
1803         }
1804         return false
1805 }
1806
1807 // Concatenate the content of text-node children of node; only direct
1808 // children are visited, and any non-text children are skipped.
1809 func getNodeText(node *html.Node) string {
1810         var recv strings.Builder
1811         for c := node.FirstChild; c != nil; c = c.NextSibling {
1812                 if c.Type == html.TextNode {
1813                         recv.WriteString(c.Data)
1814                 }
1815         }
1816         return recv.String()
1817 }
1818
1819 // Returns a map from the directory listing item string (a path) to the href
1820 // value of its <a> tag (an encoded relative URL)
1821 func getPathHrefMap(document *html.Node) map[string]string {
1822         isItemATag := func(node *html.Node) bool {
1823                 return node.Type == html.ElementNode && node.Data == "a" && matchesAttributeValue(node, "class", "item")
1824         }
1825         aTags := getNodes(document, isItemATag)
1826         output := make(map[string]string)
1827         for _, elem := range aTags {
1828                 textContent := getNodeText(elem)
1829                 for _, attr := range elem.Attr {
1830                         if attr.Key == "href" {
1831                                 output[textContent] = attr.Val
1832                                 break
1833                         }
1834                 }
1835         }
1836         return output
1837 }
1838
1839 func getWgetExamplePre(document *html.Node) string {
1840         isWgetPre := func(node *html.Node) bool {
1841                 return node.Type == html.ElementNode && matchesAttributeValue(node, "id", "wget-example")
1842         }
1843         elements := getNodes(document, isWgetPre)
1844         if len(elements) != 1 {
1845                 return ""
1846         }
1847         return getNodeText(elements[0])
1848 }
1849
1850 func (s *IntegrationSuite) TestDeleteLastFile(c *check.C) {
1851         arv := arvados.NewClientFromEnv()
1852         var newCollection arvados.Collection
1853         err := arv.RequestAndDecode(&newCollection, "POST", "arvados/v1/collections", nil, map[string]interface{}{
1854                 "collection": map[string]string{
1855                         "owner_uuid":    arvadostest.ActiveUserUUID,
1856                         "manifest_text": ". acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:foo.txt 0:3:bar.txt\n",
1857                         "name":          "keep-web test collection",
1858                 },
1859                 "ensure_unique_name": true,
1860         })
1861         c.Assert(err, check.IsNil)
1862         defer arv.RequestAndDecode(&newCollection, "DELETE", "arvados/v1/collections/"+newCollection.UUID, nil, nil)
1863
1864         var updated arvados.Collection
1865         for _, fnm := range []string{"foo.txt", "bar.txt"} {
1866                 s.handler.Cluster.Services.WebDAVDownload.ExternalURL.Host = "example.com"
1867                 u, _ := url.Parse("http://example.com/c=" + newCollection.UUID + "/" + fnm)
1868                 req := &http.Request{
1869                         Method:     "DELETE",
1870                         Host:       u.Host,
1871                         URL:        u,
1872                         RequestURI: u.RequestURI(),
1873                         Header: http.Header{
1874                                 "Authorization": {"Bearer " + arvadostest.ActiveToken},
1875                         },
1876                 }
1877                 resp := httptest.NewRecorder()
1878                 s.handler.ServeHTTP(resp, req)
1879                 c.Check(resp.Code, check.Equals, http.StatusNoContent)
1880
1881                 updated = arvados.Collection{}
1882                 err = arv.RequestAndDecode(&updated, "GET", "arvados/v1/collections/"+newCollection.UUID, nil, nil)
1883                 c.Check(err, check.IsNil)
1884                 c.Check(updated.ManifestText, check.Not(check.Matches), `(?ms).*\Q`+fnm+`\E.*`)
1885                 c.Logf("updated manifest_text %q", updated.ManifestText)
1886         }
1887         c.Check(updated.ManifestText, check.Equals, "")
1888 }
1889
1890 func (s *IntegrationSuite) TestFileContentType(c *check.C) {
1891         s.handler.Cluster.Services.WebDAVDownload.ExternalURL.Host = "download.example.com"
1892
1893         client := arvados.NewClientFromEnv()
1894         client.AuthToken = arvadostest.ActiveToken
1895         arv, err := arvadosclient.New(client)
1896         c.Assert(err, check.Equals, nil)
1897         kc, err := keepclient.MakeKeepClient(arv)
1898         c.Assert(err, check.Equals, nil)
1899
1900         fs, err := (&arvados.Collection{}).FileSystem(client, kc)
1901         c.Assert(err, check.IsNil)
1902
1903         trials := []struct {
1904                 filename    string
1905                 content     string
1906                 contentType string
1907         }{
1908                 {"picture.txt", "BMX bikes are small this year\n", "text/plain; charset=utf-8"},
1909                 {"picture.bmp", "BMX bikes are small this year\n", "image/(x-ms-)?bmp"},
1910                 {"picture.jpg", "BMX bikes are small this year\n", "image/jpeg"},
1911                 {"picture1", "BMX bikes are small this year\n", "image/bmp"},            // content sniff; "BM" is the magic signature for .bmp
1912                 {"picture2", "Cars are small this year\n", "text/plain; charset=utf-8"}, // content sniff
1913         }
1914         for _, trial := range trials {
1915                 f, err := fs.OpenFile(trial.filename, os.O_CREATE|os.O_WRONLY, 0777)
1916                 c.Assert(err, check.IsNil)
1917                 _, err = f.Write([]byte(trial.content))
1918                 c.Assert(err, check.IsNil)
1919                 c.Assert(f.Close(), check.IsNil)
1920         }
1921         mtxt, err := fs.MarshalManifest(".")
1922         c.Assert(err, check.IsNil)
1923         var coll arvados.Collection
1924         err = client.RequestAndDecode(&coll, "POST", "arvados/v1/collections", nil, map[string]interface{}{
1925                 "collection": map[string]string{
1926                         "manifest_text": mtxt,
1927                 },
1928         })
1929         c.Assert(err, check.IsNil)
1930
1931         for _, trial := range trials {
1932                 u, _ := url.Parse("http://download.example.com/by_id/" + coll.UUID + "/" + trial.filename)
1933                 req := &http.Request{
1934                         Method:     "GET",
1935                         Host:       u.Host,
1936                         URL:        u,
1937                         RequestURI: u.RequestURI(),
1938                         Header: http.Header{
1939                                 "Authorization": {"Bearer " + client.AuthToken},
1940                         },
1941                 }
1942                 resp := httptest.NewRecorder()
1943                 s.handler.ServeHTTP(resp, req)
1944                 c.Check(resp.Code, check.Equals, http.StatusOK)
1945                 c.Check(resp.Header().Get("Content-Type"), check.Matches, trial.contentType)
1946                 c.Check(resp.Body.String(), check.Equals, trial.content)
1947         }
1948 }
1949
1950 func (s *IntegrationSuite) TestCacheSize(c *check.C) {
1951         req, err := http.NewRequest("GET", "http://"+arvadostest.FooCollection+".example.com/foo", nil)
1952         req.Header.Set("Authorization", "Bearer "+arvadostest.ActiveTokenV2)
1953         c.Assert(err, check.IsNil)
1954         resp := httptest.NewRecorder()
1955         s.handler.ServeHTTP(resp, req)
1956         c.Assert(resp.Code, check.Equals, http.StatusOK)
1957         c.Check(s.handler.Cache.sessions[arvadostest.ActiveTokenV2].client.DiskCacheSize.Percent(), check.Equals, int64(10))
1958 }
1959
1960 // Writing to a collection shouldn't affect its entry in the
1961 // PDH-to-manifest cache.
1962 func (s *IntegrationSuite) TestCacheWriteCollectionSamePDH(c *check.C) {
1963         arv, err := arvadosclient.MakeArvadosClient()
1964         c.Assert(err, check.Equals, nil)
1965         arv.ApiToken = arvadostest.ActiveToken
1966
1967         u := mustParseURL("http://x.example/testfile")
1968         req := &http.Request{
1969                 Method:     "GET",
1970                 Host:       u.Host,
1971                 URL:        u,
1972                 RequestURI: u.RequestURI(),
1973                 Header:     http.Header{"Authorization": {"Bearer " + arv.ApiToken}},
1974         }
1975
1976         checkWithID := func(id string, status int) {
1977                 req.URL.Host = strings.Replace(id, "+", "-", -1) + ".example"
1978                 req.Host = req.URL.Host
1979                 resp := httptest.NewRecorder()
1980                 s.handler.ServeHTTP(resp, req)
1981                 c.Check(resp.Code, check.Equals, status)
1982         }
1983
1984         var colls [2]arvados.Collection
1985         for i := range colls {
1986                 err := arv.Create("collections",
1987                         map[string]interface{}{
1988                                 "ensure_unique_name": true,
1989                                 "collection": map[string]interface{}{
1990                                         "name": "test collection",
1991                                 },
1992                         }, &colls[i])
1993                 c.Assert(err, check.Equals, nil)
1994         }
1995
1996         // Populate cache with empty collection
1997         checkWithID(colls[0].PortableDataHash, http.StatusNotFound)
1998
1999         // write a file to colls[0]
2000         reqPut := *req
2001         reqPut.Method = "PUT"
2002         reqPut.URL.Host = colls[0].UUID + ".example"
2003         reqPut.Host = req.URL.Host
2004         reqPut.Body = ioutil.NopCloser(bytes.NewBufferString("testdata"))
2005         resp := httptest.NewRecorder()
2006         s.handler.ServeHTTP(resp, &reqPut)
2007         c.Check(resp.Code, check.Equals, http.StatusCreated)
2008
2009         // new file should not appear in colls[1]
2010         checkWithID(colls[1].PortableDataHash, http.StatusNotFound)
2011         checkWithID(colls[1].UUID, http.StatusNotFound)
2012
2013         checkWithID(colls[0].UUID, http.StatusOK)
2014 }
2015
2016 func copyHeader(h http.Header) http.Header {
2017         hc := http.Header{}
2018         for k, v := range h {
2019                 hc[k] = append([]string(nil), v...)
2020         }
2021         return hc
2022 }
2023
2024 func (s *IntegrationSuite) checkUploadDownloadRequest(c *check.C, req *http.Request,
2025         successCode int, direction string, perm bool, userUuid, collectionUuid, collectionPDH, filepath string) {
2026
2027         client := arvados.NewClientFromEnv()
2028         client.AuthToken = arvadostest.AdminToken
2029         var logentries arvados.LogList
2030         limit1 := 1
2031         err := client.RequestAndDecode(&logentries, "GET", "arvados/v1/logs", nil,
2032                 arvados.ResourceListParams{
2033                         Limit: &limit1,
2034                         Order: "created_at desc"})
2035         c.Check(err, check.IsNil)
2036         c.Check(logentries.Items, check.HasLen, 1)
2037         lastLogId := logentries.Items[0].ID
2038         c.Logf("lastLogId: %d", lastLogId)
2039
2040         var logbuf bytes.Buffer
2041         logger := logrus.New()
2042         logger.Out = &logbuf
2043         resp := httptest.NewRecorder()
2044         req = req.WithContext(ctxlog.Context(context.Background(), logger))
2045         s.handler.ServeHTTP(resp, req)
2046
2047         if perm {
2048                 c.Check(resp.Result().StatusCode, check.Equals, successCode)
2049                 c.Check(logbuf.String(), check.Matches, `(?ms).*msg="File `+direction+`".*`)
2050                 c.Check(logbuf.String(), check.Not(check.Matches), `(?ms).*level=error.*`)
2051
2052                 deadline := time.Now().Add(time.Second)
2053                 for {
2054                         c.Assert(time.Now().After(deadline), check.Equals, false, check.Commentf("timed out waiting for log entry"))
2055                         logentries = arvados.LogList{}
2056                         err = client.RequestAndDecode(&logentries, "GET", "arvados/v1/logs", nil,
2057                                 arvados.ResourceListParams{
2058                                         Filters: []arvados.Filter{
2059                                                 {Attr: "event_type", Operator: "=", Operand: "file_" + direction},
2060                                                 {Attr: "object_uuid", Operator: "=", Operand: userUuid},
2061                                         },
2062                                         Limit: &limit1,
2063                                         Order: "created_at desc",
2064                                 })
2065                         c.Assert(err, check.IsNil)
2066                         if len(logentries.Items) > 0 &&
2067                                 logentries.Items[0].ID > lastLogId &&
2068                                 logentries.Items[0].ObjectUUID == userUuid &&
2069                                 logentries.Items[0].Properties["collection_uuid"] == collectionUuid &&
2070                                 (collectionPDH == "" || logentries.Items[0].Properties["portable_data_hash"] == collectionPDH) &&
2071                                 logentries.Items[0].Properties["collection_file_path"] == filepath {
2072                                 break
2073                         }
2074                         c.Logf("logentries.Items: %+v", logentries.Items)
2075                         time.Sleep(50 * time.Millisecond)
2076                 }
2077         } else {
2078                 c.Check(resp.Result().StatusCode, check.Equals, http.StatusForbidden)
2079                 c.Check(logbuf.String(), check.Equals, "")
2080         }
2081 }
2082
2083 func (s *IntegrationSuite) TestDownloadLoggingPermission(c *check.C) {
2084         u := mustParseURL("http://" + arvadostest.FooCollection + ".keep-web.example/foo")
2085
2086         s.handler.Cluster.Collections.TrustAllContent = true
2087         s.handler.Cluster.Collections.WebDAVLogDownloadInterval = arvados.Duration(0)
2088
2089         for _, adminperm := range []bool{true, false} {
2090                 for _, userperm := range []bool{true, false} {
2091                         s.handler.Cluster.Collections.WebDAVPermission.Admin.Download = adminperm
2092                         s.handler.Cluster.Collections.WebDAVPermission.User.Download = userperm
2093
2094                         // Test admin permission
2095                         req := &http.Request{
2096                                 Method:     "GET",
2097                                 Host:       u.Host,
2098                                 URL:        u,
2099                                 RequestURI: u.RequestURI(),
2100                                 Header: http.Header{
2101                                         "Authorization": {"Bearer " + arvadostest.AdminToken},
2102                                 },
2103                         }
2104                         s.checkUploadDownloadRequest(c, req, http.StatusOK, "download", adminperm,
2105                                 arvadostest.AdminUserUUID, arvadostest.FooCollection, arvadostest.FooCollectionPDH, "foo")
2106
2107                         // Test user permission
2108                         req = &http.Request{
2109                                 Method:     "GET",
2110                                 Host:       u.Host,
2111                                 URL:        u,
2112                                 RequestURI: u.RequestURI(),
2113                                 Header: http.Header{
2114                                         "Authorization": {"Bearer " + arvadostest.ActiveToken},
2115                                 },
2116                         }
2117                         s.checkUploadDownloadRequest(c, req, http.StatusOK, "download", userperm,
2118                                 arvadostest.ActiveUserUUID, arvadostest.FooCollection, arvadostest.FooCollectionPDH, "foo")
2119                 }
2120         }
2121
2122         s.handler.Cluster.Collections.WebDAVPermission.User.Download = true
2123
2124         for _, tryurl := range []string{"http://" + arvadostest.MultilevelCollection1 + ".keep-web.example/dir1/subdir/file1",
2125                 "http://keep-web/users/active/multilevel_collection_1/dir1/subdir/file1"} {
2126
2127                 u = mustParseURL(tryurl)
2128                 req := &http.Request{
2129                         Method:     "GET",
2130                         Host:       u.Host,
2131                         URL:        u,
2132                         RequestURI: u.RequestURI(),
2133                         Header: http.Header{
2134                                 "Authorization": {"Bearer " + arvadostest.ActiveToken},
2135                         },
2136                 }
2137                 s.checkUploadDownloadRequest(c, req, http.StatusOK, "download", true,
2138                         arvadostest.ActiveUserUUID, arvadostest.MultilevelCollection1, arvadostest.MultilevelCollection1PDH, "dir1/subdir/file1")
2139         }
2140
2141         u = mustParseURL("http://" + strings.Replace(arvadostest.FooCollectionPDH, "+", "-", 1) + ".keep-web.example/foo")
2142         req := &http.Request{
2143                 Method:     "GET",
2144                 Host:       u.Host,
2145                 URL:        u,
2146                 RequestURI: u.RequestURI(),
2147                 Header: http.Header{
2148                         "Authorization": {"Bearer " + arvadostest.ActiveToken},
2149                 },
2150         }
2151         s.checkUploadDownloadRequest(c, req, http.StatusOK, "download", true,
2152                 arvadostest.ActiveUserUUID, "", arvadostest.FooCollectionPDH, "foo")
2153 }
2154
2155 func (s *IntegrationSuite) TestUploadLoggingPermission(c *check.C) {
2156         for _, adminperm := range []bool{true, false} {
2157                 for _, userperm := range []bool{true, false} {
2158
2159                         arv := arvados.NewClientFromEnv()
2160                         arv.AuthToken = arvadostest.ActiveToken
2161
2162                         var coll arvados.Collection
2163                         err := arv.RequestAndDecode(&coll,
2164                                 "POST",
2165                                 "/arvados/v1/collections",
2166                                 nil,
2167                                 map[string]interface{}{
2168                                         "ensure_unique_name": true,
2169                                         "collection": map[string]interface{}{
2170                                                 "name": "test collection",
2171                                         },
2172                                 })
2173                         c.Assert(err, check.Equals, nil)
2174
2175                         u := mustParseURL("http://" + coll.UUID + ".keep-web.example/bar")
2176
2177                         s.handler.Cluster.Collections.WebDAVPermission.Admin.Upload = adminperm
2178                         s.handler.Cluster.Collections.WebDAVPermission.User.Upload = userperm
2179
2180                         // Test admin permission
2181                         req := &http.Request{
2182                                 Method:     "PUT",
2183                                 Host:       u.Host,
2184                                 URL:        u,
2185                                 RequestURI: u.RequestURI(),
2186                                 Header: http.Header{
2187                                         "Authorization": {"Bearer " + arvadostest.AdminToken},
2188                                 },
2189                                 Body: io.NopCloser(bytes.NewReader([]byte("bar"))),
2190                         }
2191                         s.checkUploadDownloadRequest(c, req, http.StatusCreated, "upload", adminperm,
2192                                 arvadostest.AdminUserUUID, coll.UUID, "", "bar")
2193
2194                         // Test user permission
2195                         req = &http.Request{
2196                                 Method:     "PUT",
2197                                 Host:       u.Host,
2198                                 URL:        u,
2199                                 RequestURI: u.RequestURI(),
2200                                 Header: http.Header{
2201                                         "Authorization": {"Bearer " + arvadostest.ActiveToken},
2202                                 },
2203                                 Body: io.NopCloser(bytes.NewReader([]byte("bar"))),
2204                         }
2205                         s.checkUploadDownloadRequest(c, req, http.StatusCreated, "upload", userperm,
2206                                 arvadostest.ActiveUserUUID, coll.UUID, "", "bar")
2207                 }
2208         }
2209 }
2210
2211 func (s *IntegrationSuite) serveAndLogRequests(c *check.C, reqs *map[*http.Request]int) *bytes.Buffer {
2212         logbuf, ctx := newLoggerAndContext()
2213         var wg sync.WaitGroup
2214         for req, expectStatus := range *reqs {
2215                 req := req.WithContext(ctx)
2216                 expectStatus := expectStatus
2217                 wg.Add(1)
2218                 go func() {
2219                         defer wg.Done()
2220                         resp := httptest.NewRecorder()
2221                         s.handler.ServeHTTP(resp, req)
2222                         c.Check(resp.Result().StatusCode, check.Equals, expectStatus)
2223                 }()
2224         }
2225         wg.Wait()
2226         return logbuf
2227 }
2228
2229 func countLogMatches(c *check.C, logbuf *bytes.Buffer, pattern string, matchCount int) bool {
2230         search, err := regexp.Compile(pattern)
2231         if !c.Check(err, check.IsNil, check.Commentf("failed to compile regexp: %v", err)) {
2232                 return false
2233         }
2234         matches := search.FindAll(logbuf.Bytes(), -1)
2235         return c.Check(matches, check.HasLen, matchCount,
2236                 check.Commentf("%d matching log messages: %+v", len(matches), matches))
2237 }
2238
2239 func (s *IntegrationSuite) TestLogThrottling(c *check.C) {
2240         s.handler.Cluster.Collections.WebDAVLogDownloadInterval = arvados.Duration(time.Hour)
2241         fooURL := "http://" + arvadostest.FooCollection + ".keep-web.example/foo"
2242         req := newRequest("GET", fooURL)
2243         req.Header.Set("Authorization", "Bearer "+arvadostest.ActiveToken)
2244         pattern := `\bmsg="File download".* collection_file_path=foo\b`
2245
2246         // All these requests get byte zero and should be logged.
2247         reqs := make(map[*http.Request]int)
2248         reqs[req] = http.StatusOK
2249         for _, byterange := range []string{"0-2", "0-1", "0-", "-3"} {
2250                 req := req.Clone(context.Background())
2251                 req.Header.Set("Range", "bytes="+byterange)
2252                 reqs[req] = http.StatusPartialContent
2253         }
2254         logbuf := s.serveAndLogRequests(c, &reqs)
2255         countLogMatches(c, logbuf, pattern, len(reqs))
2256
2257         // None of these requests get byte zero so they should all be throttled
2258         // (now that we've made at least one request for byte zero).
2259         reqs = make(map[*http.Request]int)
2260         for _, byterange := range []string{"1-2", "1-", "2-", "-1", "-2"} {
2261                 req := req.Clone(context.Background())
2262                 req.Header.Set("Range", "bytes="+byterange)
2263                 reqs[req] = http.StatusPartialContent
2264         }
2265         logbuf = s.serveAndLogRequests(c, &reqs)
2266         countLogMatches(c, logbuf, pattern, 0)
2267 }
2268
2269 func (s *IntegrationSuite) TestLogThrottleInterval(c *check.C) {
2270         s.handler.Cluster.Collections.WebDAVLogDownloadInterval = arvados.Duration(time.Nanosecond)
2271         logbuf, ctx := newLoggerAndContext()
2272         req := newRequest("GET", "http://"+arvadostest.FooCollection+".keep-web.example/foo")
2273         req.Header.Set("Authorization", "Bearer "+arvadostest.ActiveToken)
2274         req = req.WithContext(ctx)
2275
2276         re := regexp.MustCompile(`\bmsg="File download".* collection_file_path=foo\b`)
2277         for expected := 1; expected < 4; expected++ {
2278                 time.Sleep(2 * time.Nanosecond)
2279                 resp := httptest.NewRecorder()
2280                 s.handler.ServeHTTP(resp, req)
2281                 c.Assert(resp.Result().StatusCode, check.Equals, http.StatusOK)
2282                 matches := re.FindAll(logbuf.Bytes(), -1)
2283                 c.Assert(matches, check.HasLen, expected,
2284                         check.Commentf("%d matching log messages: %+v", len(matches), matches))
2285         }
2286 }
2287
2288 func (s *IntegrationSuite) TestLogThrottleDifferentTokens(c *check.C) {
2289         s.handler.Cluster.Collections.WebDAVLogDownloadInterval = arvados.Duration(time.Hour)
2290         req := newRequest("GET", "http://"+arvadostest.FooCollection+".keep-web.example/foo")
2291         reqs := make(map[*http.Request]int)
2292         for _, token := range []string{arvadostest.ActiveToken, arvadostest.AdminToken} {
2293                 req := req.Clone(context.Background())
2294                 req.Header.Set("Authorization", "Bearer "+token)
2295                 reqs[req] = http.StatusOK
2296         }
2297         logbuf := s.serveAndLogRequests(c, &reqs)
2298         countLogMatches(c, logbuf, `\bmsg="File download".* collection_file_path=foo\b`, len(reqs))
2299 }
2300
2301 func (s *IntegrationSuite) TestLogThrottleDifferentFiles(c *check.C) {
2302         s.handler.Cluster.Collections.WebDAVLogDownloadInterval = arvados.Duration(time.Hour)
2303         baseURL := "http://" + arvadostest.MultilevelCollection1 + ".keep-web.example/"
2304         reqs := make(map[*http.Request]int)
2305         for _, filename := range []string{"file1", "file2", "file3"} {
2306                 req := newRequest("GET", baseURL+filename)
2307                 req.Header.Set("Authorization", "Bearer "+arvadostest.ActiveToken)
2308                 reqs[req] = http.StatusOK
2309         }
2310         logbuf := s.serveAndLogRequests(c, &reqs)
2311         countLogMatches(c, logbuf, `\bmsg="File download".* collection_uuid=`+arvadostest.MultilevelCollection1+`\b`, len(reqs))
2312 }
2313
2314 func (s *IntegrationSuite) TestLogThrottleDifferentSources(c *check.C) {
2315         s.handler.Cluster.Collections.WebDAVLogDownloadInterval = arvados.Duration(time.Hour)
2316         req := newRequest("GET", "http://"+arvadostest.FooCollection+".keep-web.example/foo")
2317         req.Header.Set("Authorization", "Bearer "+arvadostest.ActiveToken)
2318         reqs := make(map[*http.Request]int)
2319         reqs[req] = http.StatusOK
2320         for _, xff := range []string{"10.22.33.44", "100::123"} {
2321                 req := req.Clone(context.Background())
2322                 req.Header.Set("X-Forwarded-For", xff)
2323                 reqs[req] = http.StatusOK
2324         }
2325         logbuf := s.serveAndLogRequests(c, &reqs)
2326         countLogMatches(c, logbuf, `\bmsg="File download".* collection_file_path=foo\b`, len(reqs))
2327 }
2328
2329 func (s *IntegrationSuite) TestConcurrentWrites(c *check.C) {
2330         s.handler.Cluster.Collections.WebDAVCache.TTL = arvados.Duration(time.Second * 2)
2331         client := arvados.NewClientFromEnv()
2332         client.AuthToken = arvadostest.ActiveTokenV2
2333
2334         // Each file we upload will consist of some unique content
2335         // followed by 2 MiB of filler content.
2336         filler := "."
2337         for i := 0; i < 21; i++ {
2338                 filler += filler
2339         }
2340
2341         // Start small, and increase concurrency (2^2, 4^2, ...)
2342         // only until hitting failure. Avoids unnecessarily long
2343         // failure reports.
2344         for n := 2; n < 16 && !c.Failed(); n = n * 2 {
2345                 c.Logf("%s: n=%d", c.TestName(), n)
2346
2347                 var coll arvados.Collection
2348                 err := client.RequestAndDecode(&coll, "POST", "arvados/v1/collections", nil, nil)
2349                 c.Assert(err, check.IsNil)
2350                 defer client.RequestAndDecode(&coll, "DELETE", "arvados/v1/collections/"+coll.UUID, nil, nil)
2351
2352                 var wg sync.WaitGroup
2353                 for i := 0; i < n && !c.Failed(); i++ {
2354                         i := i
2355                         wg.Add(1)
2356                         go func() {
2357                                 defer wg.Done()
2358                                 _, resp := s.do("MKCOL", s.collectionURL(coll.UUID, fmt.Sprintf("i=%d", i)), client.AuthToken, nil, nil)
2359                                 c.Assert(resp.StatusCode, check.Equals, http.StatusCreated)
2360                                 for j := 0; j < n && !c.Failed(); j++ {
2361                                         j := j
2362                                         wg.Add(1)
2363                                         go func() {
2364                                                 defer wg.Done()
2365                                                 content := fmt.Sprintf("i=%d/j=%d", i, j)
2366                                                 _, resp := s.do("PUT", s.collectionURL(coll.UUID, content), client.AuthToken, nil, []byte(content+filler))
2367                                                 c.Check(resp.StatusCode, check.Equals, http.StatusCreated, check.Commentf("%s", content))
2368
2369                                                 time.Sleep(time.Second)
2370
2371                                                 _, resp = s.do("GET", s.collectionURL(coll.UUID, content), client.AuthToken, nil, nil)
2372                                                 c.Check(resp.StatusCode, check.Equals, http.StatusOK, check.Commentf("%s", content))
2373                                                 body, _ := io.ReadAll(resp.Body)
2374                                                 c.Check(strings.TrimSuffix(string(body), filler), check.Equals, content)
2375                                         }()
2376                                 }
2377                         }()
2378                 }
2379                 wg.Wait()
2380                 for i := 0; i < n; i++ {
2381                         _, resp := s.do("PROPFIND", s.collectionURL(coll.UUID, fmt.Sprintf("i=%d", i)), client.AuthToken, nil, nil)
2382                         c.Assert(resp.StatusCode, check.Equals, http.StatusMultiStatus)
2383                 }
2384         }
2385 }
2386
2387 func (s *IntegrationSuite) TestRepack(c *check.C) {
2388         client := arvados.NewClientFromEnv()
2389         client.AuthToken = arvadostest.ActiveTokenV2
2390
2391         // Each file we upload will consist of some unique content
2392         // followed by 1 MiB of filler content.
2393         filler := "."
2394         for i := 0; i < 20; i++ {
2395                 filler += filler
2396         }
2397
2398         var coll arvados.Collection
2399         err := client.RequestAndDecode(&coll, "POST", "arvados/v1/collections", nil, nil)
2400         c.Assert(err, check.IsNil)
2401         defer client.RequestAndDecode(&coll, "DELETE", "arvados/v1/collections/"+coll.UUID, nil, nil)
2402
2403         countblocks := func() int {
2404                 var current arvados.Collection
2405                 err = client.RequestAndDecode(&current, "GET", "arvados/v1/collections/"+coll.UUID, nil, nil)
2406                 c.Assert(err, check.IsNil)
2407                 block := map[string]bool{}
2408                 for _, hash := range regexp.MustCompile(` [0-9a-f]{32}`).FindAllString(current.ManifestText, -1) {
2409                         block[hash] = true
2410                 }
2411                 return len(block)
2412         }
2413
2414         throttle := make(chan bool, 8) // len(throttle) is max upload concurrency
2415         n := 5                         // nested loop below will write n^2 + 1 files
2416         var nfiles atomic.Int64
2417         var totalsize atomic.Int64
2418         var wg sync.WaitGroup
2419         for i := 0; i < n && !c.Failed(); i++ {
2420                 i := i
2421                 wg.Add(1)
2422                 go func() {
2423                         defer wg.Done()
2424                         throttle <- true
2425                         _, resp := s.do("MKCOL", s.collectionURL(coll.UUID, fmt.Sprintf("i=%d", i)), client.AuthToken, nil, nil)
2426                         <-throttle
2427                         c.Assert(resp.StatusCode, check.Equals, http.StatusCreated)
2428
2429                         for j := 0; j < n && !c.Failed(); j++ {
2430                                 j := j
2431                                 wg.Add(1)
2432                                 go func() {
2433                                         defer wg.Done()
2434                                         content := fmt.Sprintf("i=%d/j=%d", i, j)
2435                                         throttle <- true
2436                                         _, resp := s.do("PUT", s.collectionURL(coll.UUID, content), client.AuthToken, nil, []byte(content+filler))
2437                                         <-throttle
2438                                         c.Check(resp.StatusCode, check.Equals, http.StatusCreated, check.Commentf("%s", content))
2439                                         totalsize.Add(int64(len(content) + len(filler)))
2440                                         c.Logf("after writing %d files, manifest has %d blocks", nfiles.Add(1), countblocks())
2441                                 }()
2442                         }
2443                 }()
2444         }
2445         wg.Wait()
2446
2447         content := "lastfile"
2448         _, resp := s.do("PUT", s.collectionURL(coll.UUID, content), client.AuthToken, nil, []byte(content+filler))
2449         c.Check(resp.StatusCode, check.Equals, http.StatusCreated, check.Commentf("%s", content))
2450         nfiles.Add(1)
2451
2452         // Check that all files can still be retrieved
2453         for i := 0; i < n && !c.Failed(); i++ {
2454                 i := i
2455                 for j := 0; j < n && !c.Failed(); j++ {
2456                         j := j
2457                         wg.Add(1)
2458                         go func() {
2459                                 defer wg.Done()
2460                                 path := fmt.Sprintf("i=%d/j=%d", i, j)
2461
2462                                 _, resp := s.do("GET", s.collectionURL(coll.UUID, path), client.AuthToken, nil, nil)
2463                                 c.Check(resp.StatusCode, check.Equals, http.StatusOK, check.Commentf("%s", content))
2464                                 size, _ := io.Copy(io.Discard, resp.Body)
2465                                 c.Check(int(size), check.Equals, len(path)+len(filler))
2466                         }()
2467                 }
2468         }
2469         wg.Wait()
2470
2471         // Check that the final manifest has been repacked so average
2472         // block size is at least double the "small file" size
2473         nblocks := countblocks()
2474         c.Logf("nblocks == %d", nblocks)
2475         c.Logf("nfiles == %d", nfiles.Load())
2476         c.Check(nblocks < int(nfiles.Load()), check.Equals, true)
2477         c.Logf("totalsize == %d", totalsize.Load())
2478         meanblocksize := int(totalsize.Load()) / nblocks
2479         c.Logf("meanblocksize == %d", meanblocksize)
2480         minblocksize := 2 * int(totalsize.Load()) / int(nfiles.Load())
2481         c.Logf("expecting minblocksize %d", minblocksize)
2482         c.Check(meanblocksize >= minblocksize, check.Equals, true)
2483 }
2484
2485 func (s *IntegrationSuite) TestDepthHeader(c *check.C) {
2486         s.handler.Cluster.Collections.WebDAVCache.TTL = arvados.Duration(time.Second * 2)
2487         client := arvados.NewClientFromEnv()
2488         client.AuthToken = arvadostest.ActiveTokenV2
2489
2490         var coll arvados.Collection
2491         err := client.RequestAndDecode(&coll, "POST", "arvados/v1/collections", nil, nil)
2492         c.Assert(err, check.IsNil)
2493         defer client.RequestAndDecode(&coll, "DELETE", "arvados/v1/collections/"+coll.UUID, nil, nil)
2494         base := "http://" + coll.UUID + ".collections.example.com/"
2495
2496         for _, trial := range []struct {
2497                 method      string
2498                 path        string
2499                 destination string
2500                 depth       string
2501                 expectCode  int // 0 means expect 2xx
2502         }{
2503                 // setup...
2504                 {method: "MKCOL", path: "dir"},
2505                 {method: "PUT", path: "dir/file"},
2506                 {method: "MKCOL", path: "dir/dir2"},
2507                 // delete with no depth = OK
2508                 {method: "DELETE", path: "dir/dir2", depth: ""},
2509                 // delete with depth other than infinity = fail
2510                 {method: "DELETE", path: "dir", depth: "0", expectCode: 400},
2511                 {method: "DELETE", path: "dir", depth: "1", expectCode: 400},
2512                 // delete with depth infinity = OK
2513                 {method: "DELETE", path: "dir", depth: "infinity"},
2514
2515                 // setup...
2516                 {method: "MKCOL", path: "dir"},
2517                 {method: "PUT", path: "dir/file"},
2518                 {method: "MKCOL", path: "dir/dir2"},
2519                 // move with depth other than infinity = fail
2520                 {method: "MOVE", path: "dir", destination: "moved", depth: "0", expectCode: 400},
2521                 {method: "MOVE", path: "dir", destination: "moved", depth: "1", expectCode: 400},
2522                 // move with depth infinity = OK
2523                 {method: "MOVE", path: "dir", destination: "moved", depth: "infinity"},
2524                 {method: "DELETE", path: "moved"},
2525
2526                 // setup...
2527                 {method: "MKCOL", path: "dir"},
2528                 {method: "PUT", path: "dir/file"},
2529                 {method: "MKCOL", path: "dir/dir2"},
2530                 // copy with depth 0 = create empty destination dir
2531                 {method: "COPY", path: "dir/", destination: "copied-empty/", depth: "0"},
2532                 {method: "DELETE", path: "copied-empty/file", expectCode: 404},
2533                 {method: "DELETE", path: "copied-empty"},
2534                 // copy with depth 0 = create empty destination dir
2535                 // (destination dir has no trailing slash this time)
2536                 {method: "COPY", path: "dir/", destination: "copied-empty-noslash", depth: "0"},
2537                 {method: "DELETE", path: "copied-empty-noslash/file", expectCode: 404},
2538                 {method: "DELETE", path: "copied-empty-noslash"},
2539                 // copy with depth 0 = create empty destination dir
2540                 // (source dir has no trailing slash this time)
2541                 {method: "COPY", path: "dir", destination: "copied-empty-noslash", depth: "0"},
2542                 {method: "DELETE", path: "copied-empty-noslash/file", expectCode: 404},
2543                 {method: "DELETE", path: "copied-empty-noslash"},
2544                 // copy with depth 1 = fail
2545                 {method: "COPY", path: "dir", destination: "copied", depth: "1", expectCode: 400},
2546                 // copy with depth infinity = copy entire subtree
2547                 {method: "COPY", path: "dir/", destination: "copied", depth: "infinity"},
2548                 {method: "DELETE", path: "copied/file"},
2549                 {method: "DELETE", path: "copied"},
2550                 // copy with depth infinity = copy entire subtree
2551                 // (source dir has no trailing slash this time)
2552                 {method: "COPY", path: "dir", destination: "copied", depth: "infinity"},
2553                 {method: "DELETE", path: "copied/file"},
2554                 {method: "DELETE", path: "copied"},
2555                 // cleanup
2556                 {method: "DELETE", path: "dir"},
2557         } {
2558                 c.Logf("trial %+v", trial)
2559                 resp := httptest.NewRecorder()
2560                 req, err := http.NewRequest(trial.method, base+trial.path, strings.NewReader(""))
2561                 c.Assert(err, check.IsNil)
2562                 req.Header.Set("Authorization", "Bearer "+client.AuthToken)
2563                 if trial.destination != "" {
2564                         req.Header.Set("Destination", base+trial.destination)
2565                 }
2566                 if trial.depth != "" {
2567                         req.Header.Set("Depth", trial.depth)
2568                 }
2569                 s.handler.ServeHTTP(resp, req)
2570                 if trial.expectCode != 0 {
2571                         c.Assert(resp.Code, check.Equals, trial.expectCode)
2572                 } else {
2573                         c.Assert(resp.Code >= 200, check.Equals, true, check.Commentf("got code %d", resp.Code))
2574                         c.Assert(resp.Code < 300, check.Equals, true, check.Commentf("got code %d", resp.Code))
2575                 }
2576                 c.Logf("resp.Body: %q", resp.Body.String())
2577         }
2578 }