]> git.arvados.org - arvados.git/blob - services/keep-web/handler_test.go
23044: De-dup ContainerWebServices routing logic.
[arvados.git] / services / keep-web / handler_test.go
1 // Copyright (C) The Arvados Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4
5 package keepweb
6
7 import (
8         "bytes"
9         "context"
10         "fmt"
11         "io"
12         "io/ioutil"
13         "net/http"
14         "net/http/httptest"
15         "net/url"
16         "os"
17         "path/filepath"
18         "regexp"
19         "strings"
20         "sync"
21         "sync/atomic"
22         "time"
23
24         "git.arvados.org/arvados.git/lib/config"
25         "git.arvados.org/arvados.git/sdk/go/arvados"
26         "git.arvados.org/arvados.git/sdk/go/arvadosclient"
27         "git.arvados.org/arvados.git/sdk/go/arvadostest"
28         "git.arvados.org/arvados.git/sdk/go/auth"
29         "git.arvados.org/arvados.git/sdk/go/ctxlog"
30         "git.arvados.org/arvados.git/sdk/go/keepclient"
31         "github.com/prometheus/client_golang/prometheus"
32         "github.com/sirupsen/logrus"
33         "golang.org/x/net/html"
34         check "gopkg.in/check.v1"
35 )
36
37 var _ = check.Suite(&UnitSuite{})
38
39 func init() {
40         arvados.DebugLocksPanicMode = true
41 }
42
43 type UnitSuite struct {
44         cluster *arvados.Cluster
45         handler *handler
46 }
47
48 func (s *UnitSuite) SetUpTest(c *check.C) {
49         logger := ctxlog.TestLogger(c)
50         ldr := config.NewLoader(&bytes.Buffer{}, logger)
51         cfg, err := ldr.Load()
52         c.Assert(err, check.IsNil)
53         cc, err := cfg.GetCluster("")
54         c.Assert(err, check.IsNil)
55         s.cluster = cc
56         s.handler = &handler{
57                 Cluster: cc,
58                 Cache: cache{
59                         cluster:  cc,
60                         logger:   logger,
61                         registry: prometheus.NewRegistry(),
62                 },
63                 metrics: newMetrics(prometheus.NewRegistry()),
64         }
65 }
66
67 func newCollection(collID string) *arvados.Collection {
68         coll := &arvados.Collection{UUID: collID}
69         manifestKey := collID
70         if pdh, ok := arvadostest.TestCollectionUUIDToPDH[collID]; ok {
71                 coll.PortableDataHash = pdh
72                 manifestKey = pdh
73         }
74         if mtext, ok := arvadostest.TestCollectionPDHToManifest[manifestKey]; ok {
75                 coll.ManifestText = mtext
76         }
77         return coll
78 }
79
80 func newRequest(method, urlStr string) *http.Request {
81         u := mustParseURL(urlStr)
82         return &http.Request{
83                 Method:     method,
84                 Host:       u.Host,
85                 URL:        u,
86                 RequestURI: u.RequestURI(),
87                 RemoteAddr: "10.20.30.40:56789",
88                 Header:     http.Header{},
89         }
90 }
91
92 func newLoggerAndContext() (*bytes.Buffer, context.Context) {
93         var logbuf bytes.Buffer
94         logger := logrus.New()
95         logger.Out = &logbuf
96         return &logbuf, ctxlog.Context(context.Background(), logger)
97 }
98
99 func (s *UnitSuite) TestLogEventTypes(c *check.C) {
100         collURL := "http://keep-web.example/c=" + arvadostest.FooCollection
101         for method, expected := range map[string]string{
102                 "GET":  "file_download",
103                 "POST": "file_upload",
104                 "PUT":  "file_upload",
105         } {
106                 filePath := "/" + method
107                 req := newRequest(method, collURL+filePath)
108                 actual := newFileEventLog(s.handler, req, filePath, 1, nil, nil, "")
109                 if !c.Check(actual, check.NotNil) {
110                         continue
111                 }
112                 c.Check(actual.eventType, check.Equals, expected)
113         }
114 }
115
116 func (s *UnitSuite) TestUnloggedEventTypes(c *check.C) {
117         collURL := "http://keep-web.example/c=" + arvadostest.FooCollection
118         for _, method := range []string{"DELETE", "HEAD", "OPTIONS", "PATCH"} {
119                 filePath := "/" + method
120                 req := newRequest(method, collURL+filePath)
121                 actual := newFileEventLog(s.handler, req, filePath, 1, nil, nil, "")
122                 c.Check(actual, check.IsNil,
123                         check.Commentf("%s request made a log event", method))
124         }
125 }
126
127 func (s *UnitSuite) TestLogFilePath(c *check.C) {
128         coll := newCollection(arvadostest.FooCollection)
129         collURL := "http://keep-web.example/c=" + arvadostest.FooCollection
130         for _, filePath := range []string{"/foo", "/Foo", "/foo/bar"} {
131                 req := newRequest("GET", collURL+filePath)
132                 actual := newFileEventLog(s.handler, req, filePath, 1, coll, nil, "")
133                 if !c.Check(actual, check.NotNil) {
134                         continue
135                 }
136                 c.Check(actual.collFilePath, check.Equals, filePath)
137         }
138 }
139
140 func (s *UnitSuite) TestLogRemoteAddr(c *check.C) {
141         collURL := "http://keep-web.example/c=" + arvadostest.FooCollection
142         filePath := "/foo"
143         req := newRequest("GET", collURL+filePath)
144
145         for _, addr := range []string{"10.20.30.55", "192.168.144.120", "192.0.2.4"} {
146                 req.RemoteAddr = addr + ":57914"
147                 actual := newFileEventLog(s.handler, req, filePath, 1, nil, nil, "")
148                 if !c.Check(actual, check.NotNil) {
149                         continue
150                 }
151                 c.Check(actual.clientAddr, check.Equals, addr)
152         }
153
154         for _, addr := range []string{"100::20:30:40", "2001:db8::90:100", "3fff::30"} {
155                 req.RemoteAddr = fmt.Sprintf("[%s]:57916", addr)
156                 actual := newFileEventLog(s.handler, req, filePath, 1, nil, nil, "")
157                 if !c.Check(actual, check.NotNil) {
158                         continue
159                 }
160                 c.Check(actual.clientAddr, check.Equals, addr)
161         }
162 }
163
164 func (s *UnitSuite) TestLogXForwardedFor(c *check.C) {
165         collURL := "http://keep-web.example/c=" + arvadostest.FooCollection
166         filePath := "/foo"
167         req := newRequest("GET", collURL+filePath)
168         for xff, expected := range map[string]string{
169                 "10.20.30.55":                          "10.20.30.55",
170                 "192.168.144.120, 10.20.30.120":        "10.20.30.120",
171                 "192.0.2.4, 192.0.2.6, 192.0.2.8":      "192.0.2.8",
172                 "192.0.2.4,192.168.2.4":                "192.168.2.4",
173                 "10.20.30.60,192.168.144.40,192.0.2.4": "192.0.2.4",
174                 "100::20:30:50":                        "100::20:30:50",
175                 "2001:db8::80:90, 100::100":            "100::100",
176                 "3fff::ff, 3fff::ee, 3fff::fe":         "3fff::fe",
177                 "3fff::3f,100::1000":                   "100::1000",
178                 "2001:db8::88,100::88,3fff::88":        "3fff::88",
179                 "10.20.30.60, 2001:db8::60":            "2001:db8::60",
180                 "2001:db8::20,10.20.30.20":             "10.20.30.20",
181                 ", 10.20.30.123, 100::123":             "100::123",
182                 ",100::321,10.30.20.10":                "10.30.20.10",
183         } {
184                 req.Header.Set("X-Forwarded-For", xff)
185                 actual := newFileEventLog(s.handler, req, filePath, 1, nil, nil, "")
186                 if !c.Check(actual, check.NotNil) {
187                         continue
188                 }
189                 c.Check(actual.clientAddr, check.Equals, expected)
190         }
191 }
192
193 func (s *UnitSuite) TestLogXForwardedForMalformed(c *check.C) {
194         collURL := "http://keep-web.example/c=" + arvadostest.FooCollection
195         filePath := "/foo"
196         req := newRequest("GET", collURL+filePath)
197         for _, xff := range []string{"", ",", "10.20,30.40", "foo, bar"} {
198                 req.Header.Set("X-Forwarded-For", xff)
199                 actual := newFileEventLog(s.handler, req, filePath, 1, nil, nil, "")
200                 if !c.Check(actual, check.NotNil) {
201                         continue
202                 }
203                 c.Check(actual.clientAddr, check.Equals, "10.20.30.40")
204         }
205 }
206
207 func (s *UnitSuite) TestLogXForwardedForMultivalue(c *check.C) {
208         collURL := "http://keep-web.example/c=" + arvadostest.FooCollection
209         filePath := "/foo"
210         req := newRequest("GET", collURL+filePath)
211         req.Header.Set("X-Forwarded-For", ", ")
212         req.Header.Add("X-Forwarded-For", "2001:db8::db9:dbd")
213         req.Header.Add("X-Forwarded-For", "10.20.30.90")
214         actual := newFileEventLog(s.handler, req, filePath, 1, nil, nil, "")
215         c.Assert(actual, check.NotNil)
216         c.Check(actual.clientAddr, check.Equals, "10.20.30.90")
217 }
218
219 func (s *UnitSuite) TestLogClientAddressCanonicalization(c *check.C) {
220         collURL := "http://keep-web.example/c=" + arvadostest.FooCollection
221         filePath := "/foo"
222         req := newRequest("GET", collURL+filePath)
223         expected := "2001:db8::12:0"
224
225         req.RemoteAddr = "[2001:db8::012:0000]:57918"
226         a := newFileEventLog(s.handler, req, filePath, 1, nil, nil, "")
227         c.Assert(a, check.NotNil)
228         c.Check(a.clientAddr, check.Equals, expected)
229
230         req.RemoteAddr = "10.20.30.40:57919"
231         req.Header.Set("X-Forwarded-For", "2001:db8:0::0:12:00")
232         b := newFileEventLog(s.handler, req, filePath, 1, nil, nil, "")
233         c.Assert(b, check.NotNil)
234         c.Check(b.clientAddr, check.Equals, expected)
235 }
236
237 func (s *UnitSuite) TestLogAnonymousUser(c *check.C) {
238         collURL := "http://keep-web.example/c=" + arvadostest.FooCollection
239         filePath := "/foo"
240         req := newRequest("GET", collURL+filePath)
241         actual := newFileEventLog(s.handler, req, filePath, 1, nil, nil, arvadostest.AnonymousToken)
242         c.Assert(actual, check.NotNil)
243         c.Check(actual.userUUID, check.Equals, s.handler.Cluster.ClusterID+"-tpzed-anonymouspublic")
244         c.Check(actual.userFullName, check.Equals, "")
245         c.Check(actual.clientToken, check.Equals, arvadostest.AnonymousToken)
246 }
247
248 func (s *UnitSuite) TestLogUser(c *check.C) {
249         collURL := "http://keep-web.example/c=" + arvadostest.FooCollection
250         for _, trial := range []struct{ uuid, fullName, token string }{
251                 {arvadostest.ActiveUserUUID, "Active User", arvadostest.ActiveToken},
252                 {arvadostest.SpectatorUserUUID, "Spectator User", arvadostest.SpectatorToken},
253         } {
254                 filePath := "/" + trial.uuid
255                 req := newRequest("GET", collURL+filePath)
256                 user := &arvados.User{
257                         UUID:     trial.uuid,
258                         FullName: trial.fullName,
259                 }
260                 actual := newFileEventLog(s.handler, req, filePath, 1, nil, user, trial.token)
261                 if !c.Check(actual, check.NotNil) {
262                         continue
263                 }
264                 c.Check(actual.userUUID, check.Equals, trial.uuid)
265                 c.Check(actual.userFullName, check.Equals, trial.fullName)
266                 c.Check(actual.clientToken, check.Equals, trial.token)
267         }
268 }
269
270 func (s *UnitSuite) TestLogCollectionByUUID(c *check.C) {
271         for collUUID, collPDH := range arvadostest.TestCollectionUUIDToPDH {
272                 collURL := "http://keep-web.example/c=" + collUUID
273                 filePath := "/" + collUUID
274                 req := newRequest("GET", collURL+filePath)
275                 coll := newCollection(collUUID)
276                 actual := newFileEventLog(s.handler, req, filePath, 1, coll, nil, "")
277                 if !c.Check(actual, check.NotNil) {
278                         continue
279                 }
280                 c.Check(actual.collUUID, check.Equals, collUUID)
281                 c.Check(actual.collPDH, check.Equals, collPDH)
282         }
283 }
284
285 func (s *UnitSuite) TestLogCollectionByPDH(c *check.C) {
286         for _, collPDH := range arvadostest.TestCollectionUUIDToPDH {
287                 collURL := "http://keep-web.example/c=" + collPDH
288                 filePath := "/PDHFile"
289                 req := newRequest("GET", collURL+filePath)
290                 coll := newCollection(collPDH)
291                 actual := newFileEventLog(s.handler, req, filePath, 1, coll, nil, "")
292                 if !c.Check(actual, check.NotNil) {
293                         continue
294                 }
295                 c.Check(actual.collPDH, check.Equals, collPDH)
296                 c.Check(actual.collUUID, check.Equals, "")
297         }
298 }
299
300 func (s *UnitSuite) TestLogGETUUIDAsDict(c *check.C) {
301         filePath := "/foo"
302         reqPath := "/c=" + arvadostest.FooCollection + filePath
303         req := newRequest("GET", "http://keep-web.example"+reqPath)
304         coll := newCollection(arvadostest.FooCollection)
305         logEvent := newFileEventLog(s.handler, req, filePath, 1, coll, nil, "")
306         c.Assert(logEvent, check.NotNil)
307         c.Check(logEvent.asDict(), check.DeepEquals, arvadosclient.Dict{
308                 "event_type":  "file_download",
309                 "object_uuid": s.handler.Cluster.ClusterID + "-tpzed-anonymouspublic",
310                 "properties": arvadosclient.Dict{
311                         "reqPath":              reqPath,
312                         "collection_uuid":      arvadostest.FooCollection,
313                         "collection_file_path": filePath,
314                         "file_count":           1,
315                         "portable_data_hash":   arvadostest.FooCollectionPDH,
316                 },
317         })
318 }
319
320 func (s *UnitSuite) TestLogGETPDHAsDict(c *check.C) {
321         filePath := "/Foo"
322         reqPath := "/c=" + arvadostest.FooCollectionPDH + filePath
323         req := newRequest("GET", "http://keep-web.example"+reqPath)
324         coll := newCollection(arvadostest.FooCollectionPDH)
325         user := &arvados.User{
326                 UUID:     arvadostest.ActiveUserUUID,
327                 FullName: "Active User",
328         }
329         logEvent := newFileEventLog(s.handler, req, filePath, 1, coll, user, "")
330         c.Assert(logEvent, check.NotNil)
331         c.Check(logEvent.asDict(), check.DeepEquals, arvadosclient.Dict{
332                 "event_type":  "file_download",
333                 "object_uuid": arvadostest.ActiveUserUUID,
334                 "properties": arvadosclient.Dict{
335                         "reqPath":              reqPath,
336                         "portable_data_hash":   arvadostest.FooCollectionPDH,
337                         "collection_uuid":      "",
338                         "collection_file_path": filePath,
339                         "file_count":           1,
340                 },
341         })
342 }
343
344 func (s *UnitSuite) TestLogUploadAsDict(c *check.C) {
345         coll := newCollection(arvadostest.FooCollection)
346         user := &arvados.User{
347                 UUID:     arvadostest.ActiveUserUUID,
348                 FullName: "Active User",
349         }
350         for _, method := range []string{"POST", "PUT"} {
351                 filePath := "/" + method + "File"
352                 reqPath := "/c=" + arvadostest.FooCollection + filePath
353                 req := newRequest(method, "http://keep-web.example"+reqPath)
354                 logEvent := newFileEventLog(s.handler, req, filePath, 1, coll, user, "")
355                 if !c.Check(logEvent, check.NotNil) {
356                         continue
357                 }
358                 c.Check(logEvent.asDict(), check.DeepEquals, arvadosclient.Dict{
359                         "event_type":  "file_upload",
360                         "object_uuid": arvadostest.ActiveUserUUID,
361                         "properties": arvadosclient.Dict{
362                                 "reqPath":              reqPath,
363                                 "collection_uuid":      arvadostest.FooCollection,
364                                 "collection_file_path": filePath,
365                                 "file_count":           1,
366                         },
367                 })
368         }
369 }
370
371 func (s *UnitSuite) TestLogGETUUIDAsFields(c *check.C) {
372         filePath := "/foo"
373         reqPath := "/c=" + arvadostest.FooCollection + filePath
374         req := newRequest("GET", "http://keep-web.example"+reqPath)
375         coll := newCollection(arvadostest.FooCollection)
376         logEvent := newFileEventLog(s.handler, req, filePath, 1, coll, nil, "")
377         c.Assert(logEvent, check.NotNil)
378         c.Check(logEvent.asFields(), check.DeepEquals, logrus.Fields{
379                 "user_uuid":            s.handler.Cluster.ClusterID + "-tpzed-anonymouspublic",
380                 "collection_uuid":      arvadostest.FooCollection,
381                 "collection_file_path": filePath,
382                 "file_count":           1,
383                 "portable_data_hash":   arvadostest.FooCollectionPDH,
384         })
385 }
386
387 func (s *UnitSuite) TestLogGETPDHAsFields(c *check.C) {
388         filePath := "/Foo"
389         reqPath := "/c=" + arvadostest.FooCollectionPDH + filePath
390         req := newRequest("GET", "http://keep-web.example"+reqPath)
391         coll := newCollection(arvadostest.FooCollectionPDH)
392         user := &arvados.User{
393                 UUID:     arvadostest.ActiveUserUUID,
394                 FullName: "Active User",
395         }
396         logEvent := newFileEventLog(s.handler, req, filePath, 1, coll, user, "")
397         c.Assert(logEvent, check.NotNil)
398         c.Check(logEvent.asFields(), check.DeepEquals, logrus.Fields{
399                 "user_uuid":            arvadostest.ActiveUserUUID,
400                 "user_full_name":       "Active User",
401                 "collection_uuid":      "",
402                 "collection_file_path": filePath,
403                 "file_count":           1,
404                 "portable_data_hash":   arvadostest.FooCollectionPDH,
405         })
406 }
407
408 func (s *UnitSuite) TestLogUploadAsFields(c *check.C) {
409         coll := newCollection(arvadostest.FooCollection)
410         user := &arvados.User{
411                 UUID:     arvadostest.ActiveUserUUID,
412                 FullName: "Active User",
413         }
414         for _, method := range []string{"POST", "PUT"} {
415                 filePath := "/" + method + "File"
416                 reqPath := "/c=" + arvadostest.FooCollection + filePath
417                 req := newRequest(method, "http://keep-web.example"+reqPath)
418                 logEvent := newFileEventLog(s.handler, req, filePath, 1, coll, user, "")
419                 if !c.Check(logEvent, check.NotNil) {
420                         continue
421                 }
422                 c.Check(logEvent.asFields(), check.DeepEquals, logrus.Fields{
423                         "user_uuid":            arvadostest.ActiveUserUUID,
424                         "user_full_name":       "Active User",
425                         "collection_uuid":      arvadostest.FooCollection,
426                         "collection_file_path": filePath,
427                         "file_count":           1,
428                 })
429         }
430 }
431
432 func (s *UnitSuite) TestCORSPreflight(c *check.C) {
433         h := s.handler
434         u := mustParseURL("http://keep-web.example/c=" + arvadostest.FooCollection + "/foo")
435         req := &http.Request{
436                 Method:     "OPTIONS",
437                 Host:       u.Host,
438                 URL:        u,
439                 RequestURI: u.RequestURI(),
440                 Header: http.Header{
441                         "Origin":                        {"https://workbench.example"},
442                         "Access-Control-Request-Method": {"POST"},
443                 },
444         }
445
446         // Check preflight for an allowed request
447         resp := httptest.NewRecorder()
448         h.ServeHTTP(resp, req)
449         c.Check(resp.Code, check.Equals, http.StatusOK)
450         c.Check(resp.Body.String(), check.Equals, "")
451         c.Check(resp.Header().Get("Access-Control-Allow-Origin"), check.Equals, "*")
452         c.Check(resp.Header().Get("Access-Control-Allow-Methods"), check.Equals, "COPY, DELETE, GET, LOCK, MKCOL, MOVE, OPTIONS, POST, PROPFIND, PROPPATCH, PUT, RMCOL, UNLOCK")
453         c.Check(resp.Header().Get("Access-Control-Allow-Headers"), check.Equals, "Authorization, Content-Type, Range, Depth, Destination, If, Lock-Token, Overwrite, Timeout, Cache-Control")
454
455         // Check preflight for a disallowed request
456         resp = httptest.NewRecorder()
457         req.Header.Set("Access-Control-Request-Method", "MAKE-COFFEE")
458         h.ServeHTTP(resp, req)
459         c.Check(resp.Body.String(), check.Equals, "")
460         c.Check(resp.Code, check.Equals, http.StatusMethodNotAllowed)
461 }
462
463 func (s *UnitSuite) TestWebdavPrefixAndSource(c *check.C) {
464         for _, trial := range []struct {
465                 method   string
466                 path     string
467                 prefix   string
468                 source   string
469                 notFound bool
470                 seeOther bool
471         }{
472                 {
473                         method: "PROPFIND",
474                         path:   "/",
475                 },
476                 {
477                         method: "PROPFIND",
478                         path:   "/dir1",
479                 },
480                 {
481                         method: "PROPFIND",
482                         path:   "/dir1/",
483                 },
484                 {
485                         method: "PROPFIND",
486                         path:   "/dir1/foo",
487                         prefix: "/dir1",
488                         source: "/dir1",
489                 },
490                 {
491                         method: "PROPFIND",
492                         path:   "/prefix/dir1/foo",
493                         prefix: "/prefix/",
494                         source: "",
495                 },
496                 {
497                         method: "PROPFIND",
498                         path:   "/prefix/dir1/foo",
499                         prefix: "/prefix",
500                         source: "",
501                 },
502                 {
503                         method: "PROPFIND",
504                         path:   "/prefix/dir1/foo",
505                         prefix: "/prefix/",
506                         source: "/",
507                 },
508                 {
509                         method: "PROPFIND",
510                         path:   "/prefix/foo",
511                         prefix: "/prefix/",
512                         source: "/dir1/",
513                 },
514                 {
515                         method: "GET",
516                         path:   "/prefix/foo",
517                         prefix: "/prefix/",
518                         source: "/dir1/",
519                 },
520                 {
521                         method: "PROPFIND",
522                         path:   "/prefix/",
523                         prefix: "/prefix",
524                         source: "/dir1",
525                 },
526                 {
527                         method: "PROPFIND",
528                         path:   "/prefix",
529                         prefix: "/prefix",
530                         source: "/dir1/",
531                 },
532                 {
533                         method:   "GET",
534                         path:     "/prefix",
535                         prefix:   "/prefix",
536                         source:   "/dir1",
537                         seeOther: true,
538                 },
539                 {
540                         method:   "PROPFIND",
541                         path:     "/dir1/foo",
542                         prefix:   "",
543                         source:   "/dir1",
544                         notFound: true,
545                 },
546         } {
547                 c.Logf("trial %+v", trial)
548                 u := mustParseURL("http://" + arvadostest.FooBarDirCollection + ".keep-web.example" + trial.path)
549                 req := &http.Request{
550                         Method:     trial.method,
551                         Host:       u.Host,
552                         URL:        u,
553                         RequestURI: u.RequestURI(),
554                         Header: http.Header{
555                                 "Authorization":   {"Bearer " + arvadostest.ActiveTokenV2},
556                                 "X-Webdav-Prefix": {trial.prefix},
557                                 "X-Webdav-Source": {trial.source},
558                         },
559                         Body: ioutil.NopCloser(bytes.NewReader(nil)),
560                 }
561
562                 resp := httptest.NewRecorder()
563                 s.handler.ServeHTTP(resp, req)
564                 if trial.notFound {
565                         c.Check(resp.Code, check.Equals, http.StatusNotFound)
566                 } else if trial.method == "PROPFIND" {
567                         c.Check(resp.Code, check.Equals, http.StatusMultiStatus)
568                         c.Check(resp.Body.String(), check.Matches, `(?ms).*>\n?$`)
569                 } else if trial.seeOther {
570                         c.Check(resp.Code, check.Equals, http.StatusSeeOther)
571                 } else {
572                         c.Check(resp.Code, check.Equals, http.StatusOK)
573                 }
574         }
575 }
576
577 func (s *UnitSuite) TestEmptyResponse(c *check.C) {
578         // Ensure we start with an empty cache
579         defer os.Setenv("HOME", os.Getenv("HOME"))
580         os.Setenv("HOME", c.MkDir())
581         s.handler.Cluster.Collections.WebDAVLogDownloadInterval = arvados.Duration(0)
582
583         for _, trial := range []struct {
584                 dataExists    bool
585                 sendIMSHeader bool
586                 expectStatus  int
587                 logRegexp     string
588         }{
589                 // If we return no content due to a Keep read error,
590                 // we should emit a log message.
591                 {false, false, http.StatusOK, `(?ms).*only wrote 0 bytes.*`},
592
593                 // If we return no content because the client sent an
594                 // If-Modified-Since header, our response should be
595                 // 304.  We still expect a "File download" log since it
596                 // counts as a file access for auditing.
597                 {true, true, http.StatusNotModified, `(?ms).*msg="File download".*`},
598         } {
599                 c.Logf("trial: %+v", trial)
600                 arvadostest.StartKeep(2, true)
601                 if trial.dataExists {
602                         arv, err := arvadosclient.MakeArvadosClient()
603                         c.Assert(err, check.IsNil)
604                         arv.ApiToken = arvadostest.ActiveToken
605                         kc, err := keepclient.MakeKeepClient(arv)
606                         c.Assert(err, check.IsNil)
607                         _, _, err = kc.PutB([]byte("foo"))
608                         c.Assert(err, check.IsNil)
609                 }
610
611                 u := mustParseURL("http://" + arvadostest.FooCollection + ".keep-web.example/foo")
612                 req := &http.Request{
613                         Method:     "GET",
614                         Host:       u.Host,
615                         URL:        u,
616                         RequestURI: u.RequestURI(),
617                         Header: http.Header{
618                                 "Authorization": {"Bearer " + arvadostest.ActiveToken},
619                         },
620                 }
621                 if trial.sendIMSHeader {
622                         req.Header.Set("If-Modified-Since", strings.Replace(time.Now().UTC().Format(time.RFC1123), "UTC", "GMT", -1))
623                 }
624
625                 var logbuf bytes.Buffer
626                 logger := logrus.New()
627                 logger.Out = &logbuf
628                 req = req.WithContext(ctxlog.Context(context.Background(), logger))
629
630                 resp := httptest.NewRecorder()
631                 s.handler.ServeHTTP(resp, req)
632                 c.Check(resp.Code, check.Equals, trial.expectStatus)
633                 c.Check(resp.Body.String(), check.Equals, "")
634
635                 c.Log(logbuf.String())
636                 c.Check(logbuf.String(), check.Matches, trial.logRegexp)
637         }
638 }
639
640 func (s *UnitSuite) TestInvalidUUID(c *check.C) {
641         bogusID := strings.Replace(arvadostest.FooCollectionPDH, "+", "-", 1) + "-"
642         token := arvadostest.ActiveToken
643         for _, trial := range []string{
644                 "http://keep-web/c=" + bogusID + "/foo",
645                 "http://keep-web/c=" + bogusID + "/t=" + token + "/foo",
646                 "http://keep-web/collections/download/" + bogusID + "/" + token + "/foo",
647                 "http://keep-web/collections/" + bogusID + "/foo",
648                 "http://" + bogusID + ".keep-web/" + bogusID + "/foo",
649                 "http://" + bogusID + ".keep-web/t=" + token + "/" + bogusID + "/foo",
650         } {
651                 c.Log(trial)
652                 u := mustParseURL(trial)
653                 req := &http.Request{
654                         Method:     "GET",
655                         Host:       u.Host,
656                         URL:        u,
657                         RequestURI: u.RequestURI(),
658                 }
659                 resp := httptest.NewRecorder()
660                 s.cluster.Users.AnonymousUserToken = arvadostest.AnonymousToken
661                 s.handler.ServeHTTP(resp, req)
662                 c.Check(resp.Code, check.Equals, http.StatusNotFound)
663         }
664 }
665
666 func mustParseURL(s string) *url.URL {
667         r, err := url.Parse(s)
668         if err != nil {
669                 panic("parse URL: " + s)
670         }
671         return r
672 }
673
674 func (s *IntegrationSuite) TestVhost404(c *check.C) {
675         for _, testURL := range []string{
676                 arvadostest.NonexistentCollection + ".example.com/theperthcountyconspiracy",
677                 arvadostest.NonexistentCollection + ".example.com/t=" + arvadostest.ActiveToken + "/theperthcountyconspiracy",
678         } {
679                 resp := httptest.NewRecorder()
680                 u := mustParseURL(testURL)
681                 req := &http.Request{
682                         Method:     "GET",
683                         URL:        u,
684                         RequestURI: u.RequestURI(),
685                 }
686                 s.handler.ServeHTTP(resp, req)
687                 c.Check(resp.Code, check.Equals, http.StatusNotFound)
688                 c.Check(resp.Body.String(), check.Equals, notFoundMessage+"\n")
689         }
690 }
691
692 // An authorizer modifies an HTTP request to make use of the given
693 // token -- by adding it to a header, cookie, query param, or whatever
694 // -- and returns the HTTP status code we should expect from keep-web if
695 // the token is invalid.
696 type authorizer func(*http.Request, string) int
697
698 // We still need to accept "OAuth2 ..." as equivalent to "Bearer ..."
699 // for compatibility with older clients, including SDKs before 3.0.
700 func (s *IntegrationSuite) TestVhostViaAuthzHeaderOAuth2(c *check.C) {
701         s.doVhostRequests(c, authzViaAuthzHeaderOAuth2)
702 }
703 func authzViaAuthzHeaderOAuth2(r *http.Request, tok string) int {
704         r.Header.Add("Authorization", "OAuth2 "+tok)
705         return http.StatusUnauthorized
706 }
707
708 func (s *IntegrationSuite) TestVhostViaAuthzHeaderBearer(c *check.C) {
709         s.doVhostRequests(c, authzViaAuthzHeaderBearer)
710 }
711 func authzViaAuthzHeaderBearer(r *http.Request, tok string) int {
712         r.Header.Add("Authorization", "Bearer "+tok)
713         return http.StatusUnauthorized
714 }
715
716 func (s *IntegrationSuite) TestVhostViaCookieValue(c *check.C) {
717         s.doVhostRequests(c, authzViaCookieValue)
718 }
719 func authzViaCookieValue(r *http.Request, tok string) int {
720         r.AddCookie(&http.Cookie{
721                 Name:  "arvados_api_token",
722                 Value: auth.EncodeTokenCookie([]byte(tok)),
723         })
724         return http.StatusUnauthorized
725 }
726
727 func (s *IntegrationSuite) TestVhostViaHTTPBasicAuth(c *check.C) {
728         s.doVhostRequests(c, authzViaHTTPBasicAuth)
729 }
730 func authzViaHTTPBasicAuth(r *http.Request, tok string) int {
731         r.AddCookie(&http.Cookie{
732                 Name:  "arvados_api_token",
733                 Value: auth.EncodeTokenCookie([]byte(tok)),
734         })
735         return http.StatusUnauthorized
736 }
737
738 func (s *IntegrationSuite) TestVhostViaHTTPBasicAuthWithExtraSpaceChars(c *check.C) {
739         s.doVhostRequests(c, func(r *http.Request, tok string) int {
740                 r.AddCookie(&http.Cookie{
741                         Name:  "arvados_api_token",
742                         Value: auth.EncodeTokenCookie([]byte(" " + tok + "\n")),
743                 })
744                 return http.StatusUnauthorized
745         })
746 }
747
748 func (s *IntegrationSuite) TestVhostViaPath(c *check.C) {
749         s.doVhostRequests(c, authzViaPath)
750 }
751 func authzViaPath(r *http.Request, tok string) int {
752         r.URL.Path = "/t=" + tok + r.URL.Path
753         return http.StatusNotFound
754 }
755
756 func (s *IntegrationSuite) TestVhostViaQueryString(c *check.C) {
757         s.doVhostRequests(c, authzViaQueryString)
758 }
759 func authzViaQueryString(r *http.Request, tok string) int {
760         r.URL.RawQuery = "api_token=" + tok
761         return http.StatusUnauthorized
762 }
763
764 func (s *IntegrationSuite) TestVhostViaPOST(c *check.C) {
765         s.doVhostRequests(c, authzViaPOST)
766 }
767 func authzViaPOST(r *http.Request, tok string) int {
768         r.Method = "POST"
769         r.Header.Add("Content-Type", "application/x-www-form-urlencoded")
770         r.Body = ioutil.NopCloser(strings.NewReader(
771                 url.Values{"api_token": {tok}}.Encode()))
772         return http.StatusUnauthorized
773 }
774
775 func (s *IntegrationSuite) TestVhostViaXHRPOST(c *check.C) {
776         s.doVhostRequests(c, authzViaPOST)
777 }
778 func authzViaXHRPOST(r *http.Request, tok string) int {
779         r.Method = "POST"
780         r.Header.Add("Content-Type", "application/x-www-form-urlencoded")
781         r.Header.Add("Origin", "https://origin.example")
782         r.Body = ioutil.NopCloser(strings.NewReader(
783                 url.Values{
784                         "api_token":   {tok},
785                         "disposition": {"attachment"},
786                 }.Encode()))
787         return http.StatusUnauthorized
788 }
789
790 // Try some combinations of {url, token} using the given authorization
791 // mechanism, and verify the result is correct.
792 func (s *IntegrationSuite) doVhostRequests(c *check.C, authz authorizer) {
793         for _, hostPath := range []string{
794                 arvadostest.FooCollection + ".example.com/foo",
795                 arvadostest.FooCollection + "--collections.example.com/foo",
796                 arvadostest.FooCollection + "--collections.example.com/_/foo",
797                 arvadostest.FooCollectionPDH + ".example.com/foo",
798                 strings.Replace(arvadostest.FooCollectionPDH, "+", "-", -1) + "--collections.example.com/foo",
799                 arvadostest.FooBarDirCollection + ".example.com/dir1/foo",
800         } {
801                 c.Log("doRequests: ", hostPath)
802                 s.doVhostRequestsWithHostPath(c, authz, hostPath)
803         }
804 }
805
806 func (s *IntegrationSuite) doVhostRequestsWithHostPath(c *check.C, authz authorizer, hostPath string) {
807         for _, tok := range []string{
808                 arvadostest.ActiveToken,
809                 arvadostest.ActiveToken[:15],
810                 arvadostest.SpectatorToken,
811                 "bogus",
812                 "",
813         } {
814                 u := mustParseURL("http://" + hostPath)
815                 req := &http.Request{
816                         Method:     "GET",
817                         Host:       u.Host,
818                         URL:        u,
819                         RequestURI: u.RequestURI(),
820                         Header:     http.Header{},
821                 }
822                 failCode := authz(req, tok)
823                 req, resp := s.doReq(req)
824                 code := resp.StatusCode
825                 buf, _ := io.ReadAll(resp.Body)
826                 body := string(buf)
827
828                 // If the initial request had a (non-empty) token
829                 // showing in the query string, we should have been
830                 // redirected in order to hide it in a cookie.
831                 c.Check(req.URL.String(), check.Not(check.Matches), `.*api_token=.+`)
832
833                 if tok == arvadostest.ActiveToken {
834                         c.Check(code, check.Equals, http.StatusOK)
835                         c.Check(body, check.Equals, "foo")
836                 } else {
837                         c.Check(code >= 400, check.Equals, true)
838                         c.Check(code < 500, check.Equals, true)
839                         if tok == arvadostest.SpectatorToken {
840                                 // Valid token never offers to retry
841                                 // with different credentials.
842                                 c.Check(code, check.Equals, http.StatusNotFound)
843                         } else {
844                                 // Invalid token can ask to retry
845                                 // depending on the authz method.
846                                 c.Check(code, check.Equals, failCode)
847                         }
848                         if code == 404 {
849                                 c.Check(body, check.Equals, notFoundMessage+"\n")
850                         } else {
851                                 c.Check(body, check.Equals, unauthorizedMessage+"\n")
852                         }
853                 }
854         }
855 }
856
857 func (s *IntegrationSuite) TestVhostPortMatch(c *check.C) {
858         for _, host := range []string{"download.example.com", "DOWNLOAD.EXAMPLE.COM"} {
859                 for _, port := range []string{"80", "443", "8000"} {
860                         s.handler.Cluster.Services.WebDAVDownload.ExternalURL.Host = fmt.Sprintf("download.example.com:%v", port)
861                         u := mustParseURL(fmt.Sprintf("http://%v/by_id/%v/foo", host, arvadostest.FooCollection))
862                         req := &http.Request{
863                                 Method:     "GET",
864                                 Host:       u.Host,
865                                 URL:        u,
866                                 RequestURI: u.RequestURI(),
867                                 Header:     http.Header{"Authorization": []string{"Bearer " + arvadostest.ActiveToken}},
868                         }
869                         req, resp := s.doReq(req)
870                         if port == "8000" {
871                                 c.Check(resp.StatusCode, check.Equals, 401)
872                         } else {
873                                 c.Check(resp.StatusCode, check.Equals, 200)
874                         }
875                 }
876         }
877 }
878
879 func (s *IntegrationSuite) collectionURL(uuid, path string) string {
880         return "http://" + uuid + ".collections.example.com/" + path
881 }
882
883 // Create a request and process it using s.handler.
884 func (s *IntegrationSuite) do(method string, urlstring string, token string, hdr http.Header, body []byte) (*http.Request, *http.Response) {
885         u := mustParseURL(urlstring)
886         if _, ok := hdr["Authorization"]; ok && token != "" {
887                 panic("must not pass token if Authorization is already in hdr")
888         }
889         if hdr == nil {
890                 hdr = http.Header{}
891         }
892         if token != "" {
893                 hdr["Authorization"] = []string{"Bearer " + token}
894         }
895         return s.doReq((&http.Request{
896                 Method:     method,
897                 Host:       u.Host,
898                 URL:        u,
899                 RequestURI: u.RequestURI(),
900                 Header:     hdr,
901                 Body:       io.NopCloser(bytes.NewReader(body)),
902         }).WithContext(s.ctx))
903 }
904
905 // Process req using s.handler, and follow redirects if any.
906 func (s *IntegrationSuite) doReq(req *http.Request) (*http.Request, *http.Response) {
907         resp := httptest.NewRecorder()
908         var handler http.Handler = s.handler
909         // // Uncomment to enable request logging in test output:
910         // handler = httpserver.AddRequestIDs(httpserver.LogRequests(handler))
911         handler.ServeHTTP(resp, req)
912         if resp.Code != http.StatusSeeOther {
913                 return req, resp.Result()
914         }
915         cookies := (&http.Response{Header: resp.Header()}).Cookies()
916         u, _ := req.URL.Parse(resp.Header().Get("Location"))
917         req = &http.Request{
918                 Method:     "GET",
919                 Host:       u.Host,
920                 URL:        u,
921                 RequestURI: u.RequestURI(),
922                 Header:     http.Header{},
923         }
924         for _, c := range cookies {
925                 req.AddCookie(c)
926         }
927         return s.doReq(req.WithContext(s.ctx))
928 }
929
930 func (s *IntegrationSuite) TestVhostRedirectQueryTokenToCookie(c *check.C) {
931         s.testVhostRedirectTokenToCookie(c, "GET",
932                 arvadostest.FooCollection+".example.com/foo",
933                 "?api_token="+arvadostest.ActiveToken,
934                 nil,
935                 "",
936                 http.StatusOK,
937                 "foo",
938         )
939 }
940
941 func (s *IntegrationSuite) TestSingleOriginSecretLink(c *check.C) {
942         s.testVhostRedirectTokenToCookie(c, "GET",
943                 "example.com/c="+arvadostest.FooCollection+"/t="+arvadostest.ActiveToken+"/foo",
944                 "",
945                 nil,
946                 "",
947                 http.StatusOK,
948                 "foo",
949         )
950 }
951
952 func (s *IntegrationSuite) TestCollectionSharingToken(c *check.C) {
953         s.testVhostRedirectTokenToCookie(c, "GET",
954                 "example.com/c="+arvadostest.FooFileCollectionUUID+"/t="+arvadostest.FooFileCollectionSharingToken+"/foo",
955                 "",
956                 nil,
957                 "",
958                 http.StatusOK,
959                 "foo",
960         )
961         // Same valid sharing token, but requesting a different collection
962         s.testVhostRedirectTokenToCookie(c, "GET",
963                 "example.com/c="+arvadostest.FooCollection+"/t="+arvadostest.FooFileCollectionSharingToken+"/foo",
964                 "",
965                 nil,
966                 "",
967                 http.StatusNotFound,
968                 regexp.QuoteMeta(notFoundMessage+"\n"),
969         )
970 }
971
972 // Bad token in URL is 404 Not Found because it doesn't make sense to
973 // retry the same URL with different authorization.
974 func (s *IntegrationSuite) TestSingleOriginSecretLinkBadToken(c *check.C) {
975         s.testVhostRedirectTokenToCookie(c, "GET",
976                 "example.com/c="+arvadostest.FooCollection+"/t=bogus/foo",
977                 "",
978                 nil,
979                 "",
980                 http.StatusNotFound,
981                 regexp.QuoteMeta(notFoundMessage+"\n"),
982         )
983 }
984
985 // Bad token in a cookie (even if it got there via our own
986 // query-string-to-cookie redirect) is, in principle, retryable via
987 // wb2-login-and-redirect flow.
988 func (s *IntegrationSuite) TestVhostRedirectQueryTokenToBogusCookie(c *check.C) {
989         // Inline
990         resp := s.testVhostRedirectTokenToCookie(c, "GET",
991                 arvadostest.FooCollection+".example.com/foo",
992                 "?api_token=thisisabogustoken",
993                 http.Header{"Sec-Fetch-Mode": {"navigate"}},
994                 "",
995                 http.StatusSeeOther,
996                 "",
997         )
998         u, err := url.Parse(resp.Header().Get("Location"))
999         c.Assert(err, check.IsNil)
1000         c.Logf("redirected to %s", u)
1001         c.Check(u.Host, check.Equals, s.handler.Cluster.Services.Workbench2.ExternalURL.Host)
1002         c.Check(u.Query().Get("redirectToPreview"), check.Equals, "/c="+arvadostest.FooCollection+"/foo")
1003         c.Check(u.Query().Get("redirectToDownload"), check.Equals, "")
1004
1005         // Download/attachment indicated by ?disposition=attachment
1006         resp = s.testVhostRedirectTokenToCookie(c, "GET",
1007                 arvadostest.FooCollection+".example.com/foo",
1008                 "?api_token=thisisabogustoken&disposition=attachment",
1009                 http.Header{"Sec-Fetch-Mode": {"navigate"}},
1010                 "",
1011                 http.StatusSeeOther,
1012                 "",
1013         )
1014         u, err = url.Parse(resp.Header().Get("Location"))
1015         c.Assert(err, check.IsNil)
1016         c.Logf("redirected to %s", u)
1017         c.Check(u.Host, check.Equals, s.handler.Cluster.Services.Workbench2.ExternalURL.Host)
1018         c.Check(u.Query().Get("redirectToPreview"), check.Equals, "")
1019         c.Check(u.Query().Get("redirectToDownload"), check.Equals, "/c="+arvadostest.FooCollection+"/foo")
1020
1021         // Download/attachment indicated by vhost
1022         resp = s.testVhostRedirectTokenToCookie(c, "GET",
1023                 s.handler.Cluster.Services.WebDAVDownload.ExternalURL.Host+"/c="+arvadostest.FooCollection+"/foo",
1024                 "?api_token=thisisabogustoken",
1025                 http.Header{"Sec-Fetch-Mode": {"navigate"}},
1026                 "",
1027                 http.StatusSeeOther,
1028                 "",
1029         )
1030         u, err = url.Parse(resp.Header().Get("Location"))
1031         c.Assert(err, check.IsNil)
1032         c.Logf("redirected to %s", u)
1033         c.Check(u.Host, check.Equals, s.handler.Cluster.Services.Workbench2.ExternalURL.Host)
1034         c.Check(u.Query().Get("redirectToPreview"), check.Equals, "")
1035         c.Check(u.Query().Get("redirectToDownload"), check.Equals, "/c="+arvadostest.FooCollection+"/foo")
1036
1037         // Without "Sec-Fetch-Mode: navigate" header, just 401.
1038         s.testVhostRedirectTokenToCookie(c, "GET",
1039                 s.handler.Cluster.Services.WebDAVDownload.ExternalURL.Host+"/c="+arvadostest.FooCollection+"/foo",
1040                 "?api_token=thisisabogustoken",
1041                 http.Header{"Sec-Fetch-Mode": {"cors"}},
1042                 "",
1043                 http.StatusUnauthorized,
1044                 regexp.QuoteMeta(unauthorizedMessage+"\n"),
1045         )
1046         s.testVhostRedirectTokenToCookie(c, "GET",
1047                 s.handler.Cluster.Services.WebDAVDownload.ExternalURL.Host+"/c="+arvadostest.FooCollection+"/foo",
1048                 "?api_token=thisisabogustoken",
1049                 nil,
1050                 "",
1051                 http.StatusUnauthorized,
1052                 regexp.QuoteMeta(unauthorizedMessage+"\n"),
1053         )
1054 }
1055
1056 func (s *IntegrationSuite) TestVhostRedirectWithNoCache(c *check.C) {
1057         resp := s.testVhostRedirectTokenToCookie(c, "GET",
1058                 arvadostest.FooCollection+".example.com/foo",
1059                 "?api_token=thisisabogustoken",
1060                 http.Header{
1061                         "Sec-Fetch-Mode": {"navigate"},
1062                         "Cache-Control":  {"no-cache"},
1063                 },
1064                 "",
1065                 http.StatusSeeOther,
1066                 "",
1067         )
1068         u, err := url.Parse(resp.Header().Get("Location"))
1069         c.Assert(err, check.IsNil)
1070         c.Logf("redirected to %s", u)
1071         c.Check(u.Host, check.Equals, s.handler.Cluster.Services.Workbench2.ExternalURL.Host)
1072         c.Check(u.Query().Get("redirectToPreview"), check.Equals, "/c="+arvadostest.FooCollection+"/foo")
1073         c.Check(u.Query().Get("redirectToDownload"), check.Equals, "")
1074 }
1075
1076 func (s *IntegrationSuite) TestNoTokenWorkbench2LoginFlow(c *check.C) {
1077         for _, trial := range []struct {
1078                 anonToken    bool
1079                 cacheControl string
1080         }{
1081                 {},
1082                 {cacheControl: "no-cache"},
1083                 {anonToken: true},
1084                 {anonToken: true, cacheControl: "no-cache"},
1085         } {
1086                 c.Logf("trial: %+v", trial)
1087
1088                 if trial.anonToken {
1089                         s.handler.Cluster.Users.AnonymousUserToken = arvadostest.AnonymousToken
1090                 } else {
1091                         s.handler.Cluster.Users.AnonymousUserToken = ""
1092                 }
1093                 req, err := http.NewRequest("GET", "http://"+arvadostest.FooCollection+".example.com/foo", nil)
1094                 c.Assert(err, check.IsNil)
1095                 req.Header.Set("Sec-Fetch-Mode", "navigate")
1096                 if trial.cacheControl != "" {
1097                         req.Header.Set("Cache-Control", trial.cacheControl)
1098                 }
1099                 resp := httptest.NewRecorder()
1100                 s.handler.ServeHTTP(resp, req)
1101                 c.Check(resp.Code, check.Equals, http.StatusSeeOther)
1102                 u, err := url.Parse(resp.Header().Get("Location"))
1103                 c.Assert(err, check.IsNil)
1104                 c.Logf("redirected to %q", u)
1105                 c.Check(u.Host, check.Equals, s.handler.Cluster.Services.Workbench2.ExternalURL.Host)
1106                 c.Check(u.Query().Get("redirectToPreview"), check.Equals, "/c="+arvadostest.FooCollection+"/foo")
1107                 c.Check(u.Query().Get("redirectToDownload"), check.Equals, "")
1108         }
1109 }
1110
1111 func (s *IntegrationSuite) TestVhostRedirectQueryTokenSingleOriginError(c *check.C) {
1112         s.testVhostRedirectTokenToCookie(c, "GET",
1113                 "example.com/c="+arvadostest.FooCollection+"/foo",
1114                 "?api_token="+arvadostest.ActiveToken,
1115                 nil,
1116                 "",
1117                 http.StatusBadRequest,
1118                 regexp.QuoteMeta("cannot serve inline content at this URL (possible configuration error; see https://doc.arvados.org/install/install-keep-web.html#dns)\n"),
1119         )
1120 }
1121
1122 // If client requests an attachment by putting ?disposition=attachment
1123 // in the query string, and gets redirected, the redirect target
1124 // should respond with an attachment.
1125 func (s *IntegrationSuite) TestVhostRedirectQueryTokenRequestAttachment(c *check.C) {
1126         resp := s.testVhostRedirectTokenToCookie(c, "GET",
1127                 arvadostest.FooCollection+".example.com/foo",
1128                 "?disposition=attachment&api_token="+arvadostest.ActiveToken,
1129                 nil,
1130                 "",
1131                 http.StatusOK,
1132                 "foo",
1133         )
1134         c.Check(resp.Header().Get("Content-Disposition"), check.Matches, "attachment(;.*)?")
1135 }
1136
1137 func (s *IntegrationSuite) TestVhostRedirectQueryTokenSiteFS(c *check.C) {
1138         s.handler.Cluster.Services.WebDAVDownload.ExternalURL.Host = "download.example.com"
1139         resp := s.testVhostRedirectTokenToCookie(c, "GET",
1140                 "download.example.com/by_id/"+arvadostest.FooCollection+"/foo",
1141                 "?api_token="+arvadostest.ActiveToken,
1142                 nil,
1143                 "",
1144                 http.StatusOK,
1145                 "foo",
1146         )
1147         c.Check(resp.Header().Get("Content-Disposition"), check.Matches, "attachment(;.*)?")
1148 }
1149
1150 func (s *IntegrationSuite) TestPastCollectionVersionFileAccess(c *check.C) {
1151         s.handler.Cluster.Services.WebDAVDownload.ExternalURL.Host = "download.example.com"
1152         resp := s.testVhostRedirectTokenToCookie(c, "GET",
1153                 "download.example.com/c="+arvadostest.WazVersion1Collection+"/waz",
1154                 "?api_token="+arvadostest.ActiveToken,
1155                 nil,
1156                 "",
1157                 http.StatusOK,
1158                 "waz",
1159         )
1160         c.Check(resp.Header().Get("Content-Disposition"), check.Matches, "attachment(;.*)?")
1161         resp = s.testVhostRedirectTokenToCookie(c, "GET",
1162                 "download.example.com/by_id/"+arvadostest.WazVersion1Collection+"/waz",
1163                 "?api_token="+arvadostest.ActiveToken,
1164                 nil,
1165                 "",
1166                 http.StatusOK,
1167                 "waz",
1168         )
1169         c.Check(resp.Header().Get("Content-Disposition"), check.Matches, "attachment(;.*)?")
1170 }
1171
1172 func (s *IntegrationSuite) TestVhostRedirectQueryTokenTrustAllContent(c *check.C) {
1173         s.handler.Cluster.Collections.TrustAllContent = true
1174         s.testVhostRedirectTokenToCookie(c, "GET",
1175                 "example.com/c="+arvadostest.FooCollection+"/foo",
1176                 "?api_token="+arvadostest.ActiveToken,
1177                 nil,
1178                 "",
1179                 http.StatusOK,
1180                 "foo",
1181         )
1182 }
1183
1184 func (s *IntegrationSuite) TestVhostRedirectQueryTokenAttachmentOnlyHost(c *check.C) {
1185         s.handler.Cluster.Services.WebDAVDownload.ExternalURL.Host = "example.com:1234"
1186
1187         s.testVhostRedirectTokenToCookie(c, "GET",
1188                 "example.com/c="+arvadostest.FooCollection+"/foo",
1189                 "?api_token="+arvadostest.ActiveToken,
1190                 nil,
1191                 "",
1192                 http.StatusBadRequest,
1193                 regexp.QuoteMeta("cannot serve inline content at this URL (possible configuration error; see https://doc.arvados.org/install/install-keep-web.html#dns)\n"),
1194         )
1195
1196         resp := s.testVhostRedirectTokenToCookie(c, "GET",
1197                 "example.com:1234/c="+arvadostest.FooCollection+"/foo",
1198                 "?api_token="+arvadostest.ActiveToken,
1199                 nil,
1200                 "",
1201                 http.StatusOK,
1202                 "foo",
1203         )
1204         c.Check(resp.Header().Get("Content-Disposition"), check.Equals, "attachment")
1205 }
1206
1207 func (s *IntegrationSuite) TestVhostRedirectMultipleTokens(c *check.C) {
1208         baseUrl := arvadostest.FooCollection + ".example.com/foo"
1209         query := url.Values{}
1210
1211         // The intent of these tests is to check that requests are redirected
1212         // correctly in the presence of multiple API tokens. The exact response
1213         // codes and content are not closely considered: they're just how
1214         // keep-web responded when we made the smallest possible fix. Changing
1215         // those responses may be okay, but you should still test all these
1216         // different cases and the associated redirect logic.
1217         query["api_token"] = []string{arvadostest.ActiveToken, arvadostest.AnonymousToken}
1218         s.testVhostRedirectTokenToCookie(c, "GET", baseUrl, "?"+query.Encode(), nil, "", http.StatusOK, "foo")
1219         query["api_token"] = []string{arvadostest.ActiveToken, arvadostest.AnonymousToken, ""}
1220         s.testVhostRedirectTokenToCookie(c, "GET", baseUrl, "?"+query.Encode(), nil, "", http.StatusOK, "foo")
1221         query["api_token"] = []string{arvadostest.ActiveToken, "", arvadostest.AnonymousToken}
1222         s.testVhostRedirectTokenToCookie(c, "GET", baseUrl, "?"+query.Encode(), nil, "", http.StatusOK, "foo")
1223         query["api_token"] = []string{"", arvadostest.ActiveToken}
1224         s.testVhostRedirectTokenToCookie(c, "GET", baseUrl, "?"+query.Encode(), nil, "", http.StatusOK, "foo")
1225
1226         expectContent := regexp.QuoteMeta(unauthorizedMessage + "\n")
1227         query["api_token"] = []string{arvadostest.AnonymousToken, "invalidtoo"}
1228         s.testVhostRedirectTokenToCookie(c, "GET", baseUrl, "?"+query.Encode(), nil, "", http.StatusUnauthorized, expectContent)
1229         query["api_token"] = []string{arvadostest.AnonymousToken, ""}
1230         s.testVhostRedirectTokenToCookie(c, "GET", baseUrl, "?"+query.Encode(), nil, "", http.StatusUnauthorized, expectContent)
1231         query["api_token"] = []string{"", arvadostest.AnonymousToken}
1232         s.testVhostRedirectTokenToCookie(c, "GET", baseUrl, "?"+query.Encode(), nil, "", http.StatusUnauthorized, expectContent)
1233 }
1234
1235 func (s *IntegrationSuite) TestVhostRedirectPOSTFormTokenToCookie(c *check.C) {
1236         s.testVhostRedirectTokenToCookie(c, "POST",
1237                 arvadostest.FooCollection+".example.com/foo",
1238                 "",
1239                 http.Header{"Content-Type": {"application/x-www-form-urlencoded"}},
1240                 url.Values{"api_token": {arvadostest.ActiveToken}}.Encode(),
1241                 http.StatusOK,
1242                 "foo",
1243         )
1244 }
1245
1246 func (s *IntegrationSuite) TestVhostRedirectPOSTFormTokenToCookie404(c *check.C) {
1247         s.testVhostRedirectTokenToCookie(c, "POST",
1248                 arvadostest.FooCollection+".example.com/foo",
1249                 "",
1250                 http.Header{"Content-Type": {"application/x-www-form-urlencoded"}},
1251                 url.Values{"api_token": {arvadostest.SpectatorToken}}.Encode(),
1252                 http.StatusNotFound,
1253                 regexp.QuoteMeta(notFoundMessage+"\n"),
1254         )
1255 }
1256
1257 func (s *IntegrationSuite) TestAnonymousTokenOK(c *check.C) {
1258         s.handler.Cluster.Users.AnonymousUserToken = arvadostest.AnonymousToken
1259         s.testVhostRedirectTokenToCookie(c, "GET",
1260                 "example.com/c="+arvadostest.HelloWorldCollection+"/Hello%20world.txt",
1261                 "",
1262                 nil,
1263                 "",
1264                 http.StatusOK,
1265                 "Hello world\n",
1266         )
1267 }
1268
1269 func (s *IntegrationSuite) TestAnonymousTokenError(c *check.C) {
1270         s.handler.Cluster.Users.AnonymousUserToken = "anonymousTokenConfiguredButInvalid"
1271         s.testVhostRedirectTokenToCookie(c, "GET",
1272                 "example.com/c="+arvadostest.HelloWorldCollection+"/Hello%20world.txt",
1273                 "",
1274                 nil,
1275                 "",
1276                 http.StatusUnauthorized,
1277                 "Authorization tokens are not accepted here: .*\n",
1278         )
1279 }
1280
1281 func (s *IntegrationSuite) TestSpecialCharsInPath(c *check.C) {
1282         s.handler.Cluster.Services.WebDAVDownload.ExternalURL.Host = "download.example.com"
1283
1284         client := arvados.NewClientFromEnv()
1285         client.AuthToken = arvadostest.ActiveToken
1286         fs, err := (&arvados.Collection{}).FileSystem(client, nil)
1287         c.Assert(err, check.IsNil)
1288         path := `https:\\"odd' path chars`
1289         f, err := fs.OpenFile(path, os.O_CREATE, 0777)
1290         c.Assert(err, check.IsNil)
1291         f.Close()
1292         mtxt, err := fs.MarshalManifest(".")
1293         c.Assert(err, check.IsNil)
1294         var coll arvados.Collection
1295         err = client.RequestAndDecode(&coll, "POST", "arvados/v1/collections", nil, map[string]interface{}{
1296                 "collection": map[string]string{
1297                         "manifest_text": mtxt,
1298                 },
1299         })
1300         c.Assert(err, check.IsNil)
1301
1302         u, _ := url.Parse("http://download.example.com/c=" + coll.UUID + "/")
1303         req := &http.Request{
1304                 Method:     "GET",
1305                 Host:       u.Host,
1306                 URL:        u,
1307                 RequestURI: u.RequestURI(),
1308                 Header: http.Header{
1309                         "Authorization": {"Bearer " + client.AuthToken},
1310                 },
1311         }
1312         resp := httptest.NewRecorder()
1313         s.handler.ServeHTTP(resp, req)
1314         c.Check(resp.Code, check.Equals, http.StatusOK)
1315         doc, err := html.Parse(resp.Body)
1316         c.Assert(err, check.IsNil)
1317         pathHrefMap := getPathHrefMap(doc)
1318         c.Check(pathHrefMap, check.HasLen, 1) // the one leaf added to collection
1319         href, hasPath := pathHrefMap[path]
1320         c.Assert(hasPath, check.Equals, true) // the path is listed
1321         relUrl := mustParseURL(href)
1322         c.Check(relUrl.Path, check.Equals, "./"+path) // href can be decoded back to path
1323 }
1324
1325 func (s *IntegrationSuite) TestForwardSlashSubstitution(c *check.C) {
1326         arv := arvados.NewClientFromEnv()
1327         s.handler.Cluster.Services.WebDAVDownload.ExternalURL.Host = "download.example.com"
1328         s.handler.Cluster.Collections.ForwardSlashNameSubstitution = "{SOLIDUS}"
1329         name := "foo/bar/baz"
1330         nameShown := strings.Replace(name, "/", "{SOLIDUS}", -1)
1331
1332         client := arvados.NewClientFromEnv()
1333         client.AuthToken = arvadostest.ActiveToken
1334         fs, err := (&arvados.Collection{}).FileSystem(client, nil)
1335         c.Assert(err, check.IsNil)
1336         f, err := fs.OpenFile("filename", os.O_CREATE, 0777)
1337         c.Assert(err, check.IsNil)
1338         f.Close()
1339         mtxt, err := fs.MarshalManifest(".")
1340         c.Assert(err, check.IsNil)
1341         var coll arvados.Collection
1342         err = client.RequestAndDecode(&coll, "POST", "arvados/v1/collections", nil, map[string]interface{}{
1343                 "collection": map[string]string{
1344                         "manifest_text": mtxt,
1345                         "name":          name,
1346                         "owner_uuid":    arvadostest.AProjectUUID,
1347                 },
1348         })
1349         c.Assert(err, check.IsNil)
1350         defer arv.RequestAndDecode(&coll, "DELETE", "arvados/v1/collections/"+coll.UUID, nil, nil)
1351
1352         base := "http://download.example.com/by_id/" + coll.OwnerUUID + "/"
1353         for tryURL, expectedAnchorText := range map[string]string{
1354                 base:                   nameShown + "/",
1355                 base + nameShown + "/": "filename",
1356         } {
1357                 u, _ := url.Parse(tryURL)
1358                 req := &http.Request{
1359                         Method:     "GET",
1360                         Host:       u.Host,
1361                         URL:        u,
1362                         RequestURI: u.RequestURI(),
1363                         Header: http.Header{
1364                                 "Authorization": {"Bearer " + client.AuthToken},
1365                         },
1366                 }
1367                 resp := httptest.NewRecorder()
1368                 s.handler.ServeHTTP(resp, req)
1369                 c.Check(resp.Code, check.Equals, http.StatusOK)
1370                 doc, err := html.Parse(resp.Body)
1371                 c.Assert(err, check.IsNil) // valid HTML
1372                 pathHrefMap := getPathHrefMap(doc)
1373                 href, hasExpected := pathHrefMap[expectedAnchorText]
1374                 c.Assert(hasExpected, check.Equals, true) // has expected anchor text
1375                 c.Assert(href, check.Not(check.Equals), "")
1376                 relUrl := mustParseURL(href)
1377                 c.Check(relUrl.Path, check.Equals, "./"+expectedAnchorText) // decoded href maps back to the anchor text
1378         }
1379 }
1380
1381 // XHRs can't follow redirect-with-cookie so they rely on method=POST
1382 // and disposition=attachment (telling us it's acceptable to respond
1383 // with content instead of a redirect) and an Origin header that gets
1384 // added automatically by the browser (telling us it's desirable to do
1385 // so).
1386 func (s *IntegrationSuite) TestXHRNoRedirect(c *check.C) {
1387         u, _ := url.Parse("http://example.com/c=" + arvadostest.FooCollection + "/foo")
1388         req := &http.Request{
1389                 Method:     "POST",
1390                 Host:       u.Host,
1391                 URL:        u,
1392                 RequestURI: u.RequestURI(),
1393                 Header: http.Header{
1394                         "Origin":       {"https://origin.example"},
1395                         "Content-Type": {"application/x-www-form-urlencoded"},
1396                 },
1397                 Body: ioutil.NopCloser(strings.NewReader(url.Values{
1398                         "api_token":   {arvadostest.ActiveToken},
1399                         "disposition": {"attachment"},
1400                 }.Encode())),
1401         }
1402         resp := httptest.NewRecorder()
1403         s.handler.ServeHTTP(resp, req)
1404         c.Check(resp.Code, check.Equals, http.StatusOK)
1405         c.Check(resp.Body.String(), check.Equals, "foo")
1406         c.Check(resp.Header().Get("Access-Control-Allow-Origin"), check.Equals, "*")
1407
1408         // GET + Origin header is representative of both AJAX GET
1409         // requests and inline images via <IMG crossorigin="anonymous"
1410         // src="...">.
1411         u.RawQuery = "api_token=" + url.QueryEscape(arvadostest.ActiveTokenV2)
1412         req = &http.Request{
1413                 Method:     "GET",
1414                 Host:       u.Host,
1415                 URL:        u,
1416                 RequestURI: u.RequestURI(),
1417                 Header: http.Header{
1418                         "Origin": {"https://origin.example"},
1419                 },
1420         }
1421         resp = httptest.NewRecorder()
1422         s.handler.ServeHTTP(resp, req)
1423         c.Check(resp.Code, check.Equals, http.StatusOK)
1424         c.Check(resp.Body.String(), check.Equals, "foo")
1425         c.Check(resp.Header().Get("Access-Control-Allow-Origin"), check.Equals, "*")
1426 }
1427
1428 func (s *IntegrationSuite) testVhostRedirectTokenToCookie(c *check.C, method, hostPath, queryString string, reqHeader http.Header, reqBody string, expectStatus int, matchRespBody string) *httptest.ResponseRecorder {
1429         if reqHeader == nil {
1430                 reqHeader = http.Header{}
1431         }
1432         u, _ := url.Parse(`http://` + hostPath + queryString)
1433         c.Logf("requesting %s", u)
1434         req := &http.Request{
1435                 Method:     method,
1436                 Host:       u.Host,
1437                 URL:        u,
1438                 RequestURI: u.RequestURI(),
1439                 Header:     reqHeader,
1440                 Body:       ioutil.NopCloser(strings.NewReader(reqBody)),
1441         }
1442
1443         resp := httptest.NewRecorder()
1444         defer func() {
1445                 c.Check(resp.Code, check.Equals, expectStatus)
1446                 c.Check(resp.Body.String(), check.Matches, matchRespBody)
1447         }()
1448
1449         s.handler.ServeHTTP(resp, req)
1450         if resp.Code != http.StatusSeeOther {
1451                 attachment, _ := regexp.MatchString(`^attachment(;|$)`, resp.Header().Get("Content-Disposition"))
1452                 // Since we're not redirecting, check that any api_token in the URL is
1453                 // handled safely.
1454                 // If there is no token in the URL, then we're good.
1455                 // Otherwise, if the response code is an error, the body is expected to
1456                 // be static content, and nothing that might maliciously introspect the
1457                 // URL. It's considered safe and allowed.
1458                 // Otherwise, if the response content has attachment disposition,
1459                 // that's considered safe for all the reasons explained in the
1460                 // safeAttachment comment in handler.go.
1461                 c.Check(!u.Query().Has("api_token") || resp.Code >= 400 || attachment, check.Equals, true)
1462                 return resp
1463         }
1464
1465         loc, err := url.Parse(resp.Header().Get("Location"))
1466         c.Assert(err, check.IsNil)
1467         c.Check(loc.Scheme, check.Equals, u.Scheme)
1468         c.Check(loc.Host, check.Equals, u.Host)
1469         c.Check(loc.RawPath, check.Equals, u.RawPath)
1470         // If the response was a redirect, it should never include an API token.
1471         c.Check(loc.Query().Has("api_token"), check.Equals, false)
1472         c.Check(resp.Body.String(), check.Matches, `.*href="http://`+regexp.QuoteMeta(html.EscapeString(hostPath))+`(\?[^"]*)?".*`)
1473         cookies := (&http.Response{Header: resp.Header()}).Cookies()
1474
1475         c.Logf("following redirect to %s", u)
1476         req = &http.Request{
1477                 Method:     "GET",
1478                 Host:       loc.Host,
1479                 URL:        loc,
1480                 RequestURI: loc.RequestURI(),
1481                 Header:     reqHeader,
1482         }
1483         for _, c := range cookies {
1484                 req.AddCookie(c)
1485         }
1486
1487         resp = httptest.NewRecorder()
1488         s.handler.ServeHTTP(resp, req)
1489
1490         if resp.Code != http.StatusSeeOther {
1491                 c.Check(resp.Header().Get("Location"), check.Equals, "")
1492         }
1493         return resp
1494 }
1495
1496 func (s *IntegrationSuite) TestDirectoryListingWithAnonymousToken(c *check.C) {
1497         s.handler.Cluster.Users.AnonymousUserToken = arvadostest.AnonymousToken
1498         s.testDirectoryListing(c)
1499 }
1500
1501 func (s *IntegrationSuite) TestDirectoryListingWithNoAnonymousToken(c *check.C) {
1502         s.handler.Cluster.Users.AnonymousUserToken = ""
1503         s.testDirectoryListing(c)
1504 }
1505
1506 func (s *IntegrationSuite) testDirectoryListing(c *check.C) {
1507         // The "ownership cycle" test fixtures are reachable from the
1508         // "filter group without filters" group, causing webdav's
1509         // walkfs to recurse indefinitely. Avoid that by deleting one
1510         // of the bogus fixtures.
1511         arv := arvados.NewClientFromEnv()
1512         err := arv.RequestAndDecode(nil, "DELETE", "arvados/v1/groups/zzzzz-j7d0g-cx2al9cqkmsf1hs", nil, nil)
1513         if err != nil {
1514                 c.Assert(err, check.FitsTypeOf, &arvados.TransactionError{})
1515                 c.Check(err.(*arvados.TransactionError).StatusCode, check.Equals, 404)
1516         }
1517
1518         s.handler.Cluster.Services.WebDAVDownload.ExternalURL.Host = "download.example.com"
1519         authHeader := http.Header{
1520                 "Authorization": {"Bearer " + arvadostest.ActiveToken},
1521         }
1522         for _, trial := range []struct {
1523                 uri      string
1524                 header   http.Header
1525                 expect   []string
1526                 redirect string
1527                 cutDirs  int
1528         }{
1529                 {
1530                         uri:     strings.Replace(arvadostest.FooAndBarFilesInDirPDH, "+", "-", -1) + ".example.com/",
1531                         header:  authHeader,
1532                         expect:  []string{"dir1/foo", "dir1/bar"},
1533                         cutDirs: 0,
1534                 },
1535                 {
1536                         uri:     strings.Replace(arvadostest.FooAndBarFilesInDirPDH, "+", "-", -1) + ".example.com/dir1/",
1537                         header:  authHeader,
1538                         expect:  []string{"foo", "bar"},
1539                         cutDirs: 1,
1540                 },
1541                 {
1542                         // URLs of this form ignore authHeader, and
1543                         // FooAndBarFilesInDirUUID isn't public, so
1544                         // this returns 401.
1545                         uri:    "download.example.com/collections/" + arvadostest.FooAndBarFilesInDirUUID + "/",
1546                         header: authHeader,
1547                         expect: nil,
1548                 },
1549                 {
1550                         uri:     "download.example.com/users/active/foo_file_in_dir/",
1551                         header:  authHeader,
1552                         expect:  []string{"dir1/"},
1553                         cutDirs: 3,
1554                 },
1555                 {
1556                         uri:     "download.example.com/users/active/foo_file_in_dir/dir1/",
1557                         header:  authHeader,
1558                         expect:  []string{"bar"},
1559                         cutDirs: 4,
1560                 },
1561                 {
1562                         uri:     "download.example.com/",
1563                         header:  authHeader,
1564                         expect:  []string{"users/"},
1565                         cutDirs: 0,
1566                 },
1567                 {
1568                         uri:      "download.example.com/users",
1569                         header:   authHeader,
1570                         redirect: "/users/",
1571                         expect:   []string{"active/"},
1572                         cutDirs:  1,
1573                 },
1574                 {
1575                         uri:     "download.example.com/users/",
1576                         header:  authHeader,
1577                         expect:  []string{"active/"},
1578                         cutDirs: 1,
1579                 },
1580                 {
1581                         uri:      "download.example.com/users/active",
1582                         header:   authHeader,
1583                         redirect: "/users/active/",
1584                         expect:   []string{"foo_file_in_dir/"},
1585                         cutDirs:  2,
1586                 },
1587                 {
1588                         uri:     "download.example.com/users/active/",
1589                         header:  authHeader,
1590                         expect:  []string{"foo_file_in_dir/"},
1591                         cutDirs: 2,
1592                 },
1593                 {
1594                         uri:     "collections.example.com/collections/download/" + arvadostest.FooAndBarFilesInDirUUID + "/" + arvadostest.ActiveToken + "/",
1595                         header:  nil,
1596                         expect:  []string{"dir1/foo", "dir1/bar"},
1597                         cutDirs: 4,
1598                 },
1599                 {
1600                         uri:     "collections.example.com/c=" + arvadostest.FooAndBarFilesInDirUUID + "/t=" + arvadostest.ActiveToken + "/",
1601                         header:  nil,
1602                         expect:  []string{"dir1/foo", "dir1/bar"},
1603                         cutDirs: 2,
1604                 },
1605                 {
1606                         uri:     "collections.example.com/c=" + arvadostest.FooAndBarFilesInDirUUID + "/t=" + arvadostest.ActiveToken,
1607                         header:  nil,
1608                         expect:  []string{"dir1/foo", "dir1/bar"},
1609                         cutDirs: 2,
1610                 },
1611                 {
1612                         uri:     "download.example.com/c=" + arvadostest.FooAndBarFilesInDirUUID,
1613                         header:  authHeader,
1614                         expect:  []string{"dir1/foo", "dir1/bar"},
1615                         cutDirs: 1,
1616                 },
1617                 {
1618                         uri:      "download.example.com/c=" + arvadostest.FooAndBarFilesInDirUUID + "/dir1",
1619                         header:   authHeader,
1620                         redirect: "/c=" + arvadostest.FooAndBarFilesInDirUUID + "/dir1/",
1621                         expect:   []string{"foo", "bar"},
1622                         cutDirs:  2,
1623                 },
1624                 {
1625                         uri:     "download.example.com/c=" + arvadostest.FooAndBarFilesInDirUUID + "/_/dir1/",
1626                         header:  authHeader,
1627                         expect:  []string{"foo", "bar"},
1628                         cutDirs: 3,
1629                 },
1630                 {
1631                         uri:      arvadostest.FooAndBarFilesInDirUUID + ".example.com/dir1?api_token=" + arvadostest.ActiveToken,
1632                         header:   authHeader,
1633                         redirect: "/dir1/",
1634                         expect:   []string{"foo", "bar"},
1635                         cutDirs:  1,
1636                 },
1637                 {
1638                         uri:    "collections.example.com/c=" + arvadostest.FooAndBarFilesInDirUUID + "/theperthcountyconspiracydoesnotexist/",
1639                         header: authHeader,
1640                         expect: nil,
1641                 },
1642                 {
1643                         uri:     "download.example.com/c=" + arvadostest.WazVersion1Collection,
1644                         header:  authHeader,
1645                         expect:  []string{"waz"},
1646                         cutDirs: 1,
1647                 },
1648                 {
1649                         uri:     "download.example.com/by_id/" + arvadostest.WazVersion1Collection,
1650                         header:  authHeader,
1651                         expect:  []string{"waz"},
1652                         cutDirs: 2,
1653                 },
1654                 {
1655                         uri:     "download.example.com/users/active/This filter group/",
1656                         header:  authHeader,
1657                         expect:  []string{"A Subproject/"},
1658                         cutDirs: 3,
1659                 },
1660                 {
1661                         uri:     "download.example.com/users/active/This filter group/A Subproject",
1662                         header:  authHeader,
1663                         expect:  []string{"baz_file/"},
1664                         cutDirs: 4,
1665                 },
1666                 {
1667                         uri:     "download.example.com/by_id/" + arvadostest.AFilterGroupUUID,
1668                         header:  authHeader,
1669                         expect:  []string{"A Subproject/"},
1670                         cutDirs: 2,
1671                 },
1672                 {
1673                         uri:     "download.example.com/by_id/" + arvadostest.AFilterGroupUUID + "/A Subproject",
1674                         header:  authHeader,
1675                         expect:  []string{"baz_file/"},
1676                         cutDirs: 3,
1677                 },
1678         } {
1679                 comment := check.Commentf("HTML: %q redir %q => %q", trial.uri, trial.redirect, trial.expect)
1680                 resp := httptest.NewRecorder()
1681                 u := mustParseURL("//" + trial.uri)
1682                 req := &http.Request{
1683                         Method:     "GET",
1684                         Host:       u.Host,
1685                         URL:        u,
1686                         RequestURI: u.RequestURI(),
1687                         Header:     copyHeader(trial.header),
1688                 }
1689                 s.handler.ServeHTTP(resp, req)
1690                 var cookies []*http.Cookie
1691                 for resp.Code == http.StatusSeeOther {
1692                         u, _ := req.URL.Parse(resp.Header().Get("Location"))
1693                         req = &http.Request{
1694                                 Method:     "GET",
1695                                 Host:       u.Host,
1696                                 URL:        u,
1697                                 RequestURI: u.RequestURI(),
1698                                 Header:     copyHeader(trial.header),
1699                         }
1700                         cookies = append(cookies, (&http.Response{Header: resp.Header()}).Cookies()...)
1701                         for _, c := range cookies {
1702                                 req.AddCookie(c)
1703                         }
1704                         resp = httptest.NewRecorder()
1705                         s.handler.ServeHTTP(resp, req)
1706                 }
1707                 if trial.redirect != "" {
1708                         c.Check(req.URL.Path, check.Equals, trial.redirect, comment)
1709                 }
1710                 if trial.expect == nil {
1711                         c.Check(resp.Code, check.Equals, http.StatusUnauthorized, comment)
1712                 } else {
1713                         c.Check(resp.Code, check.Equals, http.StatusOK, comment)
1714                         listingPageDoc, err := html.Parse(resp.Body)
1715                         c.Check(err, check.IsNil, comment) // valid HTML document
1716                         pathHrefMap := getPathHrefMap(listingPageDoc)
1717                         c.Assert(pathHrefMap, check.Not(check.HasLen), 0, comment)
1718                         for _, e := range trial.expect {
1719                                 href, hasE := pathHrefMap[e]
1720                                 c.Check(hasE, check.Equals, true, comment) // expected path is listed
1721                                 relUrl := mustParseURL(href)
1722                                 c.Check(relUrl.Path, check.Equals, "./"+e, comment) // href can be decoded back to path
1723                         }
1724                         wgetCommand := getWgetExamplePre(listingPageDoc)
1725                         wgetExpected := regexp.MustCompile(`^\$ wget .*--cut-dirs=(\d+) .*'(https?://[^']+)'$`)
1726                         wgetMatchGroups := wgetExpected.FindStringSubmatch(wgetCommand)
1727                         c.Assert(wgetMatchGroups, check.NotNil)                                     // wget command matches
1728                         c.Check(wgetMatchGroups[1], check.Equals, fmt.Sprintf("%d", trial.cutDirs)) // correct level of cut dirs in wget command
1729                         printedUrl := mustParseURL(wgetMatchGroups[2])
1730                         c.Check(printedUrl.Host, check.Equals, req.URL.Host)
1731                         c.Check(printedUrl.Path, check.Equals, req.URL.Path) // URL arg in wget command can be decoded to the right path
1732                 }
1733
1734                 comment = check.Commentf("WebDAV: %q => %q", trial.uri, trial.expect)
1735                 req = &http.Request{
1736                         Method:     "OPTIONS",
1737                         Host:       u.Host,
1738                         URL:        u,
1739                         RequestURI: u.RequestURI(),
1740                         Header:     copyHeader(trial.header),
1741                         Body:       ioutil.NopCloser(&bytes.Buffer{}),
1742                 }
1743                 resp = httptest.NewRecorder()
1744                 s.handler.ServeHTTP(resp, req)
1745                 if trial.expect == nil {
1746                         c.Check(resp.Code, check.Equals, http.StatusUnauthorized, comment)
1747                 } else {
1748                         c.Check(resp.Code, check.Equals, http.StatusOK, comment)
1749                 }
1750
1751                 req = &http.Request{
1752                         Method:     "PROPFIND",
1753                         Host:       u.Host,
1754                         URL:        u,
1755                         RequestURI: u.RequestURI(),
1756                         Header:     copyHeader(trial.header),
1757                         Body:       ioutil.NopCloser(&bytes.Buffer{}),
1758                 }
1759                 resp = httptest.NewRecorder()
1760                 s.handler.ServeHTTP(resp, req)
1761                 // This check avoids logging a big XML document in the
1762                 // event webdav throws a 500 error after sending
1763                 // headers for a 207.
1764                 if !c.Check(strings.HasSuffix(resp.Body.String(), "Internal Server Error"), check.Equals, false) {
1765                         continue
1766                 }
1767                 if trial.expect == nil {
1768                         c.Check(resp.Code, check.Equals, http.StatusUnauthorized, comment)
1769                 } else {
1770                         c.Check(resp.Code, check.Equals, http.StatusMultiStatus, comment)
1771                         for _, e := range trial.expect {
1772                                 if strings.HasSuffix(e, "/") {
1773                                         e = filepath.Join(u.Path, e) + "/"
1774                                 } else {
1775                                         e = filepath.Join(u.Path, e)
1776                                 }
1777                                 e = strings.Replace(e, " ", "%20", -1)
1778                                 c.Check(resp.Body.String(), check.Matches, `(?ms).*<D:href>`+e+`</D:href>.*`, comment)
1779                         }
1780                 }
1781         }
1782 }
1783
1784 // Shallow-traverse the HTML document, gathering the nodes satisfying the
1785 // predicate function in the output slice. If a node matches the predicate,
1786 // none of its children will be visited.
1787 func getNodes(document *html.Node, predicate func(*html.Node) bool) []*html.Node {
1788         var acc []*html.Node
1789         var traverse func(*html.Node, []*html.Node) []*html.Node
1790         traverse = func(root *html.Node, sofar []*html.Node) []*html.Node {
1791                 if root == nil {
1792                         return sofar
1793                 }
1794                 if predicate(root) {
1795                         return append(sofar, root)
1796                 }
1797                 for cur := root.FirstChild; cur != nil; cur = cur.NextSibling {
1798                         sofar = traverse(cur, sofar)
1799                 }
1800                 return sofar
1801         }
1802         return traverse(document, acc)
1803 }
1804
1805 // Returns true if a node has the attribute targetAttr with the given value
1806 func matchesAttributeValue(node *html.Node, targetAttr string, value string) bool {
1807         for _, attr := range node.Attr {
1808                 if attr.Key == targetAttr && attr.Val == value {
1809                         return true
1810                 }
1811         }
1812         return false
1813 }
1814
1815 // Concatenate the content of text-node children of node; only direct
1816 // children are visited, and any non-text children are skipped.
1817 func getNodeText(node *html.Node) string {
1818         var recv strings.Builder
1819         for c := node.FirstChild; c != nil; c = c.NextSibling {
1820                 if c.Type == html.TextNode {
1821                         recv.WriteString(c.Data)
1822                 }
1823         }
1824         return recv.String()
1825 }
1826
1827 // Returns a map from the directory listing item string (a path) to the href
1828 // value of its <a> tag (an encoded relative URL)
1829 func getPathHrefMap(document *html.Node) map[string]string {
1830         isItemATag := func(node *html.Node) bool {
1831                 return node.Type == html.ElementNode && node.Data == "a" && matchesAttributeValue(node, "class", "item")
1832         }
1833         aTags := getNodes(document, isItemATag)
1834         output := make(map[string]string)
1835         for _, elem := range aTags {
1836                 textContent := getNodeText(elem)
1837                 for _, attr := range elem.Attr {
1838                         if attr.Key == "href" {
1839                                 output[textContent] = attr.Val
1840                                 break
1841                         }
1842                 }
1843         }
1844         return output
1845 }
1846
1847 func getWgetExamplePre(document *html.Node) string {
1848         isWgetPre := func(node *html.Node) bool {
1849                 return node.Type == html.ElementNode && matchesAttributeValue(node, "id", "wget-example")
1850         }
1851         elements := getNodes(document, isWgetPre)
1852         if len(elements) != 1 {
1853                 return ""
1854         }
1855         return getNodeText(elements[0])
1856 }
1857
1858 func (s *IntegrationSuite) TestDeleteLastFile(c *check.C) {
1859         arv := arvados.NewClientFromEnv()
1860         var newCollection arvados.Collection
1861         err := arv.RequestAndDecode(&newCollection, "POST", "arvados/v1/collections", nil, map[string]interface{}{
1862                 "collection": map[string]string{
1863                         "owner_uuid":    arvadostest.ActiveUserUUID,
1864                         "manifest_text": ". acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:foo.txt 0:3:bar.txt\n",
1865                         "name":          "keep-web test collection",
1866                 },
1867                 "ensure_unique_name": true,
1868         })
1869         c.Assert(err, check.IsNil)
1870         defer arv.RequestAndDecode(&newCollection, "DELETE", "arvados/v1/collections/"+newCollection.UUID, nil, nil)
1871
1872         var updated arvados.Collection
1873         for _, fnm := range []string{"foo.txt", "bar.txt"} {
1874                 s.handler.Cluster.Services.WebDAVDownload.ExternalURL.Host = "example.com"
1875                 u, _ := url.Parse("http://example.com/c=" + newCollection.UUID + "/" + fnm)
1876                 req := &http.Request{
1877                         Method:     "DELETE",
1878                         Host:       u.Host,
1879                         URL:        u,
1880                         RequestURI: u.RequestURI(),
1881                         Header: http.Header{
1882                                 "Authorization": {"Bearer " + arvadostest.ActiveToken},
1883                         },
1884                 }
1885                 resp := httptest.NewRecorder()
1886                 s.handler.ServeHTTP(resp, req)
1887                 c.Check(resp.Code, check.Equals, http.StatusNoContent)
1888
1889                 updated = arvados.Collection{}
1890                 err = arv.RequestAndDecode(&updated, "GET", "arvados/v1/collections/"+newCollection.UUID, nil, nil)
1891                 c.Check(err, check.IsNil)
1892                 c.Check(updated.ManifestText, check.Not(check.Matches), `(?ms).*\Q`+fnm+`\E.*`)
1893                 c.Logf("updated manifest_text %q", updated.ManifestText)
1894         }
1895         c.Check(updated.ManifestText, check.Equals, "")
1896 }
1897
1898 func (s *IntegrationSuite) TestFileContentType(c *check.C) {
1899         s.handler.Cluster.Services.WebDAVDownload.ExternalURL.Host = "download.example.com"
1900
1901         client := arvados.NewClientFromEnv()
1902         client.AuthToken = arvadostest.ActiveToken
1903         arv, err := arvadosclient.New(client)
1904         c.Assert(err, check.Equals, nil)
1905         kc, err := keepclient.MakeKeepClient(arv)
1906         c.Assert(err, check.Equals, nil)
1907
1908         fs, err := (&arvados.Collection{}).FileSystem(client, kc)
1909         c.Assert(err, check.IsNil)
1910
1911         trials := []struct {
1912                 filename    string
1913                 content     string
1914                 contentType string
1915         }{
1916                 {"picture.txt", "BMX bikes are small this year\n", "text/plain; charset=utf-8"},
1917                 {"picture.bmp", "BMX bikes are small this year\n", "image/(x-ms-)?bmp"},
1918                 {"picture.jpg", "BMX bikes are small this year\n", "image/jpeg"},
1919                 {"picture1", "BMX bikes are small this year\n", "image/bmp"},            // content sniff; "BM" is the magic signature for .bmp
1920                 {"picture2", "Cars are small this year\n", "text/plain; charset=utf-8"}, // content sniff
1921         }
1922         for _, trial := range trials {
1923                 f, err := fs.OpenFile(trial.filename, os.O_CREATE|os.O_WRONLY, 0777)
1924                 c.Assert(err, check.IsNil)
1925                 _, err = f.Write([]byte(trial.content))
1926                 c.Assert(err, check.IsNil)
1927                 c.Assert(f.Close(), check.IsNil)
1928         }
1929         mtxt, err := fs.MarshalManifest(".")
1930         c.Assert(err, check.IsNil)
1931         var coll arvados.Collection
1932         err = client.RequestAndDecode(&coll, "POST", "arvados/v1/collections", nil, map[string]interface{}{
1933                 "collection": map[string]string{
1934                         "manifest_text": mtxt,
1935                 },
1936         })
1937         c.Assert(err, check.IsNil)
1938
1939         for _, trial := range trials {
1940                 u, _ := url.Parse("http://download.example.com/by_id/" + coll.UUID + "/" + trial.filename)
1941                 req := &http.Request{
1942                         Method:     "GET",
1943                         Host:       u.Host,
1944                         URL:        u,
1945                         RequestURI: u.RequestURI(),
1946                         Header: http.Header{
1947                                 "Authorization": {"Bearer " + client.AuthToken},
1948                         },
1949                 }
1950                 resp := httptest.NewRecorder()
1951                 s.handler.ServeHTTP(resp, req)
1952                 c.Check(resp.Code, check.Equals, http.StatusOK)
1953                 c.Check(resp.Header().Get("Content-Type"), check.Matches, trial.contentType)
1954                 c.Check(resp.Body.String(), check.Equals, trial.content)
1955         }
1956 }
1957
1958 func (s *IntegrationSuite) TestCacheSize(c *check.C) {
1959         req, err := http.NewRequest("GET", "http://"+arvadostest.FooCollection+".example.com/foo", nil)
1960         req.Header.Set("Authorization", "Bearer "+arvadostest.ActiveTokenV2)
1961         c.Assert(err, check.IsNil)
1962         resp := httptest.NewRecorder()
1963         s.handler.ServeHTTP(resp, req)
1964         c.Assert(resp.Code, check.Equals, http.StatusOK)
1965         c.Check(s.handler.Cache.sessions[arvadostest.ActiveTokenV2].client.DiskCacheSize.Percent(), check.Equals, int64(10))
1966 }
1967
1968 // Writing to a collection shouldn't affect its entry in the
1969 // PDH-to-manifest cache.
1970 func (s *IntegrationSuite) TestCacheWriteCollectionSamePDH(c *check.C) {
1971         arv, err := arvadosclient.MakeArvadosClient()
1972         c.Assert(err, check.Equals, nil)
1973         arv.ApiToken = arvadostest.ActiveToken
1974
1975         u := mustParseURL("http://x.example/testfile")
1976         req := &http.Request{
1977                 Method:     "GET",
1978                 Host:       u.Host,
1979                 URL:        u,
1980                 RequestURI: u.RequestURI(),
1981                 Header:     http.Header{"Authorization": {"Bearer " + arv.ApiToken}},
1982         }
1983
1984         checkWithID := func(id string, status int) {
1985                 req.URL.Host = strings.Replace(id, "+", "-", -1) + ".example"
1986                 req.Host = req.URL.Host
1987                 resp := httptest.NewRecorder()
1988                 s.handler.ServeHTTP(resp, req)
1989                 c.Check(resp.Code, check.Equals, status)
1990         }
1991
1992         var colls [2]arvados.Collection
1993         for i := range colls {
1994                 err := arv.Create("collections",
1995                         map[string]interface{}{
1996                                 "ensure_unique_name": true,
1997                                 "collection": map[string]interface{}{
1998                                         "name": "test collection",
1999                                 },
2000                         }, &colls[i])
2001                 c.Assert(err, check.Equals, nil)
2002         }
2003
2004         // Populate cache with empty collection
2005         checkWithID(colls[0].PortableDataHash, http.StatusNotFound)
2006
2007         // write a file to colls[0]
2008         reqPut := *req
2009         reqPut.Method = "PUT"
2010         reqPut.URL.Host = colls[0].UUID + ".example"
2011         reqPut.Host = req.URL.Host
2012         reqPut.Body = ioutil.NopCloser(bytes.NewBufferString("testdata"))
2013         resp := httptest.NewRecorder()
2014         s.handler.ServeHTTP(resp, &reqPut)
2015         c.Check(resp.Code, check.Equals, http.StatusCreated)
2016
2017         // new file should not appear in colls[1]
2018         checkWithID(colls[1].PortableDataHash, http.StatusNotFound)
2019         checkWithID(colls[1].UUID, http.StatusNotFound)
2020
2021         checkWithID(colls[0].UUID, http.StatusOK)
2022 }
2023
2024 func copyHeader(h http.Header) http.Header {
2025         hc := http.Header{}
2026         for k, v := range h {
2027                 hc[k] = append([]string(nil), v...)
2028         }
2029         return hc
2030 }
2031
2032 func (s *IntegrationSuite) checkUploadDownloadRequest(c *check.C, req *http.Request,
2033         successCode int, direction string, perm bool, userUuid, collectionUuid, collectionPDH, filepath string) {
2034
2035         client := arvados.NewClientFromEnv()
2036         client.AuthToken = arvadostest.AdminToken
2037         var logentries arvados.LogList
2038         limit1 := 1
2039         err := client.RequestAndDecode(&logentries, "GET", "arvados/v1/logs", nil,
2040                 arvados.ResourceListParams{
2041                         Limit: &limit1,
2042                         Order: "created_at desc"})
2043         c.Check(err, check.IsNil)
2044         c.Check(logentries.Items, check.HasLen, 1)
2045         lastLogId := logentries.Items[0].ID
2046         c.Logf("lastLogId: %d", lastLogId)
2047
2048         var logbuf bytes.Buffer
2049         logger := logrus.New()
2050         logger.Out = &logbuf
2051         resp := httptest.NewRecorder()
2052         req = req.WithContext(ctxlog.Context(context.Background(), logger))
2053         s.handler.ServeHTTP(resp, req)
2054
2055         if perm {
2056                 c.Check(resp.Result().StatusCode, check.Equals, successCode)
2057                 c.Check(logbuf.String(), check.Matches, `(?ms).*msg="File `+direction+`".*`)
2058                 c.Check(logbuf.String(), check.Not(check.Matches), `(?ms).*level=error.*`)
2059
2060                 deadline := time.Now().Add(time.Second)
2061                 for {
2062                         c.Assert(time.Now().After(deadline), check.Equals, false, check.Commentf("timed out waiting for log entry"))
2063                         logentries = arvados.LogList{}
2064                         err = client.RequestAndDecode(&logentries, "GET", "arvados/v1/logs", nil,
2065                                 arvados.ResourceListParams{
2066                                         Filters: []arvados.Filter{
2067                                                 {Attr: "event_type", Operator: "=", Operand: "file_" + direction},
2068                                                 {Attr: "object_uuid", Operator: "=", Operand: userUuid},
2069                                         },
2070                                         Limit: &limit1,
2071                                         Order: "created_at desc",
2072                                 })
2073                         c.Assert(err, check.IsNil)
2074                         if len(logentries.Items) > 0 &&
2075                                 logentries.Items[0].ID > lastLogId &&
2076                                 logentries.Items[0].ObjectUUID == userUuid &&
2077                                 logentries.Items[0].Properties["collection_uuid"] == collectionUuid &&
2078                                 (collectionPDH == "" || logentries.Items[0].Properties["portable_data_hash"] == collectionPDH) &&
2079                                 logentries.Items[0].Properties["collection_file_path"] == filepath {
2080                                 break
2081                         }
2082                         c.Logf("logentries.Items: %+v", logentries.Items)
2083                         time.Sleep(50 * time.Millisecond)
2084                 }
2085         } else {
2086                 c.Check(resp.Result().StatusCode, check.Equals, http.StatusForbidden)
2087                 c.Check(logbuf.String(), check.Equals, "")
2088         }
2089 }
2090
2091 func (s *IntegrationSuite) TestDownloadLoggingPermission(c *check.C) {
2092         u := mustParseURL("http://" + arvadostest.FooCollection + ".keep-web.example/foo")
2093
2094         s.handler.Cluster.Collections.TrustAllContent = true
2095         s.handler.Cluster.Collections.WebDAVLogDownloadInterval = arvados.Duration(0)
2096
2097         for _, adminperm := range []bool{true, false} {
2098                 for _, userperm := range []bool{true, false} {
2099                         s.handler.Cluster.Collections.WebDAVPermission.Admin.Download = adminperm
2100                         s.handler.Cluster.Collections.WebDAVPermission.User.Download = userperm
2101
2102                         // Test admin permission
2103                         req := &http.Request{
2104                                 Method:     "GET",
2105                                 Host:       u.Host,
2106                                 URL:        u,
2107                                 RequestURI: u.RequestURI(),
2108                                 Header: http.Header{
2109                                         "Authorization": {"Bearer " + arvadostest.AdminToken},
2110                                 },
2111                         }
2112                         s.checkUploadDownloadRequest(c, req, http.StatusOK, "download", adminperm,
2113                                 arvadostest.AdminUserUUID, arvadostest.FooCollection, arvadostest.FooCollectionPDH, "foo")
2114
2115                         // Test user permission
2116                         req = &http.Request{
2117                                 Method:     "GET",
2118                                 Host:       u.Host,
2119                                 URL:        u,
2120                                 RequestURI: u.RequestURI(),
2121                                 Header: http.Header{
2122                                         "Authorization": {"Bearer " + arvadostest.ActiveToken},
2123                                 },
2124                         }
2125                         s.checkUploadDownloadRequest(c, req, http.StatusOK, "download", userperm,
2126                                 arvadostest.ActiveUserUUID, arvadostest.FooCollection, arvadostest.FooCollectionPDH, "foo")
2127                 }
2128         }
2129
2130         s.handler.Cluster.Collections.WebDAVPermission.User.Download = true
2131
2132         for _, tryurl := range []string{"http://" + arvadostest.MultilevelCollection1 + ".keep-web.example/dir1/subdir/file1",
2133                 "http://keep-web/users/active/multilevel_collection_1/dir1/subdir/file1"} {
2134
2135                 u = mustParseURL(tryurl)
2136                 req := &http.Request{
2137                         Method:     "GET",
2138                         Host:       u.Host,
2139                         URL:        u,
2140                         RequestURI: u.RequestURI(),
2141                         Header: http.Header{
2142                                 "Authorization": {"Bearer " + arvadostest.ActiveToken},
2143                         },
2144                 }
2145                 s.checkUploadDownloadRequest(c, req, http.StatusOK, "download", true,
2146                         arvadostest.ActiveUserUUID, arvadostest.MultilevelCollection1, arvadostest.MultilevelCollection1PDH, "dir1/subdir/file1")
2147         }
2148
2149         u = mustParseURL("http://" + strings.Replace(arvadostest.FooCollectionPDH, "+", "-", 1) + ".keep-web.example/foo")
2150         req := &http.Request{
2151                 Method:     "GET",
2152                 Host:       u.Host,
2153                 URL:        u,
2154                 RequestURI: u.RequestURI(),
2155                 Header: http.Header{
2156                         "Authorization": {"Bearer " + arvadostest.ActiveToken},
2157                 },
2158         }
2159         s.checkUploadDownloadRequest(c, req, http.StatusOK, "download", true,
2160                 arvadostest.ActiveUserUUID, "", arvadostest.FooCollectionPDH, "foo")
2161 }
2162
2163 func (s *IntegrationSuite) TestUploadLoggingPermission(c *check.C) {
2164         for _, adminperm := range []bool{true, false} {
2165                 for _, userperm := range []bool{true, false} {
2166
2167                         arv := arvados.NewClientFromEnv()
2168                         arv.AuthToken = arvadostest.ActiveToken
2169
2170                         var coll arvados.Collection
2171                         err := arv.RequestAndDecode(&coll,
2172                                 "POST",
2173                                 "/arvados/v1/collections",
2174                                 nil,
2175                                 map[string]interface{}{
2176                                         "ensure_unique_name": true,
2177                                         "collection": map[string]interface{}{
2178                                                 "name": "test collection",
2179                                         },
2180                                 })
2181                         c.Assert(err, check.Equals, nil)
2182
2183                         u := mustParseURL("http://" + coll.UUID + ".keep-web.example/bar")
2184
2185                         s.handler.Cluster.Collections.WebDAVPermission.Admin.Upload = adminperm
2186                         s.handler.Cluster.Collections.WebDAVPermission.User.Upload = userperm
2187
2188                         // Test admin permission
2189                         req := &http.Request{
2190                                 Method:     "PUT",
2191                                 Host:       u.Host,
2192                                 URL:        u,
2193                                 RequestURI: u.RequestURI(),
2194                                 Header: http.Header{
2195                                         "Authorization": {"Bearer " + arvadostest.AdminToken},
2196                                 },
2197                                 Body: io.NopCloser(bytes.NewReader([]byte("bar"))),
2198                         }
2199                         s.checkUploadDownloadRequest(c, req, http.StatusCreated, "upload", adminperm,
2200                                 arvadostest.AdminUserUUID, coll.UUID, "", "bar")
2201
2202                         // Test user permission
2203                         req = &http.Request{
2204                                 Method:     "PUT",
2205                                 Host:       u.Host,
2206                                 URL:        u,
2207                                 RequestURI: u.RequestURI(),
2208                                 Header: http.Header{
2209                                         "Authorization": {"Bearer " + arvadostest.ActiveToken},
2210                                 },
2211                                 Body: io.NopCloser(bytes.NewReader([]byte("bar"))),
2212                         }
2213                         s.checkUploadDownloadRequest(c, req, http.StatusCreated, "upload", userperm,
2214                                 arvadostest.ActiveUserUUID, coll.UUID, "", "bar")
2215                 }
2216         }
2217 }
2218
2219 func (s *IntegrationSuite) serveAndLogRequests(c *check.C, reqs *map[*http.Request]int) *bytes.Buffer {
2220         logbuf, ctx := newLoggerAndContext()
2221         var wg sync.WaitGroup
2222         for req, expectStatus := range *reqs {
2223                 req := req.WithContext(ctx)
2224                 expectStatus := expectStatus
2225                 wg.Add(1)
2226                 go func() {
2227                         defer wg.Done()
2228                         resp := httptest.NewRecorder()
2229                         s.handler.ServeHTTP(resp, req)
2230                         c.Check(resp.Result().StatusCode, check.Equals, expectStatus)
2231                 }()
2232         }
2233         wg.Wait()
2234         return logbuf
2235 }
2236
2237 func countLogMatches(c *check.C, logbuf *bytes.Buffer, pattern string, matchCount int) bool {
2238         search, err := regexp.Compile(pattern)
2239         if !c.Check(err, check.IsNil, check.Commentf("failed to compile regexp: %v", err)) {
2240                 return false
2241         }
2242         matches := search.FindAll(logbuf.Bytes(), -1)
2243         return c.Check(matches, check.HasLen, matchCount,
2244                 check.Commentf("%d matching log messages: %+v", len(matches), matches))
2245 }
2246
2247 func (s *IntegrationSuite) TestLogThrottling(c *check.C) {
2248         s.handler.Cluster.Collections.WebDAVLogDownloadInterval = arvados.Duration(time.Hour)
2249         fooURL := "http://" + arvadostest.FooCollection + ".keep-web.example/foo"
2250         req := newRequest("GET", fooURL)
2251         req.Header.Set("Authorization", "Bearer "+arvadostest.ActiveToken)
2252         pattern := `\bmsg="File download".* collection_file_path=foo\b`
2253
2254         // All these requests get byte zero and should be logged.
2255         reqs := make(map[*http.Request]int)
2256         reqs[req] = http.StatusOK
2257         for _, byterange := range []string{"0-2", "0-1", "0-", "-3"} {
2258                 req := req.Clone(context.Background())
2259                 req.Header.Set("Range", "bytes="+byterange)
2260                 reqs[req] = http.StatusPartialContent
2261         }
2262         logbuf := s.serveAndLogRequests(c, &reqs)
2263         countLogMatches(c, logbuf, pattern, len(reqs))
2264
2265         // None of these requests get byte zero so they should all be throttled
2266         // (now that we've made at least one request for byte zero).
2267         reqs = make(map[*http.Request]int)
2268         for _, byterange := range []string{"1-2", "1-", "2-", "-1", "-2"} {
2269                 req := req.Clone(context.Background())
2270                 req.Header.Set("Range", "bytes="+byterange)
2271                 reqs[req] = http.StatusPartialContent
2272         }
2273         logbuf = s.serveAndLogRequests(c, &reqs)
2274         countLogMatches(c, logbuf, pattern, 0)
2275 }
2276
2277 func (s *IntegrationSuite) TestLogThrottleInterval(c *check.C) {
2278         s.handler.Cluster.Collections.WebDAVLogDownloadInterval = arvados.Duration(time.Nanosecond)
2279         logbuf, ctx := newLoggerAndContext()
2280         req := newRequest("GET", "http://"+arvadostest.FooCollection+".keep-web.example/foo")
2281         req.Header.Set("Authorization", "Bearer "+arvadostest.ActiveToken)
2282         req = req.WithContext(ctx)
2283
2284         re := regexp.MustCompile(`\bmsg="File download".* collection_file_path=foo\b`)
2285         for expected := 1; expected < 4; expected++ {
2286                 time.Sleep(2 * time.Nanosecond)
2287                 resp := httptest.NewRecorder()
2288                 s.handler.ServeHTTP(resp, req)
2289                 c.Assert(resp.Result().StatusCode, check.Equals, http.StatusOK)
2290                 matches := re.FindAll(logbuf.Bytes(), -1)
2291                 c.Assert(matches, check.HasLen, expected,
2292                         check.Commentf("%d matching log messages: %+v", len(matches), matches))
2293         }
2294 }
2295
2296 func (s *IntegrationSuite) TestLogThrottleDifferentTokens(c *check.C) {
2297         s.handler.Cluster.Collections.WebDAVLogDownloadInterval = arvados.Duration(time.Hour)
2298         req := newRequest("GET", "http://"+arvadostest.FooCollection+".keep-web.example/foo")
2299         reqs := make(map[*http.Request]int)
2300         for _, token := range []string{arvadostest.ActiveToken, arvadostest.AdminToken} {
2301                 req := req.Clone(context.Background())
2302                 req.Header.Set("Authorization", "Bearer "+token)
2303                 reqs[req] = http.StatusOK
2304         }
2305         logbuf := s.serveAndLogRequests(c, &reqs)
2306         countLogMatches(c, logbuf, `\bmsg="File download".* collection_file_path=foo\b`, len(reqs))
2307 }
2308
2309 func (s *IntegrationSuite) TestLogThrottleDifferentFiles(c *check.C) {
2310         s.handler.Cluster.Collections.WebDAVLogDownloadInterval = arvados.Duration(time.Hour)
2311         baseURL := "http://" + arvadostest.MultilevelCollection1 + ".keep-web.example/"
2312         reqs := make(map[*http.Request]int)
2313         for _, filename := range []string{"file1", "file2", "file3"} {
2314                 req := newRequest("GET", baseURL+filename)
2315                 req.Header.Set("Authorization", "Bearer "+arvadostest.ActiveToken)
2316                 reqs[req] = http.StatusOK
2317         }
2318         logbuf := s.serveAndLogRequests(c, &reqs)
2319         countLogMatches(c, logbuf, `\bmsg="File download".* collection_uuid=`+arvadostest.MultilevelCollection1+`\b`, len(reqs))
2320 }
2321
2322 func (s *IntegrationSuite) TestLogThrottleDifferentSources(c *check.C) {
2323         s.handler.Cluster.Collections.WebDAVLogDownloadInterval = arvados.Duration(time.Hour)
2324         req := newRequest("GET", "http://"+arvadostest.FooCollection+".keep-web.example/foo")
2325         req.Header.Set("Authorization", "Bearer "+arvadostest.ActiveToken)
2326         reqs := make(map[*http.Request]int)
2327         reqs[req] = http.StatusOK
2328         for _, xff := range []string{"10.22.33.44", "100::123"} {
2329                 req := req.Clone(context.Background())
2330                 req.Header.Set("X-Forwarded-For", xff)
2331                 reqs[req] = http.StatusOK
2332         }
2333         logbuf := s.serveAndLogRequests(c, &reqs)
2334         countLogMatches(c, logbuf, `\bmsg="File download".* collection_file_path=foo\b`, len(reqs))
2335 }
2336
2337 func (s *IntegrationSuite) TestConcurrentWrites(c *check.C) {
2338         s.handler.Cluster.Collections.WebDAVCache.TTL = arvados.Duration(time.Second * 2)
2339         client := arvados.NewClientFromEnv()
2340         client.AuthToken = arvadostest.ActiveTokenV2
2341
2342         // Each file we upload will consist of some unique content
2343         // followed by 2 MiB of filler content.
2344         filler := "."
2345         for i := 0; i < 21; i++ {
2346                 filler += filler
2347         }
2348
2349         // Start small, and increase concurrency (2^2, 4^2, ...)
2350         // only until hitting failure. Avoids unnecessarily long
2351         // failure reports.
2352         for n := 2; n < 16 && !c.Failed(); n = n * 2 {
2353                 c.Logf("%s: n=%d", c.TestName(), n)
2354
2355                 var coll arvados.Collection
2356                 err := client.RequestAndDecode(&coll, "POST", "arvados/v1/collections", nil, nil)
2357                 c.Assert(err, check.IsNil)
2358                 defer client.RequestAndDecode(&coll, "DELETE", "arvados/v1/collections/"+coll.UUID, nil, nil)
2359
2360                 var wg sync.WaitGroup
2361                 for i := 0; i < n && !c.Failed(); i++ {
2362                         i := i
2363                         wg.Add(1)
2364                         go func() {
2365                                 defer wg.Done()
2366                                 _, resp := s.do("MKCOL", s.collectionURL(coll.UUID, fmt.Sprintf("i=%d", i)), client.AuthToken, nil, nil)
2367                                 c.Assert(resp.StatusCode, check.Equals, http.StatusCreated)
2368                                 for j := 0; j < n && !c.Failed(); j++ {
2369                                         j := j
2370                                         wg.Add(1)
2371                                         go func() {
2372                                                 defer wg.Done()
2373                                                 content := fmt.Sprintf("i=%d/j=%d", i, j)
2374                                                 _, resp := s.do("PUT", s.collectionURL(coll.UUID, content), client.AuthToken, nil, []byte(content+filler))
2375                                                 c.Check(resp.StatusCode, check.Equals, http.StatusCreated, check.Commentf("%s", content))
2376
2377                                                 time.Sleep(time.Second)
2378
2379                                                 _, resp = s.do("GET", s.collectionURL(coll.UUID, content), client.AuthToken, nil, nil)
2380                                                 c.Check(resp.StatusCode, check.Equals, http.StatusOK, check.Commentf("%s", content))
2381                                                 body, _ := io.ReadAll(resp.Body)
2382                                                 c.Check(strings.TrimSuffix(string(body), filler), check.Equals, content)
2383                                         }()
2384                                 }
2385                         }()
2386                 }
2387                 wg.Wait()
2388                 for i := 0; i < n; i++ {
2389                         _, resp := s.do("PROPFIND", s.collectionURL(coll.UUID, fmt.Sprintf("i=%d", i)), client.AuthToken, nil, nil)
2390                         c.Assert(resp.StatusCode, check.Equals, http.StatusMultiStatus)
2391                 }
2392         }
2393 }
2394
2395 func (s *IntegrationSuite) TestRepack(c *check.C) {
2396         client := arvados.NewClientFromEnv()
2397         client.AuthToken = arvadostest.ActiveTokenV2
2398
2399         // Each file we upload will consist of some unique content
2400         // followed by 1 MiB of filler content.
2401         filler := "."
2402         for i := 0; i < 20; i++ {
2403                 filler += filler
2404         }
2405
2406         var coll arvados.Collection
2407         err := client.RequestAndDecode(&coll, "POST", "arvados/v1/collections", nil, nil)
2408         c.Assert(err, check.IsNil)
2409         defer client.RequestAndDecode(&coll, "DELETE", "arvados/v1/collections/"+coll.UUID, nil, nil)
2410
2411         countblocks := func() int {
2412                 var current arvados.Collection
2413                 err = client.RequestAndDecode(&current, "GET", "arvados/v1/collections/"+coll.UUID, nil, nil)
2414                 c.Assert(err, check.IsNil)
2415                 block := map[string]bool{}
2416                 for _, hash := range regexp.MustCompile(` [0-9a-f]{32}`).FindAllString(current.ManifestText, -1) {
2417                         block[hash] = true
2418                 }
2419                 return len(block)
2420         }
2421
2422         throttle := make(chan bool, 8) // len(throttle) is max upload concurrency
2423         n := 5                         // nested loop below will write n^2 + 1 files
2424         var nfiles atomic.Int64
2425         var totalsize atomic.Int64
2426         var wg sync.WaitGroup
2427         for i := 0; i < n && !c.Failed(); i++ {
2428                 i := i
2429                 wg.Add(1)
2430                 go func() {
2431                         defer wg.Done()
2432                         throttle <- true
2433                         _, resp := s.do("MKCOL", s.collectionURL(coll.UUID, fmt.Sprintf("i=%d", i)), client.AuthToken, nil, nil)
2434                         <-throttle
2435                         c.Assert(resp.StatusCode, check.Equals, http.StatusCreated)
2436
2437                         for j := 0; j < n && !c.Failed(); j++ {
2438                                 j := j
2439                                 wg.Add(1)
2440                                 go func() {
2441                                         defer wg.Done()
2442                                         content := fmt.Sprintf("i=%d/j=%d", i, j)
2443                                         throttle <- true
2444                                         _, resp := s.do("PUT", s.collectionURL(coll.UUID, content), client.AuthToken, nil, []byte(content+filler))
2445                                         <-throttle
2446                                         c.Check(resp.StatusCode, check.Equals, http.StatusCreated, check.Commentf("%s", content))
2447                                         totalsize.Add(int64(len(content) + len(filler)))
2448                                         c.Logf("after writing %d files, manifest has %d blocks", nfiles.Add(1), countblocks())
2449                                 }()
2450                         }
2451                 }()
2452         }
2453         wg.Wait()
2454
2455         content := "lastfile"
2456         _, resp := s.do("PUT", s.collectionURL(coll.UUID, content), client.AuthToken, nil, []byte(content+filler))
2457         c.Check(resp.StatusCode, check.Equals, http.StatusCreated, check.Commentf("%s", content))
2458         nfiles.Add(1)
2459
2460         // Check that all files can still be retrieved
2461         for i := 0; i < n && !c.Failed(); i++ {
2462                 i := i
2463                 for j := 0; j < n && !c.Failed(); j++ {
2464                         j := j
2465                         wg.Add(1)
2466                         go func() {
2467                                 defer wg.Done()
2468                                 path := fmt.Sprintf("i=%d/j=%d", i, j)
2469
2470                                 _, resp := s.do("GET", s.collectionURL(coll.UUID, path), client.AuthToken, nil, nil)
2471                                 c.Check(resp.StatusCode, check.Equals, http.StatusOK, check.Commentf("%s", content))
2472                                 size, _ := io.Copy(io.Discard, resp.Body)
2473                                 c.Check(int(size), check.Equals, len(path)+len(filler))
2474                         }()
2475                 }
2476         }
2477         wg.Wait()
2478
2479         // Check that the final manifest has been repacked so average
2480         // block size is at least double the "small file" size
2481         nblocks := countblocks()
2482         c.Logf("nblocks == %d", nblocks)
2483         c.Logf("nfiles == %d", nfiles.Load())
2484         c.Check(nblocks < int(nfiles.Load()), check.Equals, true)
2485         c.Logf("totalsize == %d", totalsize.Load())
2486         meanblocksize := int(totalsize.Load()) / nblocks
2487         c.Logf("meanblocksize == %d", meanblocksize)
2488         minblocksize := 2 * int(totalsize.Load()) / int(nfiles.Load())
2489         c.Logf("expecting minblocksize %d", minblocksize)
2490         c.Check(meanblocksize >= minblocksize, check.Equals, true)
2491 }
2492
2493 func (s *IntegrationSuite) TestDepthHeader(c *check.C) {
2494         s.handler.Cluster.Collections.WebDAVCache.TTL = arvados.Duration(time.Second * 2)
2495         client := arvados.NewClientFromEnv()
2496         client.AuthToken = arvadostest.ActiveTokenV2
2497
2498         var coll arvados.Collection
2499         err := client.RequestAndDecode(&coll, "POST", "arvados/v1/collections", nil, nil)
2500         c.Assert(err, check.IsNil)
2501         defer client.RequestAndDecode(&coll, "DELETE", "arvados/v1/collections/"+coll.UUID, nil, nil)
2502         base := "http://" + coll.UUID + ".collections.example.com/"
2503
2504         for _, trial := range []struct {
2505                 method      string
2506                 path        string
2507                 destination string
2508                 depth       string
2509                 expectCode  int // 0 means expect 2xx
2510         }{
2511                 // setup...
2512                 {method: "MKCOL", path: "dir"},
2513                 {method: "PUT", path: "dir/file"},
2514                 {method: "MKCOL", path: "dir/dir2"},
2515                 // delete with no depth = OK
2516                 {method: "DELETE", path: "dir/dir2", depth: ""},
2517                 // delete with depth other than infinity = fail
2518                 {method: "DELETE", path: "dir", depth: "0", expectCode: 400},
2519                 {method: "DELETE", path: "dir", depth: "1", expectCode: 400},
2520                 // delete with depth infinity = OK
2521                 {method: "DELETE", path: "dir", depth: "infinity"},
2522
2523                 // setup...
2524                 {method: "MKCOL", path: "dir"},
2525                 {method: "PUT", path: "dir/file"},
2526                 {method: "MKCOL", path: "dir/dir2"},
2527                 // move with depth other than infinity = fail
2528                 {method: "MOVE", path: "dir", destination: "moved", depth: "0", expectCode: 400},
2529                 {method: "MOVE", path: "dir", destination: "moved", depth: "1", expectCode: 400},
2530                 // move with depth infinity = OK
2531                 {method: "MOVE", path: "dir", destination: "moved", depth: "infinity"},
2532                 {method: "DELETE", path: "moved"},
2533
2534                 // setup...
2535                 {method: "MKCOL", path: "dir"},
2536                 {method: "PUT", path: "dir/file"},
2537                 {method: "MKCOL", path: "dir/dir2"},
2538                 // copy with depth 0 = create empty destination dir
2539                 {method: "COPY", path: "dir/", destination: "copied-empty/", depth: "0"},
2540                 {method: "DELETE", path: "copied-empty/file", expectCode: 404},
2541                 {method: "DELETE", path: "copied-empty"},
2542                 // copy with depth 0 = create empty destination dir
2543                 // (destination dir has no trailing slash this time)
2544                 {method: "COPY", path: "dir/", destination: "copied-empty-noslash", depth: "0"},
2545                 {method: "DELETE", path: "copied-empty-noslash/file", expectCode: 404},
2546                 {method: "DELETE", path: "copied-empty-noslash"},
2547                 // copy with depth 0 = create empty destination dir
2548                 // (source dir has no trailing slash this time)
2549                 {method: "COPY", path: "dir", destination: "copied-empty-noslash", depth: "0"},
2550                 {method: "DELETE", path: "copied-empty-noslash/file", expectCode: 404},
2551                 {method: "DELETE", path: "copied-empty-noslash"},
2552                 // copy with depth 1 = fail
2553                 {method: "COPY", path: "dir", destination: "copied", depth: "1", expectCode: 400},
2554                 // copy with depth infinity = copy entire subtree
2555                 {method: "COPY", path: "dir/", destination: "copied", depth: "infinity"},
2556                 {method: "DELETE", path: "copied/file"},
2557                 {method: "DELETE", path: "copied"},
2558                 // copy with depth infinity = copy entire subtree
2559                 // (source dir has no trailing slash this time)
2560                 {method: "COPY", path: "dir", destination: "copied", depth: "infinity"},
2561                 {method: "DELETE", path: "copied/file"},
2562                 {method: "DELETE", path: "copied"},
2563                 // cleanup
2564                 {method: "DELETE", path: "dir"},
2565         } {
2566                 c.Logf("trial %+v", trial)
2567                 resp := httptest.NewRecorder()
2568                 req, err := http.NewRequest(trial.method, base+trial.path, strings.NewReader(""))
2569                 c.Assert(err, check.IsNil)
2570                 req.Header.Set("Authorization", "Bearer "+client.AuthToken)
2571                 if trial.destination != "" {
2572                         req.Header.Set("Destination", base+trial.destination)
2573                 }
2574                 if trial.depth != "" {
2575                         req.Header.Set("Depth", trial.depth)
2576                 }
2577                 s.handler.ServeHTTP(resp, req)
2578                 if trial.expectCode != 0 {
2579                         c.Assert(resp.Code, check.Equals, trial.expectCode)
2580                 } else {
2581                         c.Assert(resp.Code >= 200, check.Equals, true, check.Commentf("got code %d", resp.Code))
2582                         c.Assert(resp.Code < 300, check.Equals, true, check.Commentf("got code %d", resp.Code))
2583                 }
2584                 c.Logf("resp.Body: %q", resp.Body.String())
2585         }
2586 }