21901: Refactor common functions out of log throttle integration tests
[arvados.git] / services / keep-web / handler.go
1 // Copyright (C) The Arvados Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4
5 package keepweb
6
7 import (
8         "encoding/json"
9         "errors"
10         "fmt"
11         "html"
12         "html/template"
13         "io"
14         "net"
15         "net/http"
16         "net/url"
17         "os"
18         "slices"
19         "sort"
20         "strconv"
21         "strings"
22         "sync"
23         "time"
24
25         "git.arvados.org/arvados.git/lib/cmd"
26         "git.arvados.org/arvados.git/lib/webdavfs"
27         "git.arvados.org/arvados.git/sdk/go/arvados"
28         "git.arvados.org/arvados.git/sdk/go/arvadosclient"
29         "git.arvados.org/arvados.git/sdk/go/auth"
30         "git.arvados.org/arvados.git/sdk/go/ctxlog"
31         "git.arvados.org/arvados.git/sdk/go/httpserver"
32         "github.com/sirupsen/logrus"
33         "golang.org/x/net/webdav"
34 )
35
36 type handler struct {
37         Cache   cache
38         Cluster *arvados.Cluster
39         metrics *metrics
40
41         lockMtx    sync.Mutex
42         lock       map[string]*sync.RWMutex
43         lockTidied time.Time
44
45         fileEventLogs         map[fileEventLog]time.Time
46         fileEventLogsMtx      sync.Mutex
47         fileEventLogsNextTidy time.Time
48
49         s3SecretCache         map[string]*cachedS3Secret
50         s3SecretCacheMtx      sync.Mutex
51         s3SecretCacheNextTidy time.Time
52 }
53
54 var urlPDHDecoder = strings.NewReplacer(" ", "+", "-", "+")
55
56 var notFoundMessage = "Not Found"
57 var unauthorizedMessage = "401 Unauthorized\n\nA valid Arvados token must be provided to access this resource."
58
59 // parseCollectionIDFromURL returns a UUID or PDH if s is a UUID or a
60 // PDH (even if it is a PDH with "+" replaced by " " or "-");
61 // otherwise "".
62 func parseCollectionIDFromURL(s string) string {
63         if arvadosclient.UUIDMatch(s) {
64                 return s
65         }
66         if pdh := urlPDHDecoder.Replace(s); arvadosclient.PDHMatch(pdh) {
67                 return pdh
68         }
69         return ""
70 }
71
72 func (h *handler) serveStatus(w http.ResponseWriter, r *http.Request) {
73         json.NewEncoder(w).Encode(struct{ Version string }{cmd.Version.String()})
74 }
75
76 type errorWithHTTPStatus interface {
77         HTTPStatus() int
78 }
79
80 // updateOnSuccess wraps httpserver.ResponseWriter. If the handler
81 // sends an HTTP header indicating success, updateOnSuccess first
82 // calls the provided update func. If the update func fails, an error
83 // response is sent (using the error's HTTP status or 500 if none),
84 // and the status code and body sent by the handler are ignored (all
85 // response writes return the update error).
86 type updateOnSuccess struct {
87         httpserver.ResponseWriter
88         logger     logrus.FieldLogger
89         update     func() error
90         sentHeader bool
91         err        error
92 }
93
94 func (uos *updateOnSuccess) Write(p []byte) (int, error) {
95         if !uos.sentHeader {
96                 uos.WriteHeader(http.StatusOK)
97         }
98         if uos.err != nil {
99                 return 0, uos.err
100         }
101         return uos.ResponseWriter.Write(p)
102 }
103
104 func (uos *updateOnSuccess) WriteHeader(code int) {
105         if !uos.sentHeader {
106                 uos.sentHeader = true
107                 if code >= 200 && code < 400 {
108                         if uos.err = uos.update(); uos.err != nil {
109                                 code := http.StatusInternalServerError
110                                 if he := errorWithHTTPStatus(nil); errors.As(uos.err, &he) {
111                                         code = he.HTTPStatus()
112                                 }
113                                 uos.logger.WithError(uos.err).Errorf("update() returned %T error, changing response to HTTP %d", uos.err, code)
114                                 http.Error(uos.ResponseWriter, uos.err.Error(), code)
115                                 return
116                         }
117                 }
118         }
119         uos.ResponseWriter.WriteHeader(code)
120 }
121
122 var (
123         corsAllowHeadersHeader = strings.Join([]string{
124                 "Authorization", "Content-Type", "Range",
125                 // WebDAV request headers:
126                 "Depth", "Destination", "If", "Lock-Token", "Overwrite", "Timeout", "Cache-Control",
127         }, ", ")
128         writeMethod = map[string]bool{
129                 "COPY":      true,
130                 "DELETE":    true,
131                 "LOCK":      true,
132                 "MKCOL":     true,
133                 "MOVE":      true,
134                 "PROPPATCH": true,
135                 "PUT":       true,
136                 "RMCOL":     true,
137                 "UNLOCK":    true,
138         }
139         webdavMethod = map[string]bool{
140                 "COPY":      true,
141                 "DELETE":    true,
142                 "LOCK":      true,
143                 "MKCOL":     true,
144                 "MOVE":      true,
145                 "OPTIONS":   true,
146                 "PROPFIND":  true,
147                 "PROPPATCH": true,
148                 "PUT":       true,
149                 "RMCOL":     true,
150                 "UNLOCK":    true,
151         }
152         browserMethod = map[string]bool{
153                 "GET":  true,
154                 "HEAD": true,
155                 "POST": true,
156         }
157         // top-level dirs to serve with siteFS
158         siteFSDir = map[string]bool{
159                 "":      true, // root directory
160                 "by_id": true,
161                 "users": true,
162         }
163 )
164
165 func stripDefaultPort(host string) string {
166         // Will consider port 80 and port 443 to be the same vhost.  I think that's fine.
167         u := &url.URL{Host: host}
168         if p := u.Port(); p == "80" || p == "443" {
169                 return strings.ToLower(u.Hostname())
170         } else {
171                 return strings.ToLower(host)
172         }
173 }
174
175 // CheckHealth implements service.Handler.
176 func (h *handler) CheckHealth() error {
177         return nil
178 }
179
180 // Done implements service.Handler.
181 func (h *handler) Done() <-chan struct{} {
182         return nil
183 }
184
185 // ServeHTTP implements http.Handler.
186 func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
187         if xfp := r.Header.Get("X-Forwarded-Proto"); xfp != "" && xfp != "http" {
188                 r.URL.Scheme = xfp
189         }
190
191         wbuffer := newWriteBuffer(wOrig, int(h.Cluster.Collections.WebDAVOutputBuffer))
192         defer wbuffer.Close()
193         w := httpserver.WrapResponseWriter(responseWriter{
194                 Writer:         wbuffer,
195                 ResponseWriter: wOrig,
196         })
197
198         if r.Method == "OPTIONS" && ServeCORSPreflight(w, r.Header) {
199                 return
200         }
201
202         if !browserMethod[r.Method] && !webdavMethod[r.Method] {
203                 w.WriteHeader(http.StatusMethodNotAllowed)
204                 return
205         }
206
207         if r.Header.Get("Origin") != "" {
208                 // Allow simple cross-origin requests without user
209                 // credentials ("user credentials" as defined by CORS,
210                 // i.e., cookies, HTTP authentication, and client-side
211                 // SSL certificates. See
212                 // http://www.w3.org/TR/cors/#user-credentials).
213                 w.Header().Set("Access-Control-Allow-Origin", "*")
214                 w.Header().Set("Access-Control-Expose-Headers", "Content-Range")
215         }
216
217         if h.serveS3(w, r) {
218                 return
219         }
220
221         webdavPrefix := ""
222         arvPath := r.URL.Path
223         if prefix := r.Header.Get("X-Webdav-Prefix"); prefix != "" {
224                 // Enable a proxy (e.g., container log handler in
225                 // controller) to satisfy a request for path
226                 // "/foo/bar/baz.txt" using content from
227                 // "//abc123-4.internal/bar/baz.txt", by adding a
228                 // request header "X-Webdav-Prefix: /foo"
229                 if !strings.HasPrefix(arvPath, prefix) {
230                         http.Error(w, "X-Webdav-Prefix header is not a prefix of the requested path", http.StatusBadRequest)
231                         return
232                 }
233                 arvPath = r.URL.Path[len(prefix):]
234                 if arvPath == "" {
235                         arvPath = "/"
236                 }
237                 w.Header().Set("Vary", "X-Webdav-Prefix, "+w.Header().Get("Vary"))
238                 webdavPrefix = prefix
239         }
240         pathParts := strings.Split(arvPath[1:], "/")
241
242         var stripParts int
243         var collectionID string
244         var tokens []string
245         var reqTokens []string
246         var pathToken bool
247         var attachment bool
248         var useSiteFS bool
249         credentialsOK := h.Cluster.Collections.TrustAllContent
250         reasonNotAcceptingCredentials := ""
251
252         if r.Host != "" && stripDefaultPort(r.Host) == stripDefaultPort(h.Cluster.Services.WebDAVDownload.ExternalURL.Host) {
253                 credentialsOK = true
254                 attachment = true
255         } else if r.FormValue("disposition") == "attachment" {
256                 attachment = true
257         }
258
259         if !credentialsOK {
260                 reasonNotAcceptingCredentials = fmt.Sprintf("vhost %q does not specify a single collection ID or match Services.WebDAVDownload.ExternalURL %q, and Collections.TrustAllContent is false",
261                         r.Host, h.Cluster.Services.WebDAVDownload.ExternalURL)
262         }
263
264         if collectionID = arvados.CollectionIDFromDNSName(r.Host); collectionID != "" {
265                 // http://ID.collections.example/PATH...
266                 credentialsOK = true
267         } else if r.URL.Path == "/status.json" {
268                 h.serveStatus(w, r)
269                 return
270         } else if siteFSDir[pathParts[0]] {
271                 useSiteFS = true
272         } else if len(pathParts) >= 1 && strings.HasPrefix(pathParts[0], "c=") {
273                 // /c=ID[/PATH...]
274                 collectionID = parseCollectionIDFromURL(pathParts[0][2:])
275                 stripParts = 1
276         } else if len(pathParts) >= 2 && pathParts[0] == "collections" {
277                 if len(pathParts) >= 4 && pathParts[1] == "download" {
278                         // /collections/download/ID/TOKEN/PATH...
279                         collectionID = parseCollectionIDFromURL(pathParts[2])
280                         tokens = []string{pathParts[3]}
281                         stripParts = 4
282                         pathToken = true
283                 } else {
284                         // /collections/ID/PATH...
285                         collectionID = parseCollectionIDFromURL(pathParts[1])
286                         stripParts = 2
287                         // This path is only meant to work for public
288                         // data. Tokens provided with the request are
289                         // ignored.
290                         credentialsOK = false
291                         reasonNotAcceptingCredentials = "the '/collections/UUID/PATH' form only works for public data"
292                 }
293         }
294
295         forceReload := false
296         if cc := r.Header.Get("Cache-Control"); strings.Contains(cc, "no-cache") || strings.Contains(cc, "must-revalidate") {
297                 forceReload = true
298         }
299
300         if credentialsOK {
301                 reqTokens = auth.CredentialsFromRequest(r).Tokens
302         }
303
304         r.ParseForm()
305         origin := r.Header.Get("Origin")
306         cors := origin != "" && !strings.HasSuffix(origin, "://"+r.Host)
307         safeAjax := cors && (r.Method == http.MethodGet || r.Method == http.MethodHead)
308         // Important distinction: safeAttachment checks whether api_token exists
309         // as a query parameter. haveFormTokens checks whether api_token exists
310         // as request form data *or* a query parameter. Different checks are
311         // necessary because both the request disposition and the location of
312         // the API token affect whether or not the request needs to be
313         // redirected. The different branch comments below explain further.
314         safeAttachment := attachment && !r.URL.Query().Has("api_token")
315         if formTokens, haveFormTokens := r.Form["api_token"]; !haveFormTokens {
316                 // No token to use or redact.
317         } else if safeAjax || safeAttachment {
318                 // If this is a cross-origin request, the URL won't
319                 // appear in the browser's address bar, so
320                 // substituting a clipboard-safe URL is pointless.
321                 // Redirect-with-cookie wouldn't work anyway, because
322                 // it's not safe to allow third-party use of our
323                 // cookie.
324                 //
325                 // If we're supplying an attachment, we don't need to
326                 // convert POST to GET to avoid the "really resubmit
327                 // form?" problem, so provided the token isn't
328                 // embedded in the URL, there's no reason to do
329                 // redirect-with-cookie in this case either.
330                 for _, tok := range formTokens {
331                         reqTokens = append(reqTokens, tok)
332                 }
333         } else if browserMethod[r.Method] {
334                 // If this is a page view, and the client provided a
335                 // token via query string or POST body, we must put
336                 // the token in an HttpOnly cookie, and redirect to an
337                 // equivalent URL with the query param redacted and
338                 // method = GET.
339                 h.seeOtherWithCookie(w, r, "", credentialsOK)
340                 return
341         }
342
343         targetPath := pathParts[stripParts:]
344         if tokens == nil && len(targetPath) > 0 && strings.HasPrefix(targetPath[0], "t=") {
345                 // http://ID.example/t=TOKEN/PATH...
346                 // /c=ID/t=TOKEN/PATH...
347                 //
348                 // This form must only be used to pass scoped tokens
349                 // that give permission for a single collection. See
350                 // FormValue case above.
351                 tokens = []string{targetPath[0][2:]}
352                 pathToken = true
353                 targetPath = targetPath[1:]
354                 stripParts++
355         }
356
357         fsprefix := ""
358         if useSiteFS {
359                 if writeMethod[r.Method] {
360                         http.Error(w, webdavfs.ErrReadOnly.Error(), http.StatusMethodNotAllowed)
361                         return
362                 }
363                 if len(reqTokens) == 0 {
364                         w.Header().Add("WWW-Authenticate", "Basic realm=\"collections\"")
365                         http.Error(w, unauthorizedMessage, http.StatusUnauthorized)
366                         return
367                 }
368                 tokens = reqTokens
369         } else if collectionID == "" {
370                 http.Error(w, notFoundMessage, http.StatusNotFound)
371                 return
372         } else {
373                 fsprefix = "by_id/" + collectionID + "/"
374         }
375
376         if src := r.Header.Get("X-Webdav-Source"); strings.HasPrefix(src, "/") && !strings.Contains(src, "//") && !strings.Contains(src, "/../") {
377                 fsprefix += src[1:]
378         }
379
380         if tokens == nil {
381                 tokens = reqTokens
382                 if h.Cluster.Users.AnonymousUserToken != "" {
383                         tokens = append(tokens, h.Cluster.Users.AnonymousUserToken)
384                 }
385         }
386
387         if len(targetPath) > 0 && targetPath[0] == "_" {
388                 // If a collection has a directory called "t=foo" or
389                 // "_", it can be served at
390                 // //collections.example/_/t=foo/ or
391                 // //collections.example/_/_/ respectively:
392                 // //collections.example/t=foo/ won't work because
393                 // t=foo will be interpreted as a token "foo".
394                 targetPath = targetPath[1:]
395                 stripParts++
396         }
397
398         dirOpenMode := os.O_RDONLY
399         if writeMethod[r.Method] {
400                 dirOpenMode = os.O_RDWR
401         }
402
403         var tokenValid bool
404         var tokenScopeProblem bool
405         var token string
406         var tokenUser *arvados.User
407         var sessionFS arvados.CustomFileSystem
408         var session *cachedSession
409         var collectionDir arvados.File
410         for _, token = range tokens {
411                 var statusErr errorWithHTTPStatus
412                 fs, sess, user, err := h.Cache.GetSession(token)
413                 if errors.As(err, &statusErr) && statusErr.HTTPStatus() == http.StatusUnauthorized {
414                         // bad token
415                         continue
416                 } else if err != nil {
417                         http.Error(w, "cache error: "+err.Error(), http.StatusInternalServerError)
418                         return
419                 }
420                 if token != h.Cluster.Users.AnonymousUserToken {
421                         tokenValid = true
422                 }
423                 f, err := fs.OpenFile(fsprefix, dirOpenMode, 0)
424                 if errors.As(err, &statusErr) &&
425                         statusErr.HTTPStatus() == http.StatusForbidden &&
426                         token != h.Cluster.Users.AnonymousUserToken {
427                         // collection id is outside scope of supplied
428                         // token
429                         tokenScopeProblem = true
430                         sess.Release()
431                         continue
432                 } else if os.IsNotExist(err) {
433                         // collection does not exist or is not
434                         // readable using this token
435                         sess.Release()
436                         continue
437                 } else if err != nil {
438                         http.Error(w, err.Error(), http.StatusInternalServerError)
439                         sess.Release()
440                         return
441                 }
442                 defer f.Close()
443
444                 collectionDir, sessionFS, session, tokenUser = f, fs, sess, user
445                 break
446         }
447
448         // releaseSession() is equivalent to session.Release() except
449         // that it's a no-op if (1) session is nil, or (2) it has
450         // already been called.
451         //
452         // This way, we can do a defer call here to ensure it gets
453         // called in all code paths, and also call it inline (see
454         // below) in the cases where we want to release the lock
455         // before returning.
456         releaseSession := func() {}
457         if session != nil {
458                 var releaseSessionOnce sync.Once
459                 releaseSession = func() { releaseSessionOnce.Do(func() { session.Release() }) }
460         }
461         defer releaseSession()
462
463         if forceReload && collectionDir != nil {
464                 err := collectionDir.Sync()
465                 if err != nil {
466                         if he := errorWithHTTPStatus(nil); errors.As(err, &he) {
467                                 http.Error(w, err.Error(), he.HTTPStatus())
468                         } else {
469                                 http.Error(w, err.Error(), http.StatusInternalServerError)
470                         }
471                         return
472                 }
473         }
474         if session == nil {
475                 if pathToken {
476                         // The URL is a "secret sharing link" that
477                         // didn't work out.  Asking the client for
478                         // additional credentials would just be
479                         // confusing.
480                         http.Error(w, notFoundMessage, http.StatusNotFound)
481                         return
482                 }
483                 if tokenValid {
484                         // The client provided valid token(s), but the
485                         // collection was not found.
486                         http.Error(w, notFoundMessage, http.StatusNotFound)
487                         return
488                 }
489                 if tokenScopeProblem {
490                         // The client provided a valid token but
491                         // fetching a collection returned 401, which
492                         // means the token scope doesn't permit
493                         // fetching that collection.
494                         http.Error(w, notFoundMessage, http.StatusForbidden)
495                         return
496                 }
497                 // The client's token was invalid (e.g., expired), or
498                 // the client didn't even provide one.  Redirect to
499                 // workbench2's login-and-redirect-to-download url if
500                 // this is a browser navigation request. (The redirect
501                 // flow can't preserve the original method if it's not
502                 // GET, and doesn't make sense if the UA is a
503                 // command-line tool, is trying to load an inline
504                 // image, etc.; in these cases, there's nothing we can
505                 // do, so return 401 unauthorized.)
506                 //
507                 // Note Sec-Fetch-Mode is sent by all non-EOL
508                 // browsers, except Safari.
509                 // https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Sec-Fetch-Mode
510                 //
511                 // TODO(TC): This response would be confusing to
512                 // someone trying (anonymously) to download public
513                 // data that has been deleted.  Allow a referrer to
514                 // provide this context somehow?
515                 if r.Method == http.MethodGet && r.Header.Get("Sec-Fetch-Mode") == "navigate" {
516                         target := url.URL(h.Cluster.Services.Workbench2.ExternalURL)
517                         redirkey := "redirectToPreview"
518                         if attachment {
519                                 redirkey = "redirectToDownload"
520                         }
521                         callback := "/c=" + collectionID + "/" + strings.Join(targetPath, "/")
522                         query := url.Values{redirkey: {callback}}
523                         queryString := query.Encode()
524                         // Note: Encode (and QueryEscape function) turns space
525                         // into plus sign (+) rather than %20 (the plus sign
526                         // becomes %2B); that is the rule for web forms data
527                         // sent in URL query part via GET, but we're not
528                         // emulating forms here. Client JS APIs
529                         // (URLSearchParam#get, decodeURIComponent) will
530                         // decode %20, but while the former also expects the
531                         // form-specific encoding, the latter doesn't.
532                         // Encode() almost encodes everything; RFC3986 sec. 3.4
533                         // says "it is sometimes better for usability" to not
534                         // encode / and ? when passing URI reference in query.
535                         // This is also legal according to WHATWG URL spec and
536                         // can be desirable for debugging webapp.
537                         // We can let slash / appear in the encoded query, and
538                         // equality-sign = too, but exempting ? is not very
539                         // useful.
540                         // Plus-sign, hash, and ampersand are never exempt.
541                         r := strings.NewReplacer("+", "%20", "%2F", "/", "%3D", "=")
542                         target.RawQuery = r.Replace(queryString)
543                         w.Header().Add("Location", target.String())
544                         w.WriteHeader(http.StatusSeeOther)
545                         return
546                 }
547                 if !credentialsOK {
548                         http.Error(w, fmt.Sprintf("Authorization tokens are not accepted here: %v, and no anonymous user token is configured.", reasonNotAcceptingCredentials), http.StatusUnauthorized)
549                         return
550                 }
551                 // If none of the above cases apply, suggest the
552                 // user-agent (which is either a non-browser agent
553                 // like wget, or a browser that can't redirect through
554                 // a login flow) prompt the user for credentials.
555                 w.Header().Add("WWW-Authenticate", "Basic realm=\"collections\"")
556                 http.Error(w, unauthorizedMessage, http.StatusUnauthorized)
557                 return
558         }
559
560         if r.Method == http.MethodGet || r.Method == http.MethodHead {
561                 targetfnm := fsprefix + strings.Join(pathParts[stripParts:], "/")
562                 if fi, err := sessionFS.Stat(targetfnm); err == nil && fi.IsDir() {
563                         releaseSession() // because we won't be writing anything
564                         if !strings.HasSuffix(r.URL.Path, "/") {
565                                 h.seeOtherWithCookie(w, r, r.URL.Path+"/", credentialsOK)
566                         } else {
567                                 h.serveDirectory(w, r, fi.Name(), sessionFS, targetfnm, !useSiteFS)
568                         }
569                         return
570                 }
571         }
572
573         var basename string
574         if len(targetPath) > 0 {
575                 basename = targetPath[len(targetPath)-1]
576         }
577         if arvadosclient.PDHMatch(collectionID) && writeMethod[r.Method] {
578                 http.Error(w, webdavfs.ErrReadOnly.Error(), http.StatusMethodNotAllowed)
579                 return
580         }
581         if !h.userPermittedToUploadOrDownload(r.Method, tokenUser) {
582                 http.Error(w, "Not permitted", http.StatusForbidden)
583                 return
584         }
585         h.logUploadOrDownload(r, session.arvadosclient, sessionFS, fsprefix+strings.Join(targetPath, "/"), nil, tokenUser)
586
587         writing := writeMethod[r.Method]
588         locker := h.collectionLock(collectionID, writing)
589         defer locker.Unlock()
590
591         if writing {
592                 // Save the collection only if/when all
593                 // webdav->filesystem operations succeed --
594                 // and send a 500 error if the modified
595                 // collection can't be saved.
596                 //
597                 // Perform the write in a separate sitefs, so
598                 // concurrent read operations on the same
599                 // collection see the previous saved
600                 // state. After the write succeeds and the
601                 // collection record is updated, we reset the
602                 // session so the updates are visible in
603                 // subsequent read requests.
604                 client := session.client.WithRequestID(r.Header.Get("X-Request-Id"))
605                 sessionFS = client.SiteFileSystem(session.keepclient)
606                 writingDir, err := sessionFS.OpenFile(fsprefix, os.O_RDONLY, 0)
607                 if err != nil {
608                         http.Error(w, err.Error(), http.StatusInternalServerError)
609                         return
610                 }
611                 defer writingDir.Close()
612                 w = &updateOnSuccess{
613                         ResponseWriter: w,
614                         logger:         ctxlog.FromContext(r.Context()),
615                         update: func() error {
616                                 err := writingDir.Sync()
617                                 var te arvados.TransactionError
618                                 if errors.As(err, &te) {
619                                         err = te
620                                 }
621                                 if err != nil {
622                                         return err
623                                 }
624                                 // Sync the changes to the persistent
625                                 // sessionfs for this token.
626                                 snap, err := writingDir.Snapshot()
627                                 if err != nil {
628                                         return err
629                                 }
630                                 collectionDir.Splice(snap)
631                                 return nil
632                         }}
633         } else {
634                 // When writing, we need to block session renewal
635                 // until we're finished, in order to guarantee the
636                 // effect of the write is visible in future responses.
637                 // But if we're not writing, we can release the lock
638                 // early.  This enables us to keep renewing sessions
639                 // and processing more requests even if a slow client
640                 // takes a long time to download a large file.
641                 releaseSession()
642         }
643         if r.Method == http.MethodGet {
644                 applyContentDispositionHdr(w, r, basename, attachment)
645         }
646         if webdavPrefix == "" {
647                 webdavPrefix = "/" + strings.Join(pathParts[:stripParts], "/")
648         }
649         wh := &webdav.Handler{
650                 Prefix: webdavPrefix,
651                 FileSystem: &webdavfs.FS{
652                         FileSystem:    sessionFS,
653                         Prefix:        fsprefix,
654                         Writing:       writeMethod[r.Method],
655                         AlwaysReadEOF: r.Method == "PROPFIND",
656                 },
657                 LockSystem: webdavfs.NoLockSystem,
658                 Logger: func(r *http.Request, err error) {
659                         if err != nil && !os.IsNotExist(err) {
660                                 ctxlog.FromContext(r.Context()).WithError(err).Error("error reported by webdav handler")
661                         }
662                 },
663         }
664         h.metrics.track(wh, w, r)
665         if r.Method == http.MethodGet && w.WroteStatus() == http.StatusOK {
666                 wrote := int64(w.WroteBodyBytes())
667                 fnm := strings.Join(pathParts[stripParts:], "/")
668                 fi, err := wh.FileSystem.Stat(r.Context(), fnm)
669                 if err == nil && fi.Size() != wrote {
670                         var n int
671                         f, err := wh.FileSystem.OpenFile(r.Context(), fnm, os.O_RDONLY, 0)
672                         if err == nil {
673                                 n, err = f.Read(make([]byte, 1024))
674                                 f.Close()
675                         }
676                         ctxlog.FromContext(r.Context()).Errorf("stat.Size()==%d but only wrote %d bytes; read(1024) returns %d, %v", fi.Size(), wrote, n, err)
677                 }
678         }
679 }
680
681 var dirListingTemplate = `<!DOCTYPE HTML>
682 <HTML><HEAD>
683   <META name="robots" content="NOINDEX">
684   <TITLE>{{ .CollectionName }}</TITLE>
685   <STYLE type="text/css">
686     body {
687       margin: 1.5em;
688     }
689     pre {
690       background-color: #D9EDF7;
691       border-radius: .25em;
692       padding: .75em;
693       overflow: auto;
694     }
695     .footer p {
696       font-size: 82%;
697     }
698     hr {
699       border: 1px solid #808080;
700     }
701     ul {
702       padding: 0;
703     }
704     ul li {
705       font-family: monospace;
706       list-style: none;
707     }
708   </STYLE>
709 </HEAD>
710 <BODY>
711
712 <H1>{{ .CollectionName }}</H1>
713
714 <P>This collection of data files is being shared with you through
715 Arvados.  You can download individual files listed below.  To download
716 the entire directory tree with <CODE>wget</CODE>, try:</P>
717
718 <PRE id="wget-example">$ wget --mirror --no-parent --no-host --cut-dirs={{ .StripParts }} {{ .QuotedUrlForWget }}</PRE>
719
720 <H2>File Listing</H2>
721
722 {{if .Files}}
723 <UL>
724 {{range .Files}}
725 {{if .IsDir }}
726   <LI>{{" " | printf "%15s  " | nbsp}}<A class="item" href="{{ .Href }}/">{{ .Name }}/</A></LI>
727 {{else}}
728   <LI>{{.Size | printf "%15d  " | nbsp}}<A class="item" href="{{ .Href }}">{{ .Name }}</A></LI>
729 {{end}}
730 {{end}}
731 </UL>
732 {{else}}
733 <P>(No files; this collection is empty.)</P>
734 {{end}}
735
736 <HR>
737 <DIV class="footer">
738   <P>
739     About Arvados:
740     Arvados is a free and open source software bioinformatics platform.
741     To learn more, visit arvados.org.
742     Arvados is not responsible for the files listed on this page.
743   </P>
744 </DIV>
745
746 </BODY>
747 </HTML>
748 `
749
750 type fileListEnt struct {
751         Name  string
752         Href  string
753         Size  int64
754         IsDir bool
755 }
756
757 // Given a filesystem path like `foo/"bar baz"`, return an escaped
758 // (percent-encoded) relative path like `./foo/%22bar%20%baz%22`.
759 //
760 // Note the result may contain html-unsafe characters like '&'. These
761 // will be handled separately by the HTML templating engine as needed.
762 func relativeHref(path string) string {
763         u := &url.URL{Path: path}
764         return "./" + u.EscapedPath()
765 }
766
767 // Return a shell-quoted URL suitable for pasting to a command line
768 // ("wget ...") to repeat the given HTTP request.
769 func makeQuotedUrlForWget(r *http.Request) string {
770         scheme := r.Header.Get("X-Forwarded-Proto")
771         if scheme == "http" || scheme == "https" {
772                 // use protocol reported by load balancer / proxy
773         } else if r.TLS != nil {
774                 scheme = "https"
775         } else {
776                 scheme = "http"
777         }
778         p := r.URL.EscapedPath()
779         // An escaped path may still contain single quote chars, which
780         // would interfere with our shell quoting. Avoid this by
781         // escaping them as %27.
782         return fmt.Sprintf("'%s://%s%s'", scheme, r.Host, strings.Replace(p, "'", "%27", -1))
783 }
784
785 func (h *handler) serveDirectory(w http.ResponseWriter, r *http.Request, collectionName string, fs http.FileSystem, base string, recurse bool) {
786         var files []fileListEnt
787         var walk func(string) error
788         if !strings.HasSuffix(base, "/") {
789                 base = base + "/"
790         }
791         walk = func(path string) error {
792                 dirname := base + path
793                 if dirname != "/" {
794                         dirname = strings.TrimSuffix(dirname, "/")
795                 }
796                 d, err := fs.Open(dirname)
797                 if err != nil {
798                         return err
799                 }
800                 ents, err := d.Readdir(-1)
801                 if err != nil {
802                         return err
803                 }
804                 for _, ent := range ents {
805                         if recurse && ent.IsDir() {
806                                 err = walk(path + ent.Name() + "/")
807                                 if err != nil {
808                                         return err
809                                 }
810                         } else {
811                                 listingName := path + ent.Name()
812                                 files = append(files, fileListEnt{
813                                         Name:  listingName,
814                                         Href:  relativeHref(listingName),
815                                         Size:  ent.Size(),
816                                         IsDir: ent.IsDir(),
817                                 })
818                         }
819                 }
820                 return nil
821         }
822         if err := walk(""); err != nil {
823                 http.Error(w, "error getting directory listing: "+err.Error(), http.StatusInternalServerError)
824                 return
825         }
826
827         funcs := template.FuncMap{
828                 "nbsp": func(s string) template.HTML {
829                         return template.HTML(strings.Replace(s, " ", "&nbsp;", -1))
830                 },
831         }
832         tmpl, err := template.New("dir").Funcs(funcs).Parse(dirListingTemplate)
833         if err != nil {
834                 http.Error(w, "error parsing template: "+err.Error(), http.StatusInternalServerError)
835                 return
836         }
837         sort.Slice(files, func(i, j int) bool {
838                 return files[i].Name < files[j].Name
839         })
840         w.WriteHeader(http.StatusOK)
841         tmpl.Execute(w, map[string]interface{}{
842                 "CollectionName":   collectionName,
843                 "Files":            files,
844                 "Request":          r,
845                 "StripParts":       strings.Count(strings.TrimRight(r.URL.Path, "/"), "/"),
846                 "QuotedUrlForWget": makeQuotedUrlForWget(r),
847         })
848 }
849
850 func applyContentDispositionHdr(w http.ResponseWriter, r *http.Request, filename string, isAttachment bool) {
851         disposition := "inline"
852         if isAttachment {
853                 disposition = "attachment"
854         }
855         if strings.ContainsRune(r.RequestURI, '?') {
856                 // Help the UA realize that the filename is just
857                 // "filename.txt", not
858                 // "filename.txt?disposition=attachment".
859                 //
860                 // TODO(TC): Follow advice at RFC 6266 appendix D
861                 disposition += "; filename=" + strconv.QuoteToASCII(filename)
862         }
863         if disposition != "inline" {
864                 w.Header().Set("Content-Disposition", disposition)
865         }
866 }
867
868 func (h *handler) seeOtherWithCookie(w http.ResponseWriter, r *http.Request, location string, credentialsOK bool) {
869         if formTokens, haveFormTokens := r.Form["api_token"]; haveFormTokens {
870                 if !credentialsOK {
871                         // It is not safe to copy the provided token
872                         // into a cookie unless the current vhost
873                         // (origin) serves only a single collection or
874                         // we are in TrustAllContent mode.
875                         http.Error(w, "cannot serve inline content at this URL (possible configuration error; see https://doc.arvados.org/install/install-keep-web.html#dns)", http.StatusBadRequest)
876                         return
877                 }
878
879                 // The HttpOnly flag is necessary to prevent
880                 // JavaScript code (included in, or loaded by, a page
881                 // in the collection being served) from employing the
882                 // user's token beyond reading other files in the same
883                 // domain, i.e., same collection.
884                 //
885                 // The 303 redirect is necessary in the case of a GET
886                 // request to avoid exposing the token in the Location
887                 // bar, and in the case of a POST request to avoid
888                 // raising warnings when the user refreshes the
889                 // resulting page.
890                 for _, tok := range formTokens {
891                         if tok == "" {
892                                 continue
893                         }
894                         http.SetCookie(w, &http.Cookie{
895                                 Name:     "arvados_api_token",
896                                 Value:    auth.EncodeTokenCookie([]byte(tok)),
897                                 Path:     "/",
898                                 HttpOnly: true,
899                                 SameSite: http.SameSiteLaxMode,
900                         })
901                         break
902                 }
903         }
904
905         // Propagate query parameters (except api_token) from
906         // the original request.
907         redirQuery := r.URL.Query()
908         redirQuery.Del("api_token")
909
910         u := r.URL
911         if location != "" {
912                 newu, err := u.Parse(location)
913                 if err != nil {
914                         http.Error(w, "error resolving redirect target: "+err.Error(), http.StatusInternalServerError)
915                         return
916                 }
917                 u = newu
918         }
919         redir := (&url.URL{
920                 Scheme:   r.URL.Scheme,
921                 Host:     r.Host,
922                 Path:     u.Path,
923                 RawQuery: redirQuery.Encode(),
924         }).String()
925
926         w.Header().Add("Location", redir)
927         w.WriteHeader(http.StatusSeeOther)
928         io.WriteString(w, `<A href="`)
929         io.WriteString(w, html.EscapeString(redir))
930         io.WriteString(w, `">Continue</A>`)
931 }
932
933 func (h *handler) userPermittedToUploadOrDownload(method string, tokenUser *arvados.User) bool {
934         var permitDownload bool
935         var permitUpload bool
936         if tokenUser != nil && tokenUser.IsAdmin {
937                 permitUpload = h.Cluster.Collections.WebDAVPermission.Admin.Upload
938                 permitDownload = h.Cluster.Collections.WebDAVPermission.Admin.Download
939         } else {
940                 permitUpload = h.Cluster.Collections.WebDAVPermission.User.Upload
941                 permitDownload = h.Cluster.Collections.WebDAVPermission.User.Download
942         }
943         if (method == "PUT" || method == "POST") && !permitUpload {
944                 // Disallow operations that upload new files.
945                 // Permit webdav operations that move existing files around.
946                 return false
947         } else if method == "GET" && !permitDownload {
948                 // Disallow downloading file contents.
949                 // Permit webdav operations like PROPFIND that retrieve metadata
950                 // but not file contents.
951                 return false
952         }
953         return true
954 }
955
956 type fileEventLog struct {
957         requestPath  string
958         eventType    string
959         userUUID     string
960         userFullName string
961         collUUID     string
962         collPDH      string
963         collFilePath string
964         clientAddr   string
965         clientToken  string
966 }
967
968 func newFileEventLog(
969         h *handler,
970         r *http.Request,
971         filepath string,
972         collection *arvados.Collection,
973         user *arvados.User,
974         token string,
975 ) *fileEventLog {
976         var eventType string
977         switch r.Method {
978         case "POST", "PUT":
979                 eventType = "file_upload"
980         case "GET":
981                 eventType = "file_download"
982         default:
983                 return nil
984         }
985
986         // We want to log the address of the proxy closest to keep-web—the last
987         // value in the X-Forwarded-For list—or the client address if there is no
988         // valid proxy.
989         var clientAddr string
990         // 1. Build a slice of proxy addresses from X-Forwarded-For.
991         xff := strings.Join(r.Header.Values("X-Forwarded-For"), ",")
992         addrs := strings.Split(xff, ",")
993         // 2. Reverse the slice so it's in our most preferred order for logging.
994         slices.Reverse(addrs)
995         // 3. Append the client address to that slice.
996         if addr, _, err := net.SplitHostPort(r.RemoteAddr); err == nil {
997                 addrs = append(addrs, addr)
998         }
999         // 4. Use the first valid address in the slice.
1000         for _, addr := range addrs {
1001                 if ip := net.ParseIP(strings.TrimSpace(addr)); ip != nil {
1002                         clientAddr = ip.String()
1003                         break
1004                 }
1005         }
1006
1007         ev := &fileEventLog{
1008                 requestPath: r.URL.Path,
1009                 eventType:   eventType,
1010                 clientAddr:  clientAddr,
1011                 clientToken: token,
1012         }
1013
1014         if user != nil {
1015                 ev.userUUID = user.UUID
1016                 ev.userFullName = user.FullName
1017         } else {
1018                 ev.userUUID = fmt.Sprintf("%s-tpzed-anonymouspublic", h.Cluster.ClusterID)
1019         }
1020
1021         if collection != nil {
1022                 ev.collFilePath = filepath
1023                 // h.determineCollection populates the collection_uuid
1024                 // prop with the PDH, if this collection is being
1025                 // accessed via PDH. For logging, we use a different
1026                 // field depending on whether it's a UUID or PDH.
1027                 if len(collection.UUID) > 32 {
1028                         ev.collPDH = collection.UUID
1029                 } else {
1030                         ev.collPDH = collection.PortableDataHash
1031                         ev.collUUID = collection.UUID
1032                 }
1033         }
1034
1035         return ev
1036 }
1037
1038 func (ev *fileEventLog) shouldLogPDH() bool {
1039         return ev.eventType == "file_download" && ev.collPDH != ""
1040 }
1041
1042 func (ev *fileEventLog) asDict() arvadosclient.Dict {
1043         props := arvadosclient.Dict{
1044                 "reqPath":              ev.requestPath,
1045                 "collection_uuid":      ev.collUUID,
1046                 "collection_file_path": ev.collFilePath,
1047         }
1048         if ev.shouldLogPDH() {
1049                 props["portable_data_hash"] = ev.collPDH
1050         }
1051         return arvadosclient.Dict{
1052                 "object_uuid": ev.userUUID,
1053                 "event_type":  ev.eventType,
1054                 "properties":  props,
1055         }
1056 }
1057
1058 func (ev *fileEventLog) asFields() logrus.Fields {
1059         fields := logrus.Fields{
1060                 "collection_file_path": ev.collFilePath,
1061                 "collection_uuid":      ev.collUUID,
1062                 "user_uuid":            ev.userUUID,
1063         }
1064         if ev.shouldLogPDH() {
1065                 fields["portable_data_hash"] = ev.collPDH
1066         }
1067         if !strings.HasSuffix(ev.userUUID, "-tpzed-anonymouspublic") {
1068                 fields["user_full_name"] = ev.userFullName
1069         }
1070         return fields
1071 }
1072
1073 func (h *handler) shouldLogEvent(event *fileEventLog, t time.Time) bool {
1074         if event == nil {
1075                 return false
1076         } else if event.eventType != "file_download" ||
1077                 h.Cluster.Collections.WebDAVLogDownloadInterval == 0 {
1078                 return true
1079         }
1080         td := h.Cluster.Collections.WebDAVLogDownloadInterval.Duration()
1081         cutoff := t.Add(-td)
1082         ev := *event
1083         h.fileEventLogsMtx.Lock()
1084         defer h.fileEventLogsMtx.Unlock()
1085         if h.fileEventLogs == nil {
1086                 h.fileEventLogs = make(map[fileEventLog]time.Time)
1087         }
1088         shouldLog := h.fileEventLogs[ev].Before(cutoff)
1089         if shouldLog {
1090                 h.fileEventLogs[ev] = t
1091         }
1092         if t.After(h.fileEventLogsNextTidy) {
1093                 for key, logTime := range h.fileEventLogs {
1094                         if logTime.Before(cutoff) {
1095                                 delete(h.fileEventLogs, key)
1096                         }
1097                 }
1098                 h.fileEventLogsNextTidy = t.Add(td)
1099         }
1100         return shouldLog
1101 }
1102
1103 func (h *handler) logUploadOrDownload(
1104         r *http.Request,
1105         client *arvadosclient.ArvadosClient,
1106         fs arvados.CustomFileSystem,
1107         filepath string,
1108         collection *arvados.Collection,
1109         user *arvados.User,
1110 ) {
1111         if collection == nil && fs != nil {
1112                 collection, filepath = h.determineCollection(fs, filepath)
1113         }
1114         event := newFileEventLog(h, r, filepath, collection, user, client.ApiToken)
1115         if !h.shouldLogEvent(event, time.Now()) {
1116                 return
1117         }
1118         log := ctxlog.FromContext(r.Context()).WithFields(event.asFields())
1119         log.Info(strings.Replace(event.eventType, "file_", "File ", 1))
1120         if h.Cluster.Collections.WebDAVLogEvents {
1121                 go func() {
1122                         logReq := arvadosclient.Dict{"log": event.asDict()}
1123                         err := client.Create("logs", logReq, nil)
1124                         if err != nil {
1125                                 log.WithError(err).Errorf("Failed to create %s log event on API server", event.eventType)
1126                         }
1127                 }()
1128         }
1129 }
1130
1131 func (h *handler) determineCollection(fs arvados.CustomFileSystem, path string) (*arvados.Collection, string) {
1132         target := strings.TrimSuffix(path, "/")
1133         for cut := len(target); cut >= 0; cut = strings.LastIndexByte(target, '/') {
1134                 target = target[:cut]
1135                 fi, err := fs.Stat(target)
1136                 if os.IsNotExist(err) {
1137                         // creating a new file/dir, or download
1138                         // destined to fail
1139                         continue
1140                 } else if err != nil {
1141                         return nil, ""
1142                 }
1143                 switch src := fi.Sys().(type) {
1144                 case *arvados.Collection:
1145                         return src, strings.TrimPrefix(path[len(target):], "/")
1146                 case *arvados.Group:
1147                         return nil, ""
1148                 default:
1149                         if _, ok := src.(error); ok {
1150                                 return nil, ""
1151                         }
1152                 }
1153         }
1154         return nil, ""
1155 }
1156
1157 var lockTidyInterval = time.Minute * 10
1158
1159 // Lock the specified collection for reading or writing. Caller must
1160 // call Unlock() on the returned Locker when the operation is
1161 // finished.
1162 func (h *handler) collectionLock(collectionID string, writing bool) sync.Locker {
1163         h.lockMtx.Lock()
1164         defer h.lockMtx.Unlock()
1165         if time.Since(h.lockTidied) > lockTidyInterval {
1166                 // Periodically delete all locks that aren't in use.
1167                 h.lockTidied = time.Now()
1168                 for id, locker := range h.lock {
1169                         if locker.TryLock() {
1170                                 locker.Unlock()
1171                                 delete(h.lock, id)
1172                         }
1173                 }
1174         }
1175         locker := h.lock[collectionID]
1176         if locker == nil {
1177                 locker = new(sync.RWMutex)
1178                 if h.lock == nil {
1179                         h.lock = map[string]*sync.RWMutex{}
1180                 }
1181                 h.lock[collectionID] = locker
1182         }
1183         if writing {
1184                 locker.Lock()
1185                 return locker
1186         } else {
1187                 locker.RLock()
1188                 return locker.RLocker()
1189         }
1190 }
1191
1192 func ServeCORSPreflight(w http.ResponseWriter, header http.Header) bool {
1193         method := header.Get("Access-Control-Request-Method")
1194         if method == "" {
1195                 return false
1196         }
1197         if !browserMethod[method] && !webdavMethod[method] {
1198                 w.WriteHeader(http.StatusMethodNotAllowed)
1199                 return true
1200         }
1201         w.Header().Set("Access-Control-Allow-Headers", corsAllowHeadersHeader)
1202         w.Header().Set("Access-Control-Allow-Methods", "COPY, DELETE, GET, LOCK, MKCOL, MOVE, OPTIONS, POST, PROPFIND, PROPPATCH, PUT, RMCOL, UNLOCK")
1203         w.Header().Set("Access-Control-Allow-Origin", "*")
1204         w.Header().Set("Access-Control-Max-Age", "86400")
1205         return true
1206 }