21901: Log all keep-web GET requests that request the first byte
[arvados.git] / services / keep-web / handler.go
1 // Copyright (C) The Arvados Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4
5 package keepweb
6
7 import (
8         "encoding/json"
9         "errors"
10         "fmt"
11         "html"
12         "html/template"
13         "io"
14         "net"
15         "net/http"
16         "net/url"
17         "os"
18         "path"
19         "slices"
20         "sort"
21         "strconv"
22         "strings"
23         "sync"
24         "time"
25
26         "git.arvados.org/arvados.git/lib/cmd"
27         "git.arvados.org/arvados.git/lib/webdavfs"
28         "git.arvados.org/arvados.git/sdk/go/arvados"
29         "git.arvados.org/arvados.git/sdk/go/arvadosclient"
30         "git.arvados.org/arvados.git/sdk/go/auth"
31         "git.arvados.org/arvados.git/sdk/go/ctxlog"
32         "git.arvados.org/arvados.git/sdk/go/httpserver"
33         "github.com/gotd/contrib/http_range"
34         "github.com/sirupsen/logrus"
35         "golang.org/x/net/webdav"
36 )
37
38 type handler struct {
39         Cache   cache
40         Cluster *arvados.Cluster
41         metrics *metrics
42
43         lockMtx    sync.Mutex
44         lock       map[string]*sync.RWMutex
45         lockTidied time.Time
46
47         fileEventLogs         map[fileEventLog]time.Time
48         fileEventLogsMtx      sync.Mutex
49         fileEventLogsNextTidy time.Time
50
51         s3SecretCache         map[string]*cachedS3Secret
52         s3SecretCacheMtx      sync.Mutex
53         s3SecretCacheNextTidy time.Time
54 }
55
56 var urlPDHDecoder = strings.NewReplacer(" ", "+", "-", "+")
57
58 var notFoundMessage = "Not Found"
59 var unauthorizedMessage = "401 Unauthorized\n\nA valid Arvados token must be provided to access this resource."
60
61 // parseCollectionIDFromURL returns a UUID or PDH if s is a UUID or a
62 // PDH (even if it is a PDH with "+" replaced by " " or "-");
63 // otherwise "".
64 func parseCollectionIDFromURL(s string) string {
65         if arvadosclient.UUIDMatch(s) {
66                 return s
67         }
68         if pdh := urlPDHDecoder.Replace(s); arvadosclient.PDHMatch(pdh) {
69                 return pdh
70         }
71         return ""
72 }
73
74 func (h *handler) serveStatus(w http.ResponseWriter, r *http.Request) {
75         json.NewEncoder(w).Encode(struct{ Version string }{cmd.Version.String()})
76 }
77
78 type errorWithHTTPStatus interface {
79         HTTPStatus() int
80 }
81
82 // updateOnSuccess wraps httpserver.ResponseWriter. If the handler
83 // sends an HTTP header indicating success, updateOnSuccess first
84 // calls the provided update func. If the update func fails, an error
85 // response is sent (using the error's HTTP status or 500 if none),
86 // and the status code and body sent by the handler are ignored (all
87 // response writes return the update error).
88 type updateOnSuccess struct {
89         httpserver.ResponseWriter
90         logger     logrus.FieldLogger
91         update     func() error
92         sentHeader bool
93         err        error
94 }
95
96 func (uos *updateOnSuccess) Write(p []byte) (int, error) {
97         if !uos.sentHeader {
98                 uos.WriteHeader(http.StatusOK)
99         }
100         if uos.err != nil {
101                 return 0, uos.err
102         }
103         return uos.ResponseWriter.Write(p)
104 }
105
106 func (uos *updateOnSuccess) WriteHeader(code int) {
107         if !uos.sentHeader {
108                 uos.sentHeader = true
109                 if code >= 200 && code < 400 {
110                         if uos.err = uos.update(); uos.err != nil {
111                                 code := http.StatusInternalServerError
112                                 if he := errorWithHTTPStatus(nil); errors.As(uos.err, &he) {
113                                         code = he.HTTPStatus()
114                                 }
115                                 uos.logger.WithError(uos.err).Errorf("update() returned %T error, changing response to HTTP %d", uos.err, code)
116                                 http.Error(uos.ResponseWriter, uos.err.Error(), code)
117                                 return
118                         }
119                 }
120         }
121         uos.ResponseWriter.WriteHeader(code)
122 }
123
124 var (
125         corsAllowHeadersHeader = strings.Join([]string{
126                 "Authorization", "Content-Type", "Range",
127                 // WebDAV request headers:
128                 "Depth", "Destination", "If", "Lock-Token", "Overwrite", "Timeout", "Cache-Control",
129         }, ", ")
130         writeMethod = map[string]bool{
131                 "COPY":      true,
132                 "DELETE":    true,
133                 "LOCK":      true,
134                 "MKCOL":     true,
135                 "MOVE":      true,
136                 "PROPPATCH": true,
137                 "PUT":       true,
138                 "RMCOL":     true,
139                 "UNLOCK":    true,
140         }
141         webdavMethod = map[string]bool{
142                 "COPY":      true,
143                 "DELETE":    true,
144                 "LOCK":      true,
145                 "MKCOL":     true,
146                 "MOVE":      true,
147                 "OPTIONS":   true,
148                 "PROPFIND":  true,
149                 "PROPPATCH": true,
150                 "PUT":       true,
151                 "RMCOL":     true,
152                 "UNLOCK":    true,
153         }
154         browserMethod = map[string]bool{
155                 "GET":  true,
156                 "HEAD": true,
157                 "POST": true,
158         }
159         // top-level dirs to serve with siteFS
160         siteFSDir = map[string]bool{
161                 "":      true, // root directory
162                 "by_id": true,
163                 "users": true,
164         }
165 )
166
167 func stripDefaultPort(host string) string {
168         // Will consider port 80 and port 443 to be the same vhost.  I think that's fine.
169         u := &url.URL{Host: host}
170         if p := u.Port(); p == "80" || p == "443" {
171                 return strings.ToLower(u.Hostname())
172         } else {
173                 return strings.ToLower(host)
174         }
175 }
176
177 // CheckHealth implements service.Handler.
178 func (h *handler) CheckHealth() error {
179         return nil
180 }
181
182 // Done implements service.Handler.
183 func (h *handler) Done() <-chan struct{} {
184         return nil
185 }
186
187 // ServeHTTP implements http.Handler.
188 func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
189         if xfp := r.Header.Get("X-Forwarded-Proto"); xfp != "" && xfp != "http" {
190                 r.URL.Scheme = xfp
191         }
192
193         wbuffer := newWriteBuffer(wOrig, int(h.Cluster.Collections.WebDAVOutputBuffer))
194         defer wbuffer.Close()
195         w := httpserver.WrapResponseWriter(responseWriter{
196                 Writer:         wbuffer,
197                 ResponseWriter: wOrig,
198         })
199
200         if r.Method == "OPTIONS" && ServeCORSPreflight(w, r.Header) {
201                 return
202         }
203
204         if !browserMethod[r.Method] && !webdavMethod[r.Method] {
205                 w.WriteHeader(http.StatusMethodNotAllowed)
206                 return
207         }
208
209         if r.Header.Get("Origin") != "" {
210                 // Allow simple cross-origin requests without user
211                 // credentials ("user credentials" as defined by CORS,
212                 // i.e., cookies, HTTP authentication, and client-side
213                 // SSL certificates. See
214                 // http://www.w3.org/TR/cors/#user-credentials).
215                 w.Header().Set("Access-Control-Allow-Origin", "*")
216                 w.Header().Set("Access-Control-Expose-Headers", "Content-Range")
217         }
218
219         if h.serveS3(w, r) {
220                 return
221         }
222
223         webdavPrefix := ""
224         arvPath := r.URL.Path
225         if prefix := r.Header.Get("X-Webdav-Prefix"); prefix != "" {
226                 // Enable a proxy (e.g., container log handler in
227                 // controller) to satisfy a request for path
228                 // "/foo/bar/baz.txt" using content from
229                 // "//abc123-4.internal/bar/baz.txt", by adding a
230                 // request header "X-Webdav-Prefix: /foo"
231                 if !strings.HasPrefix(arvPath, prefix) {
232                         http.Error(w, "X-Webdav-Prefix header is not a prefix of the requested path", http.StatusBadRequest)
233                         return
234                 }
235                 arvPath = r.URL.Path[len(prefix):]
236                 if arvPath == "" {
237                         arvPath = "/"
238                 }
239                 w.Header().Set("Vary", "X-Webdav-Prefix, "+w.Header().Get("Vary"))
240                 webdavPrefix = prefix
241         }
242         pathParts := strings.Split(arvPath[1:], "/")
243
244         var stripParts int
245         var collectionID string
246         var tokens []string
247         var reqTokens []string
248         var pathToken bool
249         var attachment bool
250         var useSiteFS bool
251         credentialsOK := h.Cluster.Collections.TrustAllContent
252         reasonNotAcceptingCredentials := ""
253
254         if r.Host != "" && stripDefaultPort(r.Host) == stripDefaultPort(h.Cluster.Services.WebDAVDownload.ExternalURL.Host) {
255                 credentialsOK = true
256                 attachment = true
257         } else if r.FormValue("disposition") == "attachment" {
258                 attachment = true
259         }
260
261         if !credentialsOK {
262                 reasonNotAcceptingCredentials = fmt.Sprintf("vhost %q does not specify a single collection ID or match Services.WebDAVDownload.ExternalURL %q, and Collections.TrustAllContent is false",
263                         r.Host, h.Cluster.Services.WebDAVDownload.ExternalURL)
264         }
265
266         if collectionID = arvados.CollectionIDFromDNSName(r.Host); collectionID != "" {
267                 // http://ID.collections.example/PATH...
268                 credentialsOK = true
269         } else if r.URL.Path == "/status.json" {
270                 h.serveStatus(w, r)
271                 return
272         } else if siteFSDir[pathParts[0]] {
273                 useSiteFS = true
274         } else if len(pathParts) >= 1 && strings.HasPrefix(pathParts[0], "c=") {
275                 // /c=ID[/PATH...]
276                 collectionID = parseCollectionIDFromURL(pathParts[0][2:])
277                 stripParts = 1
278         } else if len(pathParts) >= 2 && pathParts[0] == "collections" {
279                 if len(pathParts) >= 4 && pathParts[1] == "download" {
280                         // /collections/download/ID/TOKEN/PATH...
281                         collectionID = parseCollectionIDFromURL(pathParts[2])
282                         tokens = []string{pathParts[3]}
283                         stripParts = 4
284                         pathToken = true
285                 } else {
286                         // /collections/ID/PATH...
287                         collectionID = parseCollectionIDFromURL(pathParts[1])
288                         stripParts = 2
289                         // This path is only meant to work for public
290                         // data. Tokens provided with the request are
291                         // ignored.
292                         credentialsOK = false
293                         reasonNotAcceptingCredentials = "the '/collections/UUID/PATH' form only works for public data"
294                 }
295         }
296
297         forceReload := false
298         if cc := r.Header.Get("Cache-Control"); strings.Contains(cc, "no-cache") || strings.Contains(cc, "must-revalidate") {
299                 forceReload = true
300         }
301
302         if credentialsOK {
303                 reqTokens = auth.CredentialsFromRequest(r).Tokens
304         }
305
306         r.ParseForm()
307         origin := r.Header.Get("Origin")
308         cors := origin != "" && !strings.HasSuffix(origin, "://"+r.Host)
309         safeAjax := cors && (r.Method == http.MethodGet || r.Method == http.MethodHead)
310         // Important distinction: safeAttachment checks whether api_token exists
311         // as a query parameter. haveFormTokens checks whether api_token exists
312         // as request form data *or* a query parameter. Different checks are
313         // necessary because both the request disposition and the location of
314         // the API token affect whether or not the request needs to be
315         // redirected. The different branch comments below explain further.
316         safeAttachment := attachment && !r.URL.Query().Has("api_token")
317         if formTokens, haveFormTokens := r.Form["api_token"]; !haveFormTokens {
318                 // No token to use or redact.
319         } else if safeAjax || safeAttachment {
320                 // If this is a cross-origin request, the URL won't
321                 // appear in the browser's address bar, so
322                 // substituting a clipboard-safe URL is pointless.
323                 // Redirect-with-cookie wouldn't work anyway, because
324                 // it's not safe to allow third-party use of our
325                 // cookie.
326                 //
327                 // If we're supplying an attachment, we don't need to
328                 // convert POST to GET to avoid the "really resubmit
329                 // form?" problem, so provided the token isn't
330                 // embedded in the URL, there's no reason to do
331                 // redirect-with-cookie in this case either.
332                 for _, tok := range formTokens {
333                         reqTokens = append(reqTokens, tok)
334                 }
335         } else if browserMethod[r.Method] {
336                 // If this is a page view, and the client provided a
337                 // token via query string or POST body, we must put
338                 // the token in an HttpOnly cookie, and redirect to an
339                 // equivalent URL with the query param redacted and
340                 // method = GET.
341                 h.seeOtherWithCookie(w, r, "", credentialsOK)
342                 return
343         }
344
345         targetPath := pathParts[stripParts:]
346         if tokens == nil && len(targetPath) > 0 && strings.HasPrefix(targetPath[0], "t=") {
347                 // http://ID.example/t=TOKEN/PATH...
348                 // /c=ID/t=TOKEN/PATH...
349                 //
350                 // This form must only be used to pass scoped tokens
351                 // that give permission for a single collection. See
352                 // FormValue case above.
353                 tokens = []string{targetPath[0][2:]}
354                 pathToken = true
355                 targetPath = targetPath[1:]
356                 stripParts++
357         }
358
359         fsprefix := ""
360         if useSiteFS {
361                 if writeMethod[r.Method] {
362                         http.Error(w, webdavfs.ErrReadOnly.Error(), http.StatusMethodNotAllowed)
363                         return
364                 }
365                 if len(reqTokens) == 0 {
366                         w.Header().Add("WWW-Authenticate", "Basic realm=\"collections\"")
367                         http.Error(w, unauthorizedMessage, http.StatusUnauthorized)
368                         return
369                 }
370                 tokens = reqTokens
371         } else if collectionID == "" {
372                 http.Error(w, notFoundMessage, http.StatusNotFound)
373                 return
374         } else {
375                 fsprefix = "by_id/" + collectionID + "/"
376         }
377
378         if src := r.Header.Get("X-Webdav-Source"); strings.HasPrefix(src, "/") && !strings.Contains(src, "//") && !strings.Contains(src, "/../") {
379                 fsprefix += src[1:]
380         }
381
382         if tokens == nil {
383                 tokens = reqTokens
384                 if h.Cluster.Users.AnonymousUserToken != "" {
385                         tokens = append(tokens, h.Cluster.Users.AnonymousUserToken)
386                 }
387         }
388
389         if len(targetPath) > 0 && targetPath[0] == "_" {
390                 // If a collection has a directory called "t=foo" or
391                 // "_", it can be served at
392                 // //collections.example/_/t=foo/ or
393                 // //collections.example/_/_/ respectively:
394                 // //collections.example/t=foo/ won't work because
395                 // t=foo will be interpreted as a token "foo".
396                 targetPath = targetPath[1:]
397                 stripParts++
398         }
399
400         dirOpenMode := os.O_RDONLY
401         if writeMethod[r.Method] {
402                 dirOpenMode = os.O_RDWR
403         }
404
405         var tokenValid bool
406         var tokenScopeProblem bool
407         var token string
408         var tokenUser *arvados.User
409         var sessionFS arvados.CustomFileSystem
410         var session *cachedSession
411         var collectionDir arvados.File
412         for _, token = range tokens {
413                 var statusErr errorWithHTTPStatus
414                 fs, sess, user, err := h.Cache.GetSession(token)
415                 if errors.As(err, &statusErr) && statusErr.HTTPStatus() == http.StatusUnauthorized {
416                         // bad token
417                         continue
418                 } else if err != nil {
419                         http.Error(w, "cache error: "+err.Error(), http.StatusInternalServerError)
420                         return
421                 }
422                 if token != h.Cluster.Users.AnonymousUserToken {
423                         tokenValid = true
424                 }
425                 f, err := fs.OpenFile(fsprefix, dirOpenMode, 0)
426                 if errors.As(err, &statusErr) &&
427                         statusErr.HTTPStatus() == http.StatusForbidden &&
428                         token != h.Cluster.Users.AnonymousUserToken {
429                         // collection id is outside scope of supplied
430                         // token
431                         tokenScopeProblem = true
432                         sess.Release()
433                         continue
434                 } else if os.IsNotExist(err) {
435                         // collection does not exist or is not
436                         // readable using this token
437                         sess.Release()
438                         continue
439                 } else if err != nil {
440                         http.Error(w, err.Error(), http.StatusInternalServerError)
441                         sess.Release()
442                         return
443                 }
444                 defer f.Close()
445
446                 collectionDir, sessionFS, session, tokenUser = f, fs, sess, user
447                 break
448         }
449
450         // releaseSession() is equivalent to session.Release() except
451         // that it's a no-op if (1) session is nil, or (2) it has
452         // already been called.
453         //
454         // This way, we can do a defer call here to ensure it gets
455         // called in all code paths, and also call it inline (see
456         // below) in the cases where we want to release the lock
457         // before returning.
458         releaseSession := func() {}
459         if session != nil {
460                 var releaseSessionOnce sync.Once
461                 releaseSession = func() { releaseSessionOnce.Do(func() { session.Release() }) }
462         }
463         defer releaseSession()
464
465         if forceReload && collectionDir != nil {
466                 err := collectionDir.Sync()
467                 if err != nil {
468                         if he := errorWithHTTPStatus(nil); errors.As(err, &he) {
469                                 http.Error(w, err.Error(), he.HTTPStatus())
470                         } else {
471                                 http.Error(w, err.Error(), http.StatusInternalServerError)
472                         }
473                         return
474                 }
475         }
476         if session == nil {
477                 if pathToken {
478                         // The URL is a "secret sharing link" that
479                         // didn't work out.  Asking the client for
480                         // additional credentials would just be
481                         // confusing.
482                         http.Error(w, notFoundMessage, http.StatusNotFound)
483                         return
484                 }
485                 if tokenValid {
486                         // The client provided valid token(s), but the
487                         // collection was not found.
488                         http.Error(w, notFoundMessage, http.StatusNotFound)
489                         return
490                 }
491                 if tokenScopeProblem {
492                         // The client provided a valid token but
493                         // fetching a collection returned 401, which
494                         // means the token scope doesn't permit
495                         // fetching that collection.
496                         http.Error(w, notFoundMessage, http.StatusForbidden)
497                         return
498                 }
499                 // The client's token was invalid (e.g., expired), or
500                 // the client didn't even provide one.  Redirect to
501                 // workbench2's login-and-redirect-to-download url if
502                 // this is a browser navigation request. (The redirect
503                 // flow can't preserve the original method if it's not
504                 // GET, and doesn't make sense if the UA is a
505                 // command-line tool, is trying to load an inline
506                 // image, etc.; in these cases, there's nothing we can
507                 // do, so return 401 unauthorized.)
508                 //
509                 // Note Sec-Fetch-Mode is sent by all non-EOL
510                 // browsers, except Safari.
511                 // https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Sec-Fetch-Mode
512                 //
513                 // TODO(TC): This response would be confusing to
514                 // someone trying (anonymously) to download public
515                 // data that has been deleted.  Allow a referrer to
516                 // provide this context somehow?
517                 if r.Method == http.MethodGet && r.Header.Get("Sec-Fetch-Mode") == "navigate" {
518                         target := url.URL(h.Cluster.Services.Workbench2.ExternalURL)
519                         redirkey := "redirectToPreview"
520                         if attachment {
521                                 redirkey = "redirectToDownload"
522                         }
523                         callback := "/c=" + collectionID + "/" + strings.Join(targetPath, "/")
524                         query := url.Values{redirkey: {callback}}
525                         queryString := query.Encode()
526                         // Note: Encode (and QueryEscape function) turns space
527                         // into plus sign (+) rather than %20 (the plus sign
528                         // becomes %2B); that is the rule for web forms data
529                         // sent in URL query part via GET, but we're not
530                         // emulating forms here. Client JS APIs
531                         // (URLSearchParam#get, decodeURIComponent) will
532                         // decode %20, but while the former also expects the
533                         // form-specific encoding, the latter doesn't.
534                         // Encode() almost encodes everything; RFC3986 sec. 3.4
535                         // says "it is sometimes better for usability" to not
536                         // encode / and ? when passing URI reference in query.
537                         // This is also legal according to WHATWG URL spec and
538                         // can be desirable for debugging webapp.
539                         // We can let slash / appear in the encoded query, and
540                         // equality-sign = too, but exempting ? is not very
541                         // useful.
542                         // Plus-sign, hash, and ampersand are never exempt.
543                         r := strings.NewReplacer("+", "%20", "%2F", "/", "%3D", "=")
544                         target.RawQuery = r.Replace(queryString)
545                         w.Header().Add("Location", target.String())
546                         w.WriteHeader(http.StatusSeeOther)
547                         return
548                 }
549                 if !credentialsOK {
550                         http.Error(w, fmt.Sprintf("Authorization tokens are not accepted here: %v, and no anonymous user token is configured.", reasonNotAcceptingCredentials), http.StatusUnauthorized)
551                         return
552                 }
553                 // If none of the above cases apply, suggest the
554                 // user-agent (which is either a non-browser agent
555                 // like wget, or a browser that can't redirect through
556                 // a login flow) prompt the user for credentials.
557                 w.Header().Add("WWW-Authenticate", "Basic realm=\"collections\"")
558                 http.Error(w, unauthorizedMessage, http.StatusUnauthorized)
559                 return
560         }
561
562         if r.Method == http.MethodGet || r.Method == http.MethodHead {
563                 targetfnm := fsprefix + strings.Join(pathParts[stripParts:], "/")
564                 if fi, err := sessionFS.Stat(targetfnm); err == nil && fi.IsDir() {
565                         releaseSession() // because we won't be writing anything
566                         if !strings.HasSuffix(r.URL.Path, "/") {
567                                 h.seeOtherWithCookie(w, r, r.URL.Path+"/", credentialsOK)
568                         } else {
569                                 h.serveDirectory(w, r, fi.Name(), sessionFS, targetfnm, !useSiteFS)
570                         }
571                         return
572                 }
573         }
574
575         var basename string
576         if len(targetPath) > 0 {
577                 basename = targetPath[len(targetPath)-1]
578         }
579         if arvadosclient.PDHMatch(collectionID) && writeMethod[r.Method] {
580                 http.Error(w, webdavfs.ErrReadOnly.Error(), http.StatusMethodNotAllowed)
581                 return
582         }
583         if !h.userPermittedToUploadOrDownload(r.Method, tokenUser) {
584                 http.Error(w, "Not permitted", http.StatusForbidden)
585                 return
586         }
587         h.logUploadOrDownload(r, session.arvadosclient, sessionFS, fsprefix+strings.Join(targetPath, "/"), nil, tokenUser)
588
589         writing := writeMethod[r.Method]
590         locker := h.collectionLock(collectionID, writing)
591         defer locker.Unlock()
592
593         if writing {
594                 // Save the collection only if/when all
595                 // webdav->filesystem operations succeed --
596                 // and send a 500 error if the modified
597                 // collection can't be saved.
598                 //
599                 // Perform the write in a separate sitefs, so
600                 // concurrent read operations on the same
601                 // collection see the previous saved
602                 // state. After the write succeeds and the
603                 // collection record is updated, we reset the
604                 // session so the updates are visible in
605                 // subsequent read requests.
606                 client := session.client.WithRequestID(r.Header.Get("X-Request-Id"))
607                 sessionFS = client.SiteFileSystem(session.keepclient)
608                 writingDir, err := sessionFS.OpenFile(fsprefix, os.O_RDONLY, 0)
609                 if err != nil {
610                         http.Error(w, err.Error(), http.StatusInternalServerError)
611                         return
612                 }
613                 defer writingDir.Close()
614                 w = &updateOnSuccess{
615                         ResponseWriter: w,
616                         logger:         ctxlog.FromContext(r.Context()),
617                         update: func() error {
618                                 err := writingDir.Sync()
619                                 var te arvados.TransactionError
620                                 if errors.As(err, &te) {
621                                         err = te
622                                 }
623                                 if err != nil {
624                                         return err
625                                 }
626                                 // Sync the changes to the persistent
627                                 // sessionfs for this token.
628                                 snap, err := writingDir.Snapshot()
629                                 if err != nil {
630                                         return err
631                                 }
632                                 collectionDir.Splice(snap)
633                                 return nil
634                         }}
635         } else {
636                 // When writing, we need to block session renewal
637                 // until we're finished, in order to guarantee the
638                 // effect of the write is visible in future responses.
639                 // But if we're not writing, we can release the lock
640                 // early.  This enables us to keep renewing sessions
641                 // and processing more requests even if a slow client
642                 // takes a long time to download a large file.
643                 releaseSession()
644         }
645         if r.Method == http.MethodGet {
646                 applyContentDispositionHdr(w, r, basename, attachment)
647         }
648         if webdavPrefix == "" {
649                 webdavPrefix = "/" + strings.Join(pathParts[:stripParts], "/")
650         }
651         wh := &webdav.Handler{
652                 Prefix: webdavPrefix,
653                 FileSystem: &webdavfs.FS{
654                         FileSystem:    sessionFS,
655                         Prefix:        fsprefix,
656                         Writing:       writeMethod[r.Method],
657                         AlwaysReadEOF: r.Method == "PROPFIND",
658                 },
659                 LockSystem: webdavfs.NoLockSystem,
660                 Logger: func(r *http.Request, err error) {
661                         if err != nil && !os.IsNotExist(err) {
662                                 ctxlog.FromContext(r.Context()).WithError(err).Error("error reported by webdav handler")
663                         }
664                 },
665         }
666         h.metrics.track(wh, w, r)
667         if r.Method == http.MethodGet && w.WroteStatus() == http.StatusOK {
668                 wrote := int64(w.WroteBodyBytes())
669                 fnm := strings.Join(pathParts[stripParts:], "/")
670                 fi, err := wh.FileSystem.Stat(r.Context(), fnm)
671                 if err == nil && fi.Size() != wrote {
672                         var n int
673                         f, err := wh.FileSystem.OpenFile(r.Context(), fnm, os.O_RDONLY, 0)
674                         if err == nil {
675                                 n, err = f.Read(make([]byte, 1024))
676                                 f.Close()
677                         }
678                         ctxlog.FromContext(r.Context()).Errorf("stat.Size()==%d but only wrote %d bytes; read(1024) returns %d, %v", fi.Size(), wrote, n, err)
679                 }
680         }
681 }
682
683 var dirListingTemplate = `<!DOCTYPE HTML>
684 <HTML><HEAD>
685   <META name="robots" content="NOINDEX">
686   <TITLE>{{ .CollectionName }}</TITLE>
687   <STYLE type="text/css">
688     body {
689       margin: 1.5em;
690     }
691     pre {
692       background-color: #D9EDF7;
693       border-radius: .25em;
694       padding: .75em;
695       overflow: auto;
696     }
697     .footer p {
698       font-size: 82%;
699     }
700     hr {
701       border: 1px solid #808080;
702     }
703     ul {
704       padding: 0;
705     }
706     ul li {
707       font-family: monospace;
708       list-style: none;
709     }
710   </STYLE>
711 </HEAD>
712 <BODY>
713
714 <H1>{{ .CollectionName }}</H1>
715
716 <P>This collection of data files is being shared with you through
717 Arvados.  You can download individual files listed below.  To download
718 the entire directory tree with <CODE>wget</CODE>, try:</P>
719
720 <PRE id="wget-example">$ wget --mirror --no-parent --no-host --cut-dirs={{ .StripParts }} {{ .QuotedUrlForWget }}</PRE>
721
722 <H2>File Listing</H2>
723
724 {{if .Files}}
725 <UL>
726 {{range .Files}}
727 {{if .IsDir }}
728   <LI>{{" " | printf "%15s  " | nbsp}}<A class="item" href="{{ .Href }}/">{{ .Name }}/</A></LI>
729 {{else}}
730   <LI>{{.Size | printf "%15d  " | nbsp}}<A class="item" href="{{ .Href }}">{{ .Name }}</A></LI>
731 {{end}}
732 {{end}}
733 </UL>
734 {{else}}
735 <P>(No files; this collection is empty.)</P>
736 {{end}}
737
738 <HR>
739 <DIV class="footer">
740   <P>
741     About Arvados:
742     Arvados is a free and open source software bioinformatics platform.
743     To learn more, visit arvados.org.
744     Arvados is not responsible for the files listed on this page.
745   </P>
746 </DIV>
747
748 </BODY>
749 </HTML>
750 `
751
752 type fileListEnt struct {
753         Name  string
754         Href  string
755         Size  int64
756         IsDir bool
757 }
758
759 // Given a filesystem path like `foo/"bar baz"`, return an escaped
760 // (percent-encoded) relative path like `./foo/%22bar%20%baz%22`.
761 //
762 // Note the result may contain html-unsafe characters like '&'. These
763 // will be handled separately by the HTML templating engine as needed.
764 func relativeHref(path string) string {
765         u := &url.URL{Path: path}
766         return "./" + u.EscapedPath()
767 }
768
769 // Return a shell-quoted URL suitable for pasting to a command line
770 // ("wget ...") to repeat the given HTTP request.
771 func makeQuotedUrlForWget(r *http.Request) string {
772         scheme := r.Header.Get("X-Forwarded-Proto")
773         if scheme == "http" || scheme == "https" {
774                 // use protocol reported by load balancer / proxy
775         } else if r.TLS != nil {
776                 scheme = "https"
777         } else {
778                 scheme = "http"
779         }
780         p := r.URL.EscapedPath()
781         // An escaped path may still contain single quote chars, which
782         // would interfere with our shell quoting. Avoid this by
783         // escaping them as %27.
784         return fmt.Sprintf("'%s://%s%s'", scheme, r.Host, strings.Replace(p, "'", "%27", -1))
785 }
786
787 func (h *handler) serveDirectory(w http.ResponseWriter, r *http.Request, collectionName string, fs http.FileSystem, base string, recurse bool) {
788         var files []fileListEnt
789         var walk func(string) error
790         if !strings.HasSuffix(base, "/") {
791                 base = base + "/"
792         }
793         walk = func(path string) error {
794                 dirname := base + path
795                 if dirname != "/" {
796                         dirname = strings.TrimSuffix(dirname, "/")
797                 }
798                 d, err := fs.Open(dirname)
799                 if err != nil {
800                         return err
801                 }
802                 ents, err := d.Readdir(-1)
803                 if err != nil {
804                         return err
805                 }
806                 for _, ent := range ents {
807                         if recurse && ent.IsDir() {
808                                 err = walk(path + ent.Name() + "/")
809                                 if err != nil {
810                                         return err
811                                 }
812                         } else {
813                                 listingName := path + ent.Name()
814                                 files = append(files, fileListEnt{
815                                         Name:  listingName,
816                                         Href:  relativeHref(listingName),
817                                         Size:  ent.Size(),
818                                         IsDir: ent.IsDir(),
819                                 })
820                         }
821                 }
822                 return nil
823         }
824         if err := walk(""); err != nil {
825                 http.Error(w, "error getting directory listing: "+err.Error(), http.StatusInternalServerError)
826                 return
827         }
828
829         funcs := template.FuncMap{
830                 "nbsp": func(s string) template.HTML {
831                         return template.HTML(strings.Replace(s, " ", "&nbsp;", -1))
832                 },
833         }
834         tmpl, err := template.New("dir").Funcs(funcs).Parse(dirListingTemplate)
835         if err != nil {
836                 http.Error(w, "error parsing template: "+err.Error(), http.StatusInternalServerError)
837                 return
838         }
839         sort.Slice(files, func(i, j int) bool {
840                 return files[i].Name < files[j].Name
841         })
842         w.WriteHeader(http.StatusOK)
843         tmpl.Execute(w, map[string]interface{}{
844                 "CollectionName":   collectionName,
845                 "Files":            files,
846                 "Request":          r,
847                 "StripParts":       strings.Count(strings.TrimRight(r.URL.Path, "/"), "/"),
848                 "QuotedUrlForWget": makeQuotedUrlForWget(r),
849         })
850 }
851
852 func applyContentDispositionHdr(w http.ResponseWriter, r *http.Request, filename string, isAttachment bool) {
853         disposition := "inline"
854         if isAttachment {
855                 disposition = "attachment"
856         }
857         if strings.ContainsRune(r.RequestURI, '?') {
858                 // Help the UA realize that the filename is just
859                 // "filename.txt", not
860                 // "filename.txt?disposition=attachment".
861                 //
862                 // TODO(TC): Follow advice at RFC 6266 appendix D
863                 disposition += "; filename=" + strconv.QuoteToASCII(filename)
864         }
865         if disposition != "inline" {
866                 w.Header().Set("Content-Disposition", disposition)
867         }
868 }
869
870 func (h *handler) seeOtherWithCookie(w http.ResponseWriter, r *http.Request, location string, credentialsOK bool) {
871         if formTokens, haveFormTokens := r.Form["api_token"]; haveFormTokens {
872                 if !credentialsOK {
873                         // It is not safe to copy the provided token
874                         // into a cookie unless the current vhost
875                         // (origin) serves only a single collection or
876                         // we are in TrustAllContent mode.
877                         http.Error(w, "cannot serve inline content at this URL (possible configuration error; see https://doc.arvados.org/install/install-keep-web.html#dns)", http.StatusBadRequest)
878                         return
879                 }
880
881                 // The HttpOnly flag is necessary to prevent
882                 // JavaScript code (included in, or loaded by, a page
883                 // in the collection being served) from employing the
884                 // user's token beyond reading other files in the same
885                 // domain, i.e., same collection.
886                 //
887                 // The 303 redirect is necessary in the case of a GET
888                 // request to avoid exposing the token in the Location
889                 // bar, and in the case of a POST request to avoid
890                 // raising warnings when the user refreshes the
891                 // resulting page.
892                 for _, tok := range formTokens {
893                         if tok == "" {
894                                 continue
895                         }
896                         http.SetCookie(w, &http.Cookie{
897                                 Name:     "arvados_api_token",
898                                 Value:    auth.EncodeTokenCookie([]byte(tok)),
899                                 Path:     "/",
900                                 HttpOnly: true,
901                                 SameSite: http.SameSiteLaxMode,
902                         })
903                         break
904                 }
905         }
906
907         // Propagate query parameters (except api_token) from
908         // the original request.
909         redirQuery := r.URL.Query()
910         redirQuery.Del("api_token")
911
912         u := r.URL
913         if location != "" {
914                 newu, err := u.Parse(location)
915                 if err != nil {
916                         http.Error(w, "error resolving redirect target: "+err.Error(), http.StatusInternalServerError)
917                         return
918                 }
919                 u = newu
920         }
921         redir := (&url.URL{
922                 Scheme:   r.URL.Scheme,
923                 Host:     r.Host,
924                 Path:     u.Path,
925                 RawQuery: redirQuery.Encode(),
926         }).String()
927
928         w.Header().Add("Location", redir)
929         w.WriteHeader(http.StatusSeeOther)
930         io.WriteString(w, `<A href="`)
931         io.WriteString(w, html.EscapeString(redir))
932         io.WriteString(w, `">Continue</A>`)
933 }
934
935 func (h *handler) userPermittedToUploadOrDownload(method string, tokenUser *arvados.User) bool {
936         var permitDownload bool
937         var permitUpload bool
938         if tokenUser != nil && tokenUser.IsAdmin {
939                 permitUpload = h.Cluster.Collections.WebDAVPermission.Admin.Upload
940                 permitDownload = h.Cluster.Collections.WebDAVPermission.Admin.Download
941         } else {
942                 permitUpload = h.Cluster.Collections.WebDAVPermission.User.Upload
943                 permitDownload = h.Cluster.Collections.WebDAVPermission.User.Download
944         }
945         if (method == "PUT" || method == "POST") && !permitUpload {
946                 // Disallow operations that upload new files.
947                 // Permit webdav operations that move existing files around.
948                 return false
949         } else if method == "GET" && !permitDownload {
950                 // Disallow downloading file contents.
951                 // Permit webdav operations like PROPFIND that retrieve metadata
952                 // but not file contents.
953                 return false
954         }
955         return true
956 }
957
958 type fileEventLog struct {
959         requestPath  string
960         eventType    string
961         userUUID     string
962         userFullName string
963         collUUID     string
964         collPDH      string
965         collFilePath string
966         clientAddr   string
967         clientToken  string
968 }
969
970 func newFileEventLog(
971         h *handler,
972         r *http.Request,
973         filepath string,
974         collection *arvados.Collection,
975         user *arvados.User,
976         token string,
977 ) *fileEventLog {
978         var eventType string
979         switch r.Method {
980         case "POST", "PUT":
981                 eventType = "file_upload"
982         case "GET":
983                 eventType = "file_download"
984         default:
985                 return nil
986         }
987
988         // We want to log the address of the proxy closest to keep-web—the last
989         // value in the X-Forwarded-For list—or the client address if there is no
990         // valid proxy.
991         var clientAddr string
992         // 1. Build a slice of proxy addresses from X-Forwarded-For.
993         xff := strings.Join(r.Header.Values("X-Forwarded-For"), ",")
994         addrs := strings.Split(xff, ",")
995         // 2. Reverse the slice so it's in our most preferred order for logging.
996         slices.Reverse(addrs)
997         // 3. Append the client address to that slice.
998         if addr, _, err := net.SplitHostPort(r.RemoteAddr); err == nil {
999                 addrs = append(addrs, addr)
1000         }
1001         // 4. Use the first valid address in the slice.
1002         for _, addr := range addrs {
1003                 if ip := net.ParseIP(strings.TrimSpace(addr)); ip != nil {
1004                         clientAddr = ip.String()
1005                         break
1006                 }
1007         }
1008
1009         ev := &fileEventLog{
1010                 requestPath: r.URL.Path,
1011                 eventType:   eventType,
1012                 clientAddr:  clientAddr,
1013                 clientToken: token,
1014         }
1015
1016         if user != nil {
1017                 ev.userUUID = user.UUID
1018                 ev.userFullName = user.FullName
1019         } else {
1020                 ev.userUUID = fmt.Sprintf("%s-tpzed-anonymouspublic", h.Cluster.ClusterID)
1021         }
1022
1023         if collection != nil {
1024                 ev.collFilePath = filepath
1025                 // h.determineCollection populates the collection_uuid
1026                 // prop with the PDH, if this collection is being
1027                 // accessed via PDH. For logging, we use a different
1028                 // field depending on whether it's a UUID or PDH.
1029                 if len(collection.UUID) > 32 {
1030                         ev.collPDH = collection.UUID
1031                 } else {
1032                         ev.collPDH = collection.PortableDataHash
1033                         ev.collUUID = collection.UUID
1034                 }
1035         }
1036
1037         return ev
1038 }
1039
1040 func (ev *fileEventLog) shouldLogPDH() bool {
1041         return ev.eventType == "file_download" && ev.collPDH != ""
1042 }
1043
1044 func (ev *fileEventLog) asDict() arvadosclient.Dict {
1045         props := arvadosclient.Dict{
1046                 "reqPath":              ev.requestPath,
1047                 "collection_uuid":      ev.collUUID,
1048                 "collection_file_path": ev.collFilePath,
1049         }
1050         if ev.shouldLogPDH() {
1051                 props["portable_data_hash"] = ev.collPDH
1052         }
1053         return arvadosclient.Dict{
1054                 "object_uuid": ev.userUUID,
1055                 "event_type":  ev.eventType,
1056                 "properties":  props,
1057         }
1058 }
1059
1060 func (ev *fileEventLog) asFields() logrus.Fields {
1061         fields := logrus.Fields{
1062                 "collection_file_path": ev.collFilePath,
1063                 "collection_uuid":      ev.collUUID,
1064                 "user_uuid":            ev.userUUID,
1065         }
1066         if ev.shouldLogPDH() {
1067                 fields["portable_data_hash"] = ev.collPDH
1068         }
1069         if !strings.HasSuffix(ev.userUUID, "-tpzed-anonymouspublic") {
1070                 fields["user_full_name"] = ev.userFullName
1071         }
1072         return fields
1073 }
1074
1075 func (h *handler) shouldLogEvent(
1076         event *fileEventLog,
1077         req *http.Request,
1078         fileInfo os.FileInfo,
1079         t time.Time,
1080 ) bool {
1081         if event == nil {
1082                 return false
1083         } else if event.eventType != "file_download" ||
1084                 h.Cluster.Collections.WebDAVLogDownloadInterval == 0 ||
1085                 fileInfo == nil {
1086                 return true
1087         }
1088         td := h.Cluster.Collections.WebDAVLogDownloadInterval.Duration()
1089         cutoff := t.Add(-td)
1090         ev := *event
1091         h.fileEventLogsMtx.Lock()
1092         defer h.fileEventLogsMtx.Unlock()
1093         if h.fileEventLogs == nil {
1094                 h.fileEventLogs = make(map[fileEventLog]time.Time)
1095         }
1096         shouldLog := h.fileEventLogs[ev].Before(cutoff)
1097         if !shouldLog {
1098                 // Go's http fs server evaluates http.Request.Header.Get("Range")
1099                 // (as of Go 1.22) so we should do the same.
1100                 // Don't worry about merging multiple headers, etc.
1101                 ranges, err := http_range.ParseRange(req.Header.Get("Range"), fileInfo.Size())
1102                 if ranges == nil || err != nil {
1103                         // The Range header was either empty or malformed.
1104                         // Err on the side of logging.
1105                         shouldLog = true
1106                 } else {
1107                         // Log this request only if it requested the first byte
1108                         // (our heuristic for "starting a new download").
1109                         for _, reqRange := range ranges {
1110                                 if reqRange.Start == 0 {
1111                                         shouldLog = true
1112                                         break
1113                                 }
1114                         }
1115                 }
1116         }
1117         if shouldLog {
1118                 h.fileEventLogs[ev] = t
1119         }
1120         if t.After(h.fileEventLogsNextTidy) {
1121                 for key, logTime := range h.fileEventLogs {
1122                         if logTime.Before(cutoff) {
1123                                 delete(h.fileEventLogs, key)
1124                         }
1125                 }
1126                 h.fileEventLogsNextTidy = t.Add(td)
1127         }
1128         return shouldLog
1129 }
1130
1131 func (h *handler) logUploadOrDownload(
1132         r *http.Request,
1133         client *arvadosclient.ArvadosClient,
1134         fs arvados.CustomFileSystem,
1135         filepath string,
1136         collection *arvados.Collection,
1137         user *arvados.User,
1138 ) {
1139         var fileInfo os.FileInfo
1140         if fs != nil {
1141                 if collection == nil {
1142                         collection, filepath = h.determineCollection(fs, filepath)
1143                 }
1144                 if collection != nil {
1145                         // It's okay to ignore this error because shouldLogEvent will
1146                         // always return true if fileInfo == nil.
1147                         fileInfo, _ = fs.Stat(path.Join("by_id", collection.UUID, filepath))
1148                 }
1149         }
1150         event := newFileEventLog(h, r, filepath, collection, user, client.ApiToken)
1151         if !h.shouldLogEvent(event, r, fileInfo, time.Now()) {
1152                 return
1153         }
1154         log := ctxlog.FromContext(r.Context()).WithFields(event.asFields())
1155         log.Info(strings.Replace(event.eventType, "file_", "File ", 1))
1156         if h.Cluster.Collections.WebDAVLogEvents {
1157                 go func() {
1158                         logReq := arvadosclient.Dict{"log": event.asDict()}
1159                         err := client.Create("logs", logReq, nil)
1160                         if err != nil {
1161                                 log.WithError(err).Errorf("Failed to create %s log event on API server", event.eventType)
1162                         }
1163                 }()
1164         }
1165 }
1166
1167 func (h *handler) determineCollection(fs arvados.CustomFileSystem, path string) (*arvados.Collection, string) {
1168         target := strings.TrimSuffix(path, "/")
1169         for cut := len(target); cut >= 0; cut = strings.LastIndexByte(target, '/') {
1170                 target = target[:cut]
1171                 fi, err := fs.Stat(target)
1172                 if os.IsNotExist(err) {
1173                         // creating a new file/dir, or download
1174                         // destined to fail
1175                         continue
1176                 } else if err != nil {
1177                         return nil, ""
1178                 }
1179                 switch src := fi.Sys().(type) {
1180                 case *arvados.Collection:
1181                         return src, strings.TrimPrefix(path[len(target):], "/")
1182                 case *arvados.Group:
1183                         return nil, ""
1184                 default:
1185                         if _, ok := src.(error); ok {
1186                                 return nil, ""
1187                         }
1188                 }
1189         }
1190         return nil, ""
1191 }
1192
1193 var lockTidyInterval = time.Minute * 10
1194
1195 // Lock the specified collection for reading or writing. Caller must
1196 // call Unlock() on the returned Locker when the operation is
1197 // finished.
1198 func (h *handler) collectionLock(collectionID string, writing bool) sync.Locker {
1199         h.lockMtx.Lock()
1200         defer h.lockMtx.Unlock()
1201         if time.Since(h.lockTidied) > lockTidyInterval {
1202                 // Periodically delete all locks that aren't in use.
1203                 h.lockTidied = time.Now()
1204                 for id, locker := range h.lock {
1205                         if locker.TryLock() {
1206                                 locker.Unlock()
1207                                 delete(h.lock, id)
1208                         }
1209                 }
1210         }
1211         locker := h.lock[collectionID]
1212         if locker == nil {
1213                 locker = new(sync.RWMutex)
1214                 if h.lock == nil {
1215                         h.lock = map[string]*sync.RWMutex{}
1216                 }
1217                 h.lock[collectionID] = locker
1218         }
1219         if writing {
1220                 locker.Lock()
1221                 return locker
1222         } else {
1223                 locker.RLock()
1224                 return locker.RLocker()
1225         }
1226 }
1227
1228 func ServeCORSPreflight(w http.ResponseWriter, header http.Header) bool {
1229         method := header.Get("Access-Control-Request-Method")
1230         if method == "" {
1231                 return false
1232         }
1233         if !browserMethod[method] && !webdavMethod[method] {
1234                 w.WriteHeader(http.StatusMethodNotAllowed)
1235                 return true
1236         }
1237         w.Header().Set("Access-Control-Allow-Headers", corsAllowHeadersHeader)
1238         w.Header().Set("Access-Control-Allow-Methods", "COPY, DELETE, GET, LOCK, MKCOL, MOVE, OPTIONS, POST, PROPFIND, PROPPATCH, PUT, RMCOL, UNLOCK")
1239         w.Header().Set("Access-Control-Allow-Origin", "*")
1240         w.Header().Set("Access-Control-Max-Age", "86400")
1241         return true
1242 }