]> git.arvados.org - arvados.git/blob - services/keep-web/handler.go
Merge branch '22581-api-service-support' refs #22581
[arvados.git] / services / keep-web / handler.go
1 // Copyright (C) The Arvados Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4
5 package keepweb
6
7 import (
8         "context"
9         "encoding/json"
10         "errors"
11         "fmt"
12         "html"
13         "html/template"
14         "io"
15         "net"
16         "net/http"
17         "net/url"
18         "os"
19         "path"
20         "slices"
21         "sort"
22         "strconv"
23         "strings"
24         "sync"
25         "time"
26
27         "git.arvados.org/arvados.git/lib/cmd"
28         "git.arvados.org/arvados.git/lib/ctrlctx"
29         "git.arvados.org/arvados.git/lib/webdavfs"
30         "git.arvados.org/arvados.git/sdk/go/arvados"
31         "git.arvados.org/arvados.git/sdk/go/arvadosclient"
32         "git.arvados.org/arvados.git/sdk/go/auth"
33         "git.arvados.org/arvados.git/sdk/go/ctxlog"
34         "git.arvados.org/arvados.git/sdk/go/httpserver"
35         "github.com/gotd/contrib/http_range"
36         "github.com/sirupsen/logrus"
37         "golang.org/x/net/webdav"
38 )
39
40 type handler struct {
41         Cache   cache
42         Cluster *arvados.Cluster
43         metrics *metrics
44
45         fileEventLogs         map[fileEventLog]time.Time
46         fileEventLogsMtx      sync.Mutex
47         fileEventLogsNextTidy time.Time
48
49         s3SecretCache         map[string]*cachedS3Secret
50         s3SecretCacheMtx      sync.Mutex
51         s3SecretCacheNextTidy time.Time
52
53         dbConnector    *ctrlctx.DBConnector
54         dbConnectorMtx sync.Mutex
55
56         repacking sync.Map
57 }
58
59 var urlPDHDecoder = strings.NewReplacer(" ", "+", "-", "+")
60
61 var notFoundMessage = "Not Found"
62 var unauthorizedMessage = "401 Unauthorized\n\nA valid Arvados token must be provided to access this resource."
63
64 // parseCollectionIDFromURL returns a UUID or PDH if s is a UUID or a
65 // PDH (even if it is a PDH with "+" replaced by " " or "-");
66 // otherwise "".
67 func parseCollectionIDFromURL(s string) string {
68         if arvadosclient.UUIDMatch(s) {
69                 return s
70         }
71         if pdh := urlPDHDecoder.Replace(s); arvadosclient.PDHMatch(pdh) {
72                 return pdh
73         }
74         return ""
75 }
76
77 func (h *handler) serveStatus(w http.ResponseWriter, r *http.Request) {
78         json.NewEncoder(w).Encode(struct{ Version string }{cmd.Version.String()})
79 }
80
81 type errorWithHTTPStatus interface {
82         HTTPStatus() int
83 }
84
85 // updateOnSuccess wraps httpserver.ResponseWriter. If the handler
86 // sends an HTTP header indicating success, updateOnSuccess first
87 // calls the provided update func. If the update func fails, an error
88 // response is sent (using the error's HTTP status or 500 if none),
89 // and the status code and body sent by the handler are ignored (all
90 // response writes return the update error).
91 type updateOnSuccess struct {
92         httpserver.ResponseWriter
93         logger     logrus.FieldLogger
94         update     func() error
95         sentHeader bool
96         err        error
97 }
98
99 func (uos *updateOnSuccess) Write(p []byte) (int, error) {
100         if !uos.sentHeader {
101                 uos.WriteHeader(http.StatusOK)
102         }
103         if uos.err != nil {
104                 return 0, uos.err
105         }
106         return uos.ResponseWriter.Write(p)
107 }
108
109 func (uos *updateOnSuccess) WriteHeader(code int) {
110         if !uos.sentHeader {
111                 uos.sentHeader = true
112                 if code >= 200 && code < 400 {
113                         if uos.err = uos.update(); uos.err != nil {
114                                 code := http.StatusInternalServerError
115                                 if he := errorWithHTTPStatus(nil); errors.As(uos.err, &he) {
116                                         code = he.HTTPStatus()
117                                 }
118                                 uos.logger.WithError(uos.err).Errorf("update() returned %T error, changing response to HTTP %d", uos.err, code)
119                                 http.Error(uos.ResponseWriter, uos.err.Error(), code)
120                                 return
121                         }
122                 }
123         }
124         uos.ResponseWriter.WriteHeader(code)
125 }
126
127 var (
128         corsAllowHeadersHeader = strings.Join([]string{
129                 "Authorization", "Content-Type", "Range",
130                 // WebDAV request headers:
131                 "Depth", "Destination", "If", "Lock-Token", "Overwrite", "Timeout", "Cache-Control",
132         }, ", ")
133         writeMethod = map[string]bool{
134                 "COPY":      true,
135                 "DELETE":    true,
136                 "LOCK":      true,
137                 "MKCOL":     true,
138                 "MOVE":      true,
139                 "PROPPATCH": true,
140                 "PUT":       true,
141                 "UNLOCK":    true,
142         }
143         webdavMethod = map[string]bool{
144                 "COPY":      true,
145                 "DELETE":    true,
146                 "LOCK":      true,
147                 "MKCOL":     true,
148                 "MOVE":      true,
149                 "OPTIONS":   true,
150                 "PROPFIND":  true,
151                 "PROPPATCH": true,
152                 "PUT":       true,
153                 "RMCOL":     true,
154                 "UNLOCK":    true,
155         }
156         browserMethod = map[string]bool{
157                 "GET":  true,
158                 "HEAD": true,
159                 "POST": true,
160         }
161         // top-level dirs to serve with siteFS
162         siteFSDir = map[string]bool{
163                 "":      true, // root directory
164                 "by_id": true,
165                 "users": true,
166         }
167 )
168
169 func stripDefaultPort(host string) string {
170         // Will consider port 80 and port 443 to be the same vhost.  I think that's fine.
171         u := &url.URL{Host: host}
172         if p := u.Port(); p == "80" || p == "443" {
173                 return strings.ToLower(u.Hostname())
174         } else {
175                 return strings.ToLower(host)
176         }
177 }
178
179 // CheckHealth implements service.Handler.
180 func (h *handler) CheckHealth() error {
181         return nil
182 }
183
184 // Done implements service.Handler.
185 func (h *handler) Done() <-chan struct{} {
186         return nil
187 }
188
189 func (h *handler) getDBConnector() *ctrlctx.DBConnector {
190         h.dbConnectorMtx.Lock()
191         defer h.dbConnectorMtx.Unlock()
192         if h.dbConnector == nil {
193                 h.dbConnector = &ctrlctx.DBConnector{PostgreSQL: h.Cluster.PostgreSQL}
194         }
195         return h.dbConnector
196 }
197
198 // ServeHTTP implements http.Handler.
199 func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
200         if xfp := r.Header.Get("X-Forwarded-Proto"); xfp != "" && xfp != "http" {
201                 r.URL.Scheme = xfp
202         }
203
204         httpserver.SetResponseLogFields(r.Context(), logrus.Fields{
205                 "webdavDepth":       r.Header.Get("Depth"),
206                 "webdavDestination": r.Header.Get("Destination"),
207                 "webdavOverwrite":   r.Header.Get("Overwrite"),
208         })
209
210         wbuffer := newWriteBuffer(wOrig, int(h.Cluster.Collections.WebDAVOutputBuffer))
211         defer wbuffer.Close()
212         w := httpserver.WrapResponseWriter(responseWriter{
213                 Writer:         wbuffer,
214                 ResponseWriter: wOrig,
215         })
216
217         if r.Method == "OPTIONS" && ServeCORSPreflight(w, r.Header) {
218                 return
219         }
220
221         if !browserMethod[r.Method] && !webdavMethod[r.Method] {
222                 w.WriteHeader(http.StatusMethodNotAllowed)
223                 return
224         }
225
226         if r.Header.Get("Origin") != "" {
227                 // Allow simple cross-origin requests without user
228                 // credentials ("user credentials" as defined by CORS,
229                 // i.e., cookies, HTTP authentication, and client-side
230                 // SSL certificates. See
231                 // http://www.w3.org/TR/cors/#user-credentials).
232                 w.Header().Set("Access-Control-Allow-Origin", "*")
233                 w.Header().Set("Access-Control-Expose-Headers", "Content-Range")
234         }
235
236         if h.serveS3(w, r) {
237                 return
238         }
239
240         // webdavPrefix is the leading portion of r.URL.Path that
241         // should be ignored by the webdav handler, if any.
242         //
243         // req "/c={id}/..." -> webdavPrefix "/c={id}"
244         // req "/by_id/..." -> webdavPrefix ""
245         //
246         // Note: in the code immediately below, we set webdavPrefix
247         // only if it was explicitly set by the client. Otherwise, it
248         // gets set later, after checking the request path for cases
249         // like "/c={id}/...".
250         webdavPrefix := ""
251         arvPath := r.URL.Path
252         if prefix := r.Header.Get("X-Webdav-Prefix"); prefix != "" {
253                 // Enable a proxy (e.g., container log handler in
254                 // controller) to satisfy a request for path
255                 // "/foo/bar/baz.txt" using content from
256                 // "//abc123-4.internal/bar/baz.txt", by adding a
257                 // request header "X-Webdav-Prefix: /foo"
258                 if !strings.HasPrefix(arvPath, prefix) {
259                         http.Error(w, "X-Webdav-Prefix header is not a prefix of the requested path", http.StatusBadRequest)
260                         return
261                 }
262                 arvPath = r.URL.Path[len(prefix):]
263                 if arvPath == "" {
264                         arvPath = "/"
265                 }
266                 w.Header().Set("Vary", "X-Webdav-Prefix, "+w.Header().Get("Vary"))
267                 webdavPrefix = prefix
268         }
269         pathParts := strings.Split(arvPath[1:], "/")
270
271         var stripParts int
272         var collectionID string
273         var tokens []string
274         var reqTokens []string
275         var pathToken bool
276         var attachment bool
277         var useSiteFS bool
278         credentialsOK := h.Cluster.Collections.TrustAllContent
279         reasonNotAcceptingCredentials := ""
280
281         if r.Host != "" && stripDefaultPort(r.Host) == stripDefaultPort(h.Cluster.Services.WebDAVDownload.ExternalURL.Host) {
282                 credentialsOK = true
283                 attachment = true
284         } else if r.FormValue("disposition") == "attachment" {
285                 attachment = true
286         }
287
288         if !credentialsOK {
289                 reasonNotAcceptingCredentials = fmt.Sprintf("vhost %q does not specify a single collection ID or match Services.WebDAVDownload.ExternalURL %q, and Collections.TrustAllContent is false",
290                         r.Host, h.Cluster.Services.WebDAVDownload.ExternalURL)
291         }
292
293         if collectionID = arvados.CollectionIDFromDNSName(r.Host); collectionID != "" {
294                 // http://ID.collections.example/PATH...
295                 credentialsOK = true
296         } else if r.URL.Path == "/status.json" {
297                 h.serveStatus(w, r)
298                 return
299         } else if siteFSDir[pathParts[0]] {
300                 useSiteFS = true
301         } else if len(pathParts) >= 1 && strings.HasPrefix(pathParts[0], "c=") {
302                 // /c=ID[/PATH...]
303                 collectionID = parseCollectionIDFromURL(pathParts[0][2:])
304                 stripParts = 1
305         } else if len(pathParts) >= 2 && pathParts[0] == "collections" {
306                 if len(pathParts) >= 4 && pathParts[1] == "download" {
307                         // /collections/download/ID/TOKEN/PATH...
308                         collectionID = parseCollectionIDFromURL(pathParts[2])
309                         tokens = []string{pathParts[3]}
310                         stripParts = 4
311                         pathToken = true
312                 } else {
313                         // /collections/ID/PATH...
314                         collectionID = parseCollectionIDFromURL(pathParts[1])
315                         stripParts = 2
316                         // This path is only meant to work for public
317                         // data. Tokens provided with the request are
318                         // ignored.
319                         credentialsOK = false
320                         reasonNotAcceptingCredentials = "the '/collections/UUID/PATH' form only works for public data"
321                 }
322         }
323
324         forceReload := false
325         if cc := r.Header.Get("Cache-Control"); strings.Contains(cc, "no-cache") || strings.Contains(cc, "must-revalidate") {
326                 forceReload = true
327         }
328
329         if credentialsOK {
330                 reqTokens = auth.CredentialsFromRequest(r).Tokens
331         }
332
333         r.ParseForm()
334         origin := r.Header.Get("Origin")
335         cors := origin != "" && !strings.HasSuffix(origin, "://"+r.Host)
336         safeAjax := cors && (r.Method == http.MethodGet || r.Method == http.MethodHead)
337         // Important distinction: safeAttachment checks whether api_token exists
338         // as a query parameter. haveFormTokens checks whether api_token exists
339         // as request form data *or* a query parameter. Different checks are
340         // necessary because both the request disposition and the location of
341         // the API token affect whether or not the request needs to be
342         // redirected. The different branch comments below explain further.
343         safeAttachment := attachment && !r.URL.Query().Has("api_token")
344         if formTokens, haveFormTokens := r.Form["api_token"]; !haveFormTokens {
345                 // No token to use or redact.
346         } else if safeAjax || safeAttachment {
347                 // If this is a cross-origin request, the URL won't
348                 // appear in the browser's address bar, so
349                 // substituting a clipboard-safe URL is pointless.
350                 // Redirect-with-cookie wouldn't work anyway, because
351                 // it's not safe to allow third-party use of our
352                 // cookie.
353                 //
354                 // If we're supplying an attachment, we don't need to
355                 // convert POST to GET to avoid the "really resubmit
356                 // form?" problem, so provided the token isn't
357                 // embedded in the URL, there's no reason to do
358                 // redirect-with-cookie in this case either.
359                 for _, tok := range formTokens {
360                         reqTokens = append(reqTokens, tok)
361                 }
362         } else if browserMethod[r.Method] {
363                 // If this is a page view, and the client provided a
364                 // token via query string or POST body, we must put
365                 // the token in an HttpOnly cookie, and redirect to an
366                 // equivalent URL with the query param redacted and
367                 // method = GET.
368                 h.seeOtherWithCookie(w, r, "", credentialsOK)
369                 return
370         }
371
372         targetPath := pathParts[stripParts:]
373         if tokens == nil && len(targetPath) > 0 && strings.HasPrefix(targetPath[0], "t=") {
374                 // http://ID.example/t=TOKEN/PATH...
375                 // /c=ID/t=TOKEN/PATH...
376                 //
377                 // This form must only be used to pass scoped tokens
378                 // that give permission for a single collection. See
379                 // FormValue case above.
380                 tokens = []string{targetPath[0][2:]}
381                 pathToken = true
382                 targetPath = targetPath[1:]
383                 stripParts++
384         }
385
386         // fsprefix is the path from sitefs root to the sitefs
387         // directory (implicitly or explicitly) indicated by the
388         // leading / in the request path.
389         //
390         // Request "/by_id/..." -> fsprefix ""
391         // Request "/c={id}/..." -> fsprefix "/by_id/{id}/"
392         fsprefix := ""
393         if useSiteFS {
394                 if writeMethod[r.Method] {
395                         http.Error(w, webdavfs.ErrReadOnly.Error(), http.StatusMethodNotAllowed)
396                         return
397                 }
398                 if len(reqTokens) == 0 {
399                         w.Header().Add("WWW-Authenticate", "Basic realm=\"collections\"")
400                         http.Error(w, unauthorizedMessage, http.StatusUnauthorized)
401                         return
402                 }
403                 tokens = reqTokens
404         } else if collectionID == "" {
405                 http.Error(w, notFoundMessage, http.StatusNotFound)
406                 return
407         } else {
408                 fsprefix = "by_id/" + collectionID + "/"
409         }
410
411         if src := r.Header.Get("X-Webdav-Source"); strings.HasPrefix(src, "/") && !strings.Contains(src, "//") && !strings.Contains(src, "/../") {
412                 // Clients (specifically, the container log gateway)
413                 // use X-Webdav-Source to specify that although the
414                 // request path (and other webdav fields in the
415                 // request) refer to target "/abc", the intended
416                 // target is actually
417                 // "{x-webdav-source-value}/abc".
418                 //
419                 // This, combined with X-Webdav-Prefix, enables the
420                 // container log gateway to effectively alter the
421                 // target path when proxying a request, without
422                 // needing to rewrite all the other webdav
423                 // request/response fields that might mention the
424                 // target path.
425                 fsprefix += src[1:]
426         }
427
428         if tokens == nil {
429                 tokens = reqTokens
430                 if h.Cluster.Users.AnonymousUserToken != "" {
431                         tokens = append(tokens, h.Cluster.Users.AnonymousUserToken)
432                 }
433         }
434
435         if len(targetPath) > 0 && targetPath[0] == "_" {
436                 // If a collection has a directory called "t=foo" or
437                 // "_", it can be served at
438                 // //collections.example/_/t=foo/ or
439                 // //collections.example/_/_/ respectively:
440                 // //collections.example/t=foo/ won't work because
441                 // t=foo will be interpreted as a token "foo".
442                 targetPath = targetPath[1:]
443                 stripParts++
444         }
445
446         dirOpenMode := os.O_RDONLY
447         if writeMethod[r.Method] {
448                 dirOpenMode = os.O_RDWR
449         }
450
451         var tokenValid bool
452         var tokenScopeProblem bool
453         var token string
454         var tokenUser *arvados.User
455         var sessionFS arvados.CustomFileSystem
456         var targetFS arvados.FileSystem
457         var session *cachedSession
458         var collectionDir arvados.File
459         for _, token = range tokens {
460                 var statusErr errorWithHTTPStatus
461                 fs, sess, user, err := h.Cache.GetSession(token)
462                 if errors.As(err, &statusErr) && statusErr.HTTPStatus() == http.StatusUnauthorized {
463                         // bad token
464                         continue
465                 } else if err != nil {
466                         http.Error(w, "cache error: "+err.Error(), http.StatusInternalServerError)
467                         return
468                 }
469                 if token != h.Cluster.Users.AnonymousUserToken {
470                         tokenValid = true
471                 }
472                 f, err := fs.OpenFile(fsprefix, dirOpenMode, 0)
473                 if errors.As(err, &statusErr) &&
474                         statusErr.HTTPStatus() == http.StatusForbidden &&
475                         token != h.Cluster.Users.AnonymousUserToken {
476                         // collection id is outside scope of supplied
477                         // token
478                         tokenScopeProblem = true
479                         sess.Release()
480                         continue
481                 } else if os.IsNotExist(err) {
482                         // collection does not exist or is not
483                         // readable using this token
484                         sess.Release()
485                         continue
486                 } else if err != nil {
487                         http.Error(w, err.Error(), http.StatusInternalServerError)
488                         sess.Release()
489                         return
490                 }
491                 defer f.Close()
492
493                 collectionDir, sessionFS, session, tokenUser = f, fs, sess, user
494                 break
495         }
496
497         if session == nil {
498                 if pathToken {
499                         // The URL is a "secret sharing link" that
500                         // didn't work out.  Asking the client for
501                         // additional credentials would just be
502                         // confusing.
503                         http.Error(w, notFoundMessage, http.StatusNotFound)
504                         return
505                 }
506                 if tokenValid {
507                         // The client provided valid token(s), but the
508                         // collection was not found.
509                         http.Error(w, notFoundMessage, http.StatusNotFound)
510                         return
511                 }
512                 if tokenScopeProblem {
513                         // The client provided a valid token but
514                         // fetching a collection returned 401, which
515                         // means the token scope doesn't permit
516                         // fetching that collection.
517                         http.Error(w, notFoundMessage, http.StatusForbidden)
518                         return
519                 }
520                 // The client's token was invalid (e.g., expired), or
521                 // the client didn't even provide one.  Redirect to
522                 // workbench2's login-and-redirect-to-download url if
523                 // this is a browser navigation request. (The redirect
524                 // flow can't preserve the original method if it's not
525                 // GET, and doesn't make sense if the UA is a
526                 // command-line tool, is trying to load an inline
527                 // image, etc.; in these cases, there's nothing we can
528                 // do, so return 401 unauthorized.)
529                 //
530                 // Note Sec-Fetch-Mode is sent by all non-EOL
531                 // browsers, except Safari.
532                 // https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Sec-Fetch-Mode
533                 //
534                 // TODO(TC): This response would be confusing to
535                 // someone trying (anonymously) to download public
536                 // data that has been deleted.  Allow a referrer to
537                 // provide this context somehow?
538                 if r.Method == http.MethodGet && r.Header.Get("Sec-Fetch-Mode") == "navigate" {
539                         target := url.URL(h.Cluster.Services.Workbench2.ExternalURL)
540                         redirkey := "redirectToPreview"
541                         if attachment {
542                                 redirkey = "redirectToDownload"
543                         }
544                         callback := "/c=" + collectionID + "/" + strings.Join(targetPath, "/")
545                         query := url.Values{redirkey: {callback}}
546                         queryString := query.Encode()
547                         // Note: Encode (and QueryEscape function) turns space
548                         // into plus sign (+) rather than %20 (the plus sign
549                         // becomes %2B); that is the rule for web forms data
550                         // sent in URL query part via GET, but we're not
551                         // emulating forms here. Client JS APIs
552                         // (URLSearchParam#get, decodeURIComponent) will
553                         // decode %20, but while the former also expects the
554                         // form-specific encoding, the latter doesn't.
555                         // Encode() almost encodes everything; RFC 3986 3.4
556                         // says "it is sometimes better for usability" to not
557                         // encode / and ? when passing URI reference in query.
558                         // This is also legal according to WHATWG URL spec and
559                         // can be desirable for debugging webapp.
560                         // We can let slash / appear in the encoded query, and
561                         // equality-sign = too, but exempting ? is not very
562                         // useful.
563                         // Plus-sign, hash, and ampersand are never exempt.
564                         r := strings.NewReplacer("+", "%20", "%2F", "/", "%3D", "=")
565                         target.RawQuery = r.Replace(queryString)
566                         w.Header().Add("Location", target.String())
567                         w.WriteHeader(http.StatusSeeOther)
568                         return
569                 }
570                 if !credentialsOK {
571                         http.Error(w, fmt.Sprintf("Authorization tokens are not accepted here: %v, and no anonymous user token is configured.", reasonNotAcceptingCredentials), http.StatusUnauthorized)
572                         return
573                 }
574                 // If none of the above cases apply, suggest the
575                 // user-agent (which is either a non-browser agent
576                 // like wget, or a browser that can't redirect through
577                 // a login flow) prompt the user for credentials.
578                 w.Header().Add("WWW-Authenticate", "Basic realm=\"collections\"")
579                 http.Error(w, unauthorizedMessage, http.StatusUnauthorized)
580                 return
581         }
582
583         // The first call to releaseSession() calls session.Release(),
584         // then subsequent calls are no-ops.  This lets us use a defer
585         // call here to ensure it gets called in all code paths, and
586         // also call it inline (see below) in the cases where we want
587         // to release the lock before returning.
588         var releaseSessionOnce sync.Once
589         releaseSession := func() { releaseSessionOnce.Do(func() { session.Release() }) }
590         defer releaseSession()
591
592         colltarget := strings.Join(targetPath, "/")
593         colltarget = strings.TrimSuffix(colltarget, "/")
594         fstarget := fsprefix + colltarget
595         if !forceReload {
596                 need, err := h.needSync(r.Context(), sessionFS, fstarget)
597                 if err != nil {
598                         http.Error(w, err.Error(), http.StatusBadGateway)
599                         return
600                 }
601                 forceReload = need
602         }
603         if forceReload {
604                 err := collectionDir.Sync()
605                 if err != nil {
606                         if he := errorWithHTTPStatus(nil); errors.As(err, &he) {
607                                 http.Error(w, err.Error(), he.HTTPStatus())
608                         } else {
609                                 http.Error(w, err.Error(), http.StatusInternalServerError)
610                         }
611                         return
612                 }
613         }
614
615         if r.Method == http.MethodGet || r.Method == http.MethodHead {
616                 if fi, err := sessionFS.Stat(fstarget); err == nil && fi.IsDir() {
617                         releaseSession() // because we won't be writing anything
618                         if !strings.HasSuffix(r.URL.Path, "/") {
619                                 h.seeOtherWithCookie(w, r, r.URL.Path+"/", credentialsOK)
620                         } else {
621                                 h.serveDirectory(w, r, fi.Name(), sessionFS, fstarget, !useSiteFS)
622                         }
623                         return
624                 }
625         }
626
627         var basename string
628         if len(targetPath) > 0 {
629                 basename = targetPath[len(targetPath)-1]
630         }
631         if arvadosclient.PDHMatch(collectionID) && writeMethod[r.Method] {
632                 http.Error(w, webdavfs.ErrReadOnly.Error(), http.StatusMethodNotAllowed)
633                 return
634         }
635         if !h.userPermittedToUploadOrDownload(r.Method, tokenUser) {
636                 http.Error(w, "Not permitted", http.StatusForbidden)
637                 return
638         }
639         h.logUploadOrDownload(r, session.arvadosclient, sessionFS, fstarget, nil, tokenUser)
640
641         if webdavPrefix == "" && stripParts > 0 {
642                 webdavPrefix = "/" + strings.Join(pathParts[:stripParts], "/")
643         }
644
645         writing := writeMethod[r.Method]
646         if writing {
647                 // We implement write operations by writing to a
648                 // temporary collection, then applying the change to
649                 // the real collection using the replace_files option
650                 // in a collection update request.  This lets us do
651                 // the slow part (i.e., receive the file data from the
652                 // client and write it to Keep) without worrying about
653                 // side effects of other read/write operations.
654                 //
655                 // Collection update requests for a given collection
656                 // are serialized by the controller, so we don't need
657                 // to do any locking for that part either.
658
659                 // collprefix is the subdirectory in the target
660                 // collection which (according to X-Webdav-Source) we
661                 // should pretend is "/" for this request.
662                 collprefix := strings.TrimPrefix(fsprefix, "by_id/"+collectionID+"/")
663                 if len(collprefix) == len(fsprefix) {
664                         http.Error(w, "internal error: writing to anything other than /by_id/{collectionID}", http.StatusInternalServerError)
665                         return
666                 }
667
668                 // Create a temporary collection filesystem for webdav
669                 // to operate on.
670                 var tmpcoll arvados.Collection
671                 client := session.client.WithRequestID(r.Header.Get("X-Request-Id"))
672                 tmpfs, err := tmpcoll.FileSystem(client, session.keepclient)
673                 if err != nil {
674                         http.Error(w, err.Error(), http.StatusInternalServerError)
675                         return
676                 }
677                 snap, err := arvados.Snapshot(sessionFS, "by_id/"+collectionID+"/")
678                 if err != nil {
679                         http.Error(w, "snapshot: "+err.Error(), http.StatusInternalServerError)
680                         return
681                 }
682                 err = arvados.Splice(tmpfs, "/", snap)
683                 if err != nil {
684                         http.Error(w, "splice: "+err.Error(), http.StatusInternalServerError)
685                         return
686                 }
687
688                 targetFS = tmpfs
689                 fsprefix = collprefix
690                 replace := make(map[string]string)
691
692                 switch r.Method {
693                 case "COPY", "MOVE":
694                         dsttarget, err := copyMoveDestination(r, webdavPrefix)
695                         if err != nil {
696                                 http.Error(w, err.Error(), http.StatusBadRequest)
697                                 return
698                         }
699
700                         srcspec := "current/" + colltarget
701                         // RFC 4918 9.8.3: A COPY of "Depth: 0" only
702                         // instructs that the collection and its
703                         // properties, but not resources identified by
704                         // its internal member URLs, are to be copied.
705                         //
706                         // ...meaning we will be creating an empty
707                         // directory.
708                         //
709                         // RFC 4918 9.9.2: A client MUST NOT submit a
710                         // Depth header on a MOVE on a collection with
711                         // any value but "infinity".
712                         //
713                         // ...meaning we only need to consider this
714                         // case for COPY, not for MOVE.
715                         if fi, err := tmpfs.Stat(colltarget); err == nil && fi.IsDir() && r.Method == "COPY" && r.Header.Get("Depth") == "0" {
716                                 srcspec = "manifest_text/"
717                         }
718
719                         replace[strings.TrimSuffix(dsttarget, "/")] = srcspec
720                         if r.Method == "MOVE" {
721                                 replace["/"+colltarget] = ""
722                         }
723                 case "MKCOL":
724                         replace["/"+colltarget] = "manifest_text/"
725                 case "DELETE":
726                         if depth := r.Header.Get("Depth"); depth != "" && depth != "infinity" {
727                                 http.Error(w, "invalid depth header, see RFC 4918 9.6.1", http.StatusBadRequest)
728                                 return
729                         }
730                         replace["/"+colltarget] = ""
731                 case "PUT":
732                         // changes will be applied by updateOnSuccess
733                         // update func below
734                 case "LOCK", "UNLOCK", "PROPPATCH":
735                         // no changes
736                 default:
737                         http.Error(w, "method missing", http.StatusInternalServerError)
738                         return
739                 }
740
741                 // Save the collection only if/when all
742                 // webdav->filesystem operations succeed using our
743                 // temporary collection -- and send a 500 error if the
744                 // updates can't be saved.
745                 logger := ctxlog.FromContext(r.Context())
746                 w = &updateOnSuccess{
747                         ResponseWriter: w,
748                         logger:         logger,
749                         update: func() error {
750                                 var manifest string
751                                 var snap *arvados.Subtree
752                                 var err error
753                                 if r.Method == "PUT" {
754                                         snap, err = arvados.Snapshot(tmpfs, colltarget)
755                                         if err != nil {
756                                                 return fmt.Errorf("snapshot tmpfs: %w", err)
757                                         }
758                                         tmpfs, err = (&arvados.Collection{}).FileSystem(client, session.keepclient)
759                                         err = arvados.Splice(tmpfs, "file", snap)
760                                         if err != nil {
761                                                 return fmt.Errorf("splice tmpfs: %w", err)
762                                         }
763                                         manifest, err = tmpfs.MarshalManifest(".")
764                                         if err != nil {
765                                                 return fmt.Errorf("marshal tmpfs: %w", err)
766                                         }
767                                         replace["/"+colltarget] = "manifest_text/file"
768                                 } else if len(replace) == 0 {
769                                         return nil
770                                 }
771                                 var updated arvados.Collection
772                                 err = client.RequestAndDecode(&updated, "PATCH", "arvados/v1/collections/"+collectionID, nil, map[string]interface{}{
773                                         "replace_files": replace,
774                                         "collection":    map[string]interface{}{"manifest_text": manifest}})
775                                 var te arvados.TransactionError
776                                 if errors.As(err, &te) {
777                                         err = te
778                                 }
779                                 if err != nil {
780                                         return err
781                                 }
782                                 if r.Method == "PUT" {
783                                         h.repack(r.Context(), session, logger, &updated)
784                                 }
785                                 return nil
786                         }}
787         } else {
788                 // When writing, we need to block session renewal
789                 // until we're finished, in order to guarantee the
790                 // effect of the write is visible in future responses.
791                 // But if we're not writing, we can release the lock
792                 // early.  This enables us to keep renewing sessions
793                 // and processing more requests even if a slow client
794                 // takes a long time to download a large file.
795                 releaseSession()
796                 targetFS = sessionFS
797         }
798         if r.Method == http.MethodGet {
799                 applyContentDispositionHdr(w, r, basename, attachment)
800         }
801         wh := &webdav.Handler{
802                 Prefix: webdavPrefix,
803                 FileSystem: &webdavfs.FS{
804                         FileSystem:    targetFS,
805                         Prefix:        fsprefix,
806                         Writing:       writeMethod[r.Method],
807                         AlwaysReadEOF: r.Method == "PROPFIND",
808                 },
809                 LockSystem: webdavfs.NoLockSystem,
810                 Logger: func(r *http.Request, err error) {
811                         if err != nil && !os.IsNotExist(err) {
812                                 ctxlog.FromContext(r.Context()).WithError(err).Error("error reported by webdav handler")
813                         }
814                 },
815         }
816         h.metrics.track(wh, w, r)
817         if r.Method == http.MethodGet && w.WroteStatus() == http.StatusOK {
818                 wrote := int64(w.WroteBodyBytes())
819                 fi, err := wh.FileSystem.Stat(r.Context(), colltarget)
820                 if err == nil && fi.Size() != wrote {
821                         var n int
822                         f, err := wh.FileSystem.OpenFile(r.Context(), colltarget, os.O_RDONLY, 0)
823                         if err == nil {
824                                 n, err = f.Read(make([]byte, 1024))
825                                 f.Close()
826                         }
827                         ctxlog.FromContext(r.Context()).Errorf("stat.Size()==%d but only wrote %d bytes; read(1024) returns %d, %v", fi.Size(), wrote, n, err)
828                 }
829         }
830 }
831
832 // Repack the given collection after uploading a file.
833 func (h *handler) repack(ctx context.Context, session *cachedSession, logger logrus.FieldLogger, updated *arvados.Collection) {
834         if _, busy := h.repacking.LoadOrStore(updated.UUID, true); busy {
835                 // Another goroutine is already repacking the same
836                 // collection.
837                 return
838         }
839         defer h.repacking.Delete(updated.UUID)
840
841         // Repacking is best-effort, so we disable retries, and don't
842         // fail on errors.
843         client := *session.client
844         client.Timeout = 0
845         repackfs, err := updated.FileSystem(&client, session.keepclient)
846         if err != nil {
847                 logger.Warnf("setting up repackfs: %s", err)
848                 return
849         }
850         repacked, err := repackfs.Repack(ctx, arvados.RepackOptions{CachedOnly: true})
851         if err != nil {
852                 logger.Warnf("repack: %s", err)
853                 return
854         }
855         if repacked > 0 {
856                 err := repackfs.Sync()
857                 if err != nil {
858                         logger.Infof("sync repack: %s", err)
859                 }
860         }
861 }
862
863 var dirListingTemplate = `<!DOCTYPE HTML>
864 <HTML><HEAD>
865   <META name="robots" content="NOINDEX">
866   <TITLE>{{ .CollectionName }}</TITLE>
867   <STYLE type="text/css">
868     body {
869       margin: 1.5em;
870     }
871     pre {
872       background-color: #D9EDF7;
873       border-radius: .25em;
874       padding: .75em;
875       overflow: auto;
876     }
877     .footer p {
878       font-size: 82%;
879     }
880     hr {
881       border: 1px solid #808080;
882     }
883     ul {
884       padding: 0;
885     }
886     ul li {
887       font-family: monospace;
888       list-style: none;
889     }
890   </STYLE>
891 </HEAD>
892 <BODY>
893
894 <H1>{{ .CollectionName }}</H1>
895
896 <P>This collection of data files is being shared with you through
897 Arvados.  You can download individual files listed below.  To download
898 the entire directory tree with <CODE>wget</CODE>, try:</P>
899
900 <PRE id="wget-example">$ wget --mirror --no-parent --no-host --cut-dirs={{ .StripParts }} {{ .QuotedUrlForWget }}</PRE>
901
902 <H2>File Listing</H2>
903
904 {{if .Files}}
905 <UL>
906 {{range .Files}}
907 {{if .IsDir }}
908   <LI>{{" " | printf "%15s  " | nbsp}}<A class="item" href="{{ .Href }}/">{{ .Name }}/</A></LI>
909 {{else}}
910   <LI>{{.Size | printf "%15d  " | nbsp}}<A class="item" href="{{ .Href }}">{{ .Name }}</A></LI>
911 {{end}}
912 {{end}}
913 </UL>
914 {{else}}
915 <P>(No files; this collection is empty.)</P>
916 {{end}}
917
918 <HR>
919 <DIV class="footer">
920   <P>
921     About Arvados:
922     Arvados is a free and open source software bioinformatics platform.
923     To learn more, visit arvados.org.
924     Arvados is not responsible for the files listed on this page.
925   </P>
926 </DIV>
927
928 </BODY>
929 </HTML>
930 `
931
932 type fileListEnt struct {
933         Name  string
934         Href  string
935         Size  int64
936         IsDir bool
937 }
938
939 // Given a filesystem path like `foo/"bar baz"`, return an escaped
940 // (percent-encoded) relative path like `./foo/%22bar%20%baz%22`.
941 //
942 // Note the result may contain html-unsafe characters like '&'. These
943 // will be handled separately by the HTML templating engine as needed.
944 func relativeHref(path string) string {
945         u := &url.URL{Path: path}
946         return "./" + u.EscapedPath()
947 }
948
949 // Return a shell-quoted URL suitable for pasting to a command line
950 // ("wget ...") to repeat the given HTTP request.
951 func makeQuotedUrlForWget(r *http.Request) string {
952         scheme := r.Header.Get("X-Forwarded-Proto")
953         if scheme == "http" || scheme == "https" {
954                 // use protocol reported by load balancer / proxy
955         } else if r.TLS != nil {
956                 scheme = "https"
957         } else {
958                 scheme = "http"
959         }
960         p := r.URL.EscapedPath()
961         // An escaped path may still contain single quote chars, which
962         // would interfere with our shell quoting. Avoid this by
963         // escaping them as %27.
964         return fmt.Sprintf("'%s://%s%s'", scheme, r.Host, strings.Replace(p, "'", "%27", -1))
965 }
966
967 func (h *handler) serveDirectory(w http.ResponseWriter, r *http.Request, collectionName string, fs http.FileSystem, base string, recurse bool) {
968         var files []fileListEnt
969         var walk func(string) error
970         if !strings.HasSuffix(base, "/") {
971                 base = base + "/"
972         }
973         walk = func(path string) error {
974                 dirname := base + path
975                 if dirname != "/" {
976                         dirname = strings.TrimSuffix(dirname, "/")
977                 }
978                 d, err := fs.Open(dirname)
979                 if err != nil {
980                         return err
981                 }
982                 ents, err := d.Readdir(-1)
983                 if err != nil {
984                         return err
985                 }
986                 for _, ent := range ents {
987                         if recurse && ent.IsDir() {
988                                 err = walk(path + ent.Name() + "/")
989                                 if err != nil {
990                                         return err
991                                 }
992                         } else {
993                                 listingName := path + ent.Name()
994                                 files = append(files, fileListEnt{
995                                         Name:  listingName,
996                                         Href:  relativeHref(listingName),
997                                         Size:  ent.Size(),
998                                         IsDir: ent.IsDir(),
999                                 })
1000                         }
1001                 }
1002                 return nil
1003         }
1004         if err := walk(""); err != nil {
1005                 http.Error(w, "error getting directory listing: "+err.Error(), http.StatusInternalServerError)
1006                 return
1007         }
1008
1009         funcs := template.FuncMap{
1010                 "nbsp": func(s string) template.HTML {
1011                         return template.HTML(strings.Replace(s, " ", "&nbsp;", -1))
1012                 },
1013         }
1014         tmpl, err := template.New("dir").Funcs(funcs).Parse(dirListingTemplate)
1015         if err != nil {
1016                 http.Error(w, "error parsing template: "+err.Error(), http.StatusInternalServerError)
1017                 return
1018         }
1019         sort.Slice(files, func(i, j int) bool {
1020                 return files[i].Name < files[j].Name
1021         })
1022         w.WriteHeader(http.StatusOK)
1023         tmpl.Execute(w, map[string]interface{}{
1024                 "CollectionName":   collectionName,
1025                 "Files":            files,
1026                 "Request":          r,
1027                 "StripParts":       strings.Count(strings.TrimRight(r.URL.Path, "/"), "/"),
1028                 "QuotedUrlForWget": makeQuotedUrlForWget(r),
1029         })
1030 }
1031
1032 func applyContentDispositionHdr(w http.ResponseWriter, r *http.Request, filename string, isAttachment bool) {
1033         disposition := "inline"
1034         if isAttachment {
1035                 disposition = "attachment"
1036         }
1037         if strings.ContainsRune(r.RequestURI, '?') {
1038                 // Help the UA realize that the filename is just
1039                 // "filename.txt", not
1040                 // "filename.txt?disposition=attachment".
1041                 //
1042                 // TODO(TC): Follow advice at RFC 6266 appendix D
1043                 disposition += "; filename=" + strconv.QuoteToASCII(filename)
1044         }
1045         if disposition != "inline" {
1046                 w.Header().Set("Content-Disposition", disposition)
1047         }
1048 }
1049
1050 func (h *handler) seeOtherWithCookie(w http.ResponseWriter, r *http.Request, location string, credentialsOK bool) {
1051         if formTokens, haveFormTokens := r.Form["api_token"]; haveFormTokens {
1052                 if !credentialsOK {
1053                         // It is not safe to copy the provided token
1054                         // into a cookie unless the current vhost
1055                         // (origin) serves only a single collection or
1056                         // we are in TrustAllContent mode.
1057                         http.Error(w, "cannot serve inline content at this URL (possible configuration error; see https://doc.arvados.org/install/install-keep-web.html#dns)", http.StatusBadRequest)
1058                         return
1059                 }
1060
1061                 // The HttpOnly flag is necessary to prevent
1062                 // JavaScript code (included in, or loaded by, a page
1063                 // in the collection being served) from employing the
1064                 // user's token beyond reading other files in the same
1065                 // domain, i.e., same collection.
1066                 //
1067                 // The 303 redirect is necessary in the case of a GET
1068                 // request to avoid exposing the token in the Location
1069                 // bar, and in the case of a POST request to avoid
1070                 // raising warnings when the user refreshes the
1071                 // resulting page.
1072                 for _, tok := range formTokens {
1073                         if tok == "" {
1074                                 continue
1075                         }
1076                         http.SetCookie(w, &http.Cookie{
1077                                 Name:     "arvados_api_token",
1078                                 Value:    auth.EncodeTokenCookie([]byte(tok)),
1079                                 Path:     "/",
1080                                 HttpOnly: true,
1081                                 SameSite: http.SameSiteLaxMode,
1082                         })
1083                         break
1084                 }
1085         }
1086
1087         // Propagate query parameters (except api_token) from
1088         // the original request.
1089         redirQuery := r.URL.Query()
1090         redirQuery.Del("api_token")
1091
1092         u := r.URL
1093         if location != "" {
1094                 newu, err := u.Parse(location)
1095                 if err != nil {
1096                         http.Error(w, "error resolving redirect target: "+err.Error(), http.StatusInternalServerError)
1097                         return
1098                 }
1099                 u = newu
1100         }
1101         redir := (&url.URL{
1102                 Scheme:   r.URL.Scheme,
1103                 Host:     r.Host,
1104                 Path:     u.Path,
1105                 RawQuery: redirQuery.Encode(),
1106         }).String()
1107
1108         w.Header().Add("Location", redir)
1109         w.WriteHeader(http.StatusSeeOther)
1110         io.WriteString(w, `<A href="`)
1111         io.WriteString(w, html.EscapeString(redir))
1112         io.WriteString(w, `">Continue</A>`)
1113 }
1114
1115 func (h *handler) userPermittedToUploadOrDownload(method string, tokenUser *arvados.User) bool {
1116         var permitDownload bool
1117         var permitUpload bool
1118         if tokenUser != nil && tokenUser.IsAdmin {
1119                 permitUpload = h.Cluster.Collections.WebDAVPermission.Admin.Upload
1120                 permitDownload = h.Cluster.Collections.WebDAVPermission.Admin.Download
1121         } else {
1122                 permitUpload = h.Cluster.Collections.WebDAVPermission.User.Upload
1123                 permitDownload = h.Cluster.Collections.WebDAVPermission.User.Download
1124         }
1125         if (method == "PUT" || method == "POST") && !permitUpload {
1126                 // Disallow operations that upload new files.
1127                 // Permit webdav operations that move existing files around.
1128                 return false
1129         } else if method == "GET" && !permitDownload {
1130                 // Disallow downloading file contents.
1131                 // Permit webdav operations like PROPFIND that retrieve metadata
1132                 // but not file contents.
1133                 return false
1134         }
1135         return true
1136 }
1137
1138 // Parse the request's Destination header and return the destination
1139 // path relative to the current collection, i.e., with webdavPrefix
1140 // stripped off.
1141 func copyMoveDestination(r *http.Request, webdavPrefix string) (string, error) {
1142         dsturl, err := url.Parse(r.Header.Get("Destination"))
1143         if err != nil {
1144                 return "", err
1145         }
1146         if dsturl.Host != "" && dsturl.Host != r.Host {
1147                 return "", errors.New("destination host mismatch")
1148         }
1149         if webdavPrefix == "" {
1150                 return dsturl.Path, nil
1151         }
1152         dsttarget := strings.TrimPrefix(dsturl.Path, webdavPrefix)
1153         if len(dsttarget) == len(dsturl.Path) {
1154                 return "", errors.New("destination path not supported")
1155         }
1156         return dsttarget, nil
1157 }
1158
1159 // Check whether fstarget is in a collection whose PDH has changed
1160 // since it was last Sync()ed in sessionFS.
1161 //
1162 // If fstarget doesn't exist, but would be in such a collection if it
1163 // did exist, return true.
1164 func (h *handler) needSync(ctx context.Context, sessionFS arvados.CustomFileSystem, fstarget string) (bool, error) {
1165         collection, _ := h.determineCollection(sessionFS, fstarget)
1166         if collection == nil || len(collection.UUID) != 27 || !strings.HasPrefix(collection.UUID, h.Cluster.ClusterID) {
1167                 return false, nil
1168         }
1169         db, err := h.getDBConnector().GetDB(ctx)
1170         if err != nil {
1171                 return false, err
1172         }
1173         var currentPDH string
1174         err = db.QueryRowContext(ctx, `select portable_data_hash from collections where uuid=$1`, collection.UUID).Scan(&currentPDH)
1175         if err != nil {
1176                 return false, err
1177         }
1178         if currentPDH != collection.PortableDataHash {
1179                 return true, nil
1180         }
1181         return false, nil
1182 }
1183
1184 type fileEventLog struct {
1185         requestPath  string
1186         eventType    string
1187         userUUID     string
1188         userFullName string
1189         collUUID     string
1190         collPDH      string
1191         collFilePath string
1192         clientAddr   string
1193         clientToken  string
1194 }
1195
1196 func newFileEventLog(
1197         h *handler,
1198         r *http.Request,
1199         filepath string,
1200         collection *arvados.Collection,
1201         user *arvados.User,
1202         token string,
1203 ) *fileEventLog {
1204         var eventType string
1205         switch r.Method {
1206         case "POST", "PUT":
1207                 eventType = "file_upload"
1208         case "GET":
1209                 eventType = "file_download"
1210         default:
1211                 return nil
1212         }
1213
1214         // We want to log the address of the proxy closest to keep-web—the last
1215         // value in the X-Forwarded-For list—or the client address if there is no
1216         // valid proxy.
1217         var clientAddr string
1218         // 1. Build a slice of proxy addresses from X-Forwarded-For.
1219         xff := strings.Join(r.Header.Values("X-Forwarded-For"), ",")
1220         addrs := strings.Split(xff, ",")
1221         // 2. Reverse the slice so it's in our most preferred order for logging.
1222         slices.Reverse(addrs)
1223         // 3. Append the client address to that slice.
1224         if addr, _, err := net.SplitHostPort(r.RemoteAddr); err == nil {
1225                 addrs = append(addrs, addr)
1226         }
1227         // 4. Use the first valid address in the slice.
1228         for _, addr := range addrs {
1229                 if ip := net.ParseIP(strings.TrimSpace(addr)); ip != nil {
1230                         clientAddr = ip.String()
1231                         break
1232                 }
1233         }
1234
1235         ev := &fileEventLog{
1236                 requestPath: r.URL.Path,
1237                 eventType:   eventType,
1238                 clientAddr:  clientAddr,
1239                 clientToken: token,
1240         }
1241
1242         if user != nil {
1243                 ev.userUUID = user.UUID
1244                 ev.userFullName = user.FullName
1245         } else {
1246                 ev.userUUID = fmt.Sprintf("%s-tpzed-anonymouspublic", h.Cluster.ClusterID)
1247         }
1248
1249         if collection != nil {
1250                 ev.collFilePath = filepath
1251                 // h.determineCollection populates the collection_uuid
1252                 // prop with the PDH, if this collection is being
1253                 // accessed via PDH. For logging, we use a different
1254                 // field depending on whether it's a UUID or PDH.
1255                 if len(collection.UUID) > 32 {
1256                         ev.collPDH = collection.UUID
1257                 } else {
1258                         ev.collPDH = collection.PortableDataHash
1259                         ev.collUUID = collection.UUID
1260                 }
1261         }
1262
1263         return ev
1264 }
1265
1266 func (ev *fileEventLog) shouldLogPDH() bool {
1267         return ev.eventType == "file_download" && ev.collPDH != ""
1268 }
1269
1270 func (ev *fileEventLog) asDict() arvadosclient.Dict {
1271         props := arvadosclient.Dict{
1272                 "reqPath":              ev.requestPath,
1273                 "collection_uuid":      ev.collUUID,
1274                 "collection_file_path": ev.collFilePath,
1275         }
1276         if ev.shouldLogPDH() {
1277                 props["portable_data_hash"] = ev.collPDH
1278         }
1279         return arvadosclient.Dict{
1280                 "object_uuid": ev.userUUID,
1281                 "event_type":  ev.eventType,
1282                 "properties":  props,
1283         }
1284 }
1285
1286 func (ev *fileEventLog) asFields() logrus.Fields {
1287         fields := logrus.Fields{
1288                 "collection_file_path": ev.collFilePath,
1289                 "collection_uuid":      ev.collUUID,
1290                 "user_uuid":            ev.userUUID,
1291         }
1292         if ev.shouldLogPDH() {
1293                 fields["portable_data_hash"] = ev.collPDH
1294         }
1295         if !strings.HasSuffix(ev.userUUID, "-tpzed-anonymouspublic") {
1296                 fields["user_full_name"] = ev.userFullName
1297         }
1298         return fields
1299 }
1300
1301 func (h *handler) shouldLogEvent(
1302         event *fileEventLog,
1303         req *http.Request,
1304         fileInfo os.FileInfo,
1305         t time.Time,
1306 ) bool {
1307         if event == nil {
1308                 return false
1309         } else if event.eventType != "file_download" ||
1310                 h.Cluster.Collections.WebDAVLogDownloadInterval == 0 ||
1311                 fileInfo == nil {
1312                 return true
1313         }
1314         td := h.Cluster.Collections.WebDAVLogDownloadInterval.Duration()
1315         cutoff := t.Add(-td)
1316         ev := *event
1317         h.fileEventLogsMtx.Lock()
1318         defer h.fileEventLogsMtx.Unlock()
1319         if h.fileEventLogs == nil {
1320                 h.fileEventLogs = make(map[fileEventLog]time.Time)
1321         }
1322         shouldLog := h.fileEventLogs[ev].Before(cutoff)
1323         if !shouldLog {
1324                 // Go's http fs server evaluates http.Request.Header.Get("Range")
1325                 // (as of Go 1.22) so we should do the same.
1326                 // Don't worry about merging multiple headers, etc.
1327                 ranges, err := http_range.ParseRange(req.Header.Get("Range"), fileInfo.Size())
1328                 if ranges == nil || err != nil {
1329                         // The Range header was either empty or malformed.
1330                         // Err on the side of logging.
1331                         shouldLog = true
1332                 } else {
1333                         // Log this request only if it requested the first byte
1334                         // (our heuristic for "starting a new download").
1335                         for _, reqRange := range ranges {
1336                                 if reqRange.Start == 0 {
1337                                         shouldLog = true
1338                                         break
1339                                 }
1340                         }
1341                 }
1342         }
1343         if shouldLog {
1344                 h.fileEventLogs[ev] = t
1345         }
1346         if t.After(h.fileEventLogsNextTidy) {
1347                 for key, logTime := range h.fileEventLogs {
1348                         if logTime.Before(cutoff) {
1349                                 delete(h.fileEventLogs, key)
1350                         }
1351                 }
1352                 h.fileEventLogsNextTidy = t.Add(td)
1353         }
1354         return shouldLog
1355 }
1356
1357 func (h *handler) logUploadOrDownload(
1358         r *http.Request,
1359         client *arvadosclient.ArvadosClient,
1360         fs arvados.CustomFileSystem,
1361         filepath string,
1362         collection *arvados.Collection,
1363         user *arvados.User,
1364 ) {
1365         var fileInfo os.FileInfo
1366         if fs != nil {
1367                 if collection == nil {
1368                         collection, filepath = h.determineCollection(fs, filepath)
1369                 }
1370                 if collection != nil {
1371                         // It's okay to ignore this error because shouldLogEvent will
1372                         // always return true if fileInfo == nil.
1373                         fileInfo, _ = fs.Stat(path.Join("by_id", collection.UUID, filepath))
1374                 }
1375         }
1376         event := newFileEventLog(h, r, filepath, collection, user, client.ApiToken)
1377         if !h.shouldLogEvent(event, r, fileInfo, time.Now()) {
1378                 return
1379         }
1380         log := ctxlog.FromContext(r.Context()).WithFields(event.asFields())
1381         log.Info(strings.Replace(event.eventType, "file_", "File ", 1))
1382         if h.Cluster.Collections.WebDAVLogEvents {
1383                 go func() {
1384                         logReq := arvadosclient.Dict{"log": event.asDict()}
1385                         err := client.Create("logs", logReq, nil)
1386                         if err != nil {
1387                                 log.WithError(err).Errorf("Failed to create %s log event on API server", event.eventType)
1388                         }
1389                 }()
1390         }
1391 }
1392
1393 func (h *handler) determineCollection(fs arvados.CustomFileSystem, path string) (*arvados.Collection, string) {
1394         target := strings.TrimSuffix(path, "/")
1395         for cut := len(target); cut >= 0; cut = strings.LastIndexByte(target, '/') {
1396                 target = target[:cut]
1397                 fi, err := fs.Stat(target)
1398                 if os.IsNotExist(err) {
1399                         // creating a new file/dir, or download
1400                         // destined to fail
1401                         continue
1402                 } else if err != nil {
1403                         return nil, ""
1404                 }
1405                 switch src := fi.Sys().(type) {
1406                 case *arvados.Collection:
1407                         return src, strings.TrimPrefix(path[len(target):], "/")
1408                 case *arvados.Group:
1409                         return nil, ""
1410                 default:
1411                         if _, ok := src.(error); ok {
1412                                 return nil, ""
1413                         }
1414                 }
1415         }
1416         return nil, ""
1417 }
1418
1419 func ServeCORSPreflight(w http.ResponseWriter, header http.Header) bool {
1420         method := header.Get("Access-Control-Request-Method")
1421         if method == "" {
1422                 return false
1423         }
1424         if !browserMethod[method] && !webdavMethod[method] {
1425                 w.WriteHeader(http.StatusMethodNotAllowed)
1426                 return true
1427         }
1428         w.Header().Set("Access-Control-Allow-Headers", corsAllowHeadersHeader)
1429         w.Header().Set("Access-Control-Allow-Methods", "COPY, DELETE, GET, LOCK, MKCOL, MOVE, OPTIONS, POST, PROPFIND, PROPPATCH, PUT, RMCOL, UNLOCK")
1430         w.Header().Set("Access-Control-Allow-Origin", "*")
1431         w.Header().Set("Access-Control-Max-Age", "86400")
1432         return true
1433 }