22159: removed DataTableItem type in favor of Resource type
[arvados.git] / services / keep-web / handler.go
1 // Copyright (C) The Arvados Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4
5 package keepweb
6
7 import (
8         "context"
9         "encoding/json"
10         "errors"
11         "fmt"
12         "html"
13         "html/template"
14         "io"
15         "net"
16         "net/http"
17         "net/url"
18         "os"
19         "path"
20         "slices"
21         "sort"
22         "strconv"
23         "strings"
24         "sync"
25         "time"
26
27         "git.arvados.org/arvados.git/lib/cmd"
28         "git.arvados.org/arvados.git/lib/ctrlctx"
29         "git.arvados.org/arvados.git/lib/webdavfs"
30         "git.arvados.org/arvados.git/sdk/go/arvados"
31         "git.arvados.org/arvados.git/sdk/go/arvadosclient"
32         "git.arvados.org/arvados.git/sdk/go/auth"
33         "git.arvados.org/arvados.git/sdk/go/ctxlog"
34         "git.arvados.org/arvados.git/sdk/go/httpserver"
35         "github.com/gotd/contrib/http_range"
36         "github.com/sirupsen/logrus"
37         "golang.org/x/net/webdav"
38 )
39
40 type handler struct {
41         Cache   cache
42         Cluster *arvados.Cluster
43         metrics *metrics
44
45         fileEventLogs         map[fileEventLog]time.Time
46         fileEventLogsMtx      sync.Mutex
47         fileEventLogsNextTidy time.Time
48
49         s3SecretCache         map[string]*cachedS3Secret
50         s3SecretCacheMtx      sync.Mutex
51         s3SecretCacheNextTidy time.Time
52
53         dbConnector    *ctrlctx.DBConnector
54         dbConnectorMtx sync.Mutex
55 }
56
57 var urlPDHDecoder = strings.NewReplacer(" ", "+", "-", "+")
58
59 var notFoundMessage = "Not Found"
60 var unauthorizedMessage = "401 Unauthorized\n\nA valid Arvados token must be provided to access this resource."
61
62 // parseCollectionIDFromURL returns a UUID or PDH if s is a UUID or a
63 // PDH (even if it is a PDH with "+" replaced by " " or "-");
64 // otherwise "".
65 func parseCollectionIDFromURL(s string) string {
66         if arvadosclient.UUIDMatch(s) {
67                 return s
68         }
69         if pdh := urlPDHDecoder.Replace(s); arvadosclient.PDHMatch(pdh) {
70                 return pdh
71         }
72         return ""
73 }
74
75 func (h *handler) serveStatus(w http.ResponseWriter, r *http.Request) {
76         json.NewEncoder(w).Encode(struct{ Version string }{cmd.Version.String()})
77 }
78
79 type errorWithHTTPStatus interface {
80         HTTPStatus() int
81 }
82
83 // updateOnSuccess wraps httpserver.ResponseWriter. If the handler
84 // sends an HTTP header indicating success, updateOnSuccess first
85 // calls the provided update func. If the update func fails, an error
86 // response is sent (using the error's HTTP status or 500 if none),
87 // and the status code and body sent by the handler are ignored (all
88 // response writes return the update error).
89 type updateOnSuccess struct {
90         httpserver.ResponseWriter
91         logger     logrus.FieldLogger
92         update     func() error
93         sentHeader bool
94         err        error
95 }
96
97 func (uos *updateOnSuccess) Write(p []byte) (int, error) {
98         if !uos.sentHeader {
99                 uos.WriteHeader(http.StatusOK)
100         }
101         if uos.err != nil {
102                 return 0, uos.err
103         }
104         return uos.ResponseWriter.Write(p)
105 }
106
107 func (uos *updateOnSuccess) WriteHeader(code int) {
108         if !uos.sentHeader {
109                 uos.sentHeader = true
110                 if code >= 200 && code < 400 {
111                         if uos.err = uos.update(); uos.err != nil {
112                                 code := http.StatusInternalServerError
113                                 if he := errorWithHTTPStatus(nil); errors.As(uos.err, &he) {
114                                         code = he.HTTPStatus()
115                                 }
116                                 uos.logger.WithError(uos.err).Errorf("update() returned %T error, changing response to HTTP %d", uos.err, code)
117                                 http.Error(uos.ResponseWriter, uos.err.Error(), code)
118                                 return
119                         }
120                 }
121         }
122         uos.ResponseWriter.WriteHeader(code)
123 }
124
125 var (
126         corsAllowHeadersHeader = strings.Join([]string{
127                 "Authorization", "Content-Type", "Range",
128                 // WebDAV request headers:
129                 "Depth", "Destination", "If", "Lock-Token", "Overwrite", "Timeout", "Cache-Control",
130         }, ", ")
131         writeMethod = map[string]bool{
132                 "COPY":      true,
133                 "DELETE":    true,
134                 "LOCK":      true,
135                 "MKCOL":     true,
136                 "MOVE":      true,
137                 "PROPPATCH": true,
138                 "PUT":       true,
139                 "UNLOCK":    true,
140         }
141         webdavMethod = map[string]bool{
142                 "COPY":      true,
143                 "DELETE":    true,
144                 "LOCK":      true,
145                 "MKCOL":     true,
146                 "MOVE":      true,
147                 "OPTIONS":   true,
148                 "PROPFIND":  true,
149                 "PROPPATCH": true,
150                 "PUT":       true,
151                 "RMCOL":     true,
152                 "UNLOCK":    true,
153         }
154         browserMethod = map[string]bool{
155                 "GET":  true,
156                 "HEAD": true,
157                 "POST": true,
158         }
159         // top-level dirs to serve with siteFS
160         siteFSDir = map[string]bool{
161                 "":      true, // root directory
162                 "by_id": true,
163                 "users": true,
164         }
165 )
166
167 func stripDefaultPort(host string) string {
168         // Will consider port 80 and port 443 to be the same vhost.  I think that's fine.
169         u := &url.URL{Host: host}
170         if p := u.Port(); p == "80" || p == "443" {
171                 return strings.ToLower(u.Hostname())
172         } else {
173                 return strings.ToLower(host)
174         }
175 }
176
177 // CheckHealth implements service.Handler.
178 func (h *handler) CheckHealth() error {
179         return nil
180 }
181
182 // Done implements service.Handler.
183 func (h *handler) Done() <-chan struct{} {
184         return nil
185 }
186
187 func (h *handler) getDBConnector() *ctrlctx.DBConnector {
188         h.dbConnectorMtx.Lock()
189         defer h.dbConnectorMtx.Unlock()
190         if h.dbConnector == nil {
191                 h.dbConnector = &ctrlctx.DBConnector{PostgreSQL: h.Cluster.PostgreSQL}
192         }
193         return h.dbConnector
194 }
195
196 // ServeHTTP implements http.Handler.
197 func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
198         if xfp := r.Header.Get("X-Forwarded-Proto"); xfp != "" && xfp != "http" {
199                 r.URL.Scheme = xfp
200         }
201
202         httpserver.SetResponseLogFields(r.Context(), logrus.Fields{
203                 "webdavDepth":       r.Header.Get("Depth"),
204                 "webdavDestination": r.Header.Get("Destination"),
205                 "webdavOverwrite":   r.Header.Get("Overwrite"),
206         })
207
208         wbuffer := newWriteBuffer(wOrig, int(h.Cluster.Collections.WebDAVOutputBuffer))
209         defer wbuffer.Close()
210         w := httpserver.WrapResponseWriter(responseWriter{
211                 Writer:         wbuffer,
212                 ResponseWriter: wOrig,
213         })
214
215         if r.Method == "OPTIONS" && ServeCORSPreflight(w, r.Header) {
216                 return
217         }
218
219         if !browserMethod[r.Method] && !webdavMethod[r.Method] {
220                 w.WriteHeader(http.StatusMethodNotAllowed)
221                 return
222         }
223
224         if r.Header.Get("Origin") != "" {
225                 // Allow simple cross-origin requests without user
226                 // credentials ("user credentials" as defined by CORS,
227                 // i.e., cookies, HTTP authentication, and client-side
228                 // SSL certificates. See
229                 // http://www.w3.org/TR/cors/#user-credentials).
230                 w.Header().Set("Access-Control-Allow-Origin", "*")
231                 w.Header().Set("Access-Control-Expose-Headers", "Content-Range")
232         }
233
234         if h.serveS3(w, r) {
235                 return
236         }
237
238         // webdavPrefix is the leading portion of r.URL.Path that
239         // should be ignored by the webdav handler, if any.
240         //
241         // req "/c={id}/..." -> webdavPrefix "/c={id}"
242         // req "/by_id/..." -> webdavPrefix ""
243         //
244         // Note: in the code immediately below, we set webdavPrefix
245         // only if it was explicitly set by the client. Otherwise, it
246         // gets set later, after checking the request path for cases
247         // like "/c={id}/...".
248         webdavPrefix := ""
249         arvPath := r.URL.Path
250         if prefix := r.Header.Get("X-Webdav-Prefix"); prefix != "" {
251                 // Enable a proxy (e.g., container log handler in
252                 // controller) to satisfy a request for path
253                 // "/foo/bar/baz.txt" using content from
254                 // "//abc123-4.internal/bar/baz.txt", by adding a
255                 // request header "X-Webdav-Prefix: /foo"
256                 if !strings.HasPrefix(arvPath, prefix) {
257                         http.Error(w, "X-Webdav-Prefix header is not a prefix of the requested path", http.StatusBadRequest)
258                         return
259                 }
260                 arvPath = r.URL.Path[len(prefix):]
261                 if arvPath == "" {
262                         arvPath = "/"
263                 }
264                 w.Header().Set("Vary", "X-Webdav-Prefix, "+w.Header().Get("Vary"))
265                 webdavPrefix = prefix
266         }
267         pathParts := strings.Split(arvPath[1:], "/")
268
269         var stripParts int
270         var collectionID string
271         var tokens []string
272         var reqTokens []string
273         var pathToken bool
274         var attachment bool
275         var useSiteFS bool
276         credentialsOK := h.Cluster.Collections.TrustAllContent
277         reasonNotAcceptingCredentials := ""
278
279         if r.Host != "" && stripDefaultPort(r.Host) == stripDefaultPort(h.Cluster.Services.WebDAVDownload.ExternalURL.Host) {
280                 credentialsOK = true
281                 attachment = true
282         } else if r.FormValue("disposition") == "attachment" {
283                 attachment = true
284         }
285
286         if !credentialsOK {
287                 reasonNotAcceptingCredentials = fmt.Sprintf("vhost %q does not specify a single collection ID or match Services.WebDAVDownload.ExternalURL %q, and Collections.TrustAllContent is false",
288                         r.Host, h.Cluster.Services.WebDAVDownload.ExternalURL)
289         }
290
291         if collectionID = arvados.CollectionIDFromDNSName(r.Host); collectionID != "" {
292                 // http://ID.collections.example/PATH...
293                 credentialsOK = true
294         } else if r.URL.Path == "/status.json" {
295                 h.serveStatus(w, r)
296                 return
297         } else if siteFSDir[pathParts[0]] {
298                 useSiteFS = true
299         } else if len(pathParts) >= 1 && strings.HasPrefix(pathParts[0], "c=") {
300                 // /c=ID[/PATH...]
301                 collectionID = parseCollectionIDFromURL(pathParts[0][2:])
302                 stripParts = 1
303         } else if len(pathParts) >= 2 && pathParts[0] == "collections" {
304                 if len(pathParts) >= 4 && pathParts[1] == "download" {
305                         // /collections/download/ID/TOKEN/PATH...
306                         collectionID = parseCollectionIDFromURL(pathParts[2])
307                         tokens = []string{pathParts[3]}
308                         stripParts = 4
309                         pathToken = true
310                 } else {
311                         // /collections/ID/PATH...
312                         collectionID = parseCollectionIDFromURL(pathParts[1])
313                         stripParts = 2
314                         // This path is only meant to work for public
315                         // data. Tokens provided with the request are
316                         // ignored.
317                         credentialsOK = false
318                         reasonNotAcceptingCredentials = "the '/collections/UUID/PATH' form only works for public data"
319                 }
320         }
321
322         forceReload := false
323         if cc := r.Header.Get("Cache-Control"); strings.Contains(cc, "no-cache") || strings.Contains(cc, "must-revalidate") {
324                 forceReload = true
325         }
326
327         if credentialsOK {
328                 reqTokens = auth.CredentialsFromRequest(r).Tokens
329         }
330
331         r.ParseForm()
332         origin := r.Header.Get("Origin")
333         cors := origin != "" && !strings.HasSuffix(origin, "://"+r.Host)
334         safeAjax := cors && (r.Method == http.MethodGet || r.Method == http.MethodHead)
335         // Important distinction: safeAttachment checks whether api_token exists
336         // as a query parameter. haveFormTokens checks whether api_token exists
337         // as request form data *or* a query parameter. Different checks are
338         // necessary because both the request disposition and the location of
339         // the API token affect whether or not the request needs to be
340         // redirected. The different branch comments below explain further.
341         safeAttachment := attachment && !r.URL.Query().Has("api_token")
342         if formTokens, haveFormTokens := r.Form["api_token"]; !haveFormTokens {
343                 // No token to use or redact.
344         } else if safeAjax || safeAttachment {
345                 // If this is a cross-origin request, the URL won't
346                 // appear in the browser's address bar, so
347                 // substituting a clipboard-safe URL is pointless.
348                 // Redirect-with-cookie wouldn't work anyway, because
349                 // it's not safe to allow third-party use of our
350                 // cookie.
351                 //
352                 // If we're supplying an attachment, we don't need to
353                 // convert POST to GET to avoid the "really resubmit
354                 // form?" problem, so provided the token isn't
355                 // embedded in the URL, there's no reason to do
356                 // redirect-with-cookie in this case either.
357                 for _, tok := range formTokens {
358                         reqTokens = append(reqTokens, tok)
359                 }
360         } else if browserMethod[r.Method] {
361                 // If this is a page view, and the client provided a
362                 // token via query string or POST body, we must put
363                 // the token in an HttpOnly cookie, and redirect to an
364                 // equivalent URL with the query param redacted and
365                 // method = GET.
366                 h.seeOtherWithCookie(w, r, "", credentialsOK)
367                 return
368         }
369
370         targetPath := pathParts[stripParts:]
371         if tokens == nil && len(targetPath) > 0 && strings.HasPrefix(targetPath[0], "t=") {
372                 // http://ID.example/t=TOKEN/PATH...
373                 // /c=ID/t=TOKEN/PATH...
374                 //
375                 // This form must only be used to pass scoped tokens
376                 // that give permission for a single collection. See
377                 // FormValue case above.
378                 tokens = []string{targetPath[0][2:]}
379                 pathToken = true
380                 targetPath = targetPath[1:]
381                 stripParts++
382         }
383
384         // fsprefix is the path from sitefs root to the sitefs
385         // directory (implicitly or explicitly) indicated by the
386         // leading / in the request path.
387         //
388         // Request "/by_id/..." -> fsprefix ""
389         // Request "/c={id}/..." -> fsprefix "/by_id/{id}/"
390         fsprefix := ""
391         if useSiteFS {
392                 if writeMethod[r.Method] {
393                         http.Error(w, webdavfs.ErrReadOnly.Error(), http.StatusMethodNotAllowed)
394                         return
395                 }
396                 if len(reqTokens) == 0 {
397                         w.Header().Add("WWW-Authenticate", "Basic realm=\"collections\"")
398                         http.Error(w, unauthorizedMessage, http.StatusUnauthorized)
399                         return
400                 }
401                 tokens = reqTokens
402         } else if collectionID == "" {
403                 http.Error(w, notFoundMessage, http.StatusNotFound)
404                 return
405         } else {
406                 fsprefix = "by_id/" + collectionID + "/"
407         }
408
409         if src := r.Header.Get("X-Webdav-Source"); strings.HasPrefix(src, "/") && !strings.Contains(src, "//") && !strings.Contains(src, "/../") {
410                 // Clients (specifically, the container log gateway)
411                 // use X-Webdav-Source to specify that although the
412                 // request path (and other webdav fields in the
413                 // request) refer to target "/abc", the intended
414                 // target is actually
415                 // "{x-webdav-source-value}/abc".
416                 //
417                 // This, combined with X-Webdav-Prefix, enables the
418                 // container log gateway to effectively alter the
419                 // target path when proxying a request, without
420                 // needing to rewrite all the other webdav
421                 // request/response fields that might mention the
422                 // target path.
423                 fsprefix += src[1:]
424         }
425
426         if tokens == nil {
427                 tokens = reqTokens
428                 if h.Cluster.Users.AnonymousUserToken != "" {
429                         tokens = append(tokens, h.Cluster.Users.AnonymousUserToken)
430                 }
431         }
432
433         if len(targetPath) > 0 && targetPath[0] == "_" {
434                 // If a collection has a directory called "t=foo" or
435                 // "_", it can be served at
436                 // //collections.example/_/t=foo/ or
437                 // //collections.example/_/_/ respectively:
438                 // //collections.example/t=foo/ won't work because
439                 // t=foo will be interpreted as a token "foo".
440                 targetPath = targetPath[1:]
441                 stripParts++
442         }
443
444         dirOpenMode := os.O_RDONLY
445         if writeMethod[r.Method] {
446                 dirOpenMode = os.O_RDWR
447         }
448
449         var tokenValid bool
450         var tokenScopeProblem bool
451         var token string
452         var tokenUser *arvados.User
453         var sessionFS arvados.CustomFileSystem
454         var targetFS arvados.FileSystem
455         var session *cachedSession
456         var collectionDir arvados.File
457         for _, token = range tokens {
458                 var statusErr errorWithHTTPStatus
459                 fs, sess, user, err := h.Cache.GetSession(token)
460                 if errors.As(err, &statusErr) && statusErr.HTTPStatus() == http.StatusUnauthorized {
461                         // bad token
462                         continue
463                 } else if err != nil {
464                         http.Error(w, "cache error: "+err.Error(), http.StatusInternalServerError)
465                         return
466                 }
467                 if token != h.Cluster.Users.AnonymousUserToken {
468                         tokenValid = true
469                 }
470                 f, err := fs.OpenFile(fsprefix, dirOpenMode, 0)
471                 if errors.As(err, &statusErr) &&
472                         statusErr.HTTPStatus() == http.StatusForbidden &&
473                         token != h.Cluster.Users.AnonymousUserToken {
474                         // collection id is outside scope of supplied
475                         // token
476                         tokenScopeProblem = true
477                         sess.Release()
478                         continue
479                 } else if os.IsNotExist(err) {
480                         // collection does not exist or is not
481                         // readable using this token
482                         sess.Release()
483                         continue
484                 } else if err != nil {
485                         http.Error(w, err.Error(), http.StatusInternalServerError)
486                         sess.Release()
487                         return
488                 }
489                 defer f.Close()
490
491                 collectionDir, sessionFS, session, tokenUser = f, fs, sess, user
492                 break
493         }
494
495         if session == nil {
496                 if pathToken {
497                         // The URL is a "secret sharing link" that
498                         // didn't work out.  Asking the client for
499                         // additional credentials would just be
500                         // confusing.
501                         http.Error(w, notFoundMessage, http.StatusNotFound)
502                         return
503                 }
504                 if tokenValid {
505                         // The client provided valid token(s), but the
506                         // collection was not found.
507                         http.Error(w, notFoundMessage, http.StatusNotFound)
508                         return
509                 }
510                 if tokenScopeProblem {
511                         // The client provided a valid token but
512                         // fetching a collection returned 401, which
513                         // means the token scope doesn't permit
514                         // fetching that collection.
515                         http.Error(w, notFoundMessage, http.StatusForbidden)
516                         return
517                 }
518                 // The client's token was invalid (e.g., expired), or
519                 // the client didn't even provide one.  Redirect to
520                 // workbench2's login-and-redirect-to-download url if
521                 // this is a browser navigation request. (The redirect
522                 // flow can't preserve the original method if it's not
523                 // GET, and doesn't make sense if the UA is a
524                 // command-line tool, is trying to load an inline
525                 // image, etc.; in these cases, there's nothing we can
526                 // do, so return 401 unauthorized.)
527                 //
528                 // Note Sec-Fetch-Mode is sent by all non-EOL
529                 // browsers, except Safari.
530                 // https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Sec-Fetch-Mode
531                 //
532                 // TODO(TC): This response would be confusing to
533                 // someone trying (anonymously) to download public
534                 // data that has been deleted.  Allow a referrer to
535                 // provide this context somehow?
536                 if r.Method == http.MethodGet && r.Header.Get("Sec-Fetch-Mode") == "navigate" {
537                         target := url.URL(h.Cluster.Services.Workbench2.ExternalURL)
538                         redirkey := "redirectToPreview"
539                         if attachment {
540                                 redirkey = "redirectToDownload"
541                         }
542                         callback := "/c=" + collectionID + "/" + strings.Join(targetPath, "/")
543                         query := url.Values{redirkey: {callback}}
544                         queryString := query.Encode()
545                         // Note: Encode (and QueryEscape function) turns space
546                         // into plus sign (+) rather than %20 (the plus sign
547                         // becomes %2B); that is the rule for web forms data
548                         // sent in URL query part via GET, but we're not
549                         // emulating forms here. Client JS APIs
550                         // (URLSearchParam#get, decodeURIComponent) will
551                         // decode %20, but while the former also expects the
552                         // form-specific encoding, the latter doesn't.
553                         // Encode() almost encodes everything; RFC 3986 3.4
554                         // says "it is sometimes better for usability" to not
555                         // encode / and ? when passing URI reference in query.
556                         // This is also legal according to WHATWG URL spec and
557                         // can be desirable for debugging webapp.
558                         // We can let slash / appear in the encoded query, and
559                         // equality-sign = too, but exempting ? is not very
560                         // useful.
561                         // Plus-sign, hash, and ampersand are never exempt.
562                         r := strings.NewReplacer("+", "%20", "%2F", "/", "%3D", "=")
563                         target.RawQuery = r.Replace(queryString)
564                         w.Header().Add("Location", target.String())
565                         w.WriteHeader(http.StatusSeeOther)
566                         return
567                 }
568                 if !credentialsOK {
569                         http.Error(w, fmt.Sprintf("Authorization tokens are not accepted here: %v, and no anonymous user token is configured.", reasonNotAcceptingCredentials), http.StatusUnauthorized)
570                         return
571                 }
572                 // If none of the above cases apply, suggest the
573                 // user-agent (which is either a non-browser agent
574                 // like wget, or a browser that can't redirect through
575                 // a login flow) prompt the user for credentials.
576                 w.Header().Add("WWW-Authenticate", "Basic realm=\"collections\"")
577                 http.Error(w, unauthorizedMessage, http.StatusUnauthorized)
578                 return
579         }
580
581         // The first call to releaseSession() calls session.Release(),
582         // then subsequent calls are no-ops.  This lets us use a defer
583         // call here to ensure it gets called in all code paths, and
584         // also call it inline (see below) in the cases where we want
585         // to release the lock before returning.
586         var releaseSessionOnce sync.Once
587         releaseSession := func() { releaseSessionOnce.Do(func() { session.Release() }) }
588         defer releaseSession()
589
590         colltarget := strings.Join(targetPath, "/")
591         colltarget = strings.TrimSuffix(colltarget, "/")
592         fstarget := fsprefix + colltarget
593         if !forceReload {
594                 need, err := h.needSync(r.Context(), sessionFS, fstarget)
595                 if err != nil {
596                         http.Error(w, err.Error(), http.StatusBadGateway)
597                         return
598                 }
599                 forceReload = need
600         }
601         if forceReload {
602                 err := collectionDir.Sync()
603                 if err != nil {
604                         if he := errorWithHTTPStatus(nil); errors.As(err, &he) {
605                                 http.Error(w, err.Error(), he.HTTPStatus())
606                         } else {
607                                 http.Error(w, err.Error(), http.StatusInternalServerError)
608                         }
609                         return
610                 }
611         }
612
613         if r.Method == http.MethodGet || r.Method == http.MethodHead {
614                 if fi, err := sessionFS.Stat(fstarget); err == nil && fi.IsDir() {
615                         releaseSession() // because we won't be writing anything
616                         if !strings.HasSuffix(r.URL.Path, "/") {
617                                 h.seeOtherWithCookie(w, r, r.URL.Path+"/", credentialsOK)
618                         } else {
619                                 h.serveDirectory(w, r, fi.Name(), sessionFS, fstarget, !useSiteFS)
620                         }
621                         return
622                 }
623         }
624
625         var basename string
626         if len(targetPath) > 0 {
627                 basename = targetPath[len(targetPath)-1]
628         }
629         if arvadosclient.PDHMatch(collectionID) && writeMethod[r.Method] {
630                 http.Error(w, webdavfs.ErrReadOnly.Error(), http.StatusMethodNotAllowed)
631                 return
632         }
633         if !h.userPermittedToUploadOrDownload(r.Method, tokenUser) {
634                 http.Error(w, "Not permitted", http.StatusForbidden)
635                 return
636         }
637         h.logUploadOrDownload(r, session.arvadosclient, sessionFS, fstarget, nil, tokenUser)
638
639         if webdavPrefix == "" && stripParts > 0 {
640                 webdavPrefix = "/" + strings.Join(pathParts[:stripParts], "/")
641         }
642
643         writing := writeMethod[r.Method]
644         if writing {
645                 // We implement write operations by writing to a
646                 // temporary collection, then applying the change to
647                 // the real collection using the replace_files option
648                 // in a collection update request.  This lets us do
649                 // the slow part (i.e., receive the file data from the
650                 // client and write it to Keep) without worrying about
651                 // side effects of other read/write operations.
652                 //
653                 // Collection update requests for a given collection
654                 // are serialized by the controller, so we don't need
655                 // to do any locking for that part either.
656
657                 // collprefix is the subdirectory in the target
658                 // collection which (according to X-Webdav-Source) we
659                 // should pretend is "/" for this request.
660                 collprefix := strings.TrimPrefix(fsprefix, "by_id/"+collectionID+"/")
661                 if len(collprefix) == len(fsprefix) {
662                         http.Error(w, "internal error: writing to anything other than /by_id/{collectionID}", http.StatusInternalServerError)
663                         return
664                 }
665
666                 // Create a temporary collection filesystem for webdav
667                 // to operate on.
668                 var tmpcoll arvados.Collection
669                 client := session.client.WithRequestID(r.Header.Get("X-Request-Id"))
670                 tmpfs, err := tmpcoll.FileSystem(client, session.keepclient)
671                 if err != nil {
672                         http.Error(w, err.Error(), http.StatusInternalServerError)
673                         return
674                 }
675                 snap, err := arvados.Snapshot(sessionFS, "by_id/"+collectionID+"/")
676                 if err != nil {
677                         http.Error(w, "snapshot: "+err.Error(), http.StatusInternalServerError)
678                         return
679                 }
680                 err = arvados.Splice(tmpfs, "/", snap)
681                 if err != nil {
682                         http.Error(w, "splice: "+err.Error(), http.StatusInternalServerError)
683                         return
684                 }
685
686                 targetFS = tmpfs
687                 fsprefix = collprefix
688                 replace := make(map[string]string)
689
690                 switch r.Method {
691                 case "COPY", "MOVE":
692                         dsttarget, err := copyMoveDestination(r, webdavPrefix)
693                         if err != nil {
694                                 http.Error(w, err.Error(), http.StatusBadRequest)
695                                 return
696                         }
697
698                         srcspec := "current/" + colltarget
699                         // RFC 4918 9.8.3: A COPY of "Depth: 0" only
700                         // instructs that the collection and its
701                         // properties, but not resources identified by
702                         // its internal member URLs, are to be copied.
703                         //
704                         // ...meaning we will be creating an empty
705                         // directory.
706                         //
707                         // RFC 4918 9.9.2: A client MUST NOT submit a
708                         // Depth header on a MOVE on a collection with
709                         // any value but "infinity".
710                         //
711                         // ...meaning we only need to consider this
712                         // case for COPY, not for MOVE.
713                         if fi, err := tmpfs.Stat(colltarget); err == nil && fi.IsDir() && r.Method == "COPY" && r.Header.Get("Depth") == "0" {
714                                 srcspec = "manifest_text/"
715                         }
716
717                         replace[strings.TrimSuffix(dsttarget, "/")] = srcspec
718                         if r.Method == "MOVE" {
719                                 replace["/"+colltarget] = ""
720                         }
721                 case "MKCOL":
722                         replace["/"+colltarget] = "manifest_text/"
723                 case "DELETE":
724                         if depth := r.Header.Get("Depth"); depth != "" && depth != "infinity" {
725                                 http.Error(w, "invalid depth header, see RFC 4918 9.6.1", http.StatusBadRequest)
726                                 return
727                         }
728                         replace["/"+colltarget] = ""
729                 case "PUT":
730                         // changes will be applied by updateOnSuccess
731                         // update func below
732                 case "LOCK", "UNLOCK", "PROPPATCH":
733                         // no changes
734                 default:
735                         http.Error(w, "method missing", http.StatusInternalServerError)
736                         return
737                 }
738
739                 // Save the collection only if/when all
740                 // webdav->filesystem operations succeed using our
741                 // temporary collection -- and send a 500 error if the
742                 // updates can't be saved.
743                 logger := ctxlog.FromContext(r.Context())
744                 w = &updateOnSuccess{
745                         ResponseWriter: w,
746                         logger:         logger,
747                         update: func() error {
748                                 var manifest string
749                                 var snap *arvados.Subtree
750                                 var err error
751                                 if r.Method == "PUT" {
752                                         snap, err = arvados.Snapshot(tmpfs, colltarget)
753                                         if err != nil {
754                                                 return fmt.Errorf("snapshot tmpfs: %w", err)
755                                         }
756                                         tmpfs, err = (&arvados.Collection{}).FileSystem(client, session.keepclient)
757                                         err = arvados.Splice(tmpfs, "file", snap)
758                                         if err != nil {
759                                                 return fmt.Errorf("splice tmpfs: %w", err)
760                                         }
761                                         manifest, err = tmpfs.MarshalManifest(".")
762                                         if err != nil {
763                                                 return fmt.Errorf("marshal tmpfs: %w", err)
764                                         }
765                                         replace["/"+colltarget] = "manifest_text/file"
766                                 } else if len(replace) == 0 {
767                                         return nil
768                                 }
769                                 err = client.RequestAndDecode(nil, "PATCH", "arvados/v1/collections/"+collectionID, nil, map[string]interface{}{
770                                         "replace_files": replace,
771                                         "collection":    map[string]interface{}{"manifest_text": manifest}})
772                                 var te arvados.TransactionError
773                                 if errors.As(err, &te) {
774                                         err = te
775                                 }
776                                 if err != nil {
777                                         return err
778                                 }
779                                 return nil
780                         }}
781         } else {
782                 // When writing, we need to block session renewal
783                 // until we're finished, in order to guarantee the
784                 // effect of the write is visible in future responses.
785                 // But if we're not writing, we can release the lock
786                 // early.  This enables us to keep renewing sessions
787                 // and processing more requests even if a slow client
788                 // takes a long time to download a large file.
789                 releaseSession()
790                 targetFS = sessionFS
791         }
792         if r.Method == http.MethodGet {
793                 applyContentDispositionHdr(w, r, basename, attachment)
794         }
795         wh := &webdav.Handler{
796                 Prefix: webdavPrefix,
797                 FileSystem: &webdavfs.FS{
798                         FileSystem:    targetFS,
799                         Prefix:        fsprefix,
800                         Writing:       writeMethod[r.Method],
801                         AlwaysReadEOF: r.Method == "PROPFIND",
802                 },
803                 LockSystem: webdavfs.NoLockSystem,
804                 Logger: func(r *http.Request, err error) {
805                         if err != nil && !os.IsNotExist(err) {
806                                 ctxlog.FromContext(r.Context()).WithError(err).Error("error reported by webdav handler")
807                         }
808                 },
809         }
810         h.metrics.track(wh, w, r)
811         if r.Method == http.MethodGet && w.WroteStatus() == http.StatusOK {
812                 wrote := int64(w.WroteBodyBytes())
813                 fi, err := wh.FileSystem.Stat(r.Context(), colltarget)
814                 if err == nil && fi.Size() != wrote {
815                         var n int
816                         f, err := wh.FileSystem.OpenFile(r.Context(), colltarget, os.O_RDONLY, 0)
817                         if err == nil {
818                                 n, err = f.Read(make([]byte, 1024))
819                                 f.Close()
820                         }
821                         ctxlog.FromContext(r.Context()).Errorf("stat.Size()==%d but only wrote %d bytes; read(1024) returns %d, %v", fi.Size(), wrote, n, err)
822                 }
823         }
824 }
825
826 var dirListingTemplate = `<!DOCTYPE HTML>
827 <HTML><HEAD>
828   <META name="robots" content="NOINDEX">
829   <TITLE>{{ .CollectionName }}</TITLE>
830   <STYLE type="text/css">
831     body {
832       margin: 1.5em;
833     }
834     pre {
835       background-color: #D9EDF7;
836       border-radius: .25em;
837       padding: .75em;
838       overflow: auto;
839     }
840     .footer p {
841       font-size: 82%;
842     }
843     hr {
844       border: 1px solid #808080;
845     }
846     ul {
847       padding: 0;
848     }
849     ul li {
850       font-family: monospace;
851       list-style: none;
852     }
853   </STYLE>
854 </HEAD>
855 <BODY>
856
857 <H1>{{ .CollectionName }}</H1>
858
859 <P>This collection of data files is being shared with you through
860 Arvados.  You can download individual files listed below.  To download
861 the entire directory tree with <CODE>wget</CODE>, try:</P>
862
863 <PRE id="wget-example">$ wget --mirror --no-parent --no-host --cut-dirs={{ .StripParts }} {{ .QuotedUrlForWget }}</PRE>
864
865 <H2>File Listing</H2>
866
867 {{if .Files}}
868 <UL>
869 {{range .Files}}
870 {{if .IsDir }}
871   <LI>{{" " | printf "%15s  " | nbsp}}<A class="item" href="{{ .Href }}/">{{ .Name }}/</A></LI>
872 {{else}}
873   <LI>{{.Size | printf "%15d  " | nbsp}}<A class="item" href="{{ .Href }}">{{ .Name }}</A></LI>
874 {{end}}
875 {{end}}
876 </UL>
877 {{else}}
878 <P>(No files; this collection is empty.)</P>
879 {{end}}
880
881 <HR>
882 <DIV class="footer">
883   <P>
884     About Arvados:
885     Arvados is a free and open source software bioinformatics platform.
886     To learn more, visit arvados.org.
887     Arvados is not responsible for the files listed on this page.
888   </P>
889 </DIV>
890
891 </BODY>
892 </HTML>
893 `
894
895 type fileListEnt struct {
896         Name  string
897         Href  string
898         Size  int64
899         IsDir bool
900 }
901
902 // Given a filesystem path like `foo/"bar baz"`, return an escaped
903 // (percent-encoded) relative path like `./foo/%22bar%20%baz%22`.
904 //
905 // Note the result may contain html-unsafe characters like '&'. These
906 // will be handled separately by the HTML templating engine as needed.
907 func relativeHref(path string) string {
908         u := &url.URL{Path: path}
909         return "./" + u.EscapedPath()
910 }
911
912 // Return a shell-quoted URL suitable for pasting to a command line
913 // ("wget ...") to repeat the given HTTP request.
914 func makeQuotedUrlForWget(r *http.Request) string {
915         scheme := r.Header.Get("X-Forwarded-Proto")
916         if scheme == "http" || scheme == "https" {
917                 // use protocol reported by load balancer / proxy
918         } else if r.TLS != nil {
919                 scheme = "https"
920         } else {
921                 scheme = "http"
922         }
923         p := r.URL.EscapedPath()
924         // An escaped path may still contain single quote chars, which
925         // would interfere with our shell quoting. Avoid this by
926         // escaping them as %27.
927         return fmt.Sprintf("'%s://%s%s'", scheme, r.Host, strings.Replace(p, "'", "%27", -1))
928 }
929
930 func (h *handler) serveDirectory(w http.ResponseWriter, r *http.Request, collectionName string, fs http.FileSystem, base string, recurse bool) {
931         var files []fileListEnt
932         var walk func(string) error
933         if !strings.HasSuffix(base, "/") {
934                 base = base + "/"
935         }
936         walk = func(path string) error {
937                 dirname := base + path
938                 if dirname != "/" {
939                         dirname = strings.TrimSuffix(dirname, "/")
940                 }
941                 d, err := fs.Open(dirname)
942                 if err != nil {
943                         return err
944                 }
945                 ents, err := d.Readdir(-1)
946                 if err != nil {
947                         return err
948                 }
949                 for _, ent := range ents {
950                         if recurse && ent.IsDir() {
951                                 err = walk(path + ent.Name() + "/")
952                                 if err != nil {
953                                         return err
954                                 }
955                         } else {
956                                 listingName := path + ent.Name()
957                                 files = append(files, fileListEnt{
958                                         Name:  listingName,
959                                         Href:  relativeHref(listingName),
960                                         Size:  ent.Size(),
961                                         IsDir: ent.IsDir(),
962                                 })
963                         }
964                 }
965                 return nil
966         }
967         if err := walk(""); err != nil {
968                 http.Error(w, "error getting directory listing: "+err.Error(), http.StatusInternalServerError)
969                 return
970         }
971
972         funcs := template.FuncMap{
973                 "nbsp": func(s string) template.HTML {
974                         return template.HTML(strings.Replace(s, " ", "&nbsp;", -1))
975                 },
976         }
977         tmpl, err := template.New("dir").Funcs(funcs).Parse(dirListingTemplate)
978         if err != nil {
979                 http.Error(w, "error parsing template: "+err.Error(), http.StatusInternalServerError)
980                 return
981         }
982         sort.Slice(files, func(i, j int) bool {
983                 return files[i].Name < files[j].Name
984         })
985         w.WriteHeader(http.StatusOK)
986         tmpl.Execute(w, map[string]interface{}{
987                 "CollectionName":   collectionName,
988                 "Files":            files,
989                 "Request":          r,
990                 "StripParts":       strings.Count(strings.TrimRight(r.URL.Path, "/"), "/"),
991                 "QuotedUrlForWget": makeQuotedUrlForWget(r),
992         })
993 }
994
995 func applyContentDispositionHdr(w http.ResponseWriter, r *http.Request, filename string, isAttachment bool) {
996         disposition := "inline"
997         if isAttachment {
998                 disposition = "attachment"
999         }
1000         if strings.ContainsRune(r.RequestURI, '?') {
1001                 // Help the UA realize that the filename is just
1002                 // "filename.txt", not
1003                 // "filename.txt?disposition=attachment".
1004                 //
1005                 // TODO(TC): Follow advice at RFC 6266 appendix D
1006                 disposition += "; filename=" + strconv.QuoteToASCII(filename)
1007         }
1008         if disposition != "inline" {
1009                 w.Header().Set("Content-Disposition", disposition)
1010         }
1011 }
1012
1013 func (h *handler) seeOtherWithCookie(w http.ResponseWriter, r *http.Request, location string, credentialsOK bool) {
1014         if formTokens, haveFormTokens := r.Form["api_token"]; haveFormTokens {
1015                 if !credentialsOK {
1016                         // It is not safe to copy the provided token
1017                         // into a cookie unless the current vhost
1018                         // (origin) serves only a single collection or
1019                         // we are in TrustAllContent mode.
1020                         http.Error(w, "cannot serve inline content at this URL (possible configuration error; see https://doc.arvados.org/install/install-keep-web.html#dns)", http.StatusBadRequest)
1021                         return
1022                 }
1023
1024                 // The HttpOnly flag is necessary to prevent
1025                 // JavaScript code (included in, or loaded by, a page
1026                 // in the collection being served) from employing the
1027                 // user's token beyond reading other files in the same
1028                 // domain, i.e., same collection.
1029                 //
1030                 // The 303 redirect is necessary in the case of a GET
1031                 // request to avoid exposing the token in the Location
1032                 // bar, and in the case of a POST request to avoid
1033                 // raising warnings when the user refreshes the
1034                 // resulting page.
1035                 for _, tok := range formTokens {
1036                         if tok == "" {
1037                                 continue
1038                         }
1039                         http.SetCookie(w, &http.Cookie{
1040                                 Name:     "arvados_api_token",
1041                                 Value:    auth.EncodeTokenCookie([]byte(tok)),
1042                                 Path:     "/",
1043                                 HttpOnly: true,
1044                                 SameSite: http.SameSiteLaxMode,
1045                         })
1046                         break
1047                 }
1048         }
1049
1050         // Propagate query parameters (except api_token) from
1051         // the original request.
1052         redirQuery := r.URL.Query()
1053         redirQuery.Del("api_token")
1054
1055         u := r.URL
1056         if location != "" {
1057                 newu, err := u.Parse(location)
1058                 if err != nil {
1059                         http.Error(w, "error resolving redirect target: "+err.Error(), http.StatusInternalServerError)
1060                         return
1061                 }
1062                 u = newu
1063         }
1064         redir := (&url.URL{
1065                 Scheme:   r.URL.Scheme,
1066                 Host:     r.Host,
1067                 Path:     u.Path,
1068                 RawQuery: redirQuery.Encode(),
1069         }).String()
1070
1071         w.Header().Add("Location", redir)
1072         w.WriteHeader(http.StatusSeeOther)
1073         io.WriteString(w, `<A href="`)
1074         io.WriteString(w, html.EscapeString(redir))
1075         io.WriteString(w, `">Continue</A>`)
1076 }
1077
1078 func (h *handler) userPermittedToUploadOrDownload(method string, tokenUser *arvados.User) bool {
1079         var permitDownload bool
1080         var permitUpload bool
1081         if tokenUser != nil && tokenUser.IsAdmin {
1082                 permitUpload = h.Cluster.Collections.WebDAVPermission.Admin.Upload
1083                 permitDownload = h.Cluster.Collections.WebDAVPermission.Admin.Download
1084         } else {
1085                 permitUpload = h.Cluster.Collections.WebDAVPermission.User.Upload
1086                 permitDownload = h.Cluster.Collections.WebDAVPermission.User.Download
1087         }
1088         if (method == "PUT" || method == "POST") && !permitUpload {
1089                 // Disallow operations that upload new files.
1090                 // Permit webdav operations that move existing files around.
1091                 return false
1092         } else if method == "GET" && !permitDownload {
1093                 // Disallow downloading file contents.
1094                 // Permit webdav operations like PROPFIND that retrieve metadata
1095                 // but not file contents.
1096                 return false
1097         }
1098         return true
1099 }
1100
1101 // Parse the request's Destination header and return the destination
1102 // path relative to the current collection, i.e., with webdavPrefix
1103 // stripped off.
1104 func copyMoveDestination(r *http.Request, webdavPrefix string) (string, error) {
1105         dsturl, err := url.Parse(r.Header.Get("Destination"))
1106         if err != nil {
1107                 return "", err
1108         }
1109         if dsturl.Host != "" && dsturl.Host != r.Host {
1110                 return "", errors.New("destination host mismatch")
1111         }
1112         if webdavPrefix == "" {
1113                 return dsturl.Path, nil
1114         }
1115         dsttarget := strings.TrimPrefix(dsturl.Path, webdavPrefix)
1116         if len(dsttarget) == len(dsturl.Path) {
1117                 return "", errors.New("destination path not supported")
1118         }
1119         return dsttarget, nil
1120 }
1121
1122 // Check whether fstarget is in a collection whose PDH has changed
1123 // since it was last Sync()ed in sessionFS.
1124 //
1125 // If fstarget doesn't exist, but would be in such a collection if it
1126 // did exist, return true.
1127 func (h *handler) needSync(ctx context.Context, sessionFS arvados.CustomFileSystem, fstarget string) (bool, error) {
1128         collection, _ := h.determineCollection(sessionFS, fstarget)
1129         if collection == nil || len(collection.UUID) != 27 || !strings.HasPrefix(collection.UUID, h.Cluster.ClusterID) {
1130                 return false, nil
1131         }
1132         db, err := h.getDBConnector().GetDB(ctx)
1133         if err != nil {
1134                 return false, err
1135         }
1136         var currentPDH string
1137         err = db.QueryRowContext(ctx, `select portable_data_hash from collections where uuid=$1`, collection.UUID).Scan(&currentPDH)
1138         if err != nil {
1139                 return false, err
1140         }
1141         if currentPDH != collection.PortableDataHash {
1142                 return true, nil
1143         }
1144         return false, nil
1145 }
1146
1147 type fileEventLog struct {
1148         requestPath  string
1149         eventType    string
1150         userUUID     string
1151         userFullName string
1152         collUUID     string
1153         collPDH      string
1154         collFilePath string
1155         clientAddr   string
1156         clientToken  string
1157 }
1158
1159 func newFileEventLog(
1160         h *handler,
1161         r *http.Request,
1162         filepath string,
1163         collection *arvados.Collection,
1164         user *arvados.User,
1165         token string,
1166 ) *fileEventLog {
1167         var eventType string
1168         switch r.Method {
1169         case "POST", "PUT":
1170                 eventType = "file_upload"
1171         case "GET":
1172                 eventType = "file_download"
1173         default:
1174                 return nil
1175         }
1176
1177         // We want to log the address of the proxy closest to keep-web—the last
1178         // value in the X-Forwarded-For list—or the client address if there is no
1179         // valid proxy.
1180         var clientAddr string
1181         // 1. Build a slice of proxy addresses from X-Forwarded-For.
1182         xff := strings.Join(r.Header.Values("X-Forwarded-For"), ",")
1183         addrs := strings.Split(xff, ",")
1184         // 2. Reverse the slice so it's in our most preferred order for logging.
1185         slices.Reverse(addrs)
1186         // 3. Append the client address to that slice.
1187         if addr, _, err := net.SplitHostPort(r.RemoteAddr); err == nil {
1188                 addrs = append(addrs, addr)
1189         }
1190         // 4. Use the first valid address in the slice.
1191         for _, addr := range addrs {
1192                 if ip := net.ParseIP(strings.TrimSpace(addr)); ip != nil {
1193                         clientAddr = ip.String()
1194                         break
1195                 }
1196         }
1197
1198         ev := &fileEventLog{
1199                 requestPath: r.URL.Path,
1200                 eventType:   eventType,
1201                 clientAddr:  clientAddr,
1202                 clientToken: token,
1203         }
1204
1205         if user != nil {
1206                 ev.userUUID = user.UUID
1207                 ev.userFullName = user.FullName
1208         } else {
1209                 ev.userUUID = fmt.Sprintf("%s-tpzed-anonymouspublic", h.Cluster.ClusterID)
1210         }
1211
1212         if collection != nil {
1213                 ev.collFilePath = filepath
1214                 // h.determineCollection populates the collection_uuid
1215                 // prop with the PDH, if this collection is being
1216                 // accessed via PDH. For logging, we use a different
1217                 // field depending on whether it's a UUID or PDH.
1218                 if len(collection.UUID) > 32 {
1219                         ev.collPDH = collection.UUID
1220                 } else {
1221                         ev.collPDH = collection.PortableDataHash
1222                         ev.collUUID = collection.UUID
1223                 }
1224         }
1225
1226         return ev
1227 }
1228
1229 func (ev *fileEventLog) shouldLogPDH() bool {
1230         return ev.eventType == "file_download" && ev.collPDH != ""
1231 }
1232
1233 func (ev *fileEventLog) asDict() arvadosclient.Dict {
1234         props := arvadosclient.Dict{
1235                 "reqPath":              ev.requestPath,
1236                 "collection_uuid":      ev.collUUID,
1237                 "collection_file_path": ev.collFilePath,
1238         }
1239         if ev.shouldLogPDH() {
1240                 props["portable_data_hash"] = ev.collPDH
1241         }
1242         return arvadosclient.Dict{
1243                 "object_uuid": ev.userUUID,
1244                 "event_type":  ev.eventType,
1245                 "properties":  props,
1246         }
1247 }
1248
1249 func (ev *fileEventLog) asFields() logrus.Fields {
1250         fields := logrus.Fields{
1251                 "collection_file_path": ev.collFilePath,
1252                 "collection_uuid":      ev.collUUID,
1253                 "user_uuid":            ev.userUUID,
1254         }
1255         if ev.shouldLogPDH() {
1256                 fields["portable_data_hash"] = ev.collPDH
1257         }
1258         if !strings.HasSuffix(ev.userUUID, "-tpzed-anonymouspublic") {
1259                 fields["user_full_name"] = ev.userFullName
1260         }
1261         return fields
1262 }
1263
1264 func (h *handler) shouldLogEvent(
1265         event *fileEventLog,
1266         req *http.Request,
1267         fileInfo os.FileInfo,
1268         t time.Time,
1269 ) bool {
1270         if event == nil {
1271                 return false
1272         } else if event.eventType != "file_download" ||
1273                 h.Cluster.Collections.WebDAVLogDownloadInterval == 0 ||
1274                 fileInfo == nil {
1275                 return true
1276         }
1277         td := h.Cluster.Collections.WebDAVLogDownloadInterval.Duration()
1278         cutoff := t.Add(-td)
1279         ev := *event
1280         h.fileEventLogsMtx.Lock()
1281         defer h.fileEventLogsMtx.Unlock()
1282         if h.fileEventLogs == nil {
1283                 h.fileEventLogs = make(map[fileEventLog]time.Time)
1284         }
1285         shouldLog := h.fileEventLogs[ev].Before(cutoff)
1286         if !shouldLog {
1287                 // Go's http fs server evaluates http.Request.Header.Get("Range")
1288                 // (as of Go 1.22) so we should do the same.
1289                 // Don't worry about merging multiple headers, etc.
1290                 ranges, err := http_range.ParseRange(req.Header.Get("Range"), fileInfo.Size())
1291                 if ranges == nil || err != nil {
1292                         // The Range header was either empty or malformed.
1293                         // Err on the side of logging.
1294                         shouldLog = true
1295                 } else {
1296                         // Log this request only if it requested the first byte
1297                         // (our heuristic for "starting a new download").
1298                         for _, reqRange := range ranges {
1299                                 if reqRange.Start == 0 {
1300                                         shouldLog = true
1301                                         break
1302                                 }
1303                         }
1304                 }
1305         }
1306         if shouldLog {
1307                 h.fileEventLogs[ev] = t
1308         }
1309         if t.After(h.fileEventLogsNextTidy) {
1310                 for key, logTime := range h.fileEventLogs {
1311                         if logTime.Before(cutoff) {
1312                                 delete(h.fileEventLogs, key)
1313                         }
1314                 }
1315                 h.fileEventLogsNextTidy = t.Add(td)
1316         }
1317         return shouldLog
1318 }
1319
1320 func (h *handler) logUploadOrDownload(
1321         r *http.Request,
1322         client *arvadosclient.ArvadosClient,
1323         fs arvados.CustomFileSystem,
1324         filepath string,
1325         collection *arvados.Collection,
1326         user *arvados.User,
1327 ) {
1328         var fileInfo os.FileInfo
1329         if fs != nil {
1330                 if collection == nil {
1331                         collection, filepath = h.determineCollection(fs, filepath)
1332                 }
1333                 if collection != nil {
1334                         // It's okay to ignore this error because shouldLogEvent will
1335                         // always return true if fileInfo == nil.
1336                         fileInfo, _ = fs.Stat(path.Join("by_id", collection.UUID, filepath))
1337                 }
1338         }
1339         event := newFileEventLog(h, r, filepath, collection, user, client.ApiToken)
1340         if !h.shouldLogEvent(event, r, fileInfo, time.Now()) {
1341                 return
1342         }
1343         log := ctxlog.FromContext(r.Context()).WithFields(event.asFields())
1344         log.Info(strings.Replace(event.eventType, "file_", "File ", 1))
1345         if h.Cluster.Collections.WebDAVLogEvents {
1346                 go func() {
1347                         logReq := arvadosclient.Dict{"log": event.asDict()}
1348                         err := client.Create("logs", logReq, nil)
1349                         if err != nil {
1350                                 log.WithError(err).Errorf("Failed to create %s log event on API server", event.eventType)
1351                         }
1352                 }()
1353         }
1354 }
1355
1356 func (h *handler) determineCollection(fs arvados.CustomFileSystem, path string) (*arvados.Collection, string) {
1357         target := strings.TrimSuffix(path, "/")
1358         for cut := len(target); cut >= 0; cut = strings.LastIndexByte(target, '/') {
1359                 target = target[:cut]
1360                 fi, err := fs.Stat(target)
1361                 if os.IsNotExist(err) {
1362                         // creating a new file/dir, or download
1363                         // destined to fail
1364                         continue
1365                 } else if err != nil {
1366                         return nil, ""
1367                 }
1368                 switch src := fi.Sys().(type) {
1369                 case *arvados.Collection:
1370                         return src, strings.TrimPrefix(path[len(target):], "/")
1371                 case *arvados.Group:
1372                         return nil, ""
1373                 default:
1374                         if _, ok := src.(error); ok {
1375                                 return nil, ""
1376                         }
1377                 }
1378         }
1379         return nil, ""
1380 }
1381
1382 func ServeCORSPreflight(w http.ResponseWriter, header http.Header) bool {
1383         method := header.Get("Access-Control-Request-Method")
1384         if method == "" {
1385                 return false
1386         }
1387         if !browserMethod[method] && !webdavMethod[method] {
1388                 w.WriteHeader(http.StatusMethodNotAllowed)
1389                 return true
1390         }
1391         w.Header().Set("Access-Control-Allow-Headers", corsAllowHeadersHeader)
1392         w.Header().Set("Access-Control-Allow-Methods", "COPY, DELETE, GET, LOCK, MKCOL, MOVE, OPTIONS, POST, PROPFIND, PROPPATCH, PUT, RMCOL, UNLOCK")
1393         w.Header().Set("Access-Control-Allow-Origin", "*")
1394         w.Header().Set("Access-Control-Max-Age", "86400")
1395         return true
1396 }