]> git.arvados.org - arvados.git/blob - services/keep-web/handler.go
22819: Fix message
[arvados.git] / services / keep-web / handler.go
1 // Copyright (C) The Arvados Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4
5 package keepweb
6
7 import (
8         "context"
9         "encoding/json"
10         "errors"
11         "fmt"
12         "html"
13         "html/template"
14         "io"
15         "mime"
16         "net"
17         "net/http"
18         "net/url"
19         "os"
20         "path"
21         "slices"
22         "sort"
23         "strconv"
24         "strings"
25         "sync"
26         "time"
27
28         "git.arvados.org/arvados.git/lib/cmd"
29         "git.arvados.org/arvados.git/lib/ctrlctx"
30         "git.arvados.org/arvados.git/lib/webdavfs"
31         "git.arvados.org/arvados.git/sdk/go/arvados"
32         "git.arvados.org/arvados.git/sdk/go/arvadosclient"
33         "git.arvados.org/arvados.git/sdk/go/auth"
34         "git.arvados.org/arvados.git/sdk/go/ctxlog"
35         "git.arvados.org/arvados.git/sdk/go/httpserver"
36         "github.com/gotd/contrib/http_range"
37         "github.com/sirupsen/logrus"
38         "golang.org/x/net/webdav"
39 )
40
41 type handler struct {
42         Cache   cache
43         Cluster *arvados.Cluster
44         metrics *metrics
45
46         fileEventLogs         map[fileEventLog]time.Time
47         fileEventLogsMtx      sync.Mutex
48         fileEventLogsNextTidy time.Time
49
50         s3SecretCache         map[string]*cachedS3Secret
51         s3SecretCacheMtx      sync.Mutex
52         s3SecretCacheNextTidy time.Time
53
54         dbConnector    *ctrlctx.DBConnector
55         dbConnectorMtx sync.Mutex
56
57         repacking sync.Map
58 }
59
60 var urlPDHDecoder = strings.NewReplacer(" ", "+", "-", "+")
61
62 var notFoundMessage = "Not Found"
63 var unauthorizedMessage = "401 Unauthorized\n\nA valid Arvados token must be provided to access this resource."
64
65 // parseCollectionIDFromURL returns a UUID or PDH if s is a UUID or a
66 // PDH (even if it is a PDH with "+" replaced by " " or "-");
67 // otherwise "".
68 func parseCollectionIDFromURL(s string) string {
69         if arvadosclient.UUIDMatch(s) {
70                 return s
71         }
72         if pdh := urlPDHDecoder.Replace(s); arvadosclient.PDHMatch(pdh) {
73                 return pdh
74         }
75         return ""
76 }
77
78 func (h *handler) serveStatus(w http.ResponseWriter, r *http.Request) {
79         json.NewEncoder(w).Encode(struct{ Version string }{cmd.Version.String()})
80 }
81
82 type errorWithHTTPStatus interface {
83         HTTPStatus() int
84 }
85
86 // updateOnSuccess wraps httpserver.ResponseWriter. If the handler
87 // sends an HTTP header indicating success, updateOnSuccess first
88 // calls the provided update func. If the update func fails, an error
89 // response is sent (using the error's HTTP status or 500 if none),
90 // and the status code and body sent by the handler are ignored (all
91 // response writes return the update error).
92 type updateOnSuccess struct {
93         httpserver.ResponseWriter
94         logger     logrus.FieldLogger
95         update     func() error
96         sentHeader bool
97         err        error
98 }
99
100 func (uos *updateOnSuccess) Write(p []byte) (int, error) {
101         if !uos.sentHeader {
102                 uos.WriteHeader(http.StatusOK)
103         }
104         if uos.err != nil {
105                 return 0, uos.err
106         }
107         return uos.ResponseWriter.Write(p)
108 }
109
110 func (uos *updateOnSuccess) WriteHeader(code int) {
111         if !uos.sentHeader {
112                 uos.sentHeader = true
113                 if code >= 200 && code < 400 {
114                         if uos.err = uos.update(); uos.err != nil {
115                                 code := http.StatusInternalServerError
116                                 if he := errorWithHTTPStatus(nil); errors.As(uos.err, &he) {
117                                         code = he.HTTPStatus()
118                                 }
119                                 uos.logger.WithError(uos.err).Errorf("update() returned %T error, changing response to HTTP %d", uos.err, code)
120                                 http.Error(uos.ResponseWriter, uos.err.Error(), code)
121                                 return
122                         }
123                 }
124         }
125         uos.ResponseWriter.WriteHeader(code)
126 }
127
128 var (
129         corsAllowHeadersHeader = strings.Join([]string{
130                 "Authorization", "Content-Type", "Range",
131                 // WebDAV request headers:
132                 "Depth", "Destination", "If", "Lock-Token", "Overwrite", "Timeout", "Cache-Control",
133         }, ", ")
134         writeMethod = map[string]bool{
135                 "COPY":      true,
136                 "DELETE":    true,
137                 "LOCK":      true,
138                 "MKCOL":     true,
139                 "MOVE":      true,
140                 "PROPPATCH": true,
141                 "PUT":       true,
142                 "UNLOCK":    true,
143         }
144         webdavMethod = map[string]bool{
145                 "COPY":      true,
146                 "DELETE":    true,
147                 "LOCK":      true,
148                 "MKCOL":     true,
149                 "MOVE":      true,
150                 "OPTIONS":   true,
151                 "PROPFIND":  true,
152                 "PROPPATCH": true,
153                 "PUT":       true,
154                 "RMCOL":     true,
155                 "UNLOCK":    true,
156         }
157         browserMethod = map[string]bool{
158                 "GET":  true,
159                 "HEAD": true,
160                 "POST": true,
161         }
162         // top-level dirs to serve with siteFS
163         siteFSDir = map[string]bool{
164                 "":      true, // root directory
165                 "by_id": true,
166                 "users": true,
167         }
168 )
169
170 func stripDefaultPort(host string) string {
171         // Will consider port 80 and port 443 to be the same vhost.  I think that's fine.
172         u := &url.URL{Host: host}
173         if p := u.Port(); p == "80" || p == "443" {
174                 return strings.ToLower(u.Hostname())
175         } else {
176                 return strings.ToLower(host)
177         }
178 }
179
180 // CheckHealth implements service.Handler.
181 func (h *handler) CheckHealth() error {
182         return nil
183 }
184
185 // Done implements service.Handler.
186 func (h *handler) Done() <-chan struct{} {
187         return nil
188 }
189
190 // Close releases the active database connection, if any.
191 //
192 // Currently Close() is not part of the service.Handler interface.
193 // However, it is used by the test suite to avoid accumulating
194 // database connections when starting up lots of keep-web
195 // servers/handlers.
196 func (h *handler) Close() {
197         h.getDBConnector().Close()
198 }
199
200 func (h *handler) getDBConnector() *ctrlctx.DBConnector {
201         h.dbConnectorMtx.Lock()
202         defer h.dbConnectorMtx.Unlock()
203         if h.dbConnector == nil {
204                 h.dbConnector = &ctrlctx.DBConnector{PostgreSQL: h.Cluster.PostgreSQL}
205         }
206         return h.dbConnector
207 }
208
209 // ServeHTTP implements http.Handler.
210 func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
211         if xfp := r.Header.Get("X-Forwarded-Proto"); xfp != "" && xfp != "http" {
212                 r.URL.Scheme = xfp
213         }
214
215         httpserver.SetResponseLogFields(r.Context(), logrus.Fields{
216                 "webdavDepth":       r.Header.Get("Depth"),
217                 "webdavDestination": r.Header.Get("Destination"),
218                 "webdavOverwrite":   r.Header.Get("Overwrite"),
219         })
220
221         wbuffer := newWriteBuffer(wOrig, int(h.Cluster.Collections.WebDAVOutputBuffer))
222         defer wbuffer.Close()
223         w := httpserver.WrapResponseWriter(responseWriter{
224                 Writer:         wbuffer,
225                 ResponseWriter: wOrig,
226         })
227
228         if r.Method == "OPTIONS" && ServeCORSPreflight(w, r.Header) {
229                 return
230         }
231
232         if !browserMethod[r.Method] && !webdavMethod[r.Method] {
233                 w.WriteHeader(http.StatusMethodNotAllowed)
234                 return
235         }
236
237         if r.Header.Get("Origin") != "" {
238                 // Allow simple cross-origin requests without user
239                 // credentials ("user credentials" as defined by CORS,
240                 // i.e., cookies, HTTP authentication, and client-side
241                 // SSL certificates. See
242                 // http://www.w3.org/TR/cors/#user-credentials).
243                 w.Header().Set("Access-Control-Allow-Origin", "*")
244                 w.Header().Set("Access-Control-Expose-Headers", "Content-Range")
245         }
246
247         if h.serveS3(w, r) {
248                 return
249         }
250
251         // webdavPrefix is the leading portion of r.URL.Path that
252         // should be ignored by the webdav handler, if any.
253         //
254         // req "/c={id}/..." -> webdavPrefix "/c={id}"
255         // req "/by_id/..." -> webdavPrefix ""
256         //
257         // Note: in the code immediately below, we set webdavPrefix
258         // only if it was explicitly set by the client. Otherwise, it
259         // gets set later, after checking the request path for cases
260         // like "/c={id}/...".
261         webdavPrefix := ""
262         arvPath := r.URL.Path
263         if prefix := r.Header.Get("X-Webdav-Prefix"); prefix != "" {
264                 // Enable a proxy (e.g., container log handler in
265                 // controller) to satisfy a request for path
266                 // "/foo/bar/baz.txt" using content from
267                 // "//abc123-4.internal/bar/baz.txt", by adding a
268                 // request header "X-Webdav-Prefix: /foo"
269                 if !strings.HasPrefix(arvPath, prefix) {
270                         http.Error(w, "X-Webdav-Prefix header is not a prefix of the requested path", http.StatusBadRequest)
271                         return
272                 }
273                 arvPath = r.URL.Path[len(prefix):]
274                 if arvPath == "" {
275                         arvPath = "/"
276                 }
277                 w.Header().Set("Vary", "X-Webdav-Prefix, "+w.Header().Get("Vary"))
278                 webdavPrefix = prefix
279         }
280         pathParts := strings.Split(arvPath[1:], "/")
281
282         var stripParts int
283         var collectionID string
284         var tokens []string
285         var reqTokens []string
286         var pathToken bool
287         var attachment bool
288         var useSiteFS bool
289         credentialsOK := h.Cluster.Collections.TrustAllContent
290         reasonNotAcceptingCredentials := ""
291
292         if r.Host != "" && stripDefaultPort(r.Host) == stripDefaultPort(h.Cluster.Services.WebDAVDownload.ExternalURL.Host) {
293                 credentialsOK = true
294                 attachment = true
295         } else if r.FormValue("disposition") == "attachment" {
296                 attachment = true
297         }
298
299         if !credentialsOK {
300                 reasonNotAcceptingCredentials = fmt.Sprintf("vhost %q does not specify a single collection ID or match Services.WebDAVDownload.ExternalURL %q, and Collections.TrustAllContent is false",
301                         r.Host, h.Cluster.Services.WebDAVDownload.ExternalURL)
302         }
303
304         if collectionID = arvados.CollectionIDFromDNSName(r.Host); collectionID != "" {
305                 // http://ID.collections.example/PATH...
306                 credentialsOK = true
307         } else if r.URL.Path == "/status.json" {
308                 h.serveStatus(w, r)
309                 return
310         } else if siteFSDir[pathParts[0]] {
311                 useSiteFS = true
312         } else if len(pathParts) >= 1 && strings.HasPrefix(pathParts[0], "c=") {
313                 // /c=ID[/PATH...]
314                 collectionID = parseCollectionIDFromURL(pathParts[0][2:])
315                 stripParts = 1
316         } else if len(pathParts) >= 2 && pathParts[0] == "collections" {
317                 if len(pathParts) >= 4 && pathParts[1] == "download" {
318                         // /collections/download/ID/TOKEN/PATH...
319                         collectionID = parseCollectionIDFromURL(pathParts[2])
320                         tokens = []string{pathParts[3]}
321                         stripParts = 4
322                         pathToken = true
323                 } else {
324                         // /collections/ID/PATH...
325                         collectionID = parseCollectionIDFromURL(pathParts[1])
326                         stripParts = 2
327                         // This path is only meant to work for public
328                         // data. Tokens provided with the request are
329                         // ignored.
330                         credentialsOK = false
331                         reasonNotAcceptingCredentials = "the '/collections/UUID/PATH' form only works for public data"
332                 }
333         }
334
335         forceReload := false
336         if cc := r.Header.Get("Cache-Control"); strings.Contains(cc, "no-cache") || strings.Contains(cc, "must-revalidate") {
337                 forceReload = true
338         }
339
340         if credentialsOK {
341                 reqTokens = auth.CredentialsFromRequest(r).Tokens
342         }
343
344         r.ParseForm()
345         origin := r.Header.Get("Origin")
346         cors := origin != "" && !strings.HasSuffix(origin, "://"+r.Host)
347         safeAjax := cors && (r.Method == http.MethodGet || r.Method == http.MethodHead)
348         // Important distinction: safeAttachment checks whether api_token exists
349         // as a query parameter. haveFormTokens checks whether api_token exists
350         // as request form data *or* a query parameter. Different checks are
351         // necessary because both the request disposition and the location of
352         // the API token affect whether or not the request needs to be
353         // redirected. The different branch comments below explain further.
354         safeAttachment := attachment && !r.URL.Query().Has("api_token")
355         if formTokens, haveFormTokens := r.Form["api_token"]; !haveFormTokens {
356                 // No token to use or redact.
357         } else if safeAjax || safeAttachment {
358                 // If this is a cross-origin request, the URL won't
359                 // appear in the browser's address bar, so
360                 // substituting a clipboard-safe URL is pointless.
361                 // Redirect-with-cookie wouldn't work anyway, because
362                 // it's not safe to allow third-party use of our
363                 // cookie.
364                 //
365                 // If we're supplying an attachment, we don't need to
366                 // convert POST to GET to avoid the "really resubmit
367                 // form?" problem, so provided the token isn't
368                 // embedded in the URL, there's no reason to do
369                 // redirect-with-cookie in this case either.
370                 for _, tok := range formTokens {
371                         reqTokens = append(reqTokens, tok)
372                 }
373         } else if browserMethod[r.Method] {
374                 // If this is a page view, and the client provided a
375                 // token via query string or POST body, we must put
376                 // the token in an HttpOnly cookie, and redirect to an
377                 // equivalent URL with the query param redacted and
378                 // method = GET.
379                 h.seeOtherWithCookie(w, r, "", credentialsOK)
380                 return
381         }
382
383         targetPath := pathParts[stripParts:]
384         if tokens == nil && len(targetPath) > 0 && strings.HasPrefix(targetPath[0], "t=") {
385                 // http://ID.example/t=TOKEN/PATH...
386                 // /c=ID/t=TOKEN/PATH...
387                 //
388                 // This form must only be used to pass scoped tokens
389                 // that give permission for a single collection. See
390                 // FormValue case above.
391                 tokens = []string{targetPath[0][2:]}
392                 pathToken = true
393                 targetPath = targetPath[1:]
394                 stripParts++
395         }
396
397         // fsprefix is the path from sitefs root to the sitefs
398         // directory (implicitly or explicitly) indicated by the
399         // leading / in the request path.
400         //
401         // Request "/by_id/..." -> fsprefix ""
402         // Request "/c={id}/..." -> fsprefix "/by_id/{id}/"
403         fsprefix := ""
404         if useSiteFS {
405                 if writeMethod[r.Method] {
406                         http.Error(w, webdavfs.ErrReadOnly.Error(), http.StatusMethodNotAllowed)
407                         return
408                 }
409                 if len(reqTokens) == 0 {
410                         w.Header().Add("WWW-Authenticate", "Basic realm=\"collections\"")
411                         http.Error(w, unauthorizedMessage, http.StatusUnauthorized)
412                         return
413                 }
414                 tokens = reqTokens
415         } else if collectionID == "" {
416                 http.Error(w, notFoundMessage, http.StatusNotFound)
417                 return
418         } else {
419                 fsprefix = "by_id/" + collectionID + "/"
420         }
421
422         if src := r.Header.Get("X-Webdav-Source"); strings.HasPrefix(src, "/") && !strings.Contains(src, "//") && !strings.Contains(src, "/../") {
423                 // Clients (specifically, the container log gateway)
424                 // use X-Webdav-Source to specify that although the
425                 // request path (and other webdav fields in the
426                 // request) refer to target "/abc", the intended
427                 // target is actually
428                 // "{x-webdav-source-value}/abc".
429                 //
430                 // This, combined with X-Webdav-Prefix, enables the
431                 // container log gateway to effectively alter the
432                 // target path when proxying a request, without
433                 // needing to rewrite all the other webdav
434                 // request/response fields that might mention the
435                 // target path.
436                 fsprefix += src[1:]
437         }
438
439         if tokens == nil {
440                 tokens = reqTokens
441                 if h.Cluster.Users.AnonymousUserToken != "" {
442                         tokens = append(tokens, h.Cluster.Users.AnonymousUserToken)
443                 }
444         }
445
446         if len(targetPath) > 0 && targetPath[0] == "_" {
447                 // If a collection has a directory called "t=foo" or
448                 // "_", it can be served at
449                 // //collections.example/_/t=foo/ or
450                 // //collections.example/_/_/ respectively:
451                 // //collections.example/t=foo/ won't work because
452                 // t=foo will be interpreted as a token "foo".
453                 targetPath = targetPath[1:]
454                 stripParts++
455         }
456
457         dirOpenMode := os.O_RDONLY
458         if writeMethod[r.Method] {
459                 dirOpenMode = os.O_RDWR
460         }
461
462         var tokenValid bool
463         var tokenScopeProblem bool
464         var token string
465         var tokenUser *arvados.User
466         var sessionFS arvados.CustomFileSystem
467         var targetFS arvados.FileSystem
468         var session *cachedSession
469         var collectionDir arvados.File
470         for _, token = range tokens {
471                 var statusErr errorWithHTTPStatus
472                 fs, sess, user, err := h.Cache.GetSession(token)
473                 if errors.As(err, &statusErr) && statusErr.HTTPStatus() == http.StatusUnauthorized {
474                         // bad token
475                         continue
476                 } else if err != nil {
477                         http.Error(w, "cache error: "+err.Error(), http.StatusInternalServerError)
478                         return
479                 }
480                 if token != h.Cluster.Users.AnonymousUserToken {
481                         tokenValid = true
482                 }
483                 f, err := fs.OpenFile(fsprefix, dirOpenMode, 0)
484                 if errors.As(err, &statusErr) &&
485                         statusErr.HTTPStatus() == http.StatusForbidden &&
486                         token != h.Cluster.Users.AnonymousUserToken {
487                         // collection id is outside scope of supplied
488                         // token
489                         tokenScopeProblem = true
490                         sess.Release()
491                         continue
492                 } else if os.IsNotExist(err) {
493                         // collection does not exist or is not
494                         // readable using this token
495                         sess.Release()
496                         continue
497                 } else if err != nil {
498                         http.Error(w, err.Error(), http.StatusInternalServerError)
499                         sess.Release()
500                         return
501                 }
502                 defer f.Close()
503
504                 collectionDir, sessionFS, session, tokenUser = f, fs, sess, user
505                 break
506         }
507
508         if session == nil {
509                 if pathToken {
510                         // The URL is a "secret sharing link" that
511                         // didn't work out.  Asking the client for
512                         // additional credentials would just be
513                         // confusing.
514                         http.Error(w, notFoundMessage, http.StatusNotFound)
515                         return
516                 }
517                 if tokenValid {
518                         // The client provided valid token(s), but the
519                         // collection was not found.
520                         http.Error(w, notFoundMessage, http.StatusNotFound)
521                         return
522                 }
523                 if tokenScopeProblem {
524                         // The client provided a valid token but
525                         // fetching a collection returned 401, which
526                         // means the token scope doesn't permit
527                         // fetching that collection.
528                         http.Error(w, notFoundMessage, http.StatusForbidden)
529                         return
530                 }
531                 // The client's token was invalid (e.g., expired), or
532                 // the client didn't even provide one.  Redirect to
533                 // workbench2's login-and-redirect-to-download url if
534                 // this is a browser navigation request. (The redirect
535                 // flow can't preserve the original method if it's not
536                 // GET, and doesn't make sense if the UA is a
537                 // command-line tool, is trying to load an inline
538                 // image, etc.; in these cases, there's nothing we can
539                 // do, so return 401 unauthorized.)
540                 //
541                 // Note Sec-Fetch-Mode is sent by all non-EOL
542                 // browsers, except Safari.
543                 // https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Sec-Fetch-Mode
544                 //
545                 // TODO(TC): This response would be confusing to
546                 // someone trying (anonymously) to download public
547                 // data that has been deleted.  Allow a referrer to
548                 // provide this context somehow?
549                 if r.Method == http.MethodGet && r.Header.Get("Sec-Fetch-Mode") == "navigate" {
550                         target := url.URL(h.Cluster.Services.Workbench2.ExternalURL)
551                         redirkey := "redirectToPreview"
552                         if attachment {
553                                 redirkey = "redirectToDownload"
554                         }
555                         callback := "/c=" + collectionID + "/" + strings.Join(targetPath, "/")
556                         query := url.Values{redirkey: {callback}}
557                         queryString := query.Encode()
558                         // Note: Encode (and QueryEscape function) turns space
559                         // into plus sign (+) rather than %20 (the plus sign
560                         // becomes %2B); that is the rule for web forms data
561                         // sent in URL query part via GET, but we're not
562                         // emulating forms here. Client JS APIs
563                         // (URLSearchParam#get, decodeURIComponent) will
564                         // decode %20, but while the former also expects the
565                         // form-specific encoding, the latter doesn't.
566                         // Encode() almost encodes everything; RFC 3986 3.4
567                         // says "it is sometimes better for usability" to not
568                         // encode / and ? when passing URI reference in query.
569                         // This is also legal according to WHATWG URL spec and
570                         // can be desirable for debugging webapp.
571                         // We can let slash / appear in the encoded query, and
572                         // equality-sign = too, but exempting ? is not very
573                         // useful.
574                         // Plus-sign, hash, and ampersand are never exempt.
575                         r := strings.NewReplacer("+", "%20", "%2F", "/", "%3D", "=")
576                         target.RawQuery = r.Replace(queryString)
577                         w.Header().Add("Location", target.String())
578                         w.WriteHeader(http.StatusSeeOther)
579                         return
580                 }
581                 if !credentialsOK {
582                         http.Error(w, fmt.Sprintf("Authorization tokens are not accepted here: %v, and no anonymous user token is configured.", reasonNotAcceptingCredentials), http.StatusUnauthorized)
583                         return
584                 }
585                 // If none of the above cases apply, suggest the
586                 // user-agent (which is either a non-browser agent
587                 // like wget, or a browser that can't redirect through
588                 // a login flow) prompt the user for credentials.
589                 w.Header().Add("WWW-Authenticate", "Basic realm=\"collections\"")
590                 http.Error(w, unauthorizedMessage, http.StatusUnauthorized)
591                 return
592         }
593
594         // The first call to releaseSession() calls session.Release(),
595         // then subsequent calls are no-ops.  This lets us use a defer
596         // call here to ensure it gets called in all code paths, and
597         // also call it inline (see below) in the cases where we want
598         // to release the lock before returning.
599         var releaseSessionOnce sync.Once
600         releaseSession := func() { releaseSessionOnce.Do(func() { session.Release() }) }
601         defer releaseSession()
602
603         colltarget := strings.Join(targetPath, "/")
604         colltarget = strings.TrimSuffix(colltarget, "/")
605         fstarget := fsprefix + colltarget
606         if !forceReload {
607                 need, err := h.needSync(r.Context(), sessionFS, fstarget)
608                 if err != nil {
609                         http.Error(w, err.Error(), http.StatusBadGateway)
610                         return
611                 }
612                 forceReload = need
613         }
614         if forceReload {
615                 err := collectionDir.Sync()
616                 if err != nil {
617                         if he := errorWithHTTPStatus(nil); errors.As(err, &he) {
618                                 http.Error(w, err.Error(), he.HTTPStatus())
619                         } else {
620                                 http.Error(w, err.Error(), http.StatusInternalServerError)
621                         }
622                         return
623                 }
624         }
625
626         if accept := strings.Split(r.Header.Get("Accept"), ","); len(accept) == 1 {
627                 mediatype, _, err := mime.ParseMediaType(accept[0])
628                 if err == nil && mediatype == "application/zip" {
629                         releaseSession()
630                         h.serveZip(w, r, session, sessionFS, fstarget, tokenUser)
631                         return
632                 }
633         }
634         if r.Method == http.MethodGet || r.Method == http.MethodHead {
635                 if fi, err := sessionFS.Stat(fstarget); err == nil && fi.IsDir() {
636                         releaseSession() // because we won't be writing anything
637                         if !strings.HasSuffix(r.URL.Path, "/") {
638                                 h.seeOtherWithCookie(w, r, r.URL.Path+"/", credentialsOK)
639                         } else {
640                                 h.serveDirectory(w, r, fi.Name(), sessionFS, fstarget, !useSiteFS)
641                         }
642                         return
643                 }
644         }
645
646         var basename string
647         if len(targetPath) > 0 {
648                 basename = targetPath[len(targetPath)-1]
649         }
650         if arvadosclient.PDHMatch(collectionID) && writeMethod[r.Method] {
651                 http.Error(w, webdavfs.ErrReadOnly.Error(), http.StatusMethodNotAllowed)
652                 return
653         }
654         if !h.userPermittedToUploadOrDownload(r.Method, tokenUser) {
655                 http.Error(w, "Not permitted", http.StatusForbidden)
656                 return
657         }
658         h.logUploadOrDownload(r, session.arvadosclient, sessionFS, fstarget, 1, nil, tokenUser)
659
660         if webdavPrefix == "" && stripParts > 0 {
661                 webdavPrefix = "/" + strings.Join(pathParts[:stripParts], "/")
662         }
663
664         writing := writeMethod[r.Method]
665         if writing {
666                 // We implement write operations by writing to a
667                 // temporary collection, then applying the change to
668                 // the real collection using the replace_files option
669                 // in a collection update request.  This lets us do
670                 // the slow part (i.e., receive the file data from the
671                 // client and write it to Keep) without worrying about
672                 // side effects of other read/write operations.
673                 //
674                 // Collection update requests for a given collection
675                 // are serialized by the controller, so we don't need
676                 // to do any locking for that part either.
677
678                 // collprefix is the subdirectory in the target
679                 // collection which (according to X-Webdav-Source) we
680                 // should pretend is "/" for this request.
681                 collprefix := strings.TrimPrefix(fsprefix, "by_id/"+collectionID+"/")
682                 if len(collprefix) == len(fsprefix) {
683                         http.Error(w, "internal error: writing to anything other than /by_id/{collectionID}", http.StatusInternalServerError)
684                         return
685                 }
686
687                 // Create a temporary collection filesystem for webdav
688                 // to operate on.
689                 var tmpcoll arvados.Collection
690                 client := session.client.WithRequestID(r.Header.Get("X-Request-Id"))
691                 tmpfs, err := tmpcoll.FileSystem(client, session.keepclient)
692                 if err != nil {
693                         http.Error(w, err.Error(), http.StatusInternalServerError)
694                         return
695                 }
696                 snap, err := arvados.Snapshot(sessionFS, "by_id/"+collectionID+"/")
697                 if err != nil {
698                         http.Error(w, "snapshot: "+err.Error(), http.StatusInternalServerError)
699                         return
700                 }
701                 err = arvados.Splice(tmpfs, "/", snap)
702                 if err != nil {
703                         http.Error(w, "splice: "+err.Error(), http.StatusInternalServerError)
704                         return
705                 }
706
707                 targetFS = tmpfs
708                 fsprefix = collprefix
709                 replace := make(map[string]string)
710
711                 switch r.Method {
712                 case "COPY", "MOVE":
713                         dsttarget, err := copyMoveDestination(r, webdavPrefix)
714                         if err != nil {
715                                 http.Error(w, err.Error(), http.StatusBadRequest)
716                                 return
717                         }
718
719                         srcspec := "current/" + colltarget
720                         // RFC 4918 9.8.3: A COPY of "Depth: 0" only
721                         // instructs that the collection and its
722                         // properties, but not resources identified by
723                         // its internal member URLs, are to be copied.
724                         //
725                         // ...meaning we will be creating an empty
726                         // directory.
727                         //
728                         // RFC 4918 9.9.2: A client MUST NOT submit a
729                         // Depth header on a MOVE on a collection with
730                         // any value but "infinity".
731                         //
732                         // ...meaning we only need to consider this
733                         // case for COPY, not for MOVE.
734                         if fi, err := tmpfs.Stat(colltarget); err == nil && fi.IsDir() && r.Method == "COPY" && r.Header.Get("Depth") == "0" {
735                                 srcspec = "manifest_text/"
736                         }
737
738                         replace[strings.TrimSuffix(dsttarget, "/")] = srcspec
739                         if r.Method == "MOVE" {
740                                 replace["/"+colltarget] = ""
741                         }
742                 case "MKCOL":
743                         replace["/"+colltarget] = "manifest_text/"
744                 case "DELETE":
745                         if depth := r.Header.Get("Depth"); depth != "" && depth != "infinity" {
746                                 http.Error(w, "invalid depth header, see RFC 4918 9.6.1", http.StatusBadRequest)
747                                 return
748                         }
749                         replace["/"+colltarget] = ""
750                 case "PUT":
751                         // changes will be applied by updateOnSuccess
752                         // update func below
753                 case "LOCK", "UNLOCK", "PROPPATCH":
754                         // no changes
755                 default:
756                         http.Error(w, "method missing", http.StatusInternalServerError)
757                         return
758                 }
759
760                 // Save the collection only if/when all
761                 // webdav->filesystem operations succeed using our
762                 // temporary collection -- and send a 500 error if the
763                 // updates can't be saved.
764                 logger := ctxlog.FromContext(r.Context())
765                 w = &updateOnSuccess{
766                         ResponseWriter: w,
767                         logger:         logger,
768                         update: func() error {
769                                 var manifest string
770                                 var snap *arvados.Subtree
771                                 var err error
772                                 if r.Method == "PUT" {
773                                         snap, err = arvados.Snapshot(tmpfs, colltarget)
774                                         if err != nil {
775                                                 return fmt.Errorf("snapshot tmpfs: %w", err)
776                                         }
777                                         tmpfs, err = (&arvados.Collection{}).FileSystem(client, session.keepclient)
778                                         err = arvados.Splice(tmpfs, "file", snap)
779                                         if err != nil {
780                                                 return fmt.Errorf("splice tmpfs: %w", err)
781                                         }
782                                         manifest, err = tmpfs.MarshalManifest(".")
783                                         if err != nil {
784                                                 return fmt.Errorf("marshal tmpfs: %w", err)
785                                         }
786                                         replace["/"+colltarget] = "manifest_text/file"
787                                 } else if len(replace) == 0 {
788                                         return nil
789                                 }
790                                 var updated arvados.Collection
791                                 err = client.RequestAndDecode(&updated, "PATCH", "arvados/v1/collections/"+collectionID, nil, map[string]interface{}{
792                                         "replace_files": replace,
793                                         "collection":    map[string]interface{}{"manifest_text": manifest}})
794                                 var te arvados.TransactionError
795                                 if errors.As(err, &te) {
796                                         err = te
797                                 }
798                                 if err != nil {
799                                         return err
800                                 }
801                                 if r.Method == "PUT" {
802                                         h.repack(r.Context(), session, logger, &updated)
803                                 }
804                                 return nil
805                         }}
806         } else {
807                 // When writing, we need to block session renewal
808                 // until we're finished, in order to guarantee the
809                 // effect of the write is visible in future responses.
810                 // But if we're not writing, we can release the lock
811                 // early.  This enables us to keep renewing sessions
812                 // and processing more requests even if a slow client
813                 // takes a long time to download a large file.
814                 releaseSession()
815                 targetFS = sessionFS
816         }
817         if r.Method == http.MethodGet {
818                 applyContentDispositionHdr(w, r, basename, attachment)
819         }
820         wh := &webdav.Handler{
821                 Prefix: webdavPrefix,
822                 FileSystem: &webdavfs.FS{
823                         FileSystem:    targetFS,
824                         Prefix:        fsprefix,
825                         Writing:       writeMethod[r.Method],
826                         AlwaysReadEOF: r.Method == "PROPFIND",
827                 },
828                 LockSystem: webdavfs.NoLockSystem,
829                 Logger: func(r *http.Request, err error) {
830                         if err != nil && !os.IsNotExist(err) {
831                                 ctxlog.FromContext(r.Context()).WithError(err).Error("error reported by webdav handler")
832                         }
833                 },
834         }
835         h.metrics.track(wh, w, r)
836         if r.Method == http.MethodGet && w.WroteStatus() == http.StatusOK {
837                 wrote := int64(w.WroteBodyBytes())
838                 fi, err := wh.FileSystem.Stat(r.Context(), colltarget)
839                 if err == nil && fi.Size() != wrote {
840                         var n int
841                         f, err := wh.FileSystem.OpenFile(r.Context(), colltarget, os.O_RDONLY, 0)
842                         if err == nil {
843                                 n, err = f.Read(make([]byte, 1024))
844                                 f.Close()
845                         }
846                         ctxlog.FromContext(r.Context()).Errorf("stat.Size()==%d but only wrote %d bytes; read(1024) returns %d, %v", fi.Size(), wrote, n, err)
847                 }
848         }
849 }
850
851 // Repack the given collection after uploading a file.
852 func (h *handler) repack(ctx context.Context, session *cachedSession, logger logrus.FieldLogger, updated *arvados.Collection) {
853         if _, busy := h.repacking.LoadOrStore(updated.UUID, true); busy {
854                 // Another goroutine is already repacking the same
855                 // collection.
856                 return
857         }
858         defer h.repacking.Delete(updated.UUID)
859
860         // Repacking is best-effort, so we disable retries, and don't
861         // fail on errors.
862         client := *session.client
863         client.Timeout = 0
864         repackfs, err := updated.FileSystem(&client, session.keepclient)
865         if err != nil {
866                 logger.Warnf("setting up repackfs: %s", err)
867                 return
868         }
869         repacked, err := repackfs.Repack(ctx, arvados.RepackOptions{CachedOnly: true})
870         if err != nil {
871                 logger.Warnf("repack: %s", err)
872                 return
873         }
874         if repacked > 0 {
875                 err := repackfs.Sync()
876                 if err != nil {
877                         logger.Infof("sync repack: %s", err)
878                 }
879         }
880 }
881
882 var dirListingTemplate = `<!DOCTYPE HTML>
883 <HTML><HEAD>
884   <META name="robots" content="NOINDEX">
885   <TITLE>{{ .CollectionName }}</TITLE>
886   <STYLE type="text/css">
887     body {
888       margin: 1.5em;
889     }
890     pre {
891       background-color: #D9EDF7;
892       border-radius: .25em;
893       padding: .75em;
894       overflow: auto;
895     }
896     .footer p {
897       font-size: 82%;
898     }
899     hr {
900       border: 1px solid #808080;
901     }
902     ul {
903       padding: 0;
904     }
905     ul li {
906       font-family: monospace;
907       list-style: none;
908     }
909   </STYLE>
910 </HEAD>
911 <BODY>
912
913 <H1>{{ .CollectionName }}</H1>
914
915 <P>This collection of data files is being shared with you through
916 Arvados.  You can download individual files listed below.  To download
917 the entire directory tree with <CODE>wget</CODE>, try:</P>
918
919 <PRE id="wget-example">$ wget --mirror --no-parent --no-host --cut-dirs={{ .StripParts }} {{ .QuotedUrlForWget }}</PRE>
920
921 <H2>File Listing</H2>
922
923 {{if .Files}}
924 <UL>
925 {{range .Files}}
926 {{if .IsDir }}
927   <LI>{{" " | printf "%15s  " | nbsp}}<A class="item" href="{{ .Href }}/">{{ .Name }}/</A></LI>
928 {{else}}
929   <LI>{{.Size | printf "%15d  " | nbsp}}<A class="item" href="{{ .Href }}">{{ .Name }}</A></LI>
930 {{end}}
931 {{end}}
932 </UL>
933 {{else}}
934 <P>(No files; this collection is empty.)</P>
935 {{end}}
936
937 <HR>
938 <DIV class="footer">
939   <P>
940     About Arvados:
941     Arvados is a free and open source software bioinformatics platform.
942     To learn more, visit arvados.org.
943     Arvados is not responsible for the files listed on this page.
944   </P>
945 </DIV>
946
947 </BODY>
948 </HTML>
949 `
950
951 type fileListEnt struct {
952         Name  string
953         Href  string
954         Size  int64
955         IsDir bool
956 }
957
958 // Given a filesystem path like `foo/"bar baz"`, return an escaped
959 // (percent-encoded) relative path like `./foo/%22bar%20%baz%22`.
960 //
961 // Note the result may contain html-unsafe characters like '&'. These
962 // will be handled separately by the HTML templating engine as needed.
963 func relativeHref(path string) string {
964         u := &url.URL{Path: path}
965         return "./" + u.EscapedPath()
966 }
967
968 // Return a shell-quoted URL suitable for pasting to a command line
969 // ("wget ...") to repeat the given HTTP request.
970 func makeQuotedUrlForWget(r *http.Request) string {
971         scheme := r.Header.Get("X-Forwarded-Proto")
972         if scheme == "http" || scheme == "https" {
973                 // use protocol reported by load balancer / proxy
974         } else if r.TLS != nil {
975                 scheme = "https"
976         } else {
977                 scheme = "http"
978         }
979         p := r.URL.EscapedPath()
980         // An escaped path may still contain single quote chars, which
981         // would interfere with our shell quoting. Avoid this by
982         // escaping them as %27.
983         return fmt.Sprintf("'%s://%s%s'", scheme, r.Host, strings.Replace(p, "'", "%27", -1))
984 }
985
986 func (h *handler) serveDirectory(w http.ResponseWriter, r *http.Request, collectionName string, fs http.FileSystem, base string, recurse bool) {
987         var files []fileListEnt
988         var walk func(string) error
989         if !strings.HasSuffix(base, "/") {
990                 base = base + "/"
991         }
992         walk = func(path string) error {
993                 dirname := base + path
994                 if dirname != "/" {
995                         dirname = strings.TrimSuffix(dirname, "/")
996                 }
997                 d, err := fs.Open(dirname)
998                 if err != nil {
999                         return err
1000                 }
1001                 ents, err := d.Readdir(-1)
1002                 if err != nil {
1003                         return err
1004                 }
1005                 for _, ent := range ents {
1006                         if recurse && ent.IsDir() {
1007                                 err = walk(path + ent.Name() + "/")
1008                                 if err != nil {
1009                                         return err
1010                                 }
1011                         } else {
1012                                 listingName := path + ent.Name()
1013                                 files = append(files, fileListEnt{
1014                                         Name:  listingName,
1015                                         Href:  relativeHref(listingName),
1016                                         Size:  ent.Size(),
1017                                         IsDir: ent.IsDir(),
1018                                 })
1019                         }
1020                 }
1021                 return nil
1022         }
1023         if err := walk(""); err != nil {
1024                 http.Error(w, "error getting directory listing: "+err.Error(), http.StatusInternalServerError)
1025                 return
1026         }
1027
1028         funcs := template.FuncMap{
1029                 "nbsp": func(s string) template.HTML {
1030                         return template.HTML(strings.Replace(s, " ", "&nbsp;", -1))
1031                 },
1032         }
1033         tmpl, err := template.New("dir").Funcs(funcs).Parse(dirListingTemplate)
1034         if err != nil {
1035                 http.Error(w, "error parsing template: "+err.Error(), http.StatusInternalServerError)
1036                 return
1037         }
1038         sort.Slice(files, func(i, j int) bool {
1039                 return files[i].Name < files[j].Name
1040         })
1041         w.WriteHeader(http.StatusOK)
1042         tmpl.Execute(w, map[string]interface{}{
1043                 "CollectionName":   collectionName,
1044                 "Files":            files,
1045                 "Request":          r,
1046                 "StripParts":       strings.Count(strings.TrimRight(r.URL.Path, "/"), "/"),
1047                 "QuotedUrlForWget": makeQuotedUrlForWget(r),
1048         })
1049 }
1050
1051 func applyContentDispositionHdr(w http.ResponseWriter, r *http.Request, filename string, isAttachment bool) {
1052         disposition := "inline"
1053         if isAttachment {
1054                 disposition = "attachment"
1055         }
1056         if strings.ContainsRune(r.RequestURI, '?') {
1057                 // Help the UA realize that the filename is just
1058                 // "filename.txt", not
1059                 // "filename.txt?disposition=attachment".
1060                 //
1061                 // TODO(TC): Follow advice at RFC 6266 appendix D
1062                 disposition += "; filename=" + strconv.QuoteToASCII(filename)
1063         }
1064         if disposition != "inline" {
1065                 w.Header().Set("Content-Disposition", disposition)
1066         }
1067 }
1068
1069 func (h *handler) seeOtherWithCookie(w http.ResponseWriter, r *http.Request, location string, credentialsOK bool) {
1070         if formTokens, haveFormTokens := r.Form["api_token"]; haveFormTokens {
1071                 if !credentialsOK {
1072                         // It is not safe to copy the provided token
1073                         // into a cookie unless the current vhost
1074                         // (origin) serves only a single collection or
1075                         // we are in TrustAllContent mode.
1076                         http.Error(w, "cannot serve inline content at this URL (possible configuration error; see https://doc.arvados.org/install/install-keep-web.html#dns)", http.StatusBadRequest)
1077                         return
1078                 }
1079
1080                 // The HttpOnly flag is necessary to prevent
1081                 // JavaScript code (included in, or loaded by, a page
1082                 // in the collection being served) from employing the
1083                 // user's token beyond reading other files in the same
1084                 // domain, i.e., same collection.
1085                 //
1086                 // The 303 redirect is necessary in the case of a GET
1087                 // request to avoid exposing the token in the Location
1088                 // bar, and in the case of a POST request to avoid
1089                 // raising warnings when the user refreshes the
1090                 // resulting page.
1091                 for _, tok := range formTokens {
1092                         if tok == "" {
1093                                 continue
1094                         }
1095                         http.SetCookie(w, &http.Cookie{
1096                                 Name:     "arvados_api_token",
1097                                 Value:    auth.EncodeTokenCookie([]byte(tok)),
1098                                 Path:     "/",
1099                                 HttpOnly: true,
1100                                 SameSite: http.SameSiteLaxMode,
1101                         })
1102                         break
1103                 }
1104         }
1105
1106         // Propagate query parameters (except api_token) from
1107         // the original request.
1108         redirQuery := r.URL.Query()
1109         redirQuery.Del("api_token")
1110
1111         u := r.URL
1112         if location != "" {
1113                 newu, err := u.Parse(location)
1114                 if err != nil {
1115                         http.Error(w, "error resolving redirect target: "+err.Error(), http.StatusInternalServerError)
1116                         return
1117                 }
1118                 u = newu
1119         }
1120         redir := (&url.URL{
1121                 Scheme:   r.URL.Scheme,
1122                 Host:     r.Host,
1123                 Path:     u.Path,
1124                 RawQuery: redirQuery.Encode(),
1125         }).String()
1126
1127         w.Header().Add("Location", redir)
1128         w.WriteHeader(http.StatusSeeOther)
1129         io.WriteString(w, `<A href="`)
1130         io.WriteString(w, html.EscapeString(redir))
1131         io.WriteString(w, `">Continue</A>`)
1132 }
1133
1134 func (h *handler) userPermittedToUploadOrDownload(method string, tokenUser *arvados.User) bool {
1135         var permitDownload bool
1136         var permitUpload bool
1137         if tokenUser != nil && tokenUser.IsAdmin {
1138                 permitUpload = h.Cluster.Collections.WebDAVPermission.Admin.Upload
1139                 permitDownload = h.Cluster.Collections.WebDAVPermission.Admin.Download
1140         } else {
1141                 permitUpload = h.Cluster.Collections.WebDAVPermission.User.Upload
1142                 permitDownload = h.Cluster.Collections.WebDAVPermission.User.Download
1143         }
1144         if (method == "PUT" || method == "POST") && !permitUpload {
1145                 // Disallow operations that upload new files.
1146                 // Permit webdav operations that move existing files around.
1147                 return false
1148         } else if method == "GET" && !permitDownload {
1149                 // Disallow downloading file contents.
1150                 // Permit webdav operations like PROPFIND that retrieve metadata
1151                 // but not file contents.
1152                 return false
1153         }
1154         return true
1155 }
1156
1157 // Parse the request's Destination header and return the destination
1158 // path relative to the current collection, i.e., with webdavPrefix
1159 // stripped off.
1160 func copyMoveDestination(r *http.Request, webdavPrefix string) (string, error) {
1161         dsturl, err := url.Parse(r.Header.Get("Destination"))
1162         if err != nil {
1163                 return "", err
1164         }
1165         if dsturl.Host != "" && dsturl.Host != r.Host {
1166                 return "", errors.New("destination host mismatch")
1167         }
1168         if webdavPrefix == "" {
1169                 return dsturl.Path, nil
1170         }
1171         dsttarget := strings.TrimPrefix(dsturl.Path, webdavPrefix)
1172         if len(dsttarget) == len(dsturl.Path) {
1173                 return "", errors.New("destination path not supported")
1174         }
1175         return dsttarget, nil
1176 }
1177
1178 // Check whether fstarget is in a collection whose PDH has changed
1179 // since it was last Sync()ed in sessionFS.
1180 //
1181 // If fstarget doesn't exist, but would be in such a collection if it
1182 // did exist, return true.
1183 func (h *handler) needSync(ctx context.Context, sessionFS arvados.CustomFileSystem, fstarget string) (bool, error) {
1184         collection, _ := h.determineCollection(sessionFS, fstarget)
1185         if collection == nil || len(collection.UUID) != 27 || !strings.HasPrefix(collection.UUID, h.Cluster.ClusterID) {
1186                 return false, nil
1187         }
1188         db, err := h.getDBConnector().GetDB(ctx)
1189         if err != nil {
1190                 return false, err
1191         }
1192         var currentPDH string
1193         err = db.QueryRowContext(ctx, `select portable_data_hash from collections where uuid=$1`, collection.UUID).Scan(&currentPDH)
1194         if err != nil {
1195                 return false, err
1196         }
1197         if currentPDH != collection.PortableDataHash {
1198                 return true, nil
1199         }
1200         return false, nil
1201 }
1202
1203 type fileEventLog struct {
1204         requestPath  string
1205         eventType    string
1206         userUUID     string
1207         userFullName string
1208         collUUID     string
1209         collPDH      string
1210         collFilePath string
1211         fileCount    int
1212         clientAddr   string
1213         clientToken  string
1214 }
1215
1216 func newFileEventLog(
1217         h *handler,
1218         r *http.Request,
1219         filepath string,
1220         fileCount int,
1221         collection *arvados.Collection,
1222         user *arvados.User,
1223         token string,
1224 ) *fileEventLog {
1225         var eventType string
1226         switch r.Method {
1227         case "POST", "PUT":
1228                 eventType = "file_upload"
1229         case "GET":
1230                 eventType = "file_download"
1231         default:
1232                 return nil
1233         }
1234
1235         // We want to log the address of the proxy closest to keep-web—the last
1236         // value in the X-Forwarded-For list—or the client address if there is no
1237         // valid proxy.
1238         var clientAddr string
1239         // 1. Build a slice of proxy addresses from X-Forwarded-For.
1240         xff := strings.Join(r.Header.Values("X-Forwarded-For"), ",")
1241         addrs := strings.Split(xff, ",")
1242         // 2. Reverse the slice so it's in our most preferred order for logging.
1243         slices.Reverse(addrs)
1244         // 3. Append the client address to that slice.
1245         if addr, _, err := net.SplitHostPort(r.RemoteAddr); err == nil {
1246                 addrs = append(addrs, addr)
1247         }
1248         // 4. Use the first valid address in the slice.
1249         for _, addr := range addrs {
1250                 if ip := net.ParseIP(strings.TrimSpace(addr)); ip != nil {
1251                         clientAddr = ip.String()
1252                         break
1253                 }
1254         }
1255
1256         ev := &fileEventLog{
1257                 requestPath: r.URL.Path,
1258                 eventType:   eventType,
1259                 clientAddr:  clientAddr,
1260                 clientToken: token,
1261                 fileCount:   fileCount,
1262         }
1263
1264         if user != nil {
1265                 ev.userUUID = user.UUID
1266                 ev.userFullName = user.FullName
1267         } else {
1268                 ev.userUUID = fmt.Sprintf("%s-tpzed-anonymouspublic", h.Cluster.ClusterID)
1269         }
1270
1271         if collection != nil {
1272                 ev.collFilePath = filepath
1273                 // h.determineCollection populates the collection_uuid
1274                 // prop with the PDH, if this collection is being
1275                 // accessed via PDH. For logging, we use a different
1276                 // field depending on whether it's a UUID or PDH.
1277                 if len(collection.UUID) > 32 {
1278                         ev.collPDH = collection.UUID
1279                 } else {
1280                         ev.collPDH = collection.PortableDataHash
1281                         ev.collUUID = collection.UUID
1282                 }
1283         }
1284
1285         return ev
1286 }
1287
1288 func (ev *fileEventLog) shouldLogPDH() bool {
1289         return ev.eventType == "file_download" && ev.collPDH != ""
1290 }
1291
1292 func (ev *fileEventLog) asDict() arvadosclient.Dict {
1293         props := arvadosclient.Dict{
1294                 "reqPath":              ev.requestPath,
1295                 "collection_uuid":      ev.collUUID,
1296                 "collection_file_path": ev.collFilePath,
1297                 "file_count":           ev.fileCount,
1298         }
1299         if ev.shouldLogPDH() {
1300                 props["portable_data_hash"] = ev.collPDH
1301         }
1302         return arvadosclient.Dict{
1303                 "object_uuid": ev.userUUID,
1304                 "event_type":  ev.eventType,
1305                 "properties":  props,
1306         }
1307 }
1308
1309 func (ev *fileEventLog) asFields() logrus.Fields {
1310         fields := logrus.Fields{
1311                 "collection_file_path": ev.collFilePath,
1312                 "collection_uuid":      ev.collUUID,
1313                 "user_uuid":            ev.userUUID,
1314                 "file_count":           ev.fileCount,
1315         }
1316         if ev.shouldLogPDH() {
1317                 fields["portable_data_hash"] = ev.collPDH
1318         }
1319         if !strings.HasSuffix(ev.userUUID, "-tpzed-anonymouspublic") {
1320                 fields["user_full_name"] = ev.userFullName
1321         }
1322         return fields
1323 }
1324
1325 func (h *handler) shouldLogEvent(
1326         event *fileEventLog,
1327         req *http.Request,
1328         fileInfo os.FileInfo,
1329         t time.Time,
1330 ) bool {
1331         if event == nil {
1332                 return false
1333         } else if event.eventType != "file_download" ||
1334                 h.Cluster.Collections.WebDAVLogDownloadInterval == 0 ||
1335                 fileInfo == nil {
1336                 return true
1337         }
1338         td := h.Cluster.Collections.WebDAVLogDownloadInterval.Duration()
1339         cutoff := t.Add(-td)
1340         ev := *event
1341         h.fileEventLogsMtx.Lock()
1342         defer h.fileEventLogsMtx.Unlock()
1343         if h.fileEventLogs == nil {
1344                 h.fileEventLogs = make(map[fileEventLog]time.Time)
1345         }
1346         shouldLog := h.fileEventLogs[ev].Before(cutoff)
1347         if !shouldLog {
1348                 // Go's http fs server evaluates http.Request.Header.Get("Range")
1349                 // (as of Go 1.22) so we should do the same.
1350                 // Don't worry about merging multiple headers, etc.
1351                 ranges, err := http_range.ParseRange(req.Header.Get("Range"), fileInfo.Size())
1352                 if ranges == nil || err != nil {
1353                         // The Range header was either empty or malformed.
1354                         // Err on the side of logging.
1355                         shouldLog = true
1356                 } else {
1357                         // Log this request only if it requested the first byte
1358                         // (our heuristic for "starting a new download").
1359                         for _, reqRange := range ranges {
1360                                 if reqRange.Start == 0 {
1361                                         shouldLog = true
1362                                         break
1363                                 }
1364                         }
1365                 }
1366         }
1367         if shouldLog {
1368                 h.fileEventLogs[ev] = t
1369         }
1370         if t.After(h.fileEventLogsNextTidy) {
1371                 for key, logTime := range h.fileEventLogs {
1372                         if logTime.Before(cutoff) {
1373                                 delete(h.fileEventLogs, key)
1374                         }
1375                 }
1376                 h.fileEventLogsNextTidy = t.Add(td)
1377         }
1378         return shouldLog
1379 }
1380
1381 func (h *handler) logUploadOrDownload(
1382         r *http.Request,
1383         client *arvadosclient.ArvadosClient,
1384         fs arvados.CustomFileSystem,
1385         filepath string,
1386         fileCount int,
1387         collection *arvados.Collection,
1388         user *arvados.User,
1389 ) {
1390         var fileInfo os.FileInfo
1391         if fs != nil {
1392                 if collection == nil {
1393                         collection, filepath = h.determineCollection(fs, filepath)
1394                 }
1395                 if collection != nil {
1396                         // It's okay to ignore this error because shouldLogEvent will
1397                         // always return true if fileInfo == nil.
1398                         fileInfo, _ = fs.Stat(path.Join("by_id", collection.UUID, filepath))
1399                 }
1400         }
1401         event := newFileEventLog(h, r, filepath, fileCount, collection, user, client.ApiToken)
1402         if !h.shouldLogEvent(event, r, fileInfo, time.Now()) {
1403                 return
1404         }
1405         log := ctxlog.FromContext(r.Context()).WithFields(event.asFields())
1406         log.Info(strings.Replace(event.eventType, "file_", "File ", 1))
1407         if h.Cluster.Collections.WebDAVLogEvents {
1408                 go func() {
1409                         logReq := arvadosclient.Dict{"log": event.asDict()}
1410                         err := client.Create("logs", logReq, nil)
1411                         if err != nil {
1412                                 log.WithError(err).Errorf("Failed to create %s log event on API server", event.eventType)
1413                         }
1414                 }()
1415         }
1416 }
1417
1418 func (h *handler) determineCollection(fs arvados.CustomFileSystem, path string) (*arvados.Collection, string) {
1419         target := strings.TrimSuffix(path, "/")
1420         for cut := len(target); cut >= 0; cut = strings.LastIndexByte(target, '/') {
1421                 target = target[:cut]
1422                 fi, err := fs.Stat(target)
1423                 if os.IsNotExist(err) {
1424                         // creating a new file/dir, or download
1425                         // destined to fail
1426                         continue
1427                 } else if err != nil {
1428                         return nil, ""
1429                 }
1430                 switch src := fi.Sys().(type) {
1431                 case *arvados.Collection:
1432                         return src, strings.TrimPrefix(path[len(target):], "/")
1433                 case *arvados.Group:
1434                         return nil, ""
1435                 default:
1436                         if _, ok := src.(error); ok {
1437                                 return nil, ""
1438                         }
1439                 }
1440         }
1441         return nil, ""
1442 }
1443
1444 func ServeCORSPreflight(w http.ResponseWriter, header http.Header) bool {
1445         method := header.Get("Access-Control-Request-Method")
1446         if method == "" {
1447                 return false
1448         }
1449         if !browserMethod[method] && !webdavMethod[method] {
1450                 w.WriteHeader(http.StatusMethodNotAllowed)
1451                 return true
1452         }
1453         w.Header().Set("Access-Control-Allow-Headers", corsAllowHeadersHeader)
1454         w.Header().Set("Access-Control-Allow-Methods", "COPY, DELETE, GET, LOCK, MKCOL, MOVE, OPTIONS, POST, PROPFIND, PROPPATCH, PUT, RMCOL, UNLOCK")
1455         w.Header().Set("Access-Control-Allow-Origin", "*")
1456         w.Header().Set("Access-Control-Max-Age", "86400")
1457         return true
1458 }