20846: Merge branch '19213-ubuntu2204-support' into 20846-ubuntu2204
[arvados.git] / services / keep-web / handler.go
1 // Copyright (C) The Arvados Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4
5 package keepweb
6
7 import (
8         "encoding/json"
9         "errors"
10         "fmt"
11         "html"
12         "html/template"
13         "io"
14         "net/http"
15         "net/url"
16         "os"
17         "sort"
18         "strconv"
19         "strings"
20         "sync"
21         "time"
22
23         "git.arvados.org/arvados.git/lib/cmd"
24         "git.arvados.org/arvados.git/lib/webdavfs"
25         "git.arvados.org/arvados.git/sdk/go/arvados"
26         "git.arvados.org/arvados.git/sdk/go/arvadosclient"
27         "git.arvados.org/arvados.git/sdk/go/auth"
28         "git.arvados.org/arvados.git/sdk/go/ctxlog"
29         "git.arvados.org/arvados.git/sdk/go/httpserver"
30         "git.arvados.org/arvados.git/sdk/go/keepclient"
31         "github.com/sirupsen/logrus"
32         "golang.org/x/net/webdav"
33 )
34
35 type handler struct {
36         Cache     cache
37         Cluster   *arvados.Cluster
38         setupOnce sync.Once
39
40         lockMtx    sync.Mutex
41         lock       map[string]*sync.RWMutex
42         lockTidied time.Time
43 }
44
45 var urlPDHDecoder = strings.NewReplacer(" ", "+", "-", "+")
46
47 var notFoundMessage = "Not Found"
48 var unauthorizedMessage = "401 Unauthorized\n\nA valid Arvados token must be provided to access this resource."
49
50 // parseCollectionIDFromURL returns a UUID or PDH if s is a UUID or a
51 // PDH (even if it is a PDH with "+" replaced by " " or "-");
52 // otherwise "".
53 func parseCollectionIDFromURL(s string) string {
54         if arvadosclient.UUIDMatch(s) {
55                 return s
56         }
57         if pdh := urlPDHDecoder.Replace(s); arvadosclient.PDHMatch(pdh) {
58                 return pdh
59         }
60         return ""
61 }
62
63 func (h *handler) setup() {
64         keepclient.DefaultBlockCache.MaxBlocks = h.Cluster.Collections.WebDAVCache.MaxBlockEntries
65 }
66
67 func (h *handler) serveStatus(w http.ResponseWriter, r *http.Request) {
68         json.NewEncoder(w).Encode(struct{ Version string }{cmd.Version.String()})
69 }
70
71 type errorWithHTTPStatus interface {
72         HTTPStatus() int
73 }
74
75 // updateOnSuccess wraps httpserver.ResponseWriter. If the handler
76 // sends an HTTP header indicating success, updateOnSuccess first
77 // calls the provided update func. If the update func fails, an error
78 // response is sent (using the error's HTTP status or 500 if none),
79 // and the status code and body sent by the handler are ignored (all
80 // response writes return the update error).
81 type updateOnSuccess struct {
82         httpserver.ResponseWriter
83         logger     logrus.FieldLogger
84         update     func() error
85         sentHeader bool
86         err        error
87 }
88
89 func (uos *updateOnSuccess) Write(p []byte) (int, error) {
90         if !uos.sentHeader {
91                 uos.WriteHeader(http.StatusOK)
92         }
93         if uos.err != nil {
94                 return 0, uos.err
95         }
96         return uos.ResponseWriter.Write(p)
97 }
98
99 func (uos *updateOnSuccess) WriteHeader(code int) {
100         if !uos.sentHeader {
101                 uos.sentHeader = true
102                 if code >= 200 && code < 400 {
103                         if uos.err = uos.update(); uos.err != nil {
104                                 code := http.StatusInternalServerError
105                                 if he := errorWithHTTPStatus(nil); errors.As(uos.err, &he) {
106                                         code = he.HTTPStatus()
107                                 }
108                                 uos.logger.WithError(uos.err).Errorf("update() returned %T error, changing response to HTTP %d", uos.err, code)
109                                 http.Error(uos.ResponseWriter, uos.err.Error(), code)
110                                 return
111                         }
112                 }
113         }
114         uos.ResponseWriter.WriteHeader(code)
115 }
116
117 var (
118         corsAllowHeadersHeader = strings.Join([]string{
119                 "Authorization", "Content-Type", "Range",
120                 // WebDAV request headers:
121                 "Depth", "Destination", "If", "Lock-Token", "Overwrite", "Timeout", "Cache-Control",
122         }, ", ")
123         writeMethod = map[string]bool{
124                 "COPY":      true,
125                 "DELETE":    true,
126                 "LOCK":      true,
127                 "MKCOL":     true,
128                 "MOVE":      true,
129                 "PROPPATCH": true,
130                 "PUT":       true,
131                 "RMCOL":     true,
132                 "UNLOCK":    true,
133         }
134         webdavMethod = map[string]bool{
135                 "COPY":      true,
136                 "DELETE":    true,
137                 "LOCK":      true,
138                 "MKCOL":     true,
139                 "MOVE":      true,
140                 "OPTIONS":   true,
141                 "PROPFIND":  true,
142                 "PROPPATCH": true,
143                 "PUT":       true,
144                 "RMCOL":     true,
145                 "UNLOCK":    true,
146         }
147         browserMethod = map[string]bool{
148                 "GET":  true,
149                 "HEAD": true,
150                 "POST": true,
151         }
152         // top-level dirs to serve with siteFS
153         siteFSDir = map[string]bool{
154                 "":      true, // root directory
155                 "by_id": true,
156                 "users": true,
157         }
158 )
159
160 func stripDefaultPort(host string) string {
161         // Will consider port 80 and port 443 to be the same vhost.  I think that's fine.
162         u := &url.URL{Host: host}
163         if p := u.Port(); p == "80" || p == "443" {
164                 return strings.ToLower(u.Hostname())
165         } else {
166                 return strings.ToLower(host)
167         }
168 }
169
170 // CheckHealth implements service.Handler.
171 func (h *handler) CheckHealth() error {
172         return nil
173 }
174
175 // Done implements service.Handler.
176 func (h *handler) Done() <-chan struct{} {
177         return nil
178 }
179
180 // ServeHTTP implements http.Handler.
181 func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
182         h.setupOnce.Do(h.setup)
183
184         if xfp := r.Header.Get("X-Forwarded-Proto"); xfp != "" && xfp != "http" {
185                 r.URL.Scheme = xfp
186         }
187
188         w := httpserver.WrapResponseWriter(wOrig)
189
190         if r.Method == "OPTIONS" && ServeCORSPreflight(w, r.Header) {
191                 return
192         }
193
194         if !browserMethod[r.Method] && !webdavMethod[r.Method] {
195                 w.WriteHeader(http.StatusMethodNotAllowed)
196                 return
197         }
198
199         if r.Header.Get("Origin") != "" {
200                 // Allow simple cross-origin requests without user
201                 // credentials ("user credentials" as defined by CORS,
202                 // i.e., cookies, HTTP authentication, and client-side
203                 // SSL certificates. See
204                 // http://www.w3.org/TR/cors/#user-credentials).
205                 w.Header().Set("Access-Control-Allow-Origin", "*")
206                 w.Header().Set("Access-Control-Expose-Headers", "Content-Range")
207         }
208
209         if h.serveS3(w, r) {
210                 return
211         }
212
213         webdavPrefix := ""
214         arvPath := r.URL.Path
215         if prefix := r.Header.Get("X-Webdav-Prefix"); prefix != "" {
216                 // Enable a proxy (e.g., container log handler in
217                 // controller) to satisfy a request for path
218                 // "/foo/bar/baz.txt" using content from
219                 // "//abc123-4.internal/bar/baz.txt", by adding a
220                 // request header "X-Webdav-Prefix: /foo"
221                 if !strings.HasPrefix(arvPath, prefix) {
222                         http.Error(w, "X-Webdav-Prefix header is not a prefix of the requested path", http.StatusBadRequest)
223                         return
224                 }
225                 arvPath = r.URL.Path[len(prefix):]
226                 if arvPath == "" {
227                         arvPath = "/"
228                 }
229                 w.Header().Set("Vary", "X-Webdav-Prefix, "+w.Header().Get("Vary"))
230                 webdavPrefix = prefix
231         }
232         pathParts := strings.Split(arvPath[1:], "/")
233
234         var stripParts int
235         var collectionID string
236         var tokens []string
237         var reqTokens []string
238         var pathToken bool
239         var attachment bool
240         var useSiteFS bool
241         credentialsOK := h.Cluster.Collections.TrustAllContent
242         reasonNotAcceptingCredentials := ""
243
244         if r.Host != "" && stripDefaultPort(r.Host) == stripDefaultPort(h.Cluster.Services.WebDAVDownload.ExternalURL.Host) {
245                 credentialsOK = true
246                 attachment = true
247         } else if r.FormValue("disposition") == "attachment" {
248                 attachment = true
249         }
250
251         if !credentialsOK {
252                 reasonNotAcceptingCredentials = fmt.Sprintf("vhost %q does not specify a single collection ID or match Services.WebDAVDownload.ExternalURL %q, and Collections.TrustAllContent is false",
253                         r.Host, h.Cluster.Services.WebDAVDownload.ExternalURL)
254         }
255
256         if collectionID = arvados.CollectionIDFromDNSName(r.Host); collectionID != "" {
257                 // http://ID.collections.example/PATH...
258                 credentialsOK = true
259         } else if r.URL.Path == "/status.json" {
260                 h.serveStatus(w, r)
261                 return
262         } else if siteFSDir[pathParts[0]] {
263                 useSiteFS = true
264         } else if len(pathParts) >= 1 && strings.HasPrefix(pathParts[0], "c=") {
265                 // /c=ID[/PATH...]
266                 collectionID = parseCollectionIDFromURL(pathParts[0][2:])
267                 stripParts = 1
268         } else if len(pathParts) >= 2 && pathParts[0] == "collections" {
269                 if len(pathParts) >= 4 && pathParts[1] == "download" {
270                         // /collections/download/ID/TOKEN/PATH...
271                         collectionID = parseCollectionIDFromURL(pathParts[2])
272                         tokens = []string{pathParts[3]}
273                         stripParts = 4
274                         pathToken = true
275                 } else {
276                         // /collections/ID/PATH...
277                         collectionID = parseCollectionIDFromURL(pathParts[1])
278                         stripParts = 2
279                         // This path is only meant to work for public
280                         // data. Tokens provided with the request are
281                         // ignored.
282                         credentialsOK = false
283                         reasonNotAcceptingCredentials = "the '/collections/UUID/PATH' form only works for public data"
284                 }
285         }
286
287         forceReload := false
288         if cc := r.Header.Get("Cache-Control"); strings.Contains(cc, "no-cache") || strings.Contains(cc, "must-revalidate") {
289                 forceReload = true
290         }
291
292         if credentialsOK {
293                 reqTokens = auth.CredentialsFromRequest(r).Tokens
294         }
295
296         r.ParseForm()
297         origin := r.Header.Get("Origin")
298         cors := origin != "" && !strings.HasSuffix(origin, "://"+r.Host)
299         safeAjax := cors && (r.Method == http.MethodGet || r.Method == http.MethodHead)
300         // Important distinction: safeAttachment checks whether api_token exists
301         // as a query parameter. haveFormTokens checks whether api_token exists
302         // as request form data *or* a query parameter. Different checks are
303         // necessary because both the request disposition and the location of
304         // the API token affect whether or not the request needs to be
305         // redirected. The different branch comments below explain further.
306         safeAttachment := attachment && !r.URL.Query().Has("api_token")
307         if formTokens, haveFormTokens := r.Form["api_token"]; !haveFormTokens {
308                 // No token to use or redact.
309         } else if safeAjax || safeAttachment {
310                 // If this is a cross-origin request, the URL won't
311                 // appear in the browser's address bar, so
312                 // substituting a clipboard-safe URL is pointless.
313                 // Redirect-with-cookie wouldn't work anyway, because
314                 // it's not safe to allow third-party use of our
315                 // cookie.
316                 //
317                 // If we're supplying an attachment, we don't need to
318                 // convert POST to GET to avoid the "really resubmit
319                 // form?" problem, so provided the token isn't
320                 // embedded in the URL, there's no reason to do
321                 // redirect-with-cookie in this case either.
322                 for _, tok := range formTokens {
323                         reqTokens = append(reqTokens, tok)
324                 }
325         } else if browserMethod[r.Method] {
326                 // If this is a page view, and the client provided a
327                 // token via query string or POST body, we must put
328                 // the token in an HttpOnly cookie, and redirect to an
329                 // equivalent URL with the query param redacted and
330                 // method = GET.
331                 h.seeOtherWithCookie(w, r, "", credentialsOK)
332                 return
333         }
334
335         targetPath := pathParts[stripParts:]
336         if tokens == nil && len(targetPath) > 0 && strings.HasPrefix(targetPath[0], "t=") {
337                 // http://ID.example/t=TOKEN/PATH...
338                 // /c=ID/t=TOKEN/PATH...
339                 //
340                 // This form must only be used to pass scoped tokens
341                 // that give permission for a single collection. See
342                 // FormValue case above.
343                 tokens = []string{targetPath[0][2:]}
344                 pathToken = true
345                 targetPath = targetPath[1:]
346                 stripParts++
347         }
348
349         fsprefix := ""
350         if useSiteFS {
351                 if writeMethod[r.Method] {
352                         http.Error(w, webdavfs.ErrReadOnly.Error(), http.StatusMethodNotAllowed)
353                         return
354                 }
355                 if len(reqTokens) == 0 {
356                         w.Header().Add("WWW-Authenticate", "Basic realm=\"collections\"")
357                         http.Error(w, unauthorizedMessage, http.StatusUnauthorized)
358                         return
359                 }
360                 tokens = reqTokens
361         } else if collectionID == "" {
362                 http.Error(w, notFoundMessage, http.StatusNotFound)
363                 return
364         } else {
365                 fsprefix = "by_id/" + collectionID + "/"
366         }
367
368         if src := r.Header.Get("X-Webdav-Source"); strings.HasPrefix(src, "/") && !strings.Contains(src, "//") && !strings.Contains(src, "/../") {
369                 fsprefix += src[1:]
370         }
371
372         if tokens == nil {
373                 tokens = reqTokens
374                 if h.Cluster.Users.AnonymousUserToken != "" {
375                         tokens = append(tokens, h.Cluster.Users.AnonymousUserToken)
376                 }
377         }
378
379         if len(targetPath) > 0 && targetPath[0] == "_" {
380                 // If a collection has a directory called "t=foo" or
381                 // "_", it can be served at
382                 // //collections.example/_/t=foo/ or
383                 // //collections.example/_/_/ respectively:
384                 // //collections.example/t=foo/ won't work because
385                 // t=foo will be interpreted as a token "foo".
386                 targetPath = targetPath[1:]
387                 stripParts++
388         }
389
390         dirOpenMode := os.O_RDONLY
391         if writeMethod[r.Method] {
392                 dirOpenMode = os.O_RDWR
393         }
394
395         var tokenValid bool
396         var tokenScopeProblem bool
397         var token string
398         var tokenUser *arvados.User
399         var sessionFS arvados.CustomFileSystem
400         var session *cachedSession
401         var collectionDir arvados.File
402         for _, token = range tokens {
403                 var statusErr errorWithHTTPStatus
404                 fs, sess, user, err := h.Cache.GetSession(token)
405                 if errors.As(err, &statusErr) && statusErr.HTTPStatus() == http.StatusUnauthorized {
406                         // bad token
407                         continue
408                 } else if err != nil {
409                         http.Error(w, "cache error: "+err.Error(), http.StatusInternalServerError)
410                         return
411                 }
412                 if token != h.Cluster.Users.AnonymousUserToken {
413                         tokenValid = true
414                 }
415                 f, err := fs.OpenFile(fsprefix, dirOpenMode, 0)
416                 if errors.As(err, &statusErr) &&
417                         statusErr.HTTPStatus() == http.StatusForbidden &&
418                         token != h.Cluster.Users.AnonymousUserToken {
419                         // collection id is outside scope of supplied
420                         // token
421                         tokenScopeProblem = true
422                         sess.Release()
423                         continue
424                 } else if os.IsNotExist(err) {
425                         // collection does not exist or is not
426                         // readable using this token
427                         sess.Release()
428                         continue
429                 } else if err != nil {
430                         http.Error(w, err.Error(), http.StatusInternalServerError)
431                         sess.Release()
432                         return
433                 }
434                 defer f.Close()
435                 defer sess.Release()
436
437                 collectionDir, sessionFS, session, tokenUser = f, fs, sess, user
438                 break
439         }
440         if forceReload && collectionDir != nil {
441                 err := collectionDir.Sync()
442                 if err != nil {
443                         if he := errorWithHTTPStatus(nil); errors.As(err, &he) {
444                                 http.Error(w, err.Error(), he.HTTPStatus())
445                         } else {
446                                 http.Error(w, err.Error(), http.StatusInternalServerError)
447                         }
448                         return
449                 }
450         }
451         if session == nil {
452                 if pathToken {
453                         // The URL is a "secret sharing link" that
454                         // didn't work out.  Asking the client for
455                         // additional credentials would just be
456                         // confusing.
457                         http.Error(w, notFoundMessage, http.StatusNotFound)
458                         return
459                 }
460                 if tokenValid {
461                         // The client provided valid token(s), but the
462                         // collection was not found.
463                         http.Error(w, notFoundMessage, http.StatusNotFound)
464                         return
465                 }
466                 if tokenScopeProblem {
467                         // The client provided a valid token but
468                         // fetching a collection returned 401, which
469                         // means the token scope doesn't permit
470                         // fetching that collection.
471                         http.Error(w, notFoundMessage, http.StatusForbidden)
472                         return
473                 }
474                 // The client's token was invalid (e.g., expired), or
475                 // the client didn't even provide one.  Redirect to
476                 // workbench2's login-and-redirect-to-download url if
477                 // this is a browser navigation request. (The redirect
478                 // flow can't preserve the original method if it's not
479                 // GET, and doesn't make sense if the UA is a
480                 // command-line tool, is trying to load an inline
481                 // image, etc.; in these cases, there's nothing we can
482                 // do, so return 401 unauthorized.)
483                 //
484                 // Note Sec-Fetch-Mode is sent by all non-EOL
485                 // browsers, except Safari.
486                 // https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Sec-Fetch-Mode
487                 //
488                 // TODO(TC): This response would be confusing to
489                 // someone trying (anonymously) to download public
490                 // data that has been deleted.  Allow a referrer to
491                 // provide this context somehow?
492                 if r.Method == http.MethodGet && r.Header.Get("Sec-Fetch-Mode") == "navigate" {
493                         target := url.URL(h.Cluster.Services.Workbench2.ExternalURL)
494                         redirkey := "redirectToPreview"
495                         if attachment {
496                                 redirkey = "redirectToDownload"
497                         }
498                         callback := "/c=" + collectionID + "/" + strings.Join(targetPath, "/")
499                         // target.RawQuery = url.Values{redirkey:
500                         // {target}}.Encode() would be the obvious
501                         // thing to do here, but wb2 doesn't decode
502                         // this as a query param -- it takes
503                         // everything after "${redirkey}=" as the
504                         // target URL. If we encode "/" as "%2F" etc.,
505                         // the redirect won't work.
506                         target.RawQuery = redirkey + "=" + callback
507                         w.Header().Add("Location", target.String())
508                         w.WriteHeader(http.StatusSeeOther)
509                         return
510                 }
511                 if !credentialsOK {
512                         http.Error(w, fmt.Sprintf("Authorization tokens are not accepted here: %v, and no anonymous user token is configured.", reasonNotAcceptingCredentials), http.StatusUnauthorized)
513                         return
514                 }
515                 // If none of the above cases apply, suggest the
516                 // user-agent (which is either a non-browser agent
517                 // like wget, or a browser that can't redirect through
518                 // a login flow) prompt the user for credentials.
519                 w.Header().Add("WWW-Authenticate", "Basic realm=\"collections\"")
520                 http.Error(w, unauthorizedMessage, http.StatusUnauthorized)
521                 return
522         }
523
524         if r.Method == http.MethodGet || r.Method == http.MethodHead {
525                 targetfnm := fsprefix + strings.Join(pathParts[stripParts:], "/")
526                 if fi, err := sessionFS.Stat(targetfnm); err == nil && fi.IsDir() {
527                         if !strings.HasSuffix(r.URL.Path, "/") {
528                                 h.seeOtherWithCookie(w, r, r.URL.Path+"/", credentialsOK)
529                         } else {
530                                 h.serveDirectory(w, r, fi.Name(), sessionFS, targetfnm, !useSiteFS)
531                         }
532                         return
533                 }
534         }
535
536         var basename string
537         if len(targetPath) > 0 {
538                 basename = targetPath[len(targetPath)-1]
539         }
540         if arvadosclient.PDHMatch(collectionID) && writeMethod[r.Method] {
541                 http.Error(w, webdavfs.ErrReadOnly.Error(), http.StatusMethodNotAllowed)
542                 return
543         }
544         if !h.userPermittedToUploadOrDownload(r.Method, tokenUser) {
545                 http.Error(w, "Not permitted", http.StatusForbidden)
546                 return
547         }
548         h.logUploadOrDownload(r, session.arvadosclient, sessionFS, fsprefix+strings.Join(targetPath, "/"), nil, tokenUser)
549
550         writing := writeMethod[r.Method]
551         locker := h.collectionLock(collectionID, writing)
552         defer locker.Unlock()
553
554         if writing {
555                 // Save the collection only if/when all
556                 // webdav->filesystem operations succeed --
557                 // and send a 500 error if the modified
558                 // collection can't be saved.
559                 //
560                 // Perform the write in a separate sitefs, so
561                 // concurrent read operations on the same
562                 // collection see the previous saved
563                 // state. After the write succeeds and the
564                 // collection record is updated, we reset the
565                 // session so the updates are visible in
566                 // subsequent read requests.
567                 client := session.client.WithRequestID(r.Header.Get("X-Request-Id"))
568                 sessionFS = client.SiteFileSystem(session.keepclient)
569                 writingDir, err := sessionFS.OpenFile(fsprefix, os.O_RDONLY, 0)
570                 if err != nil {
571                         http.Error(w, err.Error(), http.StatusInternalServerError)
572                         return
573                 }
574                 defer writingDir.Close()
575                 w = &updateOnSuccess{
576                         ResponseWriter: w,
577                         logger:         ctxlog.FromContext(r.Context()),
578                         update: func() error {
579                                 err := writingDir.Sync()
580                                 var te arvados.TransactionError
581                                 if errors.As(err, &te) {
582                                         err = te
583                                 }
584                                 if err != nil {
585                                         return err
586                                 }
587                                 // Sync the changes to the persistent
588                                 // sessionfs for this token.
589                                 snap, err := writingDir.Snapshot()
590                                 if err != nil {
591                                         return err
592                                 }
593                                 collectionDir.Splice(snap)
594                                 return nil
595                         }}
596         }
597         if r.Method == http.MethodGet {
598                 applyContentDispositionHdr(w, r, basename, attachment)
599         }
600         if webdavPrefix == "" {
601                 webdavPrefix = "/" + strings.Join(pathParts[:stripParts], "/")
602         }
603         wh := webdav.Handler{
604                 Prefix: webdavPrefix,
605                 FileSystem: &webdavfs.FS{
606                         FileSystem:    sessionFS,
607                         Prefix:        fsprefix,
608                         Writing:       writeMethod[r.Method],
609                         AlwaysReadEOF: r.Method == "PROPFIND",
610                 },
611                 LockSystem: webdavfs.NoLockSystem,
612                 Logger: func(r *http.Request, err error) {
613                         if err != nil && !os.IsNotExist(err) {
614                                 ctxlog.FromContext(r.Context()).WithError(err).Error("error reported by webdav handler")
615                         }
616                 },
617         }
618         wh.ServeHTTP(w, r)
619         if r.Method == http.MethodGet && w.WroteStatus() == http.StatusOK {
620                 wrote := int64(w.WroteBodyBytes())
621                 fnm := strings.Join(pathParts[stripParts:], "/")
622                 fi, err := wh.FileSystem.Stat(r.Context(), fnm)
623                 if err == nil && fi.Size() != wrote {
624                         var n int
625                         f, err := wh.FileSystem.OpenFile(r.Context(), fnm, os.O_RDONLY, 0)
626                         if err == nil {
627                                 n, err = f.Read(make([]byte, 1024))
628                                 f.Close()
629                         }
630                         ctxlog.FromContext(r.Context()).Errorf("stat.Size()==%d but only wrote %d bytes; read(1024) returns %d, %v", fi.Size(), wrote, n, err)
631                 }
632         }
633 }
634
635 var dirListingTemplate = `<!DOCTYPE HTML>
636 <HTML><HEAD>
637   <META name="robots" content="NOINDEX">
638   <TITLE>{{ .CollectionName }}</TITLE>
639   <STYLE type="text/css">
640     body {
641       margin: 1.5em;
642     }
643     pre {
644       background-color: #D9EDF7;
645       border-radius: .25em;
646       padding: .75em;
647       overflow: auto;
648     }
649     .footer p {
650       font-size: 82%;
651     }
652     ul {
653       padding: 0;
654     }
655     ul li {
656       font-family: monospace;
657       list-style: none;
658     }
659   </STYLE>
660 </HEAD>
661 <BODY>
662
663 <H1>{{ .CollectionName }}</H1>
664
665 <P>This collection of data files is being shared with you through
666 Arvados.  You can download individual files listed below.  To download
667 the entire directory tree with wget, try:</P>
668
669 <PRE>$ wget --mirror --no-parent --no-host --cut-dirs={{ .StripParts }} https://{{ .Request.Host }}{{ .Request.URL.Path }}</PRE>
670
671 <H2>File Listing</H2>
672
673 {{if .Files}}
674 <UL>
675 {{range .Files}}
676 {{if .IsDir }}
677   <LI>{{" " | printf "%15s  " | nbsp}}<A href="{{print "./" .Name}}/">{{.Name}}/</A></LI>
678 {{else}}
679   <LI>{{.Size | printf "%15d  " | nbsp}}<A href="{{print "./" .Name}}">{{.Name}}</A></LI>
680 {{end}}
681 {{end}}
682 </UL>
683 {{else}}
684 <P>(No files; this collection is empty.)</P>
685 {{end}}
686
687 <HR noshade>
688 <DIV class="footer">
689   <P>
690     About Arvados:
691     Arvados is a free and open source software bioinformatics platform.
692     To learn more, visit arvados.org.
693     Arvados is not responsible for the files listed on this page.
694   </P>
695 </DIV>
696
697 </BODY>
698 `
699
700 type fileListEnt struct {
701         Name  string
702         Size  int64
703         IsDir bool
704 }
705
706 func (h *handler) serveDirectory(w http.ResponseWriter, r *http.Request, collectionName string, fs http.FileSystem, base string, recurse bool) {
707         var files []fileListEnt
708         var walk func(string) error
709         if !strings.HasSuffix(base, "/") {
710                 base = base + "/"
711         }
712         walk = func(path string) error {
713                 dirname := base + path
714                 if dirname != "/" {
715                         dirname = strings.TrimSuffix(dirname, "/")
716                 }
717                 d, err := fs.Open(dirname)
718                 if err != nil {
719                         return err
720                 }
721                 ents, err := d.Readdir(-1)
722                 if err != nil {
723                         return err
724                 }
725                 for _, ent := range ents {
726                         if recurse && ent.IsDir() {
727                                 err = walk(path + ent.Name() + "/")
728                                 if err != nil {
729                                         return err
730                                 }
731                         } else {
732                                 files = append(files, fileListEnt{
733                                         Name:  path + ent.Name(),
734                                         Size:  ent.Size(),
735                                         IsDir: ent.IsDir(),
736                                 })
737                         }
738                 }
739                 return nil
740         }
741         if err := walk(""); err != nil {
742                 http.Error(w, "error getting directory listing: "+err.Error(), http.StatusInternalServerError)
743                 return
744         }
745
746         funcs := template.FuncMap{
747                 "nbsp": func(s string) template.HTML {
748                         return template.HTML(strings.Replace(s, " ", "&nbsp;", -1))
749                 },
750         }
751         tmpl, err := template.New("dir").Funcs(funcs).Parse(dirListingTemplate)
752         if err != nil {
753                 http.Error(w, "error parsing template: "+err.Error(), http.StatusInternalServerError)
754                 return
755         }
756         sort.Slice(files, func(i, j int) bool {
757                 return files[i].Name < files[j].Name
758         })
759         w.WriteHeader(http.StatusOK)
760         tmpl.Execute(w, map[string]interface{}{
761                 "CollectionName": collectionName,
762                 "Files":          files,
763                 "Request":        r,
764                 "StripParts":     strings.Count(strings.TrimRight(r.URL.Path, "/"), "/"),
765         })
766 }
767
768 func applyContentDispositionHdr(w http.ResponseWriter, r *http.Request, filename string, isAttachment bool) {
769         disposition := "inline"
770         if isAttachment {
771                 disposition = "attachment"
772         }
773         if strings.ContainsRune(r.RequestURI, '?') {
774                 // Help the UA realize that the filename is just
775                 // "filename.txt", not
776                 // "filename.txt?disposition=attachment".
777                 //
778                 // TODO(TC): Follow advice at RFC 6266 appendix D
779                 disposition += "; filename=" + strconv.QuoteToASCII(filename)
780         }
781         if disposition != "inline" {
782                 w.Header().Set("Content-Disposition", disposition)
783         }
784 }
785
786 func (h *handler) seeOtherWithCookie(w http.ResponseWriter, r *http.Request, location string, credentialsOK bool) {
787         if formTokens, haveFormTokens := r.Form["api_token"]; haveFormTokens {
788                 if !credentialsOK {
789                         // It is not safe to copy the provided token
790                         // into a cookie unless the current vhost
791                         // (origin) serves only a single collection or
792                         // we are in TrustAllContent mode.
793                         http.Error(w, "cannot serve inline content at this URL (possible configuration error; see https://doc.arvados.org/install/install-keep-web.html#dns)", http.StatusBadRequest)
794                         return
795                 }
796
797                 // The HttpOnly flag is necessary to prevent
798                 // JavaScript code (included in, or loaded by, a page
799                 // in the collection being served) from employing the
800                 // user's token beyond reading other files in the same
801                 // domain, i.e., same collection.
802                 //
803                 // The 303 redirect is necessary in the case of a GET
804                 // request to avoid exposing the token in the Location
805                 // bar, and in the case of a POST request to avoid
806                 // raising warnings when the user refreshes the
807                 // resulting page.
808                 for _, tok := range formTokens {
809                         if tok == "" {
810                                 continue
811                         }
812                         http.SetCookie(w, &http.Cookie{
813                                 Name:     "arvados_api_token",
814                                 Value:    auth.EncodeTokenCookie([]byte(tok)),
815                                 Path:     "/",
816                                 HttpOnly: true,
817                                 SameSite: http.SameSiteLaxMode,
818                         })
819                         break
820                 }
821         }
822
823         // Propagate query parameters (except api_token) from
824         // the original request.
825         redirQuery := r.URL.Query()
826         redirQuery.Del("api_token")
827
828         u := r.URL
829         if location != "" {
830                 newu, err := u.Parse(location)
831                 if err != nil {
832                         http.Error(w, "error resolving redirect target: "+err.Error(), http.StatusInternalServerError)
833                         return
834                 }
835                 u = newu
836         }
837         redir := (&url.URL{
838                 Scheme:   r.URL.Scheme,
839                 Host:     r.Host,
840                 Path:     u.Path,
841                 RawQuery: redirQuery.Encode(),
842         }).String()
843
844         w.Header().Add("Location", redir)
845         w.WriteHeader(http.StatusSeeOther)
846         io.WriteString(w, `<A href="`)
847         io.WriteString(w, html.EscapeString(redir))
848         io.WriteString(w, `">Continue</A>`)
849 }
850
851 func (h *handler) userPermittedToUploadOrDownload(method string, tokenUser *arvados.User) bool {
852         var permitDownload bool
853         var permitUpload bool
854         if tokenUser != nil && tokenUser.IsAdmin {
855                 permitUpload = h.Cluster.Collections.WebDAVPermission.Admin.Upload
856                 permitDownload = h.Cluster.Collections.WebDAVPermission.Admin.Download
857         } else {
858                 permitUpload = h.Cluster.Collections.WebDAVPermission.User.Upload
859                 permitDownload = h.Cluster.Collections.WebDAVPermission.User.Download
860         }
861         if (method == "PUT" || method == "POST") && !permitUpload {
862                 // Disallow operations that upload new files.
863                 // Permit webdav operations that move existing files around.
864                 return false
865         } else if method == "GET" && !permitDownload {
866                 // Disallow downloading file contents.
867                 // Permit webdav operations like PROPFIND that retrieve metadata
868                 // but not file contents.
869                 return false
870         }
871         return true
872 }
873
874 func (h *handler) logUploadOrDownload(
875         r *http.Request,
876         client *arvadosclient.ArvadosClient,
877         fs arvados.CustomFileSystem,
878         filepath string,
879         collection *arvados.Collection,
880         user *arvados.User) {
881
882         log := ctxlog.FromContext(r.Context())
883         props := make(map[string]string)
884         props["reqPath"] = r.URL.Path
885         var useruuid string
886         if user != nil {
887                 log = log.WithField("user_uuid", user.UUID).
888                         WithField("user_full_name", user.FullName)
889                 useruuid = user.UUID
890         } else {
891                 useruuid = fmt.Sprintf("%s-tpzed-anonymouspublic", h.Cluster.ClusterID)
892         }
893         if collection == nil && fs != nil {
894                 collection, filepath = h.determineCollection(fs, filepath)
895         }
896         if collection != nil {
897                 log = log.WithField("collection_file_path", filepath)
898                 props["collection_file_path"] = filepath
899                 // h.determineCollection populates the collection_uuid
900                 // prop with the PDH, if this collection is being
901                 // accessed via PDH. For logging, we use a different
902                 // field depending on whether it's a UUID or PDH.
903                 if len(collection.UUID) > 32 {
904                         log = log.WithField("portable_data_hash", collection.UUID)
905                         props["portable_data_hash"] = collection.UUID
906                 } else {
907                         log = log.WithField("collection_uuid", collection.UUID)
908                         props["collection_uuid"] = collection.UUID
909                 }
910         }
911         if r.Method == "PUT" || r.Method == "POST" {
912                 log.Info("File upload")
913                 if h.Cluster.Collections.WebDAVLogEvents {
914                         go func() {
915                                 lr := arvadosclient.Dict{"log": arvadosclient.Dict{
916                                         "object_uuid": useruuid,
917                                         "event_type":  "file_upload",
918                                         "properties":  props}}
919                                 err := client.Create("logs", lr, nil)
920                                 if err != nil {
921                                         log.WithError(err).Error("Failed to create upload log event on API server")
922                                 }
923                         }()
924                 }
925         } else if r.Method == "GET" {
926                 if collection != nil && collection.PortableDataHash != "" {
927                         log = log.WithField("portable_data_hash", collection.PortableDataHash)
928                         props["portable_data_hash"] = collection.PortableDataHash
929                 }
930                 log.Info("File download")
931                 if h.Cluster.Collections.WebDAVLogEvents {
932                         go func() {
933                                 lr := arvadosclient.Dict{"log": arvadosclient.Dict{
934                                         "object_uuid": useruuid,
935                                         "event_type":  "file_download",
936                                         "properties":  props}}
937                                 err := client.Create("logs", lr, nil)
938                                 if err != nil {
939                                         log.WithError(err).Error("Failed to create download log event on API server")
940                                 }
941                         }()
942                 }
943         }
944 }
945
946 func (h *handler) determineCollection(fs arvados.CustomFileSystem, path string) (*arvados.Collection, string) {
947         target := strings.TrimSuffix(path, "/")
948         for cut := len(target); cut >= 0; cut = strings.LastIndexByte(target, '/') {
949                 target = target[:cut]
950                 fi, err := fs.Stat(target)
951                 if os.IsNotExist(err) {
952                         // creating a new file/dir, or download
953                         // destined to fail
954                         continue
955                 } else if err != nil {
956                         return nil, ""
957                 }
958                 switch src := fi.Sys().(type) {
959                 case *arvados.Collection:
960                         return src, strings.TrimPrefix(path[len(target):], "/")
961                 case *arvados.Group:
962                         return nil, ""
963                 default:
964                         if _, ok := src.(error); ok {
965                                 return nil, ""
966                         }
967                 }
968         }
969         return nil, ""
970 }
971
972 var lockTidyInterval = time.Minute * 10
973
974 // Lock the specified collection for reading or writing. Caller must
975 // call Unlock() on the returned Locker when the operation is
976 // finished.
977 func (h *handler) collectionLock(collectionID string, writing bool) sync.Locker {
978         h.lockMtx.Lock()
979         defer h.lockMtx.Unlock()
980         if time.Since(h.lockTidied) > lockTidyInterval {
981                 // Periodically delete all locks that aren't in use.
982                 h.lockTidied = time.Now()
983                 for id, locker := range h.lock {
984                         if locker.TryLock() {
985                                 locker.Unlock()
986                                 delete(h.lock, id)
987                         }
988                 }
989         }
990         locker := h.lock[collectionID]
991         if locker == nil {
992                 locker = new(sync.RWMutex)
993                 if h.lock == nil {
994                         h.lock = map[string]*sync.RWMutex{}
995                 }
996                 h.lock[collectionID] = locker
997         }
998         if writing {
999                 locker.Lock()
1000                 return locker
1001         } else {
1002                 locker.RLock()
1003                 return locker.RLocker()
1004         }
1005 }
1006
1007 func ServeCORSPreflight(w http.ResponseWriter, header http.Header) bool {
1008         method := header.Get("Access-Control-Request-Method")
1009         if method == "" {
1010                 return false
1011         }
1012         if !browserMethod[method] && !webdavMethod[method] {
1013                 w.WriteHeader(http.StatusMethodNotAllowed)
1014                 return true
1015         }
1016         w.Header().Set("Access-Control-Allow-Headers", corsAllowHeadersHeader)
1017         w.Header().Set("Access-Control-Allow-Methods", "COPY, DELETE, GET, LOCK, MKCOL, MOVE, OPTIONS, POST, PROPFIND, PROPPATCH, PUT, RMCOL, UNLOCK")
1018         w.Header().Set("Access-Control-Allow-Origin", "*")
1019         w.Header().Set("Access-Control-Max-Age", "86400")
1020         return true
1021 }