21811: Merge branch 'main' into 21811-side-favorites-test
[arvados.git] / services / keep-web / handler.go
1 // Copyright (C) The Arvados Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4
5 package keepweb
6
7 import (
8         "encoding/json"
9         "errors"
10         "fmt"
11         "html"
12         "html/template"
13         "io"
14         "net/http"
15         "net/url"
16         "os"
17         "sort"
18         "strconv"
19         "strings"
20         "sync"
21         "time"
22
23         "git.arvados.org/arvados.git/lib/cmd"
24         "git.arvados.org/arvados.git/lib/webdavfs"
25         "git.arvados.org/arvados.git/sdk/go/arvados"
26         "git.arvados.org/arvados.git/sdk/go/arvadosclient"
27         "git.arvados.org/arvados.git/sdk/go/auth"
28         "git.arvados.org/arvados.git/sdk/go/ctxlog"
29         "git.arvados.org/arvados.git/sdk/go/httpserver"
30         "github.com/sirupsen/logrus"
31         "golang.org/x/net/webdav"
32 )
33
34 type handler struct {
35         Cache   cache
36         Cluster *arvados.Cluster
37         metrics *metrics
38
39         lockMtx    sync.Mutex
40         lock       map[string]*sync.RWMutex
41         lockTidied time.Time
42
43         s3SecretCache         map[string]*cachedS3Secret
44         s3SecretCacheMtx      sync.Mutex
45         s3SecretCacheNextTidy time.Time
46 }
47
48 var urlPDHDecoder = strings.NewReplacer(" ", "+", "-", "+")
49
50 var notFoundMessage = "Not Found"
51 var unauthorizedMessage = "401 Unauthorized\n\nA valid Arvados token must be provided to access this resource."
52
53 // parseCollectionIDFromURL returns a UUID or PDH if s is a UUID or a
54 // PDH (even if it is a PDH with "+" replaced by " " or "-");
55 // otherwise "".
56 func parseCollectionIDFromURL(s string) string {
57         if arvadosclient.UUIDMatch(s) {
58                 return s
59         }
60         if pdh := urlPDHDecoder.Replace(s); arvadosclient.PDHMatch(pdh) {
61                 return pdh
62         }
63         return ""
64 }
65
66 func (h *handler) serveStatus(w http.ResponseWriter, r *http.Request) {
67         json.NewEncoder(w).Encode(struct{ Version string }{cmd.Version.String()})
68 }
69
70 type errorWithHTTPStatus interface {
71         HTTPStatus() int
72 }
73
74 // updateOnSuccess wraps httpserver.ResponseWriter. If the handler
75 // sends an HTTP header indicating success, updateOnSuccess first
76 // calls the provided update func. If the update func fails, an error
77 // response is sent (using the error's HTTP status or 500 if none),
78 // and the status code and body sent by the handler are ignored (all
79 // response writes return the update error).
80 type updateOnSuccess struct {
81         httpserver.ResponseWriter
82         logger     logrus.FieldLogger
83         update     func() error
84         sentHeader bool
85         err        error
86 }
87
88 func (uos *updateOnSuccess) Write(p []byte) (int, error) {
89         if !uos.sentHeader {
90                 uos.WriteHeader(http.StatusOK)
91         }
92         if uos.err != nil {
93                 return 0, uos.err
94         }
95         return uos.ResponseWriter.Write(p)
96 }
97
98 func (uos *updateOnSuccess) WriteHeader(code int) {
99         if !uos.sentHeader {
100                 uos.sentHeader = true
101                 if code >= 200 && code < 400 {
102                         if uos.err = uos.update(); uos.err != nil {
103                                 code := http.StatusInternalServerError
104                                 if he := errorWithHTTPStatus(nil); errors.As(uos.err, &he) {
105                                         code = he.HTTPStatus()
106                                 }
107                                 uos.logger.WithError(uos.err).Errorf("update() returned %T error, changing response to HTTP %d", uos.err, code)
108                                 http.Error(uos.ResponseWriter, uos.err.Error(), code)
109                                 return
110                         }
111                 }
112         }
113         uos.ResponseWriter.WriteHeader(code)
114 }
115
116 var (
117         corsAllowHeadersHeader = strings.Join([]string{
118                 "Authorization", "Content-Type", "Range",
119                 // WebDAV request headers:
120                 "Depth", "Destination", "If", "Lock-Token", "Overwrite", "Timeout", "Cache-Control",
121         }, ", ")
122         writeMethod = map[string]bool{
123                 "COPY":      true,
124                 "DELETE":    true,
125                 "LOCK":      true,
126                 "MKCOL":     true,
127                 "MOVE":      true,
128                 "PROPPATCH": true,
129                 "PUT":       true,
130                 "RMCOL":     true,
131                 "UNLOCK":    true,
132         }
133         webdavMethod = map[string]bool{
134                 "COPY":      true,
135                 "DELETE":    true,
136                 "LOCK":      true,
137                 "MKCOL":     true,
138                 "MOVE":      true,
139                 "OPTIONS":   true,
140                 "PROPFIND":  true,
141                 "PROPPATCH": true,
142                 "PUT":       true,
143                 "RMCOL":     true,
144                 "UNLOCK":    true,
145         }
146         browserMethod = map[string]bool{
147                 "GET":  true,
148                 "HEAD": true,
149                 "POST": true,
150         }
151         // top-level dirs to serve with siteFS
152         siteFSDir = map[string]bool{
153                 "":      true, // root directory
154                 "by_id": true,
155                 "users": true,
156         }
157 )
158
159 func stripDefaultPort(host string) string {
160         // Will consider port 80 and port 443 to be the same vhost.  I think that's fine.
161         u := &url.URL{Host: host}
162         if p := u.Port(); p == "80" || p == "443" {
163                 return strings.ToLower(u.Hostname())
164         } else {
165                 return strings.ToLower(host)
166         }
167 }
168
169 // CheckHealth implements service.Handler.
170 func (h *handler) CheckHealth() error {
171         return nil
172 }
173
174 // Done implements service.Handler.
175 func (h *handler) Done() <-chan struct{} {
176         return nil
177 }
178
179 // ServeHTTP implements http.Handler.
180 func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
181         if xfp := r.Header.Get("X-Forwarded-Proto"); xfp != "" && xfp != "http" {
182                 r.URL.Scheme = xfp
183         }
184
185         wbuffer := newWriteBuffer(wOrig, int(h.Cluster.Collections.WebDAVOutputBuffer))
186         defer wbuffer.Close()
187         w := httpserver.WrapResponseWriter(responseWriter{
188                 Writer:         wbuffer,
189                 ResponseWriter: wOrig,
190         })
191
192         if r.Method == "OPTIONS" && ServeCORSPreflight(w, r.Header) {
193                 return
194         }
195
196         if !browserMethod[r.Method] && !webdavMethod[r.Method] {
197                 w.WriteHeader(http.StatusMethodNotAllowed)
198                 return
199         }
200
201         if r.Header.Get("Origin") != "" {
202                 // Allow simple cross-origin requests without user
203                 // credentials ("user credentials" as defined by CORS,
204                 // i.e., cookies, HTTP authentication, and client-side
205                 // SSL certificates. See
206                 // http://www.w3.org/TR/cors/#user-credentials).
207                 w.Header().Set("Access-Control-Allow-Origin", "*")
208                 w.Header().Set("Access-Control-Expose-Headers", "Content-Range")
209         }
210
211         if h.serveS3(w, r) {
212                 return
213         }
214
215         webdavPrefix := ""
216         arvPath := r.URL.Path
217         if prefix := r.Header.Get("X-Webdav-Prefix"); prefix != "" {
218                 // Enable a proxy (e.g., container log handler in
219                 // controller) to satisfy a request for path
220                 // "/foo/bar/baz.txt" using content from
221                 // "//abc123-4.internal/bar/baz.txt", by adding a
222                 // request header "X-Webdav-Prefix: /foo"
223                 if !strings.HasPrefix(arvPath, prefix) {
224                         http.Error(w, "X-Webdav-Prefix header is not a prefix of the requested path", http.StatusBadRequest)
225                         return
226                 }
227                 arvPath = r.URL.Path[len(prefix):]
228                 if arvPath == "" {
229                         arvPath = "/"
230                 }
231                 w.Header().Set("Vary", "X-Webdav-Prefix, "+w.Header().Get("Vary"))
232                 webdavPrefix = prefix
233         }
234         pathParts := strings.Split(arvPath[1:], "/")
235
236         var stripParts int
237         var collectionID string
238         var tokens []string
239         var reqTokens []string
240         var pathToken bool
241         var attachment bool
242         var useSiteFS bool
243         credentialsOK := h.Cluster.Collections.TrustAllContent
244         reasonNotAcceptingCredentials := ""
245
246         if r.Host != "" && stripDefaultPort(r.Host) == stripDefaultPort(h.Cluster.Services.WebDAVDownload.ExternalURL.Host) {
247                 credentialsOK = true
248                 attachment = true
249         } else if r.FormValue("disposition") == "attachment" {
250                 attachment = true
251         }
252
253         if !credentialsOK {
254                 reasonNotAcceptingCredentials = fmt.Sprintf("vhost %q does not specify a single collection ID or match Services.WebDAVDownload.ExternalURL %q, and Collections.TrustAllContent is false",
255                         r.Host, h.Cluster.Services.WebDAVDownload.ExternalURL)
256         }
257
258         if collectionID = arvados.CollectionIDFromDNSName(r.Host); collectionID != "" {
259                 // http://ID.collections.example/PATH...
260                 credentialsOK = true
261         } else if r.URL.Path == "/status.json" {
262                 h.serveStatus(w, r)
263                 return
264         } else if siteFSDir[pathParts[0]] {
265                 useSiteFS = true
266         } else if len(pathParts) >= 1 && strings.HasPrefix(pathParts[0], "c=") {
267                 // /c=ID[/PATH...]
268                 collectionID = parseCollectionIDFromURL(pathParts[0][2:])
269                 stripParts = 1
270         } else if len(pathParts) >= 2 && pathParts[0] == "collections" {
271                 if len(pathParts) >= 4 && pathParts[1] == "download" {
272                         // /collections/download/ID/TOKEN/PATH...
273                         collectionID = parseCollectionIDFromURL(pathParts[2])
274                         tokens = []string{pathParts[3]}
275                         stripParts = 4
276                         pathToken = true
277                 } else {
278                         // /collections/ID/PATH...
279                         collectionID = parseCollectionIDFromURL(pathParts[1])
280                         stripParts = 2
281                         // This path is only meant to work for public
282                         // data. Tokens provided with the request are
283                         // ignored.
284                         credentialsOK = false
285                         reasonNotAcceptingCredentials = "the '/collections/UUID/PATH' form only works for public data"
286                 }
287         }
288
289         forceReload := false
290         if cc := r.Header.Get("Cache-Control"); strings.Contains(cc, "no-cache") || strings.Contains(cc, "must-revalidate") {
291                 forceReload = true
292         }
293
294         if credentialsOK {
295                 reqTokens = auth.CredentialsFromRequest(r).Tokens
296         }
297
298         r.ParseForm()
299         origin := r.Header.Get("Origin")
300         cors := origin != "" && !strings.HasSuffix(origin, "://"+r.Host)
301         safeAjax := cors && (r.Method == http.MethodGet || r.Method == http.MethodHead)
302         // Important distinction: safeAttachment checks whether api_token exists
303         // as a query parameter. haveFormTokens checks whether api_token exists
304         // as request form data *or* a query parameter. Different checks are
305         // necessary because both the request disposition and the location of
306         // the API token affect whether or not the request needs to be
307         // redirected. The different branch comments below explain further.
308         safeAttachment := attachment && !r.URL.Query().Has("api_token")
309         if formTokens, haveFormTokens := r.Form["api_token"]; !haveFormTokens {
310                 // No token to use or redact.
311         } else if safeAjax || safeAttachment {
312                 // If this is a cross-origin request, the URL won't
313                 // appear in the browser's address bar, so
314                 // substituting a clipboard-safe URL is pointless.
315                 // Redirect-with-cookie wouldn't work anyway, because
316                 // it's not safe to allow third-party use of our
317                 // cookie.
318                 //
319                 // If we're supplying an attachment, we don't need to
320                 // convert POST to GET to avoid the "really resubmit
321                 // form?" problem, so provided the token isn't
322                 // embedded in the URL, there's no reason to do
323                 // redirect-with-cookie in this case either.
324                 for _, tok := range formTokens {
325                         reqTokens = append(reqTokens, tok)
326                 }
327         } else if browserMethod[r.Method] {
328                 // If this is a page view, and the client provided a
329                 // token via query string or POST body, we must put
330                 // the token in an HttpOnly cookie, and redirect to an
331                 // equivalent URL with the query param redacted and
332                 // method = GET.
333                 h.seeOtherWithCookie(w, r, "", credentialsOK)
334                 return
335         }
336
337         targetPath := pathParts[stripParts:]
338         if tokens == nil && len(targetPath) > 0 && strings.HasPrefix(targetPath[0], "t=") {
339                 // http://ID.example/t=TOKEN/PATH...
340                 // /c=ID/t=TOKEN/PATH...
341                 //
342                 // This form must only be used to pass scoped tokens
343                 // that give permission for a single collection. See
344                 // FormValue case above.
345                 tokens = []string{targetPath[0][2:]}
346                 pathToken = true
347                 targetPath = targetPath[1:]
348                 stripParts++
349         }
350
351         fsprefix := ""
352         if useSiteFS {
353                 if writeMethod[r.Method] {
354                         http.Error(w, webdavfs.ErrReadOnly.Error(), http.StatusMethodNotAllowed)
355                         return
356                 }
357                 if len(reqTokens) == 0 {
358                         w.Header().Add("WWW-Authenticate", "Basic realm=\"collections\"")
359                         http.Error(w, unauthorizedMessage, http.StatusUnauthorized)
360                         return
361                 }
362                 tokens = reqTokens
363         } else if collectionID == "" {
364                 http.Error(w, notFoundMessage, http.StatusNotFound)
365                 return
366         } else {
367                 fsprefix = "by_id/" + collectionID + "/"
368         }
369
370         if src := r.Header.Get("X-Webdav-Source"); strings.HasPrefix(src, "/") && !strings.Contains(src, "//") && !strings.Contains(src, "/../") {
371                 fsprefix += src[1:]
372         }
373
374         if tokens == nil {
375                 tokens = reqTokens
376                 if h.Cluster.Users.AnonymousUserToken != "" {
377                         tokens = append(tokens, h.Cluster.Users.AnonymousUserToken)
378                 }
379         }
380
381         if len(targetPath) > 0 && targetPath[0] == "_" {
382                 // If a collection has a directory called "t=foo" or
383                 // "_", it can be served at
384                 // //collections.example/_/t=foo/ or
385                 // //collections.example/_/_/ respectively:
386                 // //collections.example/t=foo/ won't work because
387                 // t=foo will be interpreted as a token "foo".
388                 targetPath = targetPath[1:]
389                 stripParts++
390         }
391
392         dirOpenMode := os.O_RDONLY
393         if writeMethod[r.Method] {
394                 dirOpenMode = os.O_RDWR
395         }
396
397         var tokenValid bool
398         var tokenScopeProblem bool
399         var token string
400         var tokenUser *arvados.User
401         var sessionFS arvados.CustomFileSystem
402         var session *cachedSession
403         var collectionDir arvados.File
404         for _, token = range tokens {
405                 var statusErr errorWithHTTPStatus
406                 fs, sess, user, err := h.Cache.GetSession(token)
407                 if errors.As(err, &statusErr) && statusErr.HTTPStatus() == http.StatusUnauthorized {
408                         // bad token
409                         continue
410                 } else if err != nil {
411                         http.Error(w, "cache error: "+err.Error(), http.StatusInternalServerError)
412                         return
413                 }
414                 if token != h.Cluster.Users.AnonymousUserToken {
415                         tokenValid = true
416                 }
417                 f, err := fs.OpenFile(fsprefix, dirOpenMode, 0)
418                 if errors.As(err, &statusErr) &&
419                         statusErr.HTTPStatus() == http.StatusForbidden &&
420                         token != h.Cluster.Users.AnonymousUserToken {
421                         // collection id is outside scope of supplied
422                         // token
423                         tokenScopeProblem = true
424                         sess.Release()
425                         continue
426                 } else if os.IsNotExist(err) {
427                         // collection does not exist or is not
428                         // readable using this token
429                         sess.Release()
430                         continue
431                 } else if err != nil {
432                         http.Error(w, err.Error(), http.StatusInternalServerError)
433                         sess.Release()
434                         return
435                 }
436                 defer f.Close()
437
438                 collectionDir, sessionFS, session, tokenUser = f, fs, sess, user
439                 break
440         }
441
442         // releaseSession() is equivalent to session.Release() except
443         // that it's a no-op if (1) session is nil, or (2) it has
444         // already been called.
445         //
446         // This way, we can do a defer call here to ensure it gets
447         // called in all code paths, and also call it inline (see
448         // below) in the cases where we want to release the lock
449         // before returning.
450         releaseSession := func() {}
451         if session != nil {
452                 var releaseSessionOnce sync.Once
453                 releaseSession = func() { releaseSessionOnce.Do(func() { session.Release() }) }
454         }
455         defer releaseSession()
456
457         if forceReload && collectionDir != nil {
458                 err := collectionDir.Sync()
459                 if err != nil {
460                         if he := errorWithHTTPStatus(nil); errors.As(err, &he) {
461                                 http.Error(w, err.Error(), he.HTTPStatus())
462                         } else {
463                                 http.Error(w, err.Error(), http.StatusInternalServerError)
464                         }
465                         return
466                 }
467         }
468         if session == nil {
469                 if pathToken {
470                         // The URL is a "secret sharing link" that
471                         // didn't work out.  Asking the client for
472                         // additional credentials would just be
473                         // confusing.
474                         http.Error(w, notFoundMessage, http.StatusNotFound)
475                         return
476                 }
477                 if tokenValid {
478                         // The client provided valid token(s), but the
479                         // collection was not found.
480                         http.Error(w, notFoundMessage, http.StatusNotFound)
481                         return
482                 }
483                 if tokenScopeProblem {
484                         // The client provided a valid token but
485                         // fetching a collection returned 401, which
486                         // means the token scope doesn't permit
487                         // fetching that collection.
488                         http.Error(w, notFoundMessage, http.StatusForbidden)
489                         return
490                 }
491                 // The client's token was invalid (e.g., expired), or
492                 // the client didn't even provide one.  Redirect to
493                 // workbench2's login-and-redirect-to-download url if
494                 // this is a browser navigation request. (The redirect
495                 // flow can't preserve the original method if it's not
496                 // GET, and doesn't make sense if the UA is a
497                 // command-line tool, is trying to load an inline
498                 // image, etc.; in these cases, there's nothing we can
499                 // do, so return 401 unauthorized.)
500                 //
501                 // Note Sec-Fetch-Mode is sent by all non-EOL
502                 // browsers, except Safari.
503                 // https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Sec-Fetch-Mode
504                 //
505                 // TODO(TC): This response would be confusing to
506                 // someone trying (anonymously) to download public
507                 // data that has been deleted.  Allow a referrer to
508                 // provide this context somehow?
509                 if r.Method == http.MethodGet && r.Header.Get("Sec-Fetch-Mode") == "navigate" {
510                         target := url.URL(h.Cluster.Services.Workbench2.ExternalURL)
511                         redirkey := "redirectToPreview"
512                         if attachment {
513                                 redirkey = "redirectToDownload"
514                         }
515                         callback := "/c=" + collectionID + "/" + strings.Join(targetPath, "/")
516                         query := url.Values{redirkey: {callback}}
517                         queryString := query.Encode()
518                         // Note: Encode (and QueryEscape function) turns space
519                         // into plus sign (+) rather than %20 (the plus sign
520                         // becomes %2B); that is the rule for web forms data
521                         // sent in URL query part via GET, but we're not
522                         // emulating forms here. Client JS APIs
523                         // (URLSearchParam#get, decodeURIComponent) will
524                         // decode %20, but while the former also expects the
525                         // form-specific encoding, the latter doesn't.
526                         // Encode() almost encodes everything; RFC3986 sec. 3.4
527                         // says "it is sometimes better for usability" to not
528                         // encode / and ? when passing URI reference in query.
529                         // This is also legal according to WHATWG URL spec and
530                         // can be desirable for debugging webapp.
531                         // We can let slash / appear in the encoded query, and
532                         // equality-sign = too, but exempting ? is not very
533                         // useful.
534                         // Plus-sign, hash, and ampersand are never exempt.
535                         r := strings.NewReplacer("+", "%20", "%2F", "/", "%3D", "=")
536                         target.RawQuery = r.Replace(queryString)
537                         w.Header().Add("Location", target.String())
538                         w.WriteHeader(http.StatusSeeOther)
539                         return
540                 }
541                 if !credentialsOK {
542                         http.Error(w, fmt.Sprintf("Authorization tokens are not accepted here: %v, and no anonymous user token is configured.", reasonNotAcceptingCredentials), http.StatusUnauthorized)
543                         return
544                 }
545                 // If none of the above cases apply, suggest the
546                 // user-agent (which is either a non-browser agent
547                 // like wget, or a browser that can't redirect through
548                 // a login flow) prompt the user for credentials.
549                 w.Header().Add("WWW-Authenticate", "Basic realm=\"collections\"")
550                 http.Error(w, unauthorizedMessage, http.StatusUnauthorized)
551                 return
552         }
553
554         if r.Method == http.MethodGet || r.Method == http.MethodHead {
555                 targetfnm := fsprefix + strings.Join(pathParts[stripParts:], "/")
556                 if fi, err := sessionFS.Stat(targetfnm); err == nil && fi.IsDir() {
557                         releaseSession() // because we won't be writing anything
558                         if !strings.HasSuffix(r.URL.Path, "/") {
559                                 h.seeOtherWithCookie(w, r, r.URL.Path+"/", credentialsOK)
560                         } else {
561                                 h.serveDirectory(w, r, fi.Name(), sessionFS, targetfnm, !useSiteFS)
562                         }
563                         return
564                 }
565         }
566
567         var basename string
568         if len(targetPath) > 0 {
569                 basename = targetPath[len(targetPath)-1]
570         }
571         if arvadosclient.PDHMatch(collectionID) && writeMethod[r.Method] {
572                 http.Error(w, webdavfs.ErrReadOnly.Error(), http.StatusMethodNotAllowed)
573                 return
574         }
575         if !h.userPermittedToUploadOrDownload(r.Method, tokenUser) {
576                 http.Error(w, "Not permitted", http.StatusForbidden)
577                 return
578         }
579         h.logUploadOrDownload(r, session.arvadosclient, sessionFS, fsprefix+strings.Join(targetPath, "/"), nil, tokenUser)
580
581         writing := writeMethod[r.Method]
582         locker := h.collectionLock(collectionID, writing)
583         defer locker.Unlock()
584
585         if writing {
586                 // Save the collection only if/when all
587                 // webdav->filesystem operations succeed --
588                 // and send a 500 error if the modified
589                 // collection can't be saved.
590                 //
591                 // Perform the write in a separate sitefs, so
592                 // concurrent read operations on the same
593                 // collection see the previous saved
594                 // state. After the write succeeds and the
595                 // collection record is updated, we reset the
596                 // session so the updates are visible in
597                 // subsequent read requests.
598                 client := session.client.WithRequestID(r.Header.Get("X-Request-Id"))
599                 sessionFS = client.SiteFileSystem(session.keepclient)
600                 writingDir, err := sessionFS.OpenFile(fsprefix, os.O_RDONLY, 0)
601                 if err != nil {
602                         http.Error(w, err.Error(), http.StatusInternalServerError)
603                         return
604                 }
605                 defer writingDir.Close()
606                 w = &updateOnSuccess{
607                         ResponseWriter: w,
608                         logger:         ctxlog.FromContext(r.Context()),
609                         update: func() error {
610                                 err := writingDir.Sync()
611                                 var te arvados.TransactionError
612                                 if errors.As(err, &te) {
613                                         err = te
614                                 }
615                                 if err != nil {
616                                         return err
617                                 }
618                                 // Sync the changes to the persistent
619                                 // sessionfs for this token.
620                                 snap, err := writingDir.Snapshot()
621                                 if err != nil {
622                                         return err
623                                 }
624                                 collectionDir.Splice(snap)
625                                 return nil
626                         }}
627         } else {
628                 // When writing, we need to block session renewal
629                 // until we're finished, in order to guarantee the
630                 // effect of the write is visible in future responses.
631                 // But if we're not writing, we can release the lock
632                 // early.  This enables us to keep renewing sessions
633                 // and processing more requests even if a slow client
634                 // takes a long time to download a large file.
635                 releaseSession()
636         }
637         if r.Method == http.MethodGet {
638                 applyContentDispositionHdr(w, r, basename, attachment)
639         }
640         if webdavPrefix == "" {
641                 webdavPrefix = "/" + strings.Join(pathParts[:stripParts], "/")
642         }
643         wh := &webdav.Handler{
644                 Prefix: webdavPrefix,
645                 FileSystem: &webdavfs.FS{
646                         FileSystem:    sessionFS,
647                         Prefix:        fsprefix,
648                         Writing:       writeMethod[r.Method],
649                         AlwaysReadEOF: r.Method == "PROPFIND",
650                 },
651                 LockSystem: webdavfs.NoLockSystem,
652                 Logger: func(r *http.Request, err error) {
653                         if err != nil && !os.IsNotExist(err) {
654                                 ctxlog.FromContext(r.Context()).WithError(err).Error("error reported by webdav handler")
655                         }
656                 },
657         }
658         h.metrics.track(wh, w, r)
659         if r.Method == http.MethodGet && w.WroteStatus() == http.StatusOK {
660                 wrote := int64(w.WroteBodyBytes())
661                 fnm := strings.Join(pathParts[stripParts:], "/")
662                 fi, err := wh.FileSystem.Stat(r.Context(), fnm)
663                 if err == nil && fi.Size() != wrote {
664                         var n int
665                         f, err := wh.FileSystem.OpenFile(r.Context(), fnm, os.O_RDONLY, 0)
666                         if err == nil {
667                                 n, err = f.Read(make([]byte, 1024))
668                                 f.Close()
669                         }
670                         ctxlog.FromContext(r.Context()).Errorf("stat.Size()==%d but only wrote %d bytes; read(1024) returns %d, %v", fi.Size(), wrote, n, err)
671                 }
672         }
673 }
674
675 var dirListingTemplate = `<!DOCTYPE HTML>
676 <HTML><HEAD>
677   <META name="robots" content="NOINDEX">
678   <TITLE>{{ .CollectionName }}</TITLE>
679   <STYLE type="text/css">
680     body {
681       margin: 1.5em;
682     }
683     pre {
684       background-color: #D9EDF7;
685       border-radius: .25em;
686       padding: .75em;
687       overflow: auto;
688     }
689     .footer p {
690       font-size: 82%;
691     }
692     hr {
693       border: 1px solid #808080;
694     }
695     ul {
696       padding: 0;
697     }
698     ul li {
699       font-family: monospace;
700       list-style: none;
701     }
702   </STYLE>
703 </HEAD>
704 <BODY>
705
706 <H1>{{ .CollectionName }}</H1>
707
708 <P>This collection of data files is being shared with you through
709 Arvados.  You can download individual files listed below.  To download
710 the entire directory tree with <CODE>wget</CODE>, try:</P>
711
712 <PRE id="wget-example">$ wget --mirror --no-parent --no-host --cut-dirs={{ .StripParts }} {{ .QuotedUrlForWget }}</PRE>
713
714 <H2>File Listing</H2>
715
716 {{if .Files}}
717 <UL>
718 {{range .Files}}
719 {{if .IsDir }}
720   <LI>{{" " | printf "%15s  " | nbsp}}<A class="item" href="{{ .Href }}/">{{ .Name }}/</A></LI>
721 {{else}}
722   <LI>{{.Size | printf "%15d  " | nbsp}}<A class="item" href="{{ .Href }}">{{ .Name }}</A></LI>
723 {{end}}
724 {{end}}
725 </UL>
726 {{else}}
727 <P>(No files; this collection is empty.)</P>
728 {{end}}
729
730 <HR>
731 <DIV class="footer">
732   <P>
733     About Arvados:
734     Arvados is a free and open source software bioinformatics platform.
735     To learn more, visit arvados.org.
736     Arvados is not responsible for the files listed on this page.
737   </P>
738 </DIV>
739
740 </BODY>
741 </HTML>
742 `
743
744 type fileListEnt struct {
745         Name  string
746         Href  string
747         Size  int64
748         IsDir bool
749 }
750
751 // Given a filesystem path like `foo/"bar baz"`, return an escaped
752 // (percent-encoded) relative path like `./foo/%22bar%20%baz%22`.
753 //
754 // Note the result may contain html-unsafe characters like '&'. These
755 // will be handled separately by the HTML templating engine as needed.
756 func relativeHref(path string) string {
757         u := &url.URL{Path: path}
758         return "./" + u.EscapedPath()
759 }
760
761 // Return a shell-quoted URL suitable for pasting to a command line
762 // ("wget ...") to repeat the given HTTP request.
763 func makeQuotedUrlForWget(r *http.Request) string {
764         scheme := r.Header.Get("X-Forwarded-Proto")
765         if scheme == "http" || scheme == "https" {
766                 // use protocol reported by load balancer / proxy
767         } else if r.TLS != nil {
768                 scheme = "https"
769         } else {
770                 scheme = "http"
771         }
772         p := r.URL.EscapedPath()
773         // An escaped path may still contain single quote chars, which
774         // would interfere with our shell quoting. Avoid this by
775         // escaping them as %27.
776         return fmt.Sprintf("'%s://%s%s'", scheme, r.Host, strings.Replace(p, "'", "%27", -1))
777 }
778
779 func (h *handler) serveDirectory(w http.ResponseWriter, r *http.Request, collectionName string, fs http.FileSystem, base string, recurse bool) {
780         var files []fileListEnt
781         var walk func(string) error
782         if !strings.HasSuffix(base, "/") {
783                 base = base + "/"
784         }
785         walk = func(path string) error {
786                 dirname := base + path
787                 if dirname != "/" {
788                         dirname = strings.TrimSuffix(dirname, "/")
789                 }
790                 d, err := fs.Open(dirname)
791                 if err != nil {
792                         return err
793                 }
794                 ents, err := d.Readdir(-1)
795                 if err != nil {
796                         return err
797                 }
798                 for _, ent := range ents {
799                         if recurse && ent.IsDir() {
800                                 err = walk(path + ent.Name() + "/")
801                                 if err != nil {
802                                         return err
803                                 }
804                         } else {
805                                 listingName := path + ent.Name()
806                                 files = append(files, fileListEnt{
807                                         Name:  listingName,
808                                         Href:  relativeHref(listingName),
809                                         Size:  ent.Size(),
810                                         IsDir: ent.IsDir(),
811                                 })
812                         }
813                 }
814                 return nil
815         }
816         if err := walk(""); err != nil {
817                 http.Error(w, "error getting directory listing: "+err.Error(), http.StatusInternalServerError)
818                 return
819         }
820
821         funcs := template.FuncMap{
822                 "nbsp": func(s string) template.HTML {
823                         return template.HTML(strings.Replace(s, " ", "&nbsp;", -1))
824                 },
825         }
826         tmpl, err := template.New("dir").Funcs(funcs).Parse(dirListingTemplate)
827         if err != nil {
828                 http.Error(w, "error parsing template: "+err.Error(), http.StatusInternalServerError)
829                 return
830         }
831         sort.Slice(files, func(i, j int) bool {
832                 return files[i].Name < files[j].Name
833         })
834         w.WriteHeader(http.StatusOK)
835         tmpl.Execute(w, map[string]interface{}{
836                 "CollectionName":   collectionName,
837                 "Files":            files,
838                 "Request":          r,
839                 "StripParts":       strings.Count(strings.TrimRight(r.URL.Path, "/"), "/"),
840                 "QuotedUrlForWget": makeQuotedUrlForWget(r),
841         })
842 }
843
844 func applyContentDispositionHdr(w http.ResponseWriter, r *http.Request, filename string, isAttachment bool) {
845         disposition := "inline"
846         if isAttachment {
847                 disposition = "attachment"
848         }
849         if strings.ContainsRune(r.RequestURI, '?') {
850                 // Help the UA realize that the filename is just
851                 // "filename.txt", not
852                 // "filename.txt?disposition=attachment".
853                 //
854                 // TODO(TC): Follow advice at RFC 6266 appendix D
855                 disposition += "; filename=" + strconv.QuoteToASCII(filename)
856         }
857         if disposition != "inline" {
858                 w.Header().Set("Content-Disposition", disposition)
859         }
860 }
861
862 func (h *handler) seeOtherWithCookie(w http.ResponseWriter, r *http.Request, location string, credentialsOK bool) {
863         if formTokens, haveFormTokens := r.Form["api_token"]; haveFormTokens {
864                 if !credentialsOK {
865                         // It is not safe to copy the provided token
866                         // into a cookie unless the current vhost
867                         // (origin) serves only a single collection or
868                         // we are in TrustAllContent mode.
869                         http.Error(w, "cannot serve inline content at this URL (possible configuration error; see https://doc.arvados.org/install/install-keep-web.html#dns)", http.StatusBadRequest)
870                         return
871                 }
872
873                 // The HttpOnly flag is necessary to prevent
874                 // JavaScript code (included in, or loaded by, a page
875                 // in the collection being served) from employing the
876                 // user's token beyond reading other files in the same
877                 // domain, i.e., same collection.
878                 //
879                 // The 303 redirect is necessary in the case of a GET
880                 // request to avoid exposing the token in the Location
881                 // bar, and in the case of a POST request to avoid
882                 // raising warnings when the user refreshes the
883                 // resulting page.
884                 for _, tok := range formTokens {
885                         if tok == "" {
886                                 continue
887                         }
888                         http.SetCookie(w, &http.Cookie{
889                                 Name:     "arvados_api_token",
890                                 Value:    auth.EncodeTokenCookie([]byte(tok)),
891                                 Path:     "/",
892                                 HttpOnly: true,
893                                 SameSite: http.SameSiteLaxMode,
894                         })
895                         break
896                 }
897         }
898
899         // Propagate query parameters (except api_token) from
900         // the original request.
901         redirQuery := r.URL.Query()
902         redirQuery.Del("api_token")
903
904         u := r.URL
905         if location != "" {
906                 newu, err := u.Parse(location)
907                 if err != nil {
908                         http.Error(w, "error resolving redirect target: "+err.Error(), http.StatusInternalServerError)
909                         return
910                 }
911                 u = newu
912         }
913         redir := (&url.URL{
914                 Scheme:   r.URL.Scheme,
915                 Host:     r.Host,
916                 Path:     u.Path,
917                 RawQuery: redirQuery.Encode(),
918         }).String()
919
920         w.Header().Add("Location", redir)
921         w.WriteHeader(http.StatusSeeOther)
922         io.WriteString(w, `<A href="`)
923         io.WriteString(w, html.EscapeString(redir))
924         io.WriteString(w, `">Continue</A>`)
925 }
926
927 func (h *handler) userPermittedToUploadOrDownload(method string, tokenUser *arvados.User) bool {
928         var permitDownload bool
929         var permitUpload bool
930         if tokenUser != nil && tokenUser.IsAdmin {
931                 permitUpload = h.Cluster.Collections.WebDAVPermission.Admin.Upload
932                 permitDownload = h.Cluster.Collections.WebDAVPermission.Admin.Download
933         } else {
934                 permitUpload = h.Cluster.Collections.WebDAVPermission.User.Upload
935                 permitDownload = h.Cluster.Collections.WebDAVPermission.User.Download
936         }
937         if (method == "PUT" || method == "POST") && !permitUpload {
938                 // Disallow operations that upload new files.
939                 // Permit webdav operations that move existing files around.
940                 return false
941         } else if method == "GET" && !permitDownload {
942                 // Disallow downloading file contents.
943                 // Permit webdav operations like PROPFIND that retrieve metadata
944                 // but not file contents.
945                 return false
946         }
947         return true
948 }
949
950 func (h *handler) logUploadOrDownload(
951         r *http.Request,
952         client *arvadosclient.ArvadosClient,
953         fs arvados.CustomFileSystem,
954         filepath string,
955         collection *arvados.Collection,
956         user *arvados.User) {
957
958         log := ctxlog.FromContext(r.Context())
959         props := make(map[string]string)
960         props["reqPath"] = r.URL.Path
961         var useruuid string
962         if user != nil {
963                 log = log.WithField("user_uuid", user.UUID).
964                         WithField("user_full_name", user.FullName)
965                 useruuid = user.UUID
966         } else {
967                 useruuid = fmt.Sprintf("%s-tpzed-anonymouspublic", h.Cluster.ClusterID)
968         }
969         if collection == nil && fs != nil {
970                 collection, filepath = h.determineCollection(fs, filepath)
971         }
972         if collection != nil {
973                 log = log.WithField("collection_file_path", filepath)
974                 props["collection_file_path"] = filepath
975                 // h.determineCollection populates the collection_uuid
976                 // prop with the PDH, if this collection is being
977                 // accessed via PDH. For logging, we use a different
978                 // field depending on whether it's a UUID or PDH.
979                 if len(collection.UUID) > 32 {
980                         log = log.WithField("portable_data_hash", collection.UUID)
981                         props["portable_data_hash"] = collection.UUID
982                 } else {
983                         log = log.WithField("collection_uuid", collection.UUID)
984                         props["collection_uuid"] = collection.UUID
985                 }
986         }
987         if r.Method == "PUT" || r.Method == "POST" {
988                 log.Info("File upload")
989                 if h.Cluster.Collections.WebDAVLogEvents {
990                         go func() {
991                                 lr := arvadosclient.Dict{"log": arvadosclient.Dict{
992                                         "object_uuid": useruuid,
993                                         "event_type":  "file_upload",
994                                         "properties":  props}}
995                                 err := client.Create("logs", lr, nil)
996                                 if err != nil {
997                                         log.WithError(err).Error("Failed to create upload log event on API server")
998                                 }
999                         }()
1000                 }
1001         } else if r.Method == "GET" {
1002                 if collection != nil && collection.PortableDataHash != "" {
1003                         log = log.WithField("portable_data_hash", collection.PortableDataHash)
1004                         props["portable_data_hash"] = collection.PortableDataHash
1005                 }
1006                 log.Info("File download")
1007                 if h.Cluster.Collections.WebDAVLogEvents {
1008                         go func() {
1009                                 lr := arvadosclient.Dict{"log": arvadosclient.Dict{
1010                                         "object_uuid": useruuid,
1011                                         "event_type":  "file_download",
1012                                         "properties":  props}}
1013                                 err := client.Create("logs", lr, nil)
1014                                 if err != nil {
1015                                         log.WithError(err).Error("Failed to create download log event on API server")
1016                                 }
1017                         }()
1018                 }
1019         }
1020 }
1021
1022 func (h *handler) determineCollection(fs arvados.CustomFileSystem, path string) (*arvados.Collection, string) {
1023         target := strings.TrimSuffix(path, "/")
1024         for cut := len(target); cut >= 0; cut = strings.LastIndexByte(target, '/') {
1025                 target = target[:cut]
1026                 fi, err := fs.Stat(target)
1027                 if os.IsNotExist(err) {
1028                         // creating a new file/dir, or download
1029                         // destined to fail
1030                         continue
1031                 } else if err != nil {
1032                         return nil, ""
1033                 }
1034                 switch src := fi.Sys().(type) {
1035                 case *arvados.Collection:
1036                         return src, strings.TrimPrefix(path[len(target):], "/")
1037                 case *arvados.Group:
1038                         return nil, ""
1039                 default:
1040                         if _, ok := src.(error); ok {
1041                                 return nil, ""
1042                         }
1043                 }
1044         }
1045         return nil, ""
1046 }
1047
1048 var lockTidyInterval = time.Minute * 10
1049
1050 // Lock the specified collection for reading or writing. Caller must
1051 // call Unlock() on the returned Locker when the operation is
1052 // finished.
1053 func (h *handler) collectionLock(collectionID string, writing bool) sync.Locker {
1054         h.lockMtx.Lock()
1055         defer h.lockMtx.Unlock()
1056         if time.Since(h.lockTidied) > lockTidyInterval {
1057                 // Periodically delete all locks that aren't in use.
1058                 h.lockTidied = time.Now()
1059                 for id, locker := range h.lock {
1060                         if locker.TryLock() {
1061                                 locker.Unlock()
1062                                 delete(h.lock, id)
1063                         }
1064                 }
1065         }
1066         locker := h.lock[collectionID]
1067         if locker == nil {
1068                 locker = new(sync.RWMutex)
1069                 if h.lock == nil {
1070                         h.lock = map[string]*sync.RWMutex{}
1071                 }
1072                 h.lock[collectionID] = locker
1073         }
1074         if writing {
1075                 locker.Lock()
1076                 return locker
1077         } else {
1078                 locker.RLock()
1079                 return locker.RLocker()
1080         }
1081 }
1082
1083 func ServeCORSPreflight(w http.ResponseWriter, header http.Header) bool {
1084         method := header.Get("Access-Control-Request-Method")
1085         if method == "" {
1086                 return false
1087         }
1088         if !browserMethod[method] && !webdavMethod[method] {
1089                 w.WriteHeader(http.StatusMethodNotAllowed)
1090                 return true
1091         }
1092         w.Header().Set("Access-Control-Allow-Headers", corsAllowHeadersHeader)
1093         w.Header().Set("Access-Control-Allow-Methods", "COPY, DELETE, GET, LOCK, MKCOL, MOVE, OPTIONS, POST, PROPFIND, PROPPATCH, PUT, RMCOL, UNLOCK")
1094         w.Header().Set("Access-Control-Allow-Origin", "*")
1095         w.Header().Set("Access-Control-Max-Age", "86400")
1096         return true
1097 }