21764: Consolidate tree node extraction to be reused for tests, add TreePicker unit...
[arvados.git] / services / keep-web / handler.go
1 // Copyright (C) The Arvados Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4
5 package keepweb
6
7 import (
8         "encoding/json"
9         "errors"
10         "fmt"
11         "html"
12         "html/template"
13         "io"
14         "net/http"
15         "net/url"
16         "os"
17         "sort"
18         "strconv"
19         "strings"
20         "sync"
21         "time"
22
23         "git.arvados.org/arvados.git/lib/cmd"
24         "git.arvados.org/arvados.git/lib/webdavfs"
25         "git.arvados.org/arvados.git/sdk/go/arvados"
26         "git.arvados.org/arvados.git/sdk/go/arvadosclient"
27         "git.arvados.org/arvados.git/sdk/go/auth"
28         "git.arvados.org/arvados.git/sdk/go/ctxlog"
29         "git.arvados.org/arvados.git/sdk/go/httpserver"
30         "github.com/sirupsen/logrus"
31         "golang.org/x/net/webdav"
32 )
33
34 type handler struct {
35         Cache   cache
36         Cluster *arvados.Cluster
37         metrics *metrics
38
39         lockMtx    sync.Mutex
40         lock       map[string]*sync.RWMutex
41         lockTidied time.Time
42 }
43
44 var urlPDHDecoder = strings.NewReplacer(" ", "+", "-", "+")
45
46 var notFoundMessage = "Not Found"
47 var unauthorizedMessage = "401 Unauthorized\n\nA valid Arvados token must be provided to access this resource."
48
49 // parseCollectionIDFromURL returns a UUID or PDH if s is a UUID or a
50 // PDH (even if it is a PDH with "+" replaced by " " or "-");
51 // otherwise "".
52 func parseCollectionIDFromURL(s string) string {
53         if arvadosclient.UUIDMatch(s) {
54                 return s
55         }
56         if pdh := urlPDHDecoder.Replace(s); arvadosclient.PDHMatch(pdh) {
57                 return pdh
58         }
59         return ""
60 }
61
62 func (h *handler) serveStatus(w http.ResponseWriter, r *http.Request) {
63         json.NewEncoder(w).Encode(struct{ Version string }{cmd.Version.String()})
64 }
65
66 type errorWithHTTPStatus interface {
67         HTTPStatus() int
68 }
69
70 // updateOnSuccess wraps httpserver.ResponseWriter. If the handler
71 // sends an HTTP header indicating success, updateOnSuccess first
72 // calls the provided update func. If the update func fails, an error
73 // response is sent (using the error's HTTP status or 500 if none),
74 // and the status code and body sent by the handler are ignored (all
75 // response writes return the update error).
76 type updateOnSuccess struct {
77         httpserver.ResponseWriter
78         logger     logrus.FieldLogger
79         update     func() error
80         sentHeader bool
81         err        error
82 }
83
84 func (uos *updateOnSuccess) Write(p []byte) (int, error) {
85         if !uos.sentHeader {
86                 uos.WriteHeader(http.StatusOK)
87         }
88         if uos.err != nil {
89                 return 0, uos.err
90         }
91         return uos.ResponseWriter.Write(p)
92 }
93
94 func (uos *updateOnSuccess) WriteHeader(code int) {
95         if !uos.sentHeader {
96                 uos.sentHeader = true
97                 if code >= 200 && code < 400 {
98                         if uos.err = uos.update(); uos.err != nil {
99                                 code := http.StatusInternalServerError
100                                 if he := errorWithHTTPStatus(nil); errors.As(uos.err, &he) {
101                                         code = he.HTTPStatus()
102                                 }
103                                 uos.logger.WithError(uos.err).Errorf("update() returned %T error, changing response to HTTP %d", uos.err, code)
104                                 http.Error(uos.ResponseWriter, uos.err.Error(), code)
105                                 return
106                         }
107                 }
108         }
109         uos.ResponseWriter.WriteHeader(code)
110 }
111
112 var (
113         corsAllowHeadersHeader = strings.Join([]string{
114                 "Authorization", "Content-Type", "Range",
115                 // WebDAV request headers:
116                 "Depth", "Destination", "If", "Lock-Token", "Overwrite", "Timeout", "Cache-Control",
117         }, ", ")
118         writeMethod = map[string]bool{
119                 "COPY":      true,
120                 "DELETE":    true,
121                 "LOCK":      true,
122                 "MKCOL":     true,
123                 "MOVE":      true,
124                 "PROPPATCH": true,
125                 "PUT":       true,
126                 "RMCOL":     true,
127                 "UNLOCK":    true,
128         }
129         webdavMethod = map[string]bool{
130                 "COPY":      true,
131                 "DELETE":    true,
132                 "LOCK":      true,
133                 "MKCOL":     true,
134                 "MOVE":      true,
135                 "OPTIONS":   true,
136                 "PROPFIND":  true,
137                 "PROPPATCH": true,
138                 "PUT":       true,
139                 "RMCOL":     true,
140                 "UNLOCK":    true,
141         }
142         browserMethod = map[string]bool{
143                 "GET":  true,
144                 "HEAD": true,
145                 "POST": true,
146         }
147         // top-level dirs to serve with siteFS
148         siteFSDir = map[string]bool{
149                 "":      true, // root directory
150                 "by_id": true,
151                 "users": true,
152         }
153 )
154
155 func stripDefaultPort(host string) string {
156         // Will consider port 80 and port 443 to be the same vhost.  I think that's fine.
157         u := &url.URL{Host: host}
158         if p := u.Port(); p == "80" || p == "443" {
159                 return strings.ToLower(u.Hostname())
160         } else {
161                 return strings.ToLower(host)
162         }
163 }
164
165 // CheckHealth implements service.Handler.
166 func (h *handler) CheckHealth() error {
167         return nil
168 }
169
170 // Done implements service.Handler.
171 func (h *handler) Done() <-chan struct{} {
172         return nil
173 }
174
175 // ServeHTTP implements http.Handler.
176 func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
177         if xfp := r.Header.Get("X-Forwarded-Proto"); xfp != "" && xfp != "http" {
178                 r.URL.Scheme = xfp
179         }
180
181         wbuffer := newWriteBuffer(wOrig, int(h.Cluster.Collections.WebDAVOutputBuffer))
182         defer wbuffer.Close()
183         w := httpserver.WrapResponseWriter(responseWriter{
184                 Writer:         wbuffer,
185                 ResponseWriter: wOrig,
186         })
187
188         if r.Method == "OPTIONS" && ServeCORSPreflight(w, r.Header) {
189                 return
190         }
191
192         if !browserMethod[r.Method] && !webdavMethod[r.Method] {
193                 w.WriteHeader(http.StatusMethodNotAllowed)
194                 return
195         }
196
197         if r.Header.Get("Origin") != "" {
198                 // Allow simple cross-origin requests without user
199                 // credentials ("user credentials" as defined by CORS,
200                 // i.e., cookies, HTTP authentication, and client-side
201                 // SSL certificates. See
202                 // http://www.w3.org/TR/cors/#user-credentials).
203                 w.Header().Set("Access-Control-Allow-Origin", "*")
204                 w.Header().Set("Access-Control-Expose-Headers", "Content-Range")
205         }
206
207         if h.serveS3(w, r) {
208                 return
209         }
210
211         webdavPrefix := ""
212         arvPath := r.URL.Path
213         if prefix := r.Header.Get("X-Webdav-Prefix"); prefix != "" {
214                 // Enable a proxy (e.g., container log handler in
215                 // controller) to satisfy a request for path
216                 // "/foo/bar/baz.txt" using content from
217                 // "//abc123-4.internal/bar/baz.txt", by adding a
218                 // request header "X-Webdav-Prefix: /foo"
219                 if !strings.HasPrefix(arvPath, prefix) {
220                         http.Error(w, "X-Webdav-Prefix header is not a prefix of the requested path", http.StatusBadRequest)
221                         return
222                 }
223                 arvPath = r.URL.Path[len(prefix):]
224                 if arvPath == "" {
225                         arvPath = "/"
226                 }
227                 w.Header().Set("Vary", "X-Webdav-Prefix, "+w.Header().Get("Vary"))
228                 webdavPrefix = prefix
229         }
230         pathParts := strings.Split(arvPath[1:], "/")
231
232         var stripParts int
233         var collectionID string
234         var tokens []string
235         var reqTokens []string
236         var pathToken bool
237         var attachment bool
238         var useSiteFS bool
239         credentialsOK := h.Cluster.Collections.TrustAllContent
240         reasonNotAcceptingCredentials := ""
241
242         if r.Host != "" && stripDefaultPort(r.Host) == stripDefaultPort(h.Cluster.Services.WebDAVDownload.ExternalURL.Host) {
243                 credentialsOK = true
244                 attachment = true
245         } else if r.FormValue("disposition") == "attachment" {
246                 attachment = true
247         }
248
249         if !credentialsOK {
250                 reasonNotAcceptingCredentials = fmt.Sprintf("vhost %q does not specify a single collection ID or match Services.WebDAVDownload.ExternalURL %q, and Collections.TrustAllContent is false",
251                         r.Host, h.Cluster.Services.WebDAVDownload.ExternalURL)
252         }
253
254         if collectionID = arvados.CollectionIDFromDNSName(r.Host); collectionID != "" {
255                 // http://ID.collections.example/PATH...
256                 credentialsOK = true
257         } else if r.URL.Path == "/status.json" {
258                 h.serveStatus(w, r)
259                 return
260         } else if siteFSDir[pathParts[0]] {
261                 useSiteFS = true
262         } else if len(pathParts) >= 1 && strings.HasPrefix(pathParts[0], "c=") {
263                 // /c=ID[/PATH...]
264                 collectionID = parseCollectionIDFromURL(pathParts[0][2:])
265                 stripParts = 1
266         } else if len(pathParts) >= 2 && pathParts[0] == "collections" {
267                 if len(pathParts) >= 4 && pathParts[1] == "download" {
268                         // /collections/download/ID/TOKEN/PATH...
269                         collectionID = parseCollectionIDFromURL(pathParts[2])
270                         tokens = []string{pathParts[3]}
271                         stripParts = 4
272                         pathToken = true
273                 } else {
274                         // /collections/ID/PATH...
275                         collectionID = parseCollectionIDFromURL(pathParts[1])
276                         stripParts = 2
277                         // This path is only meant to work for public
278                         // data. Tokens provided with the request are
279                         // ignored.
280                         credentialsOK = false
281                         reasonNotAcceptingCredentials = "the '/collections/UUID/PATH' form only works for public data"
282                 }
283         }
284
285         forceReload := false
286         if cc := r.Header.Get("Cache-Control"); strings.Contains(cc, "no-cache") || strings.Contains(cc, "must-revalidate") {
287                 forceReload = true
288         }
289
290         if credentialsOK {
291                 reqTokens = auth.CredentialsFromRequest(r).Tokens
292         }
293
294         r.ParseForm()
295         origin := r.Header.Get("Origin")
296         cors := origin != "" && !strings.HasSuffix(origin, "://"+r.Host)
297         safeAjax := cors && (r.Method == http.MethodGet || r.Method == http.MethodHead)
298         // Important distinction: safeAttachment checks whether api_token exists
299         // as a query parameter. haveFormTokens checks whether api_token exists
300         // as request form data *or* a query parameter. Different checks are
301         // necessary because both the request disposition and the location of
302         // the API token affect whether or not the request needs to be
303         // redirected. The different branch comments below explain further.
304         safeAttachment := attachment && !r.URL.Query().Has("api_token")
305         if formTokens, haveFormTokens := r.Form["api_token"]; !haveFormTokens {
306                 // No token to use or redact.
307         } else if safeAjax || safeAttachment {
308                 // If this is a cross-origin request, the URL won't
309                 // appear in the browser's address bar, so
310                 // substituting a clipboard-safe URL is pointless.
311                 // Redirect-with-cookie wouldn't work anyway, because
312                 // it's not safe to allow third-party use of our
313                 // cookie.
314                 //
315                 // If we're supplying an attachment, we don't need to
316                 // convert POST to GET to avoid the "really resubmit
317                 // form?" problem, so provided the token isn't
318                 // embedded in the URL, there's no reason to do
319                 // redirect-with-cookie in this case either.
320                 for _, tok := range formTokens {
321                         reqTokens = append(reqTokens, tok)
322                 }
323         } else if browserMethod[r.Method] {
324                 // If this is a page view, and the client provided a
325                 // token via query string or POST body, we must put
326                 // the token in an HttpOnly cookie, and redirect to an
327                 // equivalent URL with the query param redacted and
328                 // method = GET.
329                 h.seeOtherWithCookie(w, r, "", credentialsOK)
330                 return
331         }
332
333         targetPath := pathParts[stripParts:]
334         if tokens == nil && len(targetPath) > 0 && strings.HasPrefix(targetPath[0], "t=") {
335                 // http://ID.example/t=TOKEN/PATH...
336                 // /c=ID/t=TOKEN/PATH...
337                 //
338                 // This form must only be used to pass scoped tokens
339                 // that give permission for a single collection. See
340                 // FormValue case above.
341                 tokens = []string{targetPath[0][2:]}
342                 pathToken = true
343                 targetPath = targetPath[1:]
344                 stripParts++
345         }
346
347         fsprefix := ""
348         if useSiteFS {
349                 if writeMethod[r.Method] {
350                         http.Error(w, webdavfs.ErrReadOnly.Error(), http.StatusMethodNotAllowed)
351                         return
352                 }
353                 if len(reqTokens) == 0 {
354                         w.Header().Add("WWW-Authenticate", "Basic realm=\"collections\"")
355                         http.Error(w, unauthorizedMessage, http.StatusUnauthorized)
356                         return
357                 }
358                 tokens = reqTokens
359         } else if collectionID == "" {
360                 http.Error(w, notFoundMessage, http.StatusNotFound)
361                 return
362         } else {
363                 fsprefix = "by_id/" + collectionID + "/"
364         }
365
366         if src := r.Header.Get("X-Webdav-Source"); strings.HasPrefix(src, "/") && !strings.Contains(src, "//") && !strings.Contains(src, "/../") {
367                 fsprefix += src[1:]
368         }
369
370         if tokens == nil {
371                 tokens = reqTokens
372                 if h.Cluster.Users.AnonymousUserToken != "" {
373                         tokens = append(tokens, h.Cluster.Users.AnonymousUserToken)
374                 }
375         }
376
377         if len(targetPath) > 0 && targetPath[0] == "_" {
378                 // If a collection has a directory called "t=foo" or
379                 // "_", it can be served at
380                 // //collections.example/_/t=foo/ or
381                 // //collections.example/_/_/ respectively:
382                 // //collections.example/t=foo/ won't work because
383                 // t=foo will be interpreted as a token "foo".
384                 targetPath = targetPath[1:]
385                 stripParts++
386         }
387
388         dirOpenMode := os.O_RDONLY
389         if writeMethod[r.Method] {
390                 dirOpenMode = os.O_RDWR
391         }
392
393         var tokenValid bool
394         var tokenScopeProblem bool
395         var token string
396         var tokenUser *arvados.User
397         var sessionFS arvados.CustomFileSystem
398         var session *cachedSession
399         var collectionDir arvados.File
400         for _, token = range tokens {
401                 var statusErr errorWithHTTPStatus
402                 fs, sess, user, err := h.Cache.GetSession(token)
403                 if errors.As(err, &statusErr) && statusErr.HTTPStatus() == http.StatusUnauthorized {
404                         // bad token
405                         continue
406                 } else if err != nil {
407                         http.Error(w, "cache error: "+err.Error(), http.StatusInternalServerError)
408                         return
409                 }
410                 if token != h.Cluster.Users.AnonymousUserToken {
411                         tokenValid = true
412                 }
413                 f, err := fs.OpenFile(fsprefix, dirOpenMode, 0)
414                 if errors.As(err, &statusErr) &&
415                         statusErr.HTTPStatus() == http.StatusForbidden &&
416                         token != h.Cluster.Users.AnonymousUserToken {
417                         // collection id is outside scope of supplied
418                         // token
419                         tokenScopeProblem = true
420                         sess.Release()
421                         continue
422                 } else if os.IsNotExist(err) {
423                         // collection does not exist or is not
424                         // readable using this token
425                         sess.Release()
426                         continue
427                 } else if err != nil {
428                         http.Error(w, err.Error(), http.StatusInternalServerError)
429                         sess.Release()
430                         return
431                 }
432                 defer f.Close()
433
434                 collectionDir, sessionFS, session, tokenUser = f, fs, sess, user
435                 break
436         }
437
438         // releaseSession() is equivalent to session.Release() except
439         // that it's a no-op if (1) session is nil, or (2) it has
440         // already been called.
441         //
442         // This way, we can do a defer call here to ensure it gets
443         // called in all code paths, and also call it inline (see
444         // below) in the cases where we want to release the lock
445         // before returning.
446         releaseSession := func() {}
447         if session != nil {
448                 var releaseSessionOnce sync.Once
449                 releaseSession = func() { releaseSessionOnce.Do(func() { session.Release() }) }
450         }
451         defer releaseSession()
452
453         if forceReload && collectionDir != nil {
454                 err := collectionDir.Sync()
455                 if err != nil {
456                         if he := errorWithHTTPStatus(nil); errors.As(err, &he) {
457                                 http.Error(w, err.Error(), he.HTTPStatus())
458                         } else {
459                                 http.Error(w, err.Error(), http.StatusInternalServerError)
460                         }
461                         return
462                 }
463         }
464         if session == nil {
465                 if pathToken {
466                         // The URL is a "secret sharing link" that
467                         // didn't work out.  Asking the client for
468                         // additional credentials would just be
469                         // confusing.
470                         http.Error(w, notFoundMessage, http.StatusNotFound)
471                         return
472                 }
473                 if tokenValid {
474                         // The client provided valid token(s), but the
475                         // collection was not found.
476                         http.Error(w, notFoundMessage, http.StatusNotFound)
477                         return
478                 }
479                 if tokenScopeProblem {
480                         // The client provided a valid token but
481                         // fetching a collection returned 401, which
482                         // means the token scope doesn't permit
483                         // fetching that collection.
484                         http.Error(w, notFoundMessage, http.StatusForbidden)
485                         return
486                 }
487                 // The client's token was invalid (e.g., expired), or
488                 // the client didn't even provide one.  Redirect to
489                 // workbench2's login-and-redirect-to-download url if
490                 // this is a browser navigation request. (The redirect
491                 // flow can't preserve the original method if it's not
492                 // GET, and doesn't make sense if the UA is a
493                 // command-line tool, is trying to load an inline
494                 // image, etc.; in these cases, there's nothing we can
495                 // do, so return 401 unauthorized.)
496                 //
497                 // Note Sec-Fetch-Mode is sent by all non-EOL
498                 // browsers, except Safari.
499                 // https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Sec-Fetch-Mode
500                 //
501                 // TODO(TC): This response would be confusing to
502                 // someone trying (anonymously) to download public
503                 // data that has been deleted.  Allow a referrer to
504                 // provide this context somehow?
505                 if r.Method == http.MethodGet && r.Header.Get("Sec-Fetch-Mode") == "navigate" {
506                         target := url.URL(h.Cluster.Services.Workbench2.ExternalURL)
507                         redirkey := "redirectToPreview"
508                         if attachment {
509                                 redirkey = "redirectToDownload"
510                         }
511                         callback := "/c=" + collectionID + "/" + strings.Join(targetPath, "/")
512                         // target.RawQuery = url.Values{redirkey:
513                         // {target}}.Encode() would be the obvious
514                         // thing to do here, but wb2 doesn't decode
515                         // this as a query param -- it takes
516                         // everything after "${redirkey}=" as the
517                         // target URL. If we encode "/" as "%2F" etc.,
518                         // the redirect won't work.
519                         target.RawQuery = redirkey + "=" + callback
520                         w.Header().Add("Location", target.String())
521                         w.WriteHeader(http.StatusSeeOther)
522                         return
523                 }
524                 if !credentialsOK {
525                         http.Error(w, fmt.Sprintf("Authorization tokens are not accepted here: %v, and no anonymous user token is configured.", reasonNotAcceptingCredentials), http.StatusUnauthorized)
526                         return
527                 }
528                 // If none of the above cases apply, suggest the
529                 // user-agent (which is either a non-browser agent
530                 // like wget, or a browser that can't redirect through
531                 // a login flow) prompt the user for credentials.
532                 w.Header().Add("WWW-Authenticate", "Basic realm=\"collections\"")
533                 http.Error(w, unauthorizedMessage, http.StatusUnauthorized)
534                 return
535         }
536
537         if r.Method == http.MethodGet || r.Method == http.MethodHead {
538                 targetfnm := fsprefix + strings.Join(pathParts[stripParts:], "/")
539                 if fi, err := sessionFS.Stat(targetfnm); err == nil && fi.IsDir() {
540                         releaseSession() // because we won't be writing anything
541                         if !strings.HasSuffix(r.URL.Path, "/") {
542                                 h.seeOtherWithCookie(w, r, r.URL.Path+"/", credentialsOK)
543                         } else {
544                                 h.serveDirectory(w, r, fi.Name(), sessionFS, targetfnm, !useSiteFS)
545                         }
546                         return
547                 }
548         }
549
550         var basename string
551         if len(targetPath) > 0 {
552                 basename = targetPath[len(targetPath)-1]
553         }
554         if arvadosclient.PDHMatch(collectionID) && writeMethod[r.Method] {
555                 http.Error(w, webdavfs.ErrReadOnly.Error(), http.StatusMethodNotAllowed)
556                 return
557         }
558         if !h.userPermittedToUploadOrDownload(r.Method, tokenUser) {
559                 http.Error(w, "Not permitted", http.StatusForbidden)
560                 return
561         }
562         h.logUploadOrDownload(r, session.arvadosclient, sessionFS, fsprefix+strings.Join(targetPath, "/"), nil, tokenUser)
563
564         writing := writeMethod[r.Method]
565         locker := h.collectionLock(collectionID, writing)
566         defer locker.Unlock()
567
568         if writing {
569                 // Save the collection only if/when all
570                 // webdav->filesystem operations succeed --
571                 // and send a 500 error if the modified
572                 // collection can't be saved.
573                 //
574                 // Perform the write in a separate sitefs, so
575                 // concurrent read operations on the same
576                 // collection see the previous saved
577                 // state. After the write succeeds and the
578                 // collection record is updated, we reset the
579                 // session so the updates are visible in
580                 // subsequent read requests.
581                 client := session.client.WithRequestID(r.Header.Get("X-Request-Id"))
582                 sessionFS = client.SiteFileSystem(session.keepclient)
583                 writingDir, err := sessionFS.OpenFile(fsprefix, os.O_RDONLY, 0)
584                 if err != nil {
585                         http.Error(w, err.Error(), http.StatusInternalServerError)
586                         return
587                 }
588                 defer writingDir.Close()
589                 w = &updateOnSuccess{
590                         ResponseWriter: w,
591                         logger:         ctxlog.FromContext(r.Context()),
592                         update: func() error {
593                                 err := writingDir.Sync()
594                                 var te arvados.TransactionError
595                                 if errors.As(err, &te) {
596                                         err = te
597                                 }
598                                 if err != nil {
599                                         return err
600                                 }
601                                 // Sync the changes to the persistent
602                                 // sessionfs for this token.
603                                 snap, err := writingDir.Snapshot()
604                                 if err != nil {
605                                         return err
606                                 }
607                                 collectionDir.Splice(snap)
608                                 return nil
609                         }}
610         } else {
611                 // When writing, we need to block session renewal
612                 // until we're finished, in order to guarantee the
613                 // effect of the write is visible in future responses.
614                 // But if we're not writing, we can release the lock
615                 // early.  This enables us to keep renewing sessions
616                 // and processing more requests even if a slow client
617                 // takes a long time to download a large file.
618                 releaseSession()
619         }
620         if r.Method == http.MethodGet {
621                 applyContentDispositionHdr(w, r, basename, attachment)
622         }
623         if webdavPrefix == "" {
624                 webdavPrefix = "/" + strings.Join(pathParts[:stripParts], "/")
625         }
626         wh := &webdav.Handler{
627                 Prefix: webdavPrefix,
628                 FileSystem: &webdavfs.FS{
629                         FileSystem:    sessionFS,
630                         Prefix:        fsprefix,
631                         Writing:       writeMethod[r.Method],
632                         AlwaysReadEOF: r.Method == "PROPFIND",
633                 },
634                 LockSystem: webdavfs.NoLockSystem,
635                 Logger: func(r *http.Request, err error) {
636                         if err != nil && !os.IsNotExist(err) {
637                                 ctxlog.FromContext(r.Context()).WithError(err).Error("error reported by webdav handler")
638                         }
639                 },
640         }
641         h.metrics.track(wh, w, r)
642         if r.Method == http.MethodGet && w.WroteStatus() == http.StatusOK {
643                 wrote := int64(w.WroteBodyBytes())
644                 fnm := strings.Join(pathParts[stripParts:], "/")
645                 fi, err := wh.FileSystem.Stat(r.Context(), fnm)
646                 if err == nil && fi.Size() != wrote {
647                         var n int
648                         f, err := wh.FileSystem.OpenFile(r.Context(), fnm, os.O_RDONLY, 0)
649                         if err == nil {
650                                 n, err = f.Read(make([]byte, 1024))
651                                 f.Close()
652                         }
653                         ctxlog.FromContext(r.Context()).Errorf("stat.Size()==%d but only wrote %d bytes; read(1024) returns %d, %v", fi.Size(), wrote, n, err)
654                 }
655         }
656 }
657
658 var dirListingTemplate = `<!DOCTYPE HTML>
659 <HTML><HEAD>
660   <META name="robots" content="NOINDEX">
661   <TITLE>{{ .CollectionName }}</TITLE>
662   <STYLE type="text/css">
663     body {
664       margin: 1.5em;
665     }
666     pre {
667       background-color: #D9EDF7;
668       border-radius: .25em;
669       padding: .75em;
670       overflow: auto;
671     }
672     .footer p {
673       font-size: 82%;
674     }
675     ul {
676       padding: 0;
677     }
678     ul li {
679       font-family: monospace;
680       list-style: none;
681     }
682   </STYLE>
683 </HEAD>
684 <BODY>
685
686 <H1>{{ .CollectionName }}</H1>
687
688 <P>This collection of data files is being shared with you through
689 Arvados.  You can download individual files listed below.  To download
690 the entire directory tree with wget, try:</P>
691
692 <PRE>$ wget --mirror --no-parent --no-host --cut-dirs={{ .StripParts }} https://{{ .Request.Host }}{{ .Request.URL.Path }}</PRE>
693
694 <H2>File Listing</H2>
695
696 {{if .Files}}
697 <UL>
698 {{range .Files}}
699 {{if .IsDir }}
700   <LI>{{" " | printf "%15s  " | nbsp}}<A href="{{print "./" .Name}}/">{{.Name}}/</A></LI>
701 {{else}}
702   <LI>{{.Size | printf "%15d  " | nbsp}}<A href="{{print "./" .Name}}">{{.Name}}</A></LI>
703 {{end}}
704 {{end}}
705 </UL>
706 {{else}}
707 <P>(No files; this collection is empty.)</P>
708 {{end}}
709
710 <HR noshade>
711 <DIV class="footer">
712   <P>
713     About Arvados:
714     Arvados is a free and open source software bioinformatics platform.
715     To learn more, visit arvados.org.
716     Arvados is not responsible for the files listed on this page.
717   </P>
718 </DIV>
719
720 </BODY>
721 `
722
723 type fileListEnt struct {
724         Name  string
725         Size  int64
726         IsDir bool
727 }
728
729 func (h *handler) serveDirectory(w http.ResponseWriter, r *http.Request, collectionName string, fs http.FileSystem, base string, recurse bool) {
730         var files []fileListEnt
731         var walk func(string) error
732         if !strings.HasSuffix(base, "/") {
733                 base = base + "/"
734         }
735         walk = func(path string) error {
736                 dirname := base + path
737                 if dirname != "/" {
738                         dirname = strings.TrimSuffix(dirname, "/")
739                 }
740                 d, err := fs.Open(dirname)
741                 if err != nil {
742                         return err
743                 }
744                 ents, err := d.Readdir(-1)
745                 if err != nil {
746                         return err
747                 }
748                 for _, ent := range ents {
749                         if recurse && ent.IsDir() {
750                                 err = walk(path + ent.Name() + "/")
751                                 if err != nil {
752                                         return err
753                                 }
754                         } else {
755                                 files = append(files, fileListEnt{
756                                         Name:  path + ent.Name(),
757                                         Size:  ent.Size(),
758                                         IsDir: ent.IsDir(),
759                                 })
760                         }
761                 }
762                 return nil
763         }
764         if err := walk(""); err != nil {
765                 http.Error(w, "error getting directory listing: "+err.Error(), http.StatusInternalServerError)
766                 return
767         }
768
769         funcs := template.FuncMap{
770                 "nbsp": func(s string) template.HTML {
771                         return template.HTML(strings.Replace(s, " ", "&nbsp;", -1))
772                 },
773         }
774         tmpl, err := template.New("dir").Funcs(funcs).Parse(dirListingTemplate)
775         if err != nil {
776                 http.Error(w, "error parsing template: "+err.Error(), http.StatusInternalServerError)
777                 return
778         }
779         sort.Slice(files, func(i, j int) bool {
780                 return files[i].Name < files[j].Name
781         })
782         w.WriteHeader(http.StatusOK)
783         tmpl.Execute(w, map[string]interface{}{
784                 "CollectionName": collectionName,
785                 "Files":          files,
786                 "Request":        r,
787                 "StripParts":     strings.Count(strings.TrimRight(r.URL.Path, "/"), "/"),
788         })
789 }
790
791 func applyContentDispositionHdr(w http.ResponseWriter, r *http.Request, filename string, isAttachment bool) {
792         disposition := "inline"
793         if isAttachment {
794                 disposition = "attachment"
795         }
796         if strings.ContainsRune(r.RequestURI, '?') {
797                 // Help the UA realize that the filename is just
798                 // "filename.txt", not
799                 // "filename.txt?disposition=attachment".
800                 //
801                 // TODO(TC): Follow advice at RFC 6266 appendix D
802                 disposition += "; filename=" + strconv.QuoteToASCII(filename)
803         }
804         if disposition != "inline" {
805                 w.Header().Set("Content-Disposition", disposition)
806         }
807 }
808
809 func (h *handler) seeOtherWithCookie(w http.ResponseWriter, r *http.Request, location string, credentialsOK bool) {
810         if formTokens, haveFormTokens := r.Form["api_token"]; haveFormTokens {
811                 if !credentialsOK {
812                         // It is not safe to copy the provided token
813                         // into a cookie unless the current vhost
814                         // (origin) serves only a single collection or
815                         // we are in TrustAllContent mode.
816                         http.Error(w, "cannot serve inline content at this URL (possible configuration error; see https://doc.arvados.org/install/install-keep-web.html#dns)", http.StatusBadRequest)
817                         return
818                 }
819
820                 // The HttpOnly flag is necessary to prevent
821                 // JavaScript code (included in, or loaded by, a page
822                 // in the collection being served) from employing the
823                 // user's token beyond reading other files in the same
824                 // domain, i.e., same collection.
825                 //
826                 // The 303 redirect is necessary in the case of a GET
827                 // request to avoid exposing the token in the Location
828                 // bar, and in the case of a POST request to avoid
829                 // raising warnings when the user refreshes the
830                 // resulting page.
831                 for _, tok := range formTokens {
832                         if tok == "" {
833                                 continue
834                         }
835                         http.SetCookie(w, &http.Cookie{
836                                 Name:     "arvados_api_token",
837                                 Value:    auth.EncodeTokenCookie([]byte(tok)),
838                                 Path:     "/",
839                                 HttpOnly: true,
840                                 SameSite: http.SameSiteLaxMode,
841                         })
842                         break
843                 }
844         }
845
846         // Propagate query parameters (except api_token) from
847         // the original request.
848         redirQuery := r.URL.Query()
849         redirQuery.Del("api_token")
850
851         u := r.URL
852         if location != "" {
853                 newu, err := u.Parse(location)
854                 if err != nil {
855                         http.Error(w, "error resolving redirect target: "+err.Error(), http.StatusInternalServerError)
856                         return
857                 }
858                 u = newu
859         }
860         redir := (&url.URL{
861                 Scheme:   r.URL.Scheme,
862                 Host:     r.Host,
863                 Path:     u.Path,
864                 RawQuery: redirQuery.Encode(),
865         }).String()
866
867         w.Header().Add("Location", redir)
868         w.WriteHeader(http.StatusSeeOther)
869         io.WriteString(w, `<A href="`)
870         io.WriteString(w, html.EscapeString(redir))
871         io.WriteString(w, `">Continue</A>`)
872 }
873
874 func (h *handler) userPermittedToUploadOrDownload(method string, tokenUser *arvados.User) bool {
875         var permitDownload bool
876         var permitUpload bool
877         if tokenUser != nil && tokenUser.IsAdmin {
878                 permitUpload = h.Cluster.Collections.WebDAVPermission.Admin.Upload
879                 permitDownload = h.Cluster.Collections.WebDAVPermission.Admin.Download
880         } else {
881                 permitUpload = h.Cluster.Collections.WebDAVPermission.User.Upload
882                 permitDownload = h.Cluster.Collections.WebDAVPermission.User.Download
883         }
884         if (method == "PUT" || method == "POST") && !permitUpload {
885                 // Disallow operations that upload new files.
886                 // Permit webdav operations that move existing files around.
887                 return false
888         } else if method == "GET" && !permitDownload {
889                 // Disallow downloading file contents.
890                 // Permit webdav operations like PROPFIND that retrieve metadata
891                 // but not file contents.
892                 return false
893         }
894         return true
895 }
896
897 func (h *handler) logUploadOrDownload(
898         r *http.Request,
899         client *arvadosclient.ArvadosClient,
900         fs arvados.CustomFileSystem,
901         filepath string,
902         collection *arvados.Collection,
903         user *arvados.User) {
904
905         log := ctxlog.FromContext(r.Context())
906         props := make(map[string]string)
907         props["reqPath"] = r.URL.Path
908         var useruuid string
909         if user != nil {
910                 log = log.WithField("user_uuid", user.UUID).
911                         WithField("user_full_name", user.FullName)
912                 useruuid = user.UUID
913         } else {
914                 useruuid = fmt.Sprintf("%s-tpzed-anonymouspublic", h.Cluster.ClusterID)
915         }
916         if collection == nil && fs != nil {
917                 collection, filepath = h.determineCollection(fs, filepath)
918         }
919         if collection != nil {
920                 log = log.WithField("collection_file_path", filepath)
921                 props["collection_file_path"] = filepath
922                 // h.determineCollection populates the collection_uuid
923                 // prop with the PDH, if this collection is being
924                 // accessed via PDH. For logging, we use a different
925                 // field depending on whether it's a UUID or PDH.
926                 if len(collection.UUID) > 32 {
927                         log = log.WithField("portable_data_hash", collection.UUID)
928                         props["portable_data_hash"] = collection.UUID
929                 } else {
930                         log = log.WithField("collection_uuid", collection.UUID)
931                         props["collection_uuid"] = collection.UUID
932                 }
933         }
934         if r.Method == "PUT" || r.Method == "POST" {
935                 log.Info("File upload")
936                 if h.Cluster.Collections.WebDAVLogEvents {
937                         go func() {
938                                 lr := arvadosclient.Dict{"log": arvadosclient.Dict{
939                                         "object_uuid": useruuid,
940                                         "event_type":  "file_upload",
941                                         "properties":  props}}
942                                 err := client.Create("logs", lr, nil)
943                                 if err != nil {
944                                         log.WithError(err).Error("Failed to create upload log event on API server")
945                                 }
946                         }()
947                 }
948         } else if r.Method == "GET" {
949                 if collection != nil && collection.PortableDataHash != "" {
950                         log = log.WithField("portable_data_hash", collection.PortableDataHash)
951                         props["portable_data_hash"] = collection.PortableDataHash
952                 }
953                 log.Info("File download")
954                 if h.Cluster.Collections.WebDAVLogEvents {
955                         go func() {
956                                 lr := arvadosclient.Dict{"log": arvadosclient.Dict{
957                                         "object_uuid": useruuid,
958                                         "event_type":  "file_download",
959                                         "properties":  props}}
960                                 err := client.Create("logs", lr, nil)
961                                 if err != nil {
962                                         log.WithError(err).Error("Failed to create download log event on API server")
963                                 }
964                         }()
965                 }
966         }
967 }
968
969 func (h *handler) determineCollection(fs arvados.CustomFileSystem, path string) (*arvados.Collection, string) {
970         target := strings.TrimSuffix(path, "/")
971         for cut := len(target); cut >= 0; cut = strings.LastIndexByte(target, '/') {
972                 target = target[:cut]
973                 fi, err := fs.Stat(target)
974                 if os.IsNotExist(err) {
975                         // creating a new file/dir, or download
976                         // destined to fail
977                         continue
978                 } else if err != nil {
979                         return nil, ""
980                 }
981                 switch src := fi.Sys().(type) {
982                 case *arvados.Collection:
983                         return src, strings.TrimPrefix(path[len(target):], "/")
984                 case *arvados.Group:
985                         return nil, ""
986                 default:
987                         if _, ok := src.(error); ok {
988                                 return nil, ""
989                         }
990                 }
991         }
992         return nil, ""
993 }
994
995 var lockTidyInterval = time.Minute * 10
996
997 // Lock the specified collection for reading or writing. Caller must
998 // call Unlock() on the returned Locker when the operation is
999 // finished.
1000 func (h *handler) collectionLock(collectionID string, writing bool) sync.Locker {
1001         h.lockMtx.Lock()
1002         defer h.lockMtx.Unlock()
1003         if time.Since(h.lockTidied) > lockTidyInterval {
1004                 // Periodically delete all locks that aren't in use.
1005                 h.lockTidied = time.Now()
1006                 for id, locker := range h.lock {
1007                         if locker.TryLock() {
1008                                 locker.Unlock()
1009                                 delete(h.lock, id)
1010                         }
1011                 }
1012         }
1013         locker := h.lock[collectionID]
1014         if locker == nil {
1015                 locker = new(sync.RWMutex)
1016                 if h.lock == nil {
1017                         h.lock = map[string]*sync.RWMutex{}
1018                 }
1019                 h.lock[collectionID] = locker
1020         }
1021         if writing {
1022                 locker.Lock()
1023                 return locker
1024         } else {
1025                 locker.RLock()
1026                 return locker.RLocker()
1027         }
1028 }
1029
1030 func ServeCORSPreflight(w http.ResponseWriter, header http.Header) bool {
1031         method := header.Get("Access-Control-Request-Method")
1032         if method == "" {
1033                 return false
1034         }
1035         if !browserMethod[method] && !webdavMethod[method] {
1036                 w.WriteHeader(http.StatusMethodNotAllowed)
1037                 return true
1038         }
1039         w.Header().Set("Access-Control-Allow-Headers", corsAllowHeadersHeader)
1040         w.Header().Set("Access-Control-Allow-Methods", "COPY, DELETE, GET, LOCK, MKCOL, MOVE, OPTIONS, POST, PROPFIND, PROPPATCH, PUT, RMCOL, UNLOCK")
1041         w.Header().Set("Access-Control-Allow-Origin", "*")
1042         w.Header().Set("Access-Control-Max-Age", "86400")
1043         return true
1044 }