7ac8bc02dbe12a64b1dab4134141c3474bfd5ccb
[arvados.git] / services / keep-web / handler.go
1 // Copyright (C) The Arvados Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4
5 package keepweb
6
7 import (
8         "encoding/json"
9         "errors"
10         "fmt"
11         "html"
12         "html/template"
13         "io"
14         "net/http"
15         "net/url"
16         "os"
17         "path/filepath"
18         "sort"
19         "strconv"
20         "strings"
21         "sync"
22
23         "git.arvados.org/arvados.git/sdk/go/arvados"
24         "git.arvados.org/arvados.git/sdk/go/arvadosclient"
25         "git.arvados.org/arvados.git/sdk/go/auth"
26         "git.arvados.org/arvados.git/sdk/go/ctxlog"
27         "git.arvados.org/arvados.git/sdk/go/httpserver"
28         "git.arvados.org/arvados.git/sdk/go/keepclient"
29         "github.com/sirupsen/logrus"
30         "golang.org/x/net/webdav"
31 )
32
33 type handler struct {
34         Cache      cache
35         Cluster    *arvados.Cluster
36         clientPool *arvadosclient.ClientPool
37         setupOnce  sync.Once
38         webdavLS   webdav.LockSystem
39 }
40
41 var urlPDHDecoder = strings.NewReplacer(" ", "+", "-", "+")
42
43 var notFoundMessage = "Not Found"
44 var unauthorizedMessage = "401 Unauthorized\r\n\r\nA valid Arvados token must be provided to access this resource.\r\n"
45
46 // parseCollectionIDFromURL returns a UUID or PDH if s is a UUID or a
47 // PDH (even if it is a PDH with "+" replaced by " " or "-");
48 // otherwise "".
49 func parseCollectionIDFromURL(s string) string {
50         if arvadosclient.UUIDMatch(s) {
51                 return s
52         }
53         if pdh := urlPDHDecoder.Replace(s); arvadosclient.PDHMatch(pdh) {
54                 return pdh
55         }
56         return ""
57 }
58
59 func (h *handler) setup() {
60         // Errors will be handled at the client pool.
61         arv, _ := arvados.NewClientFromConfig(h.Cluster)
62         h.clientPool = arvadosclient.MakeClientPoolWith(arv)
63
64         keepclient.DefaultBlockCache.MaxBlocks = h.Cluster.Collections.WebDAVCache.MaxBlockEntries
65
66         // Even though we don't accept LOCK requests, every webdav
67         // handler must have a non-nil LockSystem.
68         h.webdavLS = &noLockSystem{}
69 }
70
71 func (h *handler) serveStatus(w http.ResponseWriter, r *http.Request) {
72         json.NewEncoder(w).Encode(struct{ Version string }{version})
73 }
74
75 // updateOnSuccess wraps httpserver.ResponseWriter. If the handler
76 // sends an HTTP header indicating success, updateOnSuccess first
77 // calls the provided update func. If the update func fails, an error
78 // response is sent (using the error's HTTP status or 500 if none),
79 // and the status code and body sent by the handler are ignored (all
80 // response writes return the update error).
81 type updateOnSuccess struct {
82         httpserver.ResponseWriter
83         logger     logrus.FieldLogger
84         update     func() error
85         sentHeader bool
86         err        error
87 }
88
89 func (uos *updateOnSuccess) Write(p []byte) (int, error) {
90         if !uos.sentHeader {
91                 uos.WriteHeader(http.StatusOK)
92         }
93         if uos.err != nil {
94                 return 0, uos.err
95         }
96         return uos.ResponseWriter.Write(p)
97 }
98
99 func (uos *updateOnSuccess) WriteHeader(code int) {
100         if !uos.sentHeader {
101                 uos.sentHeader = true
102                 if code >= 200 && code < 400 {
103                         if uos.err = uos.update(); uos.err != nil {
104                                 code := http.StatusInternalServerError
105                                 var he interface{ HTTPStatus() int }
106                                 if errors.As(uos.err, &he) {
107                                         code = he.HTTPStatus()
108                                 }
109                                 uos.logger.WithError(uos.err).Errorf("update() returned %T error, changing response to HTTP %d", uos.err, code)
110                                 http.Error(uos.ResponseWriter, uos.err.Error(), code)
111                                 return
112                         }
113                 }
114         }
115         uos.ResponseWriter.WriteHeader(code)
116 }
117
118 var (
119         corsAllowHeadersHeader = strings.Join([]string{
120                 "Authorization", "Content-Type", "Range",
121                 // WebDAV request headers:
122                 "Depth", "Destination", "If", "Lock-Token", "Overwrite", "Timeout",
123         }, ", ")
124         writeMethod = map[string]bool{
125                 "COPY":      true,
126                 "DELETE":    true,
127                 "LOCK":      true,
128                 "MKCOL":     true,
129                 "MOVE":      true,
130                 "PROPPATCH": true,
131                 "PUT":       true,
132                 "RMCOL":     true,
133                 "UNLOCK":    true,
134         }
135         webdavMethod = map[string]bool{
136                 "COPY":      true,
137                 "DELETE":    true,
138                 "LOCK":      true,
139                 "MKCOL":     true,
140                 "MOVE":      true,
141                 "OPTIONS":   true,
142                 "PROPFIND":  true,
143                 "PROPPATCH": true,
144                 "PUT":       true,
145                 "RMCOL":     true,
146                 "UNLOCK":    true,
147         }
148         browserMethod = map[string]bool{
149                 "GET":  true,
150                 "HEAD": true,
151                 "POST": true,
152         }
153         // top-level dirs to serve with siteFS
154         siteFSDir = map[string]bool{
155                 "":      true, // root directory
156                 "by_id": true,
157                 "users": true,
158         }
159 )
160
161 func stripDefaultPort(host string) string {
162         // Will consider port 80 and port 443 to be the same vhost.  I think that's fine.
163         u := &url.URL{Host: host}
164         if p := u.Port(); p == "80" || p == "443" {
165                 return strings.ToLower(u.Hostname())
166         } else {
167                 return strings.ToLower(host)
168         }
169 }
170
171 // CheckHealth implements service.Handler.
172 func (h *handler) CheckHealth() error {
173         return nil
174 }
175
176 // Done implements service.Handler.
177 func (h *handler) Done() <-chan struct{} {
178         return nil
179 }
180
181 // ServeHTTP implements http.Handler.
182 func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
183         h.setupOnce.Do(h.setup)
184
185         if xfp := r.Header.Get("X-Forwarded-Proto"); xfp != "" && xfp != "http" {
186                 r.URL.Scheme = xfp
187         }
188
189         w := httpserver.WrapResponseWriter(wOrig)
190
191         if method := r.Header.Get("Access-Control-Request-Method"); method != "" && r.Method == "OPTIONS" {
192                 if !browserMethod[method] && !webdavMethod[method] {
193                         w.WriteHeader(http.StatusMethodNotAllowed)
194                         return
195                 }
196                 w.Header().Set("Access-Control-Allow-Headers", corsAllowHeadersHeader)
197                 w.Header().Set("Access-Control-Allow-Methods", "COPY, DELETE, GET, LOCK, MKCOL, MOVE, OPTIONS, POST, PROPFIND, PROPPATCH, PUT, RMCOL, UNLOCK")
198                 w.Header().Set("Access-Control-Allow-Origin", "*")
199                 w.Header().Set("Access-Control-Max-Age", "86400")
200                 return
201         }
202
203         if !browserMethod[r.Method] && !webdavMethod[r.Method] {
204                 w.WriteHeader(http.StatusMethodNotAllowed)
205                 return
206         }
207
208         if r.Header.Get("Origin") != "" {
209                 // Allow simple cross-origin requests without user
210                 // credentials ("user credentials" as defined by CORS,
211                 // i.e., cookies, HTTP authentication, and client-side
212                 // SSL certificates. See
213                 // http://www.w3.org/TR/cors/#user-credentials).
214                 w.Header().Set("Access-Control-Allow-Origin", "*")
215                 w.Header().Set("Access-Control-Expose-Headers", "Content-Range")
216         }
217
218         if h.serveS3(w, r) {
219                 return
220         }
221
222         pathParts := strings.Split(r.URL.Path[1:], "/")
223
224         var stripParts int
225         var collectionID string
226         var tokens []string
227         var reqTokens []string
228         var pathToken bool
229         var attachment bool
230         var useSiteFS bool
231         credentialsOK := h.Cluster.Collections.TrustAllContent
232         reasonNotAcceptingCredentials := ""
233
234         if r.Host != "" && stripDefaultPort(r.Host) == stripDefaultPort(h.Cluster.Services.WebDAVDownload.ExternalURL.Host) {
235                 credentialsOK = true
236                 attachment = true
237         } else if r.FormValue("disposition") == "attachment" {
238                 attachment = true
239         }
240
241         if !credentialsOK {
242                 reasonNotAcceptingCredentials = fmt.Sprintf("vhost %q does not specify a single collection ID or match Services.WebDAVDownload.ExternalURL %q, and Collections.TrustAllContent is false",
243                         r.Host, h.Cluster.Services.WebDAVDownload.ExternalURL)
244         }
245
246         if collectionID = arvados.CollectionIDFromDNSName(r.Host); collectionID != "" {
247                 // http://ID.collections.example/PATH...
248                 credentialsOK = true
249         } else if r.URL.Path == "/status.json" {
250                 h.serveStatus(w, r)
251                 return
252         } else if siteFSDir[pathParts[0]] {
253                 useSiteFS = true
254         } else if len(pathParts) >= 1 && strings.HasPrefix(pathParts[0], "c=") {
255                 // /c=ID[/PATH...]
256                 collectionID = parseCollectionIDFromURL(pathParts[0][2:])
257                 stripParts = 1
258         } else if len(pathParts) >= 2 && pathParts[0] == "collections" {
259                 if len(pathParts) >= 4 && pathParts[1] == "download" {
260                         // /collections/download/ID/TOKEN/PATH...
261                         collectionID = parseCollectionIDFromURL(pathParts[2])
262                         tokens = []string{pathParts[3]}
263                         stripParts = 4
264                         pathToken = true
265                 } else {
266                         // /collections/ID/PATH...
267                         collectionID = parseCollectionIDFromURL(pathParts[1])
268                         stripParts = 2
269                         // This path is only meant to work for public
270                         // data. Tokens provided with the request are
271                         // ignored.
272                         credentialsOK = false
273                         reasonNotAcceptingCredentials = "the '/collections/UUID/PATH' form only works for public data"
274                 }
275         }
276
277         if collectionID == "" && !useSiteFS {
278                 http.Error(w, notFoundMessage, http.StatusNotFound)
279                 return
280         }
281
282         forceReload := false
283         if cc := r.Header.Get("Cache-Control"); strings.Contains(cc, "no-cache") || strings.Contains(cc, "must-revalidate") {
284                 forceReload = true
285         }
286
287         if credentialsOK {
288                 reqTokens = auth.CredentialsFromRequest(r).Tokens
289         }
290
291         formToken := r.FormValue("api_token")
292         origin := r.Header.Get("Origin")
293         cors := origin != "" && !strings.HasSuffix(origin, "://"+r.Host)
294         safeAjax := cors && (r.Method == http.MethodGet || r.Method == http.MethodHead)
295         safeAttachment := attachment && r.URL.Query().Get("api_token") == ""
296         if formToken == "" {
297                 // No token to use or redact.
298         } else if safeAjax || safeAttachment {
299                 // If this is a cross-origin request, the URL won't
300                 // appear in the browser's address bar, so
301                 // substituting a clipboard-safe URL is pointless.
302                 // Redirect-with-cookie wouldn't work anyway, because
303                 // it's not safe to allow third-party use of our
304                 // cookie.
305                 //
306                 // If we're supplying an attachment, we don't need to
307                 // convert POST to GET to avoid the "really resubmit
308                 // form?" problem, so provided the token isn't
309                 // embedded in the URL, there's no reason to do
310                 // redirect-with-cookie in this case either.
311                 reqTokens = append(reqTokens, formToken)
312         } else if browserMethod[r.Method] {
313                 // If this is a page view, and the client provided a
314                 // token via query string or POST body, we must put
315                 // the token in an HttpOnly cookie, and redirect to an
316                 // equivalent URL with the query param redacted and
317                 // method = GET.
318                 h.seeOtherWithCookie(w, r, "", credentialsOK)
319                 return
320         }
321
322         if useSiteFS {
323                 h.serveSiteFS(w, r, reqTokens, credentialsOK, attachment)
324                 return
325         }
326
327         targetPath := pathParts[stripParts:]
328         if tokens == nil && len(targetPath) > 0 && strings.HasPrefix(targetPath[0], "t=") {
329                 // http://ID.example/t=TOKEN/PATH...
330                 // /c=ID/t=TOKEN/PATH...
331                 //
332                 // This form must only be used to pass scoped tokens
333                 // that give permission for a single collection. See
334                 // FormValue case above.
335                 tokens = []string{targetPath[0][2:]}
336                 pathToken = true
337                 targetPath = targetPath[1:]
338                 stripParts++
339         }
340
341         if tokens == nil {
342                 tokens = reqTokens
343                 if h.Cluster.Users.AnonymousUserToken != "" {
344                         tokens = append(tokens, h.Cluster.Users.AnonymousUserToken)
345                 }
346         }
347
348         if tokens == nil {
349                 if !credentialsOK {
350                         http.Error(w, fmt.Sprintf("Authorization tokens are not accepted here: %v, and no anonymous user token is configured.", reasonNotAcceptingCredentials), http.StatusUnauthorized)
351                 } else {
352                         http.Error(w, fmt.Sprintf("No authorization token in request, and no anonymous user token is configured."), http.StatusUnauthorized)
353                 }
354                 return
355         }
356
357         if len(targetPath) > 0 && targetPath[0] == "_" {
358                 // If a collection has a directory called "t=foo" or
359                 // "_", it can be served at
360                 // //collections.example/_/t=foo/ or
361                 // //collections.example/_/_/ respectively:
362                 // //collections.example/t=foo/ won't work because
363                 // t=foo will be interpreted as a token "foo".
364                 targetPath = targetPath[1:]
365                 stripParts++
366         }
367
368         arv := h.clientPool.Get()
369         if arv == nil {
370                 http.Error(w, "client pool error: "+h.clientPool.Err().Error(), http.StatusInternalServerError)
371                 return
372         }
373         defer h.clientPool.Put(arv)
374
375         dirOpenMode := os.O_RDONLY
376         if writeMethod[r.Method] {
377                 dirOpenMode = os.O_RDWR
378         }
379
380         validToken := make(map[string]bool)
381         var token string
382         var tokenUser *arvados.User
383         var sessionFS arvados.CustomFileSystem
384         var session *cachedSession
385         var collectionDir arvados.File
386         for _, token = range tokens {
387                 var statusErr interface{ HTTPStatus() int }
388                 fs, sess, user, err := h.Cache.GetSession(token)
389                 if errors.As(err, &statusErr) && statusErr.HTTPStatus() == http.StatusUnauthorized {
390                         // bad token
391                         continue
392                 } else if err != nil {
393                         http.Error(w, "cache error: "+err.Error(), http.StatusInternalServerError)
394                         return
395                 }
396                 f, err := fs.OpenFile("by_id/"+collectionID, dirOpenMode, 0)
397                 if errors.As(err, &statusErr) && statusErr.HTTPStatus() == http.StatusForbidden {
398                         // collection id is outside token scope
399                         validToken[token] = true
400                         continue
401                 }
402                 validToken[token] = true
403                 if os.IsNotExist(err) {
404                         // collection does not exist or is not
405                         // readable using this token
406                         continue
407                 } else if err != nil {
408                         http.Error(w, err.Error(), http.StatusInternalServerError)
409                         return
410                 }
411                 defer f.Close()
412
413                 collectionDir, sessionFS, session, tokenUser = f, fs, sess, user
414                 break
415         }
416         if forceReload {
417                 err := collectionDir.Sync()
418                 if err != nil {
419                         var statusErr interface{ HTTPStatus() int }
420                         if errors.As(err, &statusErr) {
421                                 http.Error(w, err.Error(), statusErr.HTTPStatus())
422                         } else {
423                                 http.Error(w, err.Error(), http.StatusInternalServerError)
424                         }
425                         return
426                 }
427         }
428         if session == nil {
429                 if pathToken || !credentialsOK {
430                         // Either the URL is a "secret sharing link"
431                         // that didn't work out (and asking the client
432                         // for additional credentials would just be
433                         // confusing), or we don't even accept
434                         // credentials at this path.
435                         http.Error(w, notFoundMessage, http.StatusNotFound)
436                         return
437                 }
438                 for _, t := range reqTokens {
439                         if validToken[t] {
440                                 // The client provided valid token(s),
441                                 // but the collection was not found.
442                                 http.Error(w, notFoundMessage, http.StatusNotFound)
443                                 return
444                         }
445                 }
446                 // The client's token was invalid (e.g., expired), or
447                 // the client didn't even provide one.  Redirect to
448                 // workbench2's login-and-redirect-to-download url if
449                 // this is a browser navigation request. (The redirect
450                 // flow can't preserve the original method if it's not
451                 // GET, and doesn't make sense if the UA is a
452                 // command-line tool, is trying to load an inline
453                 // image, etc.; in these cases, there's nothing we can
454                 // do, so return 401 unauthorized.)
455                 //
456                 // Note Sec-Fetch-Mode is sent by all non-EOL
457                 // browsers, except Safari.
458                 // https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Sec-Fetch-Mode
459                 //
460                 // TODO(TC): This response would be confusing to
461                 // someone trying (anonymously) to download public
462                 // data that has been deleted.  Allow a referrer to
463                 // provide this context somehow?
464                 if r.Method == http.MethodGet && r.Header.Get("Sec-Fetch-Mode") == "navigate" {
465                         target := url.URL(h.Cluster.Services.Workbench2.ExternalURL)
466                         redirkey := "redirectToPreview"
467                         if attachment {
468                                 redirkey = "redirectToDownload"
469                         }
470                         callback := "/c=" + collectionID + "/" + strings.Join(targetPath, "/")
471                         // target.RawQuery = url.Values{redirkey:
472                         // {target}}.Encode() would be the obvious
473                         // thing to do here, but wb2 doesn't decode
474                         // this as a query param -- it takes
475                         // everything after "${redirkey}=" as the
476                         // target URL. If we encode "/" as "%2F" etc.,
477                         // the redirect won't work.
478                         target.RawQuery = redirkey + "=" + callback
479                         w.Header().Add("Location", target.String())
480                         w.WriteHeader(http.StatusSeeOther)
481                 } else {
482                         w.Header().Add("WWW-Authenticate", "Basic realm=\"collections\"")
483                         http.Error(w, unauthorizedMessage, http.StatusUnauthorized)
484                 }
485                 return
486         }
487
488         var basename string
489         if len(targetPath) > 0 {
490                 basename = targetPath[len(targetPath)-1]
491         }
492         applyContentDispositionHdr(w, r, basename, attachment)
493
494         if arvadosclient.PDHMatch(collectionID) && writeMethod[r.Method] {
495                 http.Error(w, errReadOnly.Error(), http.StatusMethodNotAllowed)
496                 return
497         }
498         if !h.userPermittedToUploadOrDownload(r.Method, tokenUser) {
499                 http.Error(w, "Not permitted", http.StatusForbidden)
500                 return
501         }
502         h.logUploadOrDownload(r, session.arvadosclient, sessionFS, "by_id/"+collectionID+"/"+strings.Join(targetPath, "/"), nil, tokenUser)
503
504         if writeMethod[r.Method] {
505                 // Save the collection only if/when all
506                 // webdav->filesystem operations succeed --
507                 // and send a 500 error if the modified
508                 // collection can't be saved.
509                 //
510                 // Perform the write in a separate sitefs, so
511                 // concurrent read operations on the same
512                 // collection see the previous saved
513                 // state. After the write succeeds and the
514                 // collection record is updated, we reset the
515                 // session so the updates are visible in
516                 // subsequent read requests.
517                 client := session.client.WithRequestID(r.Header.Get("X-Request-Id"))
518                 sessionFS = client.SiteFileSystem(session.keepclient)
519                 writingDir, err := sessionFS.OpenFile("by_id/"+collectionID, os.O_RDONLY, 0)
520                 if err != nil {
521                         http.Error(w, err.Error(), http.StatusInternalServerError)
522                         return
523                 }
524                 defer writingDir.Close()
525                 w = &updateOnSuccess{
526                         ResponseWriter: w,
527                         logger:         ctxlog.FromContext(r.Context()),
528                         update: func() error {
529                                 err := writingDir.Sync()
530                                 var te arvados.TransactionError
531                                 if errors.As(err, &te) {
532                                         err = te
533                                 }
534                                 if err != nil {
535                                         return err
536                                 }
537                                 // Sync the changes to the persistent
538                                 // sessionfs for this token.
539                                 snap, err := writingDir.Snapshot()
540                                 if err != nil {
541                                         return err
542                                 }
543                                 collectionDir.Splice(snap)
544                                 return nil
545                         }}
546         }
547         wh := webdav.Handler{
548                 Prefix: "/" + strings.Join(pathParts[:stripParts], "/"),
549                 FileSystem: &webdavFS{
550                         collfs:        sessionFS,
551                         prefix:        "by_id/" + collectionID + "/",
552                         writing:       writeMethod[r.Method],
553                         alwaysReadEOF: r.Method == "PROPFIND",
554                 },
555                 LockSystem: h.webdavLS,
556                 Logger: func(r *http.Request, err error) {
557                         if err != nil {
558                                 ctxlog.FromContext(r.Context()).WithError(err).Error("error reported by webdav handler")
559                         }
560                 },
561         }
562         if r.Method == http.MethodGet || r.Method == http.MethodHead {
563                 targetfnm := "by_id/" + collectionID + "/" + strings.Join(pathParts[stripParts:], "/")
564                 if fi, err := sessionFS.Stat(targetfnm); err == nil && fi.IsDir() {
565                         if !strings.HasSuffix(r.URL.Path, "/") {
566                                 h.seeOtherWithCookie(w, r, r.URL.Path+"/", credentialsOK)
567                         } else {
568                                 h.serveDirectory(w, r, fi.Name(), sessionFS, targetfnm, true)
569                         }
570                         return
571                 }
572         }
573         wh.ServeHTTP(w, r)
574         if r.Method == http.MethodGet && w.WroteStatus() == http.StatusOK {
575                 wrote := int64(w.WroteBodyBytes())
576                 fnm := strings.Join(pathParts[stripParts:], "/")
577                 fi, err := wh.FileSystem.Stat(r.Context(), fnm)
578                 if err == nil && fi.Size() != wrote {
579                         var n int
580                         f, err := wh.FileSystem.OpenFile(r.Context(), fnm, os.O_RDONLY, 0)
581                         if err == nil {
582                                 n, err = f.Read(make([]byte, 1024))
583                                 f.Close()
584                         }
585                         ctxlog.FromContext(r.Context()).Errorf("stat.Size()==%d but only wrote %d bytes; read(1024) returns %d, %v", fi.Size(), wrote, n, err)
586                 }
587         }
588 }
589
590 func (h *handler) getClients(reqID, token string) (arv *arvadosclient.ArvadosClient, kc *keepclient.KeepClient, client *arvados.Client, release func(), err error) {
591         arv = h.clientPool.Get()
592         if arv == nil {
593                 err = h.clientPool.Err()
594                 return
595         }
596         release = func() { h.clientPool.Put(arv) }
597         arv.ApiToken = token
598         kc, err = keepclient.MakeKeepClient(arv)
599         if err != nil {
600                 release()
601                 return
602         }
603         kc.RequestID = reqID
604         client = (&arvados.Client{
605                 APIHost:   arv.ApiServer,
606                 AuthToken: arv.ApiToken,
607                 Insecure:  arv.ApiInsecure,
608         }).WithRequestID(reqID)
609         return
610 }
611
612 func (h *handler) serveSiteFS(w http.ResponseWriter, r *http.Request, tokens []string, credentialsOK, attachment bool) {
613         if len(tokens) == 0 {
614                 w.Header().Add("WWW-Authenticate", "Basic realm=\"collections\"")
615                 http.Error(w, unauthorizedMessage, http.StatusUnauthorized)
616                 return
617         }
618         if writeMethod[r.Method] {
619                 http.Error(w, errReadOnly.Error(), http.StatusMethodNotAllowed)
620                 return
621         }
622
623         fs, sess, user, err := h.Cache.GetSession(tokens[0])
624         if err != nil {
625                 http.Error(w, err.Error(), http.StatusInternalServerError)
626                 return
627         }
628         f, err := fs.Open(r.URL.Path)
629         if os.IsNotExist(err) {
630                 http.Error(w, err.Error(), http.StatusNotFound)
631                 return
632         } else if err != nil {
633                 http.Error(w, err.Error(), http.StatusInternalServerError)
634                 return
635         }
636         defer f.Close()
637         if fi, err := f.Stat(); err == nil && fi.IsDir() && r.Method == "GET" {
638                 if !strings.HasSuffix(r.URL.Path, "/") {
639                         h.seeOtherWithCookie(w, r, r.URL.Path+"/", credentialsOK)
640                 } else {
641                         h.serveDirectory(w, r, fi.Name(), fs, r.URL.Path, false)
642                 }
643                 return
644         }
645
646         if !h.userPermittedToUploadOrDownload(r.Method, user) {
647                 http.Error(w, "Not permitted", http.StatusForbidden)
648                 return
649         }
650         h.logUploadOrDownload(r, sess.arvadosclient, fs, r.URL.Path, nil, user)
651
652         if r.Method == "GET" {
653                 _, basename := filepath.Split(r.URL.Path)
654                 applyContentDispositionHdr(w, r, basename, attachment)
655         }
656         wh := webdav.Handler{
657                 FileSystem: &webdavFS{
658                         collfs:        fs,
659                         writing:       writeMethod[r.Method],
660                         alwaysReadEOF: r.Method == "PROPFIND",
661                 },
662                 LockSystem: h.webdavLS,
663                 Logger: func(_ *http.Request, err error) {
664                         if err != nil {
665                                 ctxlog.FromContext(r.Context()).WithError(err).Error("error reported by webdav handler")
666                         }
667                 },
668         }
669         wh.ServeHTTP(w, r)
670 }
671
672 var dirListingTemplate = `<!DOCTYPE HTML>
673 <HTML><HEAD>
674   <META name="robots" content="NOINDEX">
675   <TITLE>{{ .CollectionName }}</TITLE>
676   <STYLE type="text/css">
677     body {
678       margin: 1.5em;
679     }
680     pre {
681       background-color: #D9EDF7;
682       border-radius: .25em;
683       padding: .75em;
684       overflow: auto;
685     }
686     .footer p {
687       font-size: 82%;
688     }
689     ul {
690       padding: 0;
691     }
692     ul li {
693       font-family: monospace;
694       list-style: none;
695     }
696   </STYLE>
697 </HEAD>
698 <BODY>
699
700 <H1>{{ .CollectionName }}</H1>
701
702 <P>This collection of data files is being shared with you through
703 Arvados.  You can download individual files listed below.  To download
704 the entire directory tree with wget, try:</P>
705
706 <PRE>$ wget --mirror --no-parent --no-host --cut-dirs={{ .StripParts }} https://{{ .Request.Host }}{{ .Request.URL.Path }}</PRE>
707
708 <H2>File Listing</H2>
709
710 {{if .Files}}
711 <UL>
712 {{range .Files}}
713 {{if .IsDir }}
714   <LI>{{" " | printf "%15s  " | nbsp}}<A href="{{print "./" .Name}}/">{{.Name}}/</A></LI>
715 {{else}}
716   <LI>{{.Size | printf "%15d  " | nbsp}}<A href="{{print "./" .Name}}">{{.Name}}</A></LI>
717 {{end}}
718 {{end}}
719 </UL>
720 {{else}}
721 <P>(No files; this collection is empty.)</P>
722 {{end}}
723
724 <HR noshade>
725 <DIV class="footer">
726   <P>
727     About Arvados:
728     Arvados is a free and open source software bioinformatics platform.
729     To learn more, visit arvados.org.
730     Arvados is not responsible for the files listed on this page.
731   </P>
732 </DIV>
733
734 </BODY>
735 `
736
737 type fileListEnt struct {
738         Name  string
739         Size  int64
740         IsDir bool
741 }
742
743 func (h *handler) serveDirectory(w http.ResponseWriter, r *http.Request, collectionName string, fs http.FileSystem, base string, recurse bool) {
744         var files []fileListEnt
745         var walk func(string) error
746         if !strings.HasSuffix(base, "/") {
747                 base = base + "/"
748         }
749         walk = func(path string) error {
750                 dirname := base + path
751                 if dirname != "/" {
752                         dirname = strings.TrimSuffix(dirname, "/")
753                 }
754                 d, err := fs.Open(dirname)
755                 if err != nil {
756                         return err
757                 }
758                 ents, err := d.Readdir(-1)
759                 if err != nil {
760                         return err
761                 }
762                 for _, ent := range ents {
763                         if recurse && ent.IsDir() {
764                                 err = walk(path + ent.Name() + "/")
765                                 if err != nil {
766                                         return err
767                                 }
768                         } else {
769                                 files = append(files, fileListEnt{
770                                         Name:  path + ent.Name(),
771                                         Size:  ent.Size(),
772                                         IsDir: ent.IsDir(),
773                                 })
774                         }
775                 }
776                 return nil
777         }
778         if err := walk(""); err != nil {
779                 http.Error(w, "error getting directory listing: "+err.Error(), http.StatusInternalServerError)
780                 return
781         }
782
783         funcs := template.FuncMap{
784                 "nbsp": func(s string) template.HTML {
785                         return template.HTML(strings.Replace(s, " ", "&nbsp;", -1))
786                 },
787         }
788         tmpl, err := template.New("dir").Funcs(funcs).Parse(dirListingTemplate)
789         if err != nil {
790                 http.Error(w, "error parsing template: "+err.Error(), http.StatusInternalServerError)
791                 return
792         }
793         sort.Slice(files, func(i, j int) bool {
794                 return files[i].Name < files[j].Name
795         })
796         w.WriteHeader(http.StatusOK)
797         tmpl.Execute(w, map[string]interface{}{
798                 "CollectionName": collectionName,
799                 "Files":          files,
800                 "Request":        r,
801                 "StripParts":     strings.Count(strings.TrimRight(r.URL.Path, "/"), "/"),
802         })
803 }
804
805 func applyContentDispositionHdr(w http.ResponseWriter, r *http.Request, filename string, isAttachment bool) {
806         disposition := "inline"
807         if isAttachment {
808                 disposition = "attachment"
809         }
810         if strings.ContainsRune(r.RequestURI, '?') {
811                 // Help the UA realize that the filename is just
812                 // "filename.txt", not
813                 // "filename.txt?disposition=attachment".
814                 //
815                 // TODO(TC): Follow advice at RFC 6266 appendix D
816                 disposition += "; filename=" + strconv.QuoteToASCII(filename)
817         }
818         if disposition != "inline" {
819                 w.Header().Set("Content-Disposition", disposition)
820         }
821 }
822
823 func (h *handler) seeOtherWithCookie(w http.ResponseWriter, r *http.Request, location string, credentialsOK bool) {
824         if formToken := r.FormValue("api_token"); formToken != "" {
825                 if !credentialsOK {
826                         // It is not safe to copy the provided token
827                         // into a cookie unless the current vhost
828                         // (origin) serves only a single collection or
829                         // we are in TrustAllContent mode.
830                         http.Error(w, "cannot serve inline content at this URL (possible configuration error; see https://doc.arvados.org/install/install-keep-web.html#dns)", http.StatusBadRequest)
831                         return
832                 }
833
834                 // The HttpOnly flag is necessary to prevent
835                 // JavaScript code (included in, or loaded by, a page
836                 // in the collection being served) from employing the
837                 // user's token beyond reading other files in the same
838                 // domain, i.e., same collection.
839                 //
840                 // The 303 redirect is necessary in the case of a GET
841                 // request to avoid exposing the token in the Location
842                 // bar, and in the case of a POST request to avoid
843                 // raising warnings when the user refreshes the
844                 // resulting page.
845                 http.SetCookie(w, &http.Cookie{
846                         Name:     "arvados_api_token",
847                         Value:    auth.EncodeTokenCookie([]byte(formToken)),
848                         Path:     "/",
849                         HttpOnly: true,
850                         SameSite: http.SameSiteLaxMode,
851                 })
852         }
853
854         // Propagate query parameters (except api_token) from
855         // the original request.
856         redirQuery := r.URL.Query()
857         redirQuery.Del("api_token")
858
859         u := r.URL
860         if location != "" {
861                 newu, err := u.Parse(location)
862                 if err != nil {
863                         http.Error(w, "error resolving redirect target: "+err.Error(), http.StatusInternalServerError)
864                         return
865                 }
866                 u = newu
867         }
868         redir := (&url.URL{
869                 Scheme:   r.URL.Scheme,
870                 Host:     r.Host,
871                 Path:     u.Path,
872                 RawQuery: redirQuery.Encode(),
873         }).String()
874
875         w.Header().Add("Location", redir)
876         w.WriteHeader(http.StatusSeeOther)
877         io.WriteString(w, `<A href="`)
878         io.WriteString(w, html.EscapeString(redir))
879         io.WriteString(w, `">Continue</A>`)
880 }
881
882 func (h *handler) userPermittedToUploadOrDownload(method string, tokenUser *arvados.User) bool {
883         var permitDownload bool
884         var permitUpload bool
885         if tokenUser != nil && tokenUser.IsAdmin {
886                 permitUpload = h.Cluster.Collections.WebDAVPermission.Admin.Upload
887                 permitDownload = h.Cluster.Collections.WebDAVPermission.Admin.Download
888         } else {
889                 permitUpload = h.Cluster.Collections.WebDAVPermission.User.Upload
890                 permitDownload = h.Cluster.Collections.WebDAVPermission.User.Download
891         }
892         if (method == "PUT" || method == "POST") && !permitUpload {
893                 // Disallow operations that upload new files.
894                 // Permit webdav operations that move existing files around.
895                 return false
896         } else if method == "GET" && !permitDownload {
897                 // Disallow downloading file contents.
898                 // Permit webdav operations like PROPFIND that retrieve metadata
899                 // but not file contents.
900                 return false
901         }
902         return true
903 }
904
905 func (h *handler) logUploadOrDownload(
906         r *http.Request,
907         client *arvadosclient.ArvadosClient,
908         fs arvados.CustomFileSystem,
909         filepath string,
910         collection *arvados.Collection,
911         user *arvados.User) {
912
913         log := ctxlog.FromContext(r.Context())
914         props := make(map[string]string)
915         props["reqPath"] = r.URL.Path
916         var useruuid string
917         if user != nil {
918                 log = log.WithField("user_uuid", user.UUID).
919                         WithField("user_full_name", user.FullName)
920                 useruuid = user.UUID
921         } else {
922                 useruuid = fmt.Sprintf("%s-tpzed-anonymouspublic", h.Cluster.ClusterID)
923         }
924         if collection == nil && fs != nil {
925                 collection, filepath = h.determineCollection(fs, filepath)
926         }
927         if collection != nil {
928                 log = log.WithField("collection_file_path", filepath)
929                 props["collection_file_path"] = filepath
930                 // h.determineCollection populates the collection_uuid
931                 // prop with the PDH, if this collection is being
932                 // accessed via PDH. For logging, we use a different
933                 // field depending on whether it's a UUID or PDH.
934                 if len(collection.UUID) > 32 {
935                         log = log.WithField("portable_data_hash", collection.UUID)
936                         props["portable_data_hash"] = collection.UUID
937                 } else {
938                         log = log.WithField("collection_uuid", collection.UUID)
939                         props["collection_uuid"] = collection.UUID
940                 }
941         }
942         if r.Method == "PUT" || r.Method == "POST" {
943                 log.Info("File upload")
944                 if h.Cluster.Collections.WebDAVLogEvents {
945                         go func() {
946                                 lr := arvadosclient.Dict{"log": arvadosclient.Dict{
947                                         "object_uuid": useruuid,
948                                         "event_type":  "file_upload",
949                                         "properties":  props}}
950                                 err := client.Create("logs", lr, nil)
951                                 if err != nil {
952                                         log.WithError(err).Error("Failed to create upload log event on API server")
953                                 }
954                         }()
955                 }
956         } else if r.Method == "GET" {
957                 if collection != nil && collection.PortableDataHash != "" {
958                         log = log.WithField("portable_data_hash", collection.PortableDataHash)
959                         props["portable_data_hash"] = collection.PortableDataHash
960                 }
961                 log.Info("File download")
962                 if h.Cluster.Collections.WebDAVLogEvents {
963                         go func() {
964                                 lr := arvadosclient.Dict{"log": arvadosclient.Dict{
965                                         "object_uuid": useruuid,
966                                         "event_type":  "file_download",
967                                         "properties":  props}}
968                                 err := client.Create("logs", lr, nil)
969                                 if err != nil {
970                                         log.WithError(err).Error("Failed to create download log event on API server")
971                                 }
972                         }()
973                 }
974         }
975 }
976
977 func (h *handler) determineCollection(fs arvados.CustomFileSystem, path string) (*arvados.Collection, string) {
978         target := strings.TrimSuffix(path, "/")
979         for cut := len(target); cut >= 0; cut = strings.LastIndexByte(target, '/') {
980                 target = target[:cut]
981                 fi, err := fs.Stat(target)
982                 if os.IsNotExist(err) {
983                         // creating a new file/dir, or download
984                         // destined to fail
985                         continue
986                 } else if err != nil {
987                         return nil, ""
988                 }
989                 switch src := fi.Sys().(type) {
990                 case *arvados.Collection:
991                         return src, strings.TrimPrefix(path[len(target):], "/")
992                 case *arvados.Group:
993                         return nil, ""
994                 default:
995                         if _, ok := src.(error); ok {
996                                 return nil, ""
997                         }
998                 }
999         }
1000         return nil, ""
1001 }