Revert "Merge branch '21583-railsapi-base64-gem'"
[arvados.git] / services / keep-web / handler.go
1 // Copyright (C) The Arvados Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4
5 package keepweb
6
7 import (
8         "encoding/json"
9         "errors"
10         "fmt"
11         "html"
12         "html/template"
13         "io"
14         "net/http"
15         "net/url"
16         "os"
17         "sort"
18         "strconv"
19         "strings"
20         "sync"
21         "time"
22
23         "git.arvados.org/arvados.git/lib/cmd"
24         "git.arvados.org/arvados.git/lib/webdavfs"
25         "git.arvados.org/arvados.git/sdk/go/arvados"
26         "git.arvados.org/arvados.git/sdk/go/arvadosclient"
27         "git.arvados.org/arvados.git/sdk/go/auth"
28         "git.arvados.org/arvados.git/sdk/go/ctxlog"
29         "git.arvados.org/arvados.git/sdk/go/httpserver"
30         "github.com/sirupsen/logrus"
31         "golang.org/x/net/webdav"
32 )
33
34 type handler struct {
35         Cache   cache
36         Cluster *arvados.Cluster
37         metrics *metrics
38
39         lockMtx    sync.Mutex
40         lock       map[string]*sync.RWMutex
41         lockTidied time.Time
42 }
43
44 var urlPDHDecoder = strings.NewReplacer(" ", "+", "-", "+")
45
46 var notFoundMessage = "Not Found"
47 var unauthorizedMessage = "401 Unauthorized\n\nA valid Arvados token must be provided to access this resource."
48
49 // parseCollectionIDFromURL returns a UUID or PDH if s is a UUID or a
50 // PDH (even if it is a PDH with "+" replaced by " " or "-");
51 // otherwise "".
52 func parseCollectionIDFromURL(s string) string {
53         if arvadosclient.UUIDMatch(s) {
54                 return s
55         }
56         if pdh := urlPDHDecoder.Replace(s); arvadosclient.PDHMatch(pdh) {
57                 return pdh
58         }
59         return ""
60 }
61
62 func (h *handler) serveStatus(w http.ResponseWriter, r *http.Request) {
63         json.NewEncoder(w).Encode(struct{ Version string }{cmd.Version.String()})
64 }
65
66 type errorWithHTTPStatus interface {
67         HTTPStatus() int
68 }
69
70 // updateOnSuccess wraps httpserver.ResponseWriter. If the handler
71 // sends an HTTP header indicating success, updateOnSuccess first
72 // calls the provided update func. If the update func fails, an error
73 // response is sent (using the error's HTTP status or 500 if none),
74 // and the status code and body sent by the handler are ignored (all
75 // response writes return the update error).
76 type updateOnSuccess struct {
77         httpserver.ResponseWriter
78         logger     logrus.FieldLogger
79         update     func() error
80         sentHeader bool
81         err        error
82 }
83
84 func (uos *updateOnSuccess) Write(p []byte) (int, error) {
85         if !uos.sentHeader {
86                 uos.WriteHeader(http.StatusOK)
87         }
88         if uos.err != nil {
89                 return 0, uos.err
90         }
91         return uos.ResponseWriter.Write(p)
92 }
93
94 func (uos *updateOnSuccess) WriteHeader(code int) {
95         if !uos.sentHeader {
96                 uos.sentHeader = true
97                 if code >= 200 && code < 400 {
98                         if uos.err = uos.update(); uos.err != nil {
99                                 code := http.StatusInternalServerError
100                                 if he := errorWithHTTPStatus(nil); errors.As(uos.err, &he) {
101                                         code = he.HTTPStatus()
102                                 }
103                                 uos.logger.WithError(uos.err).Errorf("update() returned %T error, changing response to HTTP %d", uos.err, code)
104                                 http.Error(uos.ResponseWriter, uos.err.Error(), code)
105                                 return
106                         }
107                 }
108         }
109         uos.ResponseWriter.WriteHeader(code)
110 }
111
112 var (
113         corsAllowHeadersHeader = strings.Join([]string{
114                 "Authorization", "Content-Type", "Range",
115                 // WebDAV request headers:
116                 "Depth", "Destination", "If", "Lock-Token", "Overwrite", "Timeout", "Cache-Control",
117         }, ", ")
118         writeMethod = map[string]bool{
119                 "COPY":      true,
120                 "DELETE":    true,
121                 "LOCK":      true,
122                 "MKCOL":     true,
123                 "MOVE":      true,
124                 "PROPPATCH": true,
125                 "PUT":       true,
126                 "RMCOL":     true,
127                 "UNLOCK":    true,
128         }
129         webdavMethod = map[string]bool{
130                 "COPY":      true,
131                 "DELETE":    true,
132                 "LOCK":      true,
133                 "MKCOL":     true,
134                 "MOVE":      true,
135                 "OPTIONS":   true,
136                 "PROPFIND":  true,
137                 "PROPPATCH": true,
138                 "PUT":       true,
139                 "RMCOL":     true,
140                 "UNLOCK":    true,
141         }
142         browserMethod = map[string]bool{
143                 "GET":  true,
144                 "HEAD": true,
145                 "POST": true,
146         }
147         // top-level dirs to serve with siteFS
148         siteFSDir = map[string]bool{
149                 "":      true, // root directory
150                 "by_id": true,
151                 "users": true,
152         }
153 )
154
155 func stripDefaultPort(host string) string {
156         // Will consider port 80 and port 443 to be the same vhost.  I think that's fine.
157         u := &url.URL{Host: host}
158         if p := u.Port(); p == "80" || p == "443" {
159                 return strings.ToLower(u.Hostname())
160         } else {
161                 return strings.ToLower(host)
162         }
163 }
164
165 // CheckHealth implements service.Handler.
166 func (h *handler) CheckHealth() error {
167         return nil
168 }
169
170 // Done implements service.Handler.
171 func (h *handler) Done() <-chan struct{} {
172         return nil
173 }
174
175 // ServeHTTP implements http.Handler.
176 func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
177         if xfp := r.Header.Get("X-Forwarded-Proto"); xfp != "" && xfp != "http" {
178                 r.URL.Scheme = xfp
179         }
180
181         w := httpserver.WrapResponseWriter(wOrig)
182
183         if r.Method == "OPTIONS" && ServeCORSPreflight(w, r.Header) {
184                 return
185         }
186
187         if !browserMethod[r.Method] && !webdavMethod[r.Method] {
188                 w.WriteHeader(http.StatusMethodNotAllowed)
189                 return
190         }
191
192         if r.Header.Get("Origin") != "" {
193                 // Allow simple cross-origin requests without user
194                 // credentials ("user credentials" as defined by CORS,
195                 // i.e., cookies, HTTP authentication, and client-side
196                 // SSL certificates. See
197                 // http://www.w3.org/TR/cors/#user-credentials).
198                 w.Header().Set("Access-Control-Allow-Origin", "*")
199                 w.Header().Set("Access-Control-Expose-Headers", "Content-Range")
200         }
201
202         if h.serveS3(w, r) {
203                 return
204         }
205
206         webdavPrefix := ""
207         arvPath := r.URL.Path
208         if prefix := r.Header.Get("X-Webdav-Prefix"); prefix != "" {
209                 // Enable a proxy (e.g., container log handler in
210                 // controller) to satisfy a request for path
211                 // "/foo/bar/baz.txt" using content from
212                 // "//abc123-4.internal/bar/baz.txt", by adding a
213                 // request header "X-Webdav-Prefix: /foo"
214                 if !strings.HasPrefix(arvPath, prefix) {
215                         http.Error(w, "X-Webdav-Prefix header is not a prefix of the requested path", http.StatusBadRequest)
216                         return
217                 }
218                 arvPath = r.URL.Path[len(prefix):]
219                 if arvPath == "" {
220                         arvPath = "/"
221                 }
222                 w.Header().Set("Vary", "X-Webdav-Prefix, "+w.Header().Get("Vary"))
223                 webdavPrefix = prefix
224         }
225         pathParts := strings.Split(arvPath[1:], "/")
226
227         var stripParts int
228         var collectionID string
229         var tokens []string
230         var reqTokens []string
231         var pathToken bool
232         var attachment bool
233         var useSiteFS bool
234         credentialsOK := h.Cluster.Collections.TrustAllContent
235         reasonNotAcceptingCredentials := ""
236
237         if r.Host != "" && stripDefaultPort(r.Host) == stripDefaultPort(h.Cluster.Services.WebDAVDownload.ExternalURL.Host) {
238                 credentialsOK = true
239                 attachment = true
240         } else if r.FormValue("disposition") == "attachment" {
241                 attachment = true
242         }
243
244         if !credentialsOK {
245                 reasonNotAcceptingCredentials = fmt.Sprintf("vhost %q does not specify a single collection ID or match Services.WebDAVDownload.ExternalURL %q, and Collections.TrustAllContent is false",
246                         r.Host, h.Cluster.Services.WebDAVDownload.ExternalURL)
247         }
248
249         if collectionID = arvados.CollectionIDFromDNSName(r.Host); collectionID != "" {
250                 // http://ID.collections.example/PATH...
251                 credentialsOK = true
252         } else if r.URL.Path == "/status.json" {
253                 h.serveStatus(w, r)
254                 return
255         } else if siteFSDir[pathParts[0]] {
256                 useSiteFS = true
257         } else if len(pathParts) >= 1 && strings.HasPrefix(pathParts[0], "c=") {
258                 // /c=ID[/PATH...]
259                 collectionID = parseCollectionIDFromURL(pathParts[0][2:])
260                 stripParts = 1
261         } else if len(pathParts) >= 2 && pathParts[0] == "collections" {
262                 if len(pathParts) >= 4 && pathParts[1] == "download" {
263                         // /collections/download/ID/TOKEN/PATH...
264                         collectionID = parseCollectionIDFromURL(pathParts[2])
265                         tokens = []string{pathParts[3]}
266                         stripParts = 4
267                         pathToken = true
268                 } else {
269                         // /collections/ID/PATH...
270                         collectionID = parseCollectionIDFromURL(pathParts[1])
271                         stripParts = 2
272                         // This path is only meant to work for public
273                         // data. Tokens provided with the request are
274                         // ignored.
275                         credentialsOK = false
276                         reasonNotAcceptingCredentials = "the '/collections/UUID/PATH' form only works for public data"
277                 }
278         }
279
280         forceReload := false
281         if cc := r.Header.Get("Cache-Control"); strings.Contains(cc, "no-cache") || strings.Contains(cc, "must-revalidate") {
282                 forceReload = true
283         }
284
285         if credentialsOK {
286                 reqTokens = auth.CredentialsFromRequest(r).Tokens
287         }
288
289         r.ParseForm()
290         origin := r.Header.Get("Origin")
291         cors := origin != "" && !strings.HasSuffix(origin, "://"+r.Host)
292         safeAjax := cors && (r.Method == http.MethodGet || r.Method == http.MethodHead)
293         // Important distinction: safeAttachment checks whether api_token exists
294         // as a query parameter. haveFormTokens checks whether api_token exists
295         // as request form data *or* a query parameter. Different checks are
296         // necessary because both the request disposition and the location of
297         // the API token affect whether or not the request needs to be
298         // redirected. The different branch comments below explain further.
299         safeAttachment := attachment && !r.URL.Query().Has("api_token")
300         if formTokens, haveFormTokens := r.Form["api_token"]; !haveFormTokens {
301                 // No token to use or redact.
302         } else if safeAjax || safeAttachment {
303                 // If this is a cross-origin request, the URL won't
304                 // appear in the browser's address bar, so
305                 // substituting a clipboard-safe URL is pointless.
306                 // Redirect-with-cookie wouldn't work anyway, because
307                 // it's not safe to allow third-party use of our
308                 // cookie.
309                 //
310                 // If we're supplying an attachment, we don't need to
311                 // convert POST to GET to avoid the "really resubmit
312                 // form?" problem, so provided the token isn't
313                 // embedded in the URL, there's no reason to do
314                 // redirect-with-cookie in this case either.
315                 for _, tok := range formTokens {
316                         reqTokens = append(reqTokens, tok)
317                 }
318         } else if browserMethod[r.Method] {
319                 // If this is a page view, and the client provided a
320                 // token via query string or POST body, we must put
321                 // the token in an HttpOnly cookie, and redirect to an
322                 // equivalent URL with the query param redacted and
323                 // method = GET.
324                 h.seeOtherWithCookie(w, r, "", credentialsOK)
325                 return
326         }
327
328         targetPath := pathParts[stripParts:]
329         if tokens == nil && len(targetPath) > 0 && strings.HasPrefix(targetPath[0], "t=") {
330                 // http://ID.example/t=TOKEN/PATH...
331                 // /c=ID/t=TOKEN/PATH...
332                 //
333                 // This form must only be used to pass scoped tokens
334                 // that give permission for a single collection. See
335                 // FormValue case above.
336                 tokens = []string{targetPath[0][2:]}
337                 pathToken = true
338                 targetPath = targetPath[1:]
339                 stripParts++
340         }
341
342         fsprefix := ""
343         if useSiteFS {
344                 if writeMethod[r.Method] {
345                         http.Error(w, webdavfs.ErrReadOnly.Error(), http.StatusMethodNotAllowed)
346                         return
347                 }
348                 if len(reqTokens) == 0 {
349                         w.Header().Add("WWW-Authenticate", "Basic realm=\"collections\"")
350                         http.Error(w, unauthorizedMessage, http.StatusUnauthorized)
351                         return
352                 }
353                 tokens = reqTokens
354         } else if collectionID == "" {
355                 http.Error(w, notFoundMessage, http.StatusNotFound)
356                 return
357         } else {
358                 fsprefix = "by_id/" + collectionID + "/"
359         }
360
361         if src := r.Header.Get("X-Webdav-Source"); strings.HasPrefix(src, "/") && !strings.Contains(src, "//") && !strings.Contains(src, "/../") {
362                 fsprefix += src[1:]
363         }
364
365         if tokens == nil {
366                 tokens = reqTokens
367                 if h.Cluster.Users.AnonymousUserToken != "" {
368                         tokens = append(tokens, h.Cluster.Users.AnonymousUserToken)
369                 }
370         }
371
372         if len(targetPath) > 0 && targetPath[0] == "_" {
373                 // If a collection has a directory called "t=foo" or
374                 // "_", it can be served at
375                 // //collections.example/_/t=foo/ or
376                 // //collections.example/_/_/ respectively:
377                 // //collections.example/t=foo/ won't work because
378                 // t=foo will be interpreted as a token "foo".
379                 targetPath = targetPath[1:]
380                 stripParts++
381         }
382
383         dirOpenMode := os.O_RDONLY
384         if writeMethod[r.Method] {
385                 dirOpenMode = os.O_RDWR
386         }
387
388         var tokenValid bool
389         var tokenScopeProblem bool
390         var token string
391         var tokenUser *arvados.User
392         var sessionFS arvados.CustomFileSystem
393         var session *cachedSession
394         var collectionDir arvados.File
395         for _, token = range tokens {
396                 var statusErr errorWithHTTPStatus
397                 fs, sess, user, err := h.Cache.GetSession(token)
398                 if errors.As(err, &statusErr) && statusErr.HTTPStatus() == http.StatusUnauthorized {
399                         // bad token
400                         continue
401                 } else if err != nil {
402                         http.Error(w, "cache error: "+err.Error(), http.StatusInternalServerError)
403                         return
404                 }
405                 if token != h.Cluster.Users.AnonymousUserToken {
406                         tokenValid = true
407                 }
408                 f, err := fs.OpenFile(fsprefix, dirOpenMode, 0)
409                 if errors.As(err, &statusErr) &&
410                         statusErr.HTTPStatus() == http.StatusForbidden &&
411                         token != h.Cluster.Users.AnonymousUserToken {
412                         // collection id is outside scope of supplied
413                         // token
414                         tokenScopeProblem = true
415                         sess.Release()
416                         continue
417                 } else if os.IsNotExist(err) {
418                         // collection does not exist or is not
419                         // readable using this token
420                         sess.Release()
421                         continue
422                 } else if err != nil {
423                         http.Error(w, err.Error(), http.StatusInternalServerError)
424                         sess.Release()
425                         return
426                 }
427                 defer f.Close()
428                 defer sess.Release()
429
430                 collectionDir, sessionFS, session, tokenUser = f, fs, sess, user
431                 break
432         }
433         if forceReload && collectionDir != nil {
434                 err := collectionDir.Sync()
435                 if err != nil {
436                         if he := errorWithHTTPStatus(nil); errors.As(err, &he) {
437                                 http.Error(w, err.Error(), he.HTTPStatus())
438                         } else {
439                                 http.Error(w, err.Error(), http.StatusInternalServerError)
440                         }
441                         return
442                 }
443         }
444         if session == nil {
445                 if pathToken {
446                         // The URL is a "secret sharing link" that
447                         // didn't work out.  Asking the client for
448                         // additional credentials would just be
449                         // confusing.
450                         http.Error(w, notFoundMessage, http.StatusNotFound)
451                         return
452                 }
453                 if tokenValid {
454                         // The client provided valid token(s), but the
455                         // collection was not found.
456                         http.Error(w, notFoundMessage, http.StatusNotFound)
457                         return
458                 }
459                 if tokenScopeProblem {
460                         // The client provided a valid token but
461                         // fetching a collection returned 401, which
462                         // means the token scope doesn't permit
463                         // fetching that collection.
464                         http.Error(w, notFoundMessage, http.StatusForbidden)
465                         return
466                 }
467                 // The client's token was invalid (e.g., expired), or
468                 // the client didn't even provide one.  Redirect to
469                 // workbench2's login-and-redirect-to-download url if
470                 // this is a browser navigation request. (The redirect
471                 // flow can't preserve the original method if it's not
472                 // GET, and doesn't make sense if the UA is a
473                 // command-line tool, is trying to load an inline
474                 // image, etc.; in these cases, there's nothing we can
475                 // do, so return 401 unauthorized.)
476                 //
477                 // Note Sec-Fetch-Mode is sent by all non-EOL
478                 // browsers, except Safari.
479                 // https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Sec-Fetch-Mode
480                 //
481                 // TODO(TC): This response would be confusing to
482                 // someone trying (anonymously) to download public
483                 // data that has been deleted.  Allow a referrer to
484                 // provide this context somehow?
485                 if r.Method == http.MethodGet && r.Header.Get("Sec-Fetch-Mode") == "navigate" {
486                         target := url.URL(h.Cluster.Services.Workbench2.ExternalURL)
487                         redirkey := "redirectToPreview"
488                         if attachment {
489                                 redirkey = "redirectToDownload"
490                         }
491                         callback := "/c=" + collectionID + "/" + strings.Join(targetPath, "/")
492                         // target.RawQuery = url.Values{redirkey:
493                         // {target}}.Encode() would be the obvious
494                         // thing to do here, but wb2 doesn't decode
495                         // this as a query param -- it takes
496                         // everything after "${redirkey}=" as the
497                         // target URL. If we encode "/" as "%2F" etc.,
498                         // the redirect won't work.
499                         target.RawQuery = redirkey + "=" + callback
500                         w.Header().Add("Location", target.String())
501                         w.WriteHeader(http.StatusSeeOther)
502                         return
503                 }
504                 if !credentialsOK {
505                         http.Error(w, fmt.Sprintf("Authorization tokens are not accepted here: %v, and no anonymous user token is configured.", reasonNotAcceptingCredentials), http.StatusUnauthorized)
506                         return
507                 }
508                 // If none of the above cases apply, suggest the
509                 // user-agent (which is either a non-browser agent
510                 // like wget, or a browser that can't redirect through
511                 // a login flow) prompt the user for credentials.
512                 w.Header().Add("WWW-Authenticate", "Basic realm=\"collections\"")
513                 http.Error(w, unauthorizedMessage, http.StatusUnauthorized)
514                 return
515         }
516
517         if r.Method == http.MethodGet || r.Method == http.MethodHead {
518                 targetfnm := fsprefix + strings.Join(pathParts[stripParts:], "/")
519                 if fi, err := sessionFS.Stat(targetfnm); err == nil && fi.IsDir() {
520                         if !strings.HasSuffix(r.URL.Path, "/") {
521                                 h.seeOtherWithCookie(w, r, r.URL.Path+"/", credentialsOK)
522                         } else {
523                                 h.serveDirectory(w, r, fi.Name(), sessionFS, targetfnm, !useSiteFS)
524                         }
525                         return
526                 }
527         }
528
529         var basename string
530         if len(targetPath) > 0 {
531                 basename = targetPath[len(targetPath)-1]
532         }
533         if arvadosclient.PDHMatch(collectionID) && writeMethod[r.Method] {
534                 http.Error(w, webdavfs.ErrReadOnly.Error(), http.StatusMethodNotAllowed)
535                 return
536         }
537         if !h.userPermittedToUploadOrDownload(r.Method, tokenUser) {
538                 http.Error(w, "Not permitted", http.StatusForbidden)
539                 return
540         }
541         h.logUploadOrDownload(r, session.arvadosclient, sessionFS, fsprefix+strings.Join(targetPath, "/"), nil, tokenUser)
542
543         writing := writeMethod[r.Method]
544         locker := h.collectionLock(collectionID, writing)
545         defer locker.Unlock()
546
547         if writing {
548                 // Save the collection only if/when all
549                 // webdav->filesystem operations succeed --
550                 // and send a 500 error if the modified
551                 // collection can't be saved.
552                 //
553                 // Perform the write in a separate sitefs, so
554                 // concurrent read operations on the same
555                 // collection see the previous saved
556                 // state. After the write succeeds and the
557                 // collection record is updated, we reset the
558                 // session so the updates are visible in
559                 // subsequent read requests.
560                 client := session.client.WithRequestID(r.Header.Get("X-Request-Id"))
561                 sessionFS = client.SiteFileSystem(session.keepclient)
562                 writingDir, err := sessionFS.OpenFile(fsprefix, os.O_RDONLY, 0)
563                 if err != nil {
564                         http.Error(w, err.Error(), http.StatusInternalServerError)
565                         return
566                 }
567                 defer writingDir.Close()
568                 w = &updateOnSuccess{
569                         ResponseWriter: w,
570                         logger:         ctxlog.FromContext(r.Context()),
571                         update: func() error {
572                                 err := writingDir.Sync()
573                                 var te arvados.TransactionError
574                                 if errors.As(err, &te) {
575                                         err = te
576                                 }
577                                 if err != nil {
578                                         return err
579                                 }
580                                 // Sync the changes to the persistent
581                                 // sessionfs for this token.
582                                 snap, err := writingDir.Snapshot()
583                                 if err != nil {
584                                         return err
585                                 }
586                                 collectionDir.Splice(snap)
587                                 return nil
588                         }}
589         }
590         if r.Method == http.MethodGet {
591                 applyContentDispositionHdr(w, r, basename, attachment)
592         }
593         if webdavPrefix == "" {
594                 webdavPrefix = "/" + strings.Join(pathParts[:stripParts], "/")
595         }
596         wh := &webdav.Handler{
597                 Prefix: webdavPrefix,
598                 FileSystem: &webdavfs.FS{
599                         FileSystem:    sessionFS,
600                         Prefix:        fsprefix,
601                         Writing:       writeMethod[r.Method],
602                         AlwaysReadEOF: r.Method == "PROPFIND",
603                 },
604                 LockSystem: webdavfs.NoLockSystem,
605                 Logger: func(r *http.Request, err error) {
606                         if err != nil && !os.IsNotExist(err) {
607                                 ctxlog.FromContext(r.Context()).WithError(err).Error("error reported by webdav handler")
608                         }
609                 },
610         }
611         h.metrics.track(wh, w, r)
612         if r.Method == http.MethodGet && w.WroteStatus() == http.StatusOK {
613                 wrote := int64(w.WroteBodyBytes())
614                 fnm := strings.Join(pathParts[stripParts:], "/")
615                 fi, err := wh.FileSystem.Stat(r.Context(), fnm)
616                 if err == nil && fi.Size() != wrote {
617                         var n int
618                         f, err := wh.FileSystem.OpenFile(r.Context(), fnm, os.O_RDONLY, 0)
619                         if err == nil {
620                                 n, err = f.Read(make([]byte, 1024))
621                                 f.Close()
622                         }
623                         ctxlog.FromContext(r.Context()).Errorf("stat.Size()==%d but only wrote %d bytes; read(1024) returns %d, %v", fi.Size(), wrote, n, err)
624                 }
625         }
626 }
627
628 var dirListingTemplate = `<!DOCTYPE HTML>
629 <HTML><HEAD>
630   <META name="robots" content="NOINDEX">
631   <TITLE>{{ .CollectionName }}</TITLE>
632   <STYLE type="text/css">
633     body {
634       margin: 1.5em;
635     }
636     pre {
637       background-color: #D9EDF7;
638       border-radius: .25em;
639       padding: .75em;
640       overflow: auto;
641     }
642     .footer p {
643       font-size: 82%;
644     }
645     ul {
646       padding: 0;
647     }
648     ul li {
649       font-family: monospace;
650       list-style: none;
651     }
652   </STYLE>
653 </HEAD>
654 <BODY>
655
656 <H1>{{ .CollectionName }}</H1>
657
658 <P>This collection of data files is being shared with you through
659 Arvados.  You can download individual files listed below.  To download
660 the entire directory tree with wget, try:</P>
661
662 <PRE>$ wget --mirror --no-parent --no-host --cut-dirs={{ .StripParts }} https://{{ .Request.Host }}{{ .Request.URL.Path }}</PRE>
663
664 <H2>File Listing</H2>
665
666 {{if .Files}}
667 <UL>
668 {{range .Files}}
669 {{if .IsDir }}
670   <LI>{{" " | printf "%15s  " | nbsp}}<A href="{{print "./" .Name}}/">{{.Name}}/</A></LI>
671 {{else}}
672   <LI>{{.Size | printf "%15d  " | nbsp}}<A href="{{print "./" .Name}}">{{.Name}}</A></LI>
673 {{end}}
674 {{end}}
675 </UL>
676 {{else}}
677 <P>(No files; this collection is empty.)</P>
678 {{end}}
679
680 <HR noshade>
681 <DIV class="footer">
682   <P>
683     About Arvados:
684     Arvados is a free and open source software bioinformatics platform.
685     To learn more, visit arvados.org.
686     Arvados is not responsible for the files listed on this page.
687   </P>
688 </DIV>
689
690 </BODY>
691 `
692
693 type fileListEnt struct {
694         Name  string
695         Size  int64
696         IsDir bool
697 }
698
699 func (h *handler) serveDirectory(w http.ResponseWriter, r *http.Request, collectionName string, fs http.FileSystem, base string, recurse bool) {
700         var files []fileListEnt
701         var walk func(string) error
702         if !strings.HasSuffix(base, "/") {
703                 base = base + "/"
704         }
705         walk = func(path string) error {
706                 dirname := base + path
707                 if dirname != "/" {
708                         dirname = strings.TrimSuffix(dirname, "/")
709                 }
710                 d, err := fs.Open(dirname)
711                 if err != nil {
712                         return err
713                 }
714                 ents, err := d.Readdir(-1)
715                 if err != nil {
716                         return err
717                 }
718                 for _, ent := range ents {
719                         if recurse && ent.IsDir() {
720                                 err = walk(path + ent.Name() + "/")
721                                 if err != nil {
722                                         return err
723                                 }
724                         } else {
725                                 files = append(files, fileListEnt{
726                                         Name:  path + ent.Name(),
727                                         Size:  ent.Size(),
728                                         IsDir: ent.IsDir(),
729                                 })
730                         }
731                 }
732                 return nil
733         }
734         if err := walk(""); err != nil {
735                 http.Error(w, "error getting directory listing: "+err.Error(), http.StatusInternalServerError)
736                 return
737         }
738
739         funcs := template.FuncMap{
740                 "nbsp": func(s string) template.HTML {
741                         return template.HTML(strings.Replace(s, " ", "&nbsp;", -1))
742                 },
743         }
744         tmpl, err := template.New("dir").Funcs(funcs).Parse(dirListingTemplate)
745         if err != nil {
746                 http.Error(w, "error parsing template: "+err.Error(), http.StatusInternalServerError)
747                 return
748         }
749         sort.Slice(files, func(i, j int) bool {
750                 return files[i].Name < files[j].Name
751         })
752         w.WriteHeader(http.StatusOK)
753         tmpl.Execute(w, map[string]interface{}{
754                 "CollectionName": collectionName,
755                 "Files":          files,
756                 "Request":        r,
757                 "StripParts":     strings.Count(strings.TrimRight(r.URL.Path, "/"), "/"),
758         })
759 }
760
761 func applyContentDispositionHdr(w http.ResponseWriter, r *http.Request, filename string, isAttachment bool) {
762         disposition := "inline"
763         if isAttachment {
764                 disposition = "attachment"
765         }
766         if strings.ContainsRune(r.RequestURI, '?') {
767                 // Help the UA realize that the filename is just
768                 // "filename.txt", not
769                 // "filename.txt?disposition=attachment".
770                 //
771                 // TODO(TC): Follow advice at RFC 6266 appendix D
772                 disposition += "; filename=" + strconv.QuoteToASCII(filename)
773         }
774         if disposition != "inline" {
775                 w.Header().Set("Content-Disposition", disposition)
776         }
777 }
778
779 func (h *handler) seeOtherWithCookie(w http.ResponseWriter, r *http.Request, location string, credentialsOK bool) {
780         if formTokens, haveFormTokens := r.Form["api_token"]; haveFormTokens {
781                 if !credentialsOK {
782                         // It is not safe to copy the provided token
783                         // into a cookie unless the current vhost
784                         // (origin) serves only a single collection or
785                         // we are in TrustAllContent mode.
786                         http.Error(w, "cannot serve inline content at this URL (possible configuration error; see https://doc.arvados.org/install/install-keep-web.html#dns)", http.StatusBadRequest)
787                         return
788                 }
789
790                 // The HttpOnly flag is necessary to prevent
791                 // JavaScript code (included in, or loaded by, a page
792                 // in the collection being served) from employing the
793                 // user's token beyond reading other files in the same
794                 // domain, i.e., same collection.
795                 //
796                 // The 303 redirect is necessary in the case of a GET
797                 // request to avoid exposing the token in the Location
798                 // bar, and in the case of a POST request to avoid
799                 // raising warnings when the user refreshes the
800                 // resulting page.
801                 for _, tok := range formTokens {
802                         if tok == "" {
803                                 continue
804                         }
805                         http.SetCookie(w, &http.Cookie{
806                                 Name:     "arvados_api_token",
807                                 Value:    auth.EncodeTokenCookie([]byte(tok)),
808                                 Path:     "/",
809                                 HttpOnly: true,
810                                 SameSite: http.SameSiteLaxMode,
811                         })
812                         break
813                 }
814         }
815
816         // Propagate query parameters (except api_token) from
817         // the original request.
818         redirQuery := r.URL.Query()
819         redirQuery.Del("api_token")
820
821         u := r.URL
822         if location != "" {
823                 newu, err := u.Parse(location)
824                 if err != nil {
825                         http.Error(w, "error resolving redirect target: "+err.Error(), http.StatusInternalServerError)
826                         return
827                 }
828                 u = newu
829         }
830         redir := (&url.URL{
831                 Scheme:   r.URL.Scheme,
832                 Host:     r.Host,
833                 Path:     u.Path,
834                 RawQuery: redirQuery.Encode(),
835         }).String()
836
837         w.Header().Add("Location", redir)
838         w.WriteHeader(http.StatusSeeOther)
839         io.WriteString(w, `<A href="`)
840         io.WriteString(w, html.EscapeString(redir))
841         io.WriteString(w, `">Continue</A>`)
842 }
843
844 func (h *handler) userPermittedToUploadOrDownload(method string, tokenUser *arvados.User) bool {
845         var permitDownload bool
846         var permitUpload bool
847         if tokenUser != nil && tokenUser.IsAdmin {
848                 permitUpload = h.Cluster.Collections.WebDAVPermission.Admin.Upload
849                 permitDownload = h.Cluster.Collections.WebDAVPermission.Admin.Download
850         } else {
851                 permitUpload = h.Cluster.Collections.WebDAVPermission.User.Upload
852                 permitDownload = h.Cluster.Collections.WebDAVPermission.User.Download
853         }
854         if (method == "PUT" || method == "POST") && !permitUpload {
855                 // Disallow operations that upload new files.
856                 // Permit webdav operations that move existing files around.
857                 return false
858         } else if method == "GET" && !permitDownload {
859                 // Disallow downloading file contents.
860                 // Permit webdav operations like PROPFIND that retrieve metadata
861                 // but not file contents.
862                 return false
863         }
864         return true
865 }
866
867 func (h *handler) logUploadOrDownload(
868         r *http.Request,
869         client *arvadosclient.ArvadosClient,
870         fs arvados.CustomFileSystem,
871         filepath string,
872         collection *arvados.Collection,
873         user *arvados.User) {
874
875         log := ctxlog.FromContext(r.Context())
876         props := make(map[string]string)
877         props["reqPath"] = r.URL.Path
878         var useruuid string
879         if user != nil {
880                 log = log.WithField("user_uuid", user.UUID).
881                         WithField("user_full_name", user.FullName)
882                 useruuid = user.UUID
883         } else {
884                 useruuid = fmt.Sprintf("%s-tpzed-anonymouspublic", h.Cluster.ClusterID)
885         }
886         if collection == nil && fs != nil {
887                 collection, filepath = h.determineCollection(fs, filepath)
888         }
889         if collection != nil {
890                 log = log.WithField("collection_file_path", filepath)
891                 props["collection_file_path"] = filepath
892                 // h.determineCollection populates the collection_uuid
893                 // prop with the PDH, if this collection is being
894                 // accessed via PDH. For logging, we use a different
895                 // field depending on whether it's a UUID or PDH.
896                 if len(collection.UUID) > 32 {
897                         log = log.WithField("portable_data_hash", collection.UUID)
898                         props["portable_data_hash"] = collection.UUID
899                 } else {
900                         log = log.WithField("collection_uuid", collection.UUID)
901                         props["collection_uuid"] = collection.UUID
902                 }
903         }
904         if r.Method == "PUT" || r.Method == "POST" {
905                 log.Info("File upload")
906                 if h.Cluster.Collections.WebDAVLogEvents {
907                         go func() {
908                                 lr := arvadosclient.Dict{"log": arvadosclient.Dict{
909                                         "object_uuid": useruuid,
910                                         "event_type":  "file_upload",
911                                         "properties":  props}}
912                                 err := client.Create("logs", lr, nil)
913                                 if err != nil {
914                                         log.WithError(err).Error("Failed to create upload log event on API server")
915                                 }
916                         }()
917                 }
918         } else if r.Method == "GET" {
919                 if collection != nil && collection.PortableDataHash != "" {
920                         log = log.WithField("portable_data_hash", collection.PortableDataHash)
921                         props["portable_data_hash"] = collection.PortableDataHash
922                 }
923                 log.Info("File download")
924                 if h.Cluster.Collections.WebDAVLogEvents {
925                         go func() {
926                                 lr := arvadosclient.Dict{"log": arvadosclient.Dict{
927                                         "object_uuid": useruuid,
928                                         "event_type":  "file_download",
929                                         "properties":  props}}
930                                 err := client.Create("logs", lr, nil)
931                                 if err != nil {
932                                         log.WithError(err).Error("Failed to create download log event on API server")
933                                 }
934                         }()
935                 }
936         }
937 }
938
939 func (h *handler) determineCollection(fs arvados.CustomFileSystem, path string) (*arvados.Collection, string) {
940         target := strings.TrimSuffix(path, "/")
941         for cut := len(target); cut >= 0; cut = strings.LastIndexByte(target, '/') {
942                 target = target[:cut]
943                 fi, err := fs.Stat(target)
944                 if os.IsNotExist(err) {
945                         // creating a new file/dir, or download
946                         // destined to fail
947                         continue
948                 } else if err != nil {
949                         return nil, ""
950                 }
951                 switch src := fi.Sys().(type) {
952                 case *arvados.Collection:
953                         return src, strings.TrimPrefix(path[len(target):], "/")
954                 case *arvados.Group:
955                         return nil, ""
956                 default:
957                         if _, ok := src.(error); ok {
958                                 return nil, ""
959                         }
960                 }
961         }
962         return nil, ""
963 }
964
965 var lockTidyInterval = time.Minute * 10
966
967 // Lock the specified collection for reading or writing. Caller must
968 // call Unlock() on the returned Locker when the operation is
969 // finished.
970 func (h *handler) collectionLock(collectionID string, writing bool) sync.Locker {
971         h.lockMtx.Lock()
972         defer h.lockMtx.Unlock()
973         if time.Since(h.lockTidied) > lockTidyInterval {
974                 // Periodically delete all locks that aren't in use.
975                 h.lockTidied = time.Now()
976                 for id, locker := range h.lock {
977                         if locker.TryLock() {
978                                 locker.Unlock()
979                                 delete(h.lock, id)
980                         }
981                 }
982         }
983         locker := h.lock[collectionID]
984         if locker == nil {
985                 locker = new(sync.RWMutex)
986                 if h.lock == nil {
987                         h.lock = map[string]*sync.RWMutex{}
988                 }
989                 h.lock[collectionID] = locker
990         }
991         if writing {
992                 locker.Lock()
993                 return locker
994         } else {
995                 locker.RLock()
996                 return locker.RLocker()
997         }
998 }
999
1000 func ServeCORSPreflight(w http.ResponseWriter, header http.Header) bool {
1001         method := header.Get("Access-Control-Request-Method")
1002         if method == "" {
1003                 return false
1004         }
1005         if !browserMethod[method] && !webdavMethod[method] {
1006                 w.WriteHeader(http.StatusMethodNotAllowed)
1007                 return true
1008         }
1009         w.Header().Set("Access-Control-Allow-Headers", corsAllowHeadersHeader)
1010         w.Header().Set("Access-Control-Allow-Methods", "COPY, DELETE, GET, LOCK, MKCOL, MOVE, OPTIONS, POST, PROPFIND, PROPPATCH, PUT, RMCOL, UNLOCK")
1011         w.Header().Set("Access-Control-Allow-Origin", "*")
1012         w.Header().Set("Access-Control-Max-Age", "86400")
1013         return true
1014 }