21606: Add output buffer for webdav downloads.
[arvados.git] / services / keep-web / handler.go
1 // Copyright (C) The Arvados Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4
5 package keepweb
6
7 import (
8         "encoding/json"
9         "errors"
10         "fmt"
11         "html"
12         "html/template"
13         "io"
14         "net/http"
15         "net/url"
16         "os"
17         "sort"
18         "strconv"
19         "strings"
20         "sync"
21         "time"
22
23         "git.arvados.org/arvados.git/lib/cmd"
24         "git.arvados.org/arvados.git/lib/webdavfs"
25         "git.arvados.org/arvados.git/sdk/go/arvados"
26         "git.arvados.org/arvados.git/sdk/go/arvadosclient"
27         "git.arvados.org/arvados.git/sdk/go/auth"
28         "git.arvados.org/arvados.git/sdk/go/ctxlog"
29         "git.arvados.org/arvados.git/sdk/go/httpserver"
30         "github.com/sirupsen/logrus"
31         "golang.org/x/net/webdav"
32 )
33
34 type handler struct {
35         Cache   cache
36         Cluster *arvados.Cluster
37         metrics *metrics
38
39         lockMtx    sync.Mutex
40         lock       map[string]*sync.RWMutex
41         lockTidied time.Time
42 }
43
44 var urlPDHDecoder = strings.NewReplacer(" ", "+", "-", "+")
45
46 var notFoundMessage = "Not Found"
47 var unauthorizedMessage = "401 Unauthorized\n\nA valid Arvados token must be provided to access this resource."
48
49 // parseCollectionIDFromURL returns a UUID or PDH if s is a UUID or a
50 // PDH (even if it is a PDH with "+" replaced by " " or "-");
51 // otherwise "".
52 func parseCollectionIDFromURL(s string) string {
53         if arvadosclient.UUIDMatch(s) {
54                 return s
55         }
56         if pdh := urlPDHDecoder.Replace(s); arvadosclient.PDHMatch(pdh) {
57                 return pdh
58         }
59         return ""
60 }
61
62 func (h *handler) serveStatus(w http.ResponseWriter, r *http.Request) {
63         json.NewEncoder(w).Encode(struct{ Version string }{cmd.Version.String()})
64 }
65
66 type errorWithHTTPStatus interface {
67         HTTPStatus() int
68 }
69
70 // updateOnSuccess wraps httpserver.ResponseWriter. If the handler
71 // sends an HTTP header indicating success, updateOnSuccess first
72 // calls the provided update func. If the update func fails, an error
73 // response is sent (using the error's HTTP status or 500 if none),
74 // and the status code and body sent by the handler are ignored (all
75 // response writes return the update error).
76 type updateOnSuccess struct {
77         httpserver.ResponseWriter
78         logger     logrus.FieldLogger
79         update     func() error
80         sentHeader bool
81         err        error
82 }
83
84 func (uos *updateOnSuccess) Write(p []byte) (int, error) {
85         if !uos.sentHeader {
86                 uos.WriteHeader(http.StatusOK)
87         }
88         if uos.err != nil {
89                 return 0, uos.err
90         }
91         return uos.ResponseWriter.Write(p)
92 }
93
94 func (uos *updateOnSuccess) WriteHeader(code int) {
95         if !uos.sentHeader {
96                 uos.sentHeader = true
97                 if code >= 200 && code < 400 {
98                         if uos.err = uos.update(); uos.err != nil {
99                                 code := http.StatusInternalServerError
100                                 if he := errorWithHTTPStatus(nil); errors.As(uos.err, &he) {
101                                         code = he.HTTPStatus()
102                                 }
103                                 uos.logger.WithError(uos.err).Errorf("update() returned %T error, changing response to HTTP %d", uos.err, code)
104                                 http.Error(uos.ResponseWriter, uos.err.Error(), code)
105                                 return
106                         }
107                 }
108         }
109         uos.ResponseWriter.WriteHeader(code)
110 }
111
112 var (
113         corsAllowHeadersHeader = strings.Join([]string{
114                 "Authorization", "Content-Type", "Range",
115                 // WebDAV request headers:
116                 "Depth", "Destination", "If", "Lock-Token", "Overwrite", "Timeout", "Cache-Control",
117         }, ", ")
118         writeMethod = map[string]bool{
119                 "COPY":      true,
120                 "DELETE":    true,
121                 "LOCK":      true,
122                 "MKCOL":     true,
123                 "MOVE":      true,
124                 "PROPPATCH": true,
125                 "PUT":       true,
126                 "RMCOL":     true,
127                 "UNLOCK":    true,
128         }
129         webdavMethod = map[string]bool{
130                 "COPY":      true,
131                 "DELETE":    true,
132                 "LOCK":      true,
133                 "MKCOL":     true,
134                 "MOVE":      true,
135                 "OPTIONS":   true,
136                 "PROPFIND":  true,
137                 "PROPPATCH": true,
138                 "PUT":       true,
139                 "RMCOL":     true,
140                 "UNLOCK":    true,
141         }
142         browserMethod = map[string]bool{
143                 "GET":  true,
144                 "HEAD": true,
145                 "POST": true,
146         }
147         // top-level dirs to serve with siteFS
148         siteFSDir = map[string]bool{
149                 "":      true, // root directory
150                 "by_id": true,
151                 "users": true,
152         }
153 )
154
155 func stripDefaultPort(host string) string {
156         // Will consider port 80 and port 443 to be the same vhost.  I think that's fine.
157         u := &url.URL{Host: host}
158         if p := u.Port(); p == "80" || p == "443" {
159                 return strings.ToLower(u.Hostname())
160         } else {
161                 return strings.ToLower(host)
162         }
163 }
164
165 // CheckHealth implements service.Handler.
166 func (h *handler) CheckHealth() error {
167         return nil
168 }
169
170 // Done implements service.Handler.
171 func (h *handler) Done() <-chan struct{} {
172         return nil
173 }
174
175 // ServeHTTP implements http.Handler.
176 func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
177         if xfp := r.Header.Get("X-Forwarded-Proto"); xfp != "" && xfp != "http" {
178                 r.URL.Scheme = xfp
179         }
180
181         wbuffer := newWriteBuffer(wOrig, int(h.Cluster.Collections.WebDAVOutputBuffer))
182         defer wbuffer.Close()
183         w := httpserver.WrapResponseWriter(responseWriter{
184                 Writer:         wbuffer,
185                 ResponseWriter: wOrig,
186         })
187
188         if r.Method == "OPTIONS" && ServeCORSPreflight(w, r.Header) {
189                 return
190         }
191
192         if !browserMethod[r.Method] && !webdavMethod[r.Method] {
193                 w.WriteHeader(http.StatusMethodNotAllowed)
194                 return
195         }
196
197         if r.Header.Get("Origin") != "" {
198                 // Allow simple cross-origin requests without user
199                 // credentials ("user credentials" as defined by CORS,
200                 // i.e., cookies, HTTP authentication, and client-side
201                 // SSL certificates. See
202                 // http://www.w3.org/TR/cors/#user-credentials).
203                 w.Header().Set("Access-Control-Allow-Origin", "*")
204                 w.Header().Set("Access-Control-Expose-Headers", "Content-Range")
205         }
206
207         if h.serveS3(w, r) {
208                 return
209         }
210
211         webdavPrefix := ""
212         arvPath := r.URL.Path
213         if prefix := r.Header.Get("X-Webdav-Prefix"); prefix != "" {
214                 // Enable a proxy (e.g., container log handler in
215                 // controller) to satisfy a request for path
216                 // "/foo/bar/baz.txt" using content from
217                 // "//abc123-4.internal/bar/baz.txt", by adding a
218                 // request header "X-Webdav-Prefix: /foo"
219                 if !strings.HasPrefix(arvPath, prefix) {
220                         http.Error(w, "X-Webdav-Prefix header is not a prefix of the requested path", http.StatusBadRequest)
221                         return
222                 }
223                 arvPath = r.URL.Path[len(prefix):]
224                 if arvPath == "" {
225                         arvPath = "/"
226                 }
227                 w.Header().Set("Vary", "X-Webdav-Prefix, "+w.Header().Get("Vary"))
228                 webdavPrefix = prefix
229         }
230         pathParts := strings.Split(arvPath[1:], "/")
231
232         var stripParts int
233         var collectionID string
234         var tokens []string
235         var reqTokens []string
236         var pathToken bool
237         var attachment bool
238         var useSiteFS bool
239         credentialsOK := h.Cluster.Collections.TrustAllContent
240         reasonNotAcceptingCredentials := ""
241
242         if r.Host != "" && stripDefaultPort(r.Host) == stripDefaultPort(h.Cluster.Services.WebDAVDownload.ExternalURL.Host) {
243                 credentialsOK = true
244                 attachment = true
245         } else if r.FormValue("disposition") == "attachment" {
246                 attachment = true
247         }
248
249         if !credentialsOK {
250                 reasonNotAcceptingCredentials = fmt.Sprintf("vhost %q does not specify a single collection ID or match Services.WebDAVDownload.ExternalURL %q, and Collections.TrustAllContent is false",
251                         r.Host, h.Cluster.Services.WebDAVDownload.ExternalURL)
252         }
253
254         if collectionID = arvados.CollectionIDFromDNSName(r.Host); collectionID != "" {
255                 // http://ID.collections.example/PATH...
256                 credentialsOK = true
257         } else if r.URL.Path == "/status.json" {
258                 h.serveStatus(w, r)
259                 return
260         } else if siteFSDir[pathParts[0]] {
261                 useSiteFS = true
262         } else if len(pathParts) >= 1 && strings.HasPrefix(pathParts[0], "c=") {
263                 // /c=ID[/PATH...]
264                 collectionID = parseCollectionIDFromURL(pathParts[0][2:])
265                 stripParts = 1
266         } else if len(pathParts) >= 2 && pathParts[0] == "collections" {
267                 if len(pathParts) >= 4 && pathParts[1] == "download" {
268                         // /collections/download/ID/TOKEN/PATH...
269                         collectionID = parseCollectionIDFromURL(pathParts[2])
270                         tokens = []string{pathParts[3]}
271                         stripParts = 4
272                         pathToken = true
273                 } else {
274                         // /collections/ID/PATH...
275                         collectionID = parseCollectionIDFromURL(pathParts[1])
276                         stripParts = 2
277                         // This path is only meant to work for public
278                         // data. Tokens provided with the request are
279                         // ignored.
280                         credentialsOK = false
281                         reasonNotAcceptingCredentials = "the '/collections/UUID/PATH' form only works for public data"
282                 }
283         }
284
285         forceReload := false
286         if cc := r.Header.Get("Cache-Control"); strings.Contains(cc, "no-cache") || strings.Contains(cc, "must-revalidate") {
287                 forceReload = true
288         }
289
290         if credentialsOK {
291                 reqTokens = auth.CredentialsFromRequest(r).Tokens
292         }
293
294         r.ParseForm()
295         origin := r.Header.Get("Origin")
296         cors := origin != "" && !strings.HasSuffix(origin, "://"+r.Host)
297         safeAjax := cors && (r.Method == http.MethodGet || r.Method == http.MethodHead)
298         // Important distinction: safeAttachment checks whether api_token exists
299         // as a query parameter. haveFormTokens checks whether api_token exists
300         // as request form data *or* a query parameter. Different checks are
301         // necessary because both the request disposition and the location of
302         // the API token affect whether or not the request needs to be
303         // redirected. The different branch comments below explain further.
304         safeAttachment := attachment && !r.URL.Query().Has("api_token")
305         if formTokens, haveFormTokens := r.Form["api_token"]; !haveFormTokens {
306                 // No token to use or redact.
307         } else if safeAjax || safeAttachment {
308                 // If this is a cross-origin request, the URL won't
309                 // appear in the browser's address bar, so
310                 // substituting a clipboard-safe URL is pointless.
311                 // Redirect-with-cookie wouldn't work anyway, because
312                 // it's not safe to allow third-party use of our
313                 // cookie.
314                 //
315                 // If we're supplying an attachment, we don't need to
316                 // convert POST to GET to avoid the "really resubmit
317                 // form?" problem, so provided the token isn't
318                 // embedded in the URL, there's no reason to do
319                 // redirect-with-cookie in this case either.
320                 for _, tok := range formTokens {
321                         reqTokens = append(reqTokens, tok)
322                 }
323         } else if browserMethod[r.Method] {
324                 // If this is a page view, and the client provided a
325                 // token via query string or POST body, we must put
326                 // the token in an HttpOnly cookie, and redirect to an
327                 // equivalent URL with the query param redacted and
328                 // method = GET.
329                 h.seeOtherWithCookie(w, r, "", credentialsOK)
330                 return
331         }
332
333         targetPath := pathParts[stripParts:]
334         if tokens == nil && len(targetPath) > 0 && strings.HasPrefix(targetPath[0], "t=") {
335                 // http://ID.example/t=TOKEN/PATH...
336                 // /c=ID/t=TOKEN/PATH...
337                 //
338                 // This form must only be used to pass scoped tokens
339                 // that give permission for a single collection. See
340                 // FormValue case above.
341                 tokens = []string{targetPath[0][2:]}
342                 pathToken = true
343                 targetPath = targetPath[1:]
344                 stripParts++
345         }
346
347         fsprefix := ""
348         if useSiteFS {
349                 if writeMethod[r.Method] {
350                         http.Error(w, webdavfs.ErrReadOnly.Error(), http.StatusMethodNotAllowed)
351                         return
352                 }
353                 if len(reqTokens) == 0 {
354                         w.Header().Add("WWW-Authenticate", "Basic realm=\"collections\"")
355                         http.Error(w, unauthorizedMessage, http.StatusUnauthorized)
356                         return
357                 }
358                 tokens = reqTokens
359         } else if collectionID == "" {
360                 http.Error(w, notFoundMessage, http.StatusNotFound)
361                 return
362         } else {
363                 fsprefix = "by_id/" + collectionID + "/"
364         }
365
366         if src := r.Header.Get("X-Webdav-Source"); strings.HasPrefix(src, "/") && !strings.Contains(src, "//") && !strings.Contains(src, "/../") {
367                 fsprefix += src[1:]
368         }
369
370         if tokens == nil {
371                 tokens = reqTokens
372                 if h.Cluster.Users.AnonymousUserToken != "" {
373                         tokens = append(tokens, h.Cluster.Users.AnonymousUserToken)
374                 }
375         }
376
377         if len(targetPath) > 0 && targetPath[0] == "_" {
378                 // If a collection has a directory called "t=foo" or
379                 // "_", it can be served at
380                 // //collections.example/_/t=foo/ or
381                 // //collections.example/_/_/ respectively:
382                 // //collections.example/t=foo/ won't work because
383                 // t=foo will be interpreted as a token "foo".
384                 targetPath = targetPath[1:]
385                 stripParts++
386         }
387
388         dirOpenMode := os.O_RDONLY
389         if writeMethod[r.Method] {
390                 dirOpenMode = os.O_RDWR
391         }
392
393         var tokenValid bool
394         var tokenScopeProblem bool
395         var token string
396         var tokenUser *arvados.User
397         var sessionFS arvados.CustomFileSystem
398         var session *cachedSession
399         var collectionDir arvados.File
400         for _, token = range tokens {
401                 var statusErr errorWithHTTPStatus
402                 fs, sess, user, err := h.Cache.GetSession(token)
403                 if errors.As(err, &statusErr) && statusErr.HTTPStatus() == http.StatusUnauthorized {
404                         // bad token
405                         continue
406                 } else if err != nil {
407                         http.Error(w, "cache error: "+err.Error(), http.StatusInternalServerError)
408                         return
409                 }
410                 if token != h.Cluster.Users.AnonymousUserToken {
411                         tokenValid = true
412                 }
413                 f, err := fs.OpenFile(fsprefix, dirOpenMode, 0)
414                 if errors.As(err, &statusErr) &&
415                         statusErr.HTTPStatus() == http.StatusForbidden &&
416                         token != h.Cluster.Users.AnonymousUserToken {
417                         // collection id is outside scope of supplied
418                         // token
419                         tokenScopeProblem = true
420                         sess.Release()
421                         continue
422                 } else if os.IsNotExist(err) {
423                         // collection does not exist or is not
424                         // readable using this token
425                         sess.Release()
426                         continue
427                 } else if err != nil {
428                         http.Error(w, err.Error(), http.StatusInternalServerError)
429                         sess.Release()
430                         return
431                 }
432                 defer f.Close()
433                 defer sess.Release()
434
435                 collectionDir, sessionFS, session, tokenUser = f, fs, sess, user
436                 break
437         }
438         if forceReload && collectionDir != nil {
439                 err := collectionDir.Sync()
440                 if err != nil {
441                         if he := errorWithHTTPStatus(nil); errors.As(err, &he) {
442                                 http.Error(w, err.Error(), he.HTTPStatus())
443                         } else {
444                                 http.Error(w, err.Error(), http.StatusInternalServerError)
445                         }
446                         return
447                 }
448         }
449         if session == nil {
450                 if pathToken {
451                         // The URL is a "secret sharing link" that
452                         // didn't work out.  Asking the client for
453                         // additional credentials would just be
454                         // confusing.
455                         http.Error(w, notFoundMessage, http.StatusNotFound)
456                         return
457                 }
458                 if tokenValid {
459                         // The client provided valid token(s), but the
460                         // collection was not found.
461                         http.Error(w, notFoundMessage, http.StatusNotFound)
462                         return
463                 }
464                 if tokenScopeProblem {
465                         // The client provided a valid token but
466                         // fetching a collection returned 401, which
467                         // means the token scope doesn't permit
468                         // fetching that collection.
469                         http.Error(w, notFoundMessage, http.StatusForbidden)
470                         return
471                 }
472                 // The client's token was invalid (e.g., expired), or
473                 // the client didn't even provide one.  Redirect to
474                 // workbench2's login-and-redirect-to-download url if
475                 // this is a browser navigation request. (The redirect
476                 // flow can't preserve the original method if it's not
477                 // GET, and doesn't make sense if the UA is a
478                 // command-line tool, is trying to load an inline
479                 // image, etc.; in these cases, there's nothing we can
480                 // do, so return 401 unauthorized.)
481                 //
482                 // Note Sec-Fetch-Mode is sent by all non-EOL
483                 // browsers, except Safari.
484                 // https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Sec-Fetch-Mode
485                 //
486                 // TODO(TC): This response would be confusing to
487                 // someone trying (anonymously) to download public
488                 // data that has been deleted.  Allow a referrer to
489                 // provide this context somehow?
490                 if r.Method == http.MethodGet && r.Header.Get("Sec-Fetch-Mode") == "navigate" {
491                         target := url.URL(h.Cluster.Services.Workbench2.ExternalURL)
492                         redirkey := "redirectToPreview"
493                         if attachment {
494                                 redirkey = "redirectToDownload"
495                         }
496                         callback := "/c=" + collectionID + "/" + strings.Join(targetPath, "/")
497                         // target.RawQuery = url.Values{redirkey:
498                         // {target}}.Encode() would be the obvious
499                         // thing to do here, but wb2 doesn't decode
500                         // this as a query param -- it takes
501                         // everything after "${redirkey}=" as the
502                         // target URL. If we encode "/" as "%2F" etc.,
503                         // the redirect won't work.
504                         target.RawQuery = redirkey + "=" + callback
505                         w.Header().Add("Location", target.String())
506                         w.WriteHeader(http.StatusSeeOther)
507                         return
508                 }
509                 if !credentialsOK {
510                         http.Error(w, fmt.Sprintf("Authorization tokens are not accepted here: %v, and no anonymous user token is configured.", reasonNotAcceptingCredentials), http.StatusUnauthorized)
511                         return
512                 }
513                 // If none of the above cases apply, suggest the
514                 // user-agent (which is either a non-browser agent
515                 // like wget, or a browser that can't redirect through
516                 // a login flow) prompt the user for credentials.
517                 w.Header().Add("WWW-Authenticate", "Basic realm=\"collections\"")
518                 http.Error(w, unauthorizedMessage, http.StatusUnauthorized)
519                 return
520         }
521
522         if r.Method == http.MethodGet || r.Method == http.MethodHead {
523                 targetfnm := fsprefix + strings.Join(pathParts[stripParts:], "/")
524                 if fi, err := sessionFS.Stat(targetfnm); err == nil && fi.IsDir() {
525                         if !strings.HasSuffix(r.URL.Path, "/") {
526                                 h.seeOtherWithCookie(w, r, r.URL.Path+"/", credentialsOK)
527                         } else {
528                                 h.serveDirectory(w, r, fi.Name(), sessionFS, targetfnm, !useSiteFS)
529                         }
530                         return
531                 }
532         }
533
534         var basename string
535         if len(targetPath) > 0 {
536                 basename = targetPath[len(targetPath)-1]
537         }
538         if arvadosclient.PDHMatch(collectionID) && writeMethod[r.Method] {
539                 http.Error(w, webdavfs.ErrReadOnly.Error(), http.StatusMethodNotAllowed)
540                 return
541         }
542         if !h.userPermittedToUploadOrDownload(r.Method, tokenUser) {
543                 http.Error(w, "Not permitted", http.StatusForbidden)
544                 return
545         }
546         h.logUploadOrDownload(r, session.arvadosclient, sessionFS, fsprefix+strings.Join(targetPath, "/"), nil, tokenUser)
547
548         writing := writeMethod[r.Method]
549         locker := h.collectionLock(collectionID, writing)
550         defer locker.Unlock()
551
552         if writing {
553                 // Save the collection only if/when all
554                 // webdav->filesystem operations succeed --
555                 // and send a 500 error if the modified
556                 // collection can't be saved.
557                 //
558                 // Perform the write in a separate sitefs, so
559                 // concurrent read operations on the same
560                 // collection see the previous saved
561                 // state. After the write succeeds and the
562                 // collection record is updated, we reset the
563                 // session so the updates are visible in
564                 // subsequent read requests.
565                 client := session.client.WithRequestID(r.Header.Get("X-Request-Id"))
566                 sessionFS = client.SiteFileSystem(session.keepclient)
567                 writingDir, err := sessionFS.OpenFile(fsprefix, os.O_RDONLY, 0)
568                 if err != nil {
569                         http.Error(w, err.Error(), http.StatusInternalServerError)
570                         return
571                 }
572                 defer writingDir.Close()
573                 w = &updateOnSuccess{
574                         ResponseWriter: w,
575                         logger:         ctxlog.FromContext(r.Context()),
576                         update: func() error {
577                                 err := writingDir.Sync()
578                                 var te arvados.TransactionError
579                                 if errors.As(err, &te) {
580                                         err = te
581                                 }
582                                 if err != nil {
583                                         return err
584                                 }
585                                 // Sync the changes to the persistent
586                                 // sessionfs for this token.
587                                 snap, err := writingDir.Snapshot()
588                                 if err != nil {
589                                         return err
590                                 }
591                                 collectionDir.Splice(snap)
592                                 return nil
593                         }}
594         }
595         if r.Method == http.MethodGet {
596                 applyContentDispositionHdr(w, r, basename, attachment)
597         }
598         if webdavPrefix == "" {
599                 webdavPrefix = "/" + strings.Join(pathParts[:stripParts], "/")
600         }
601         wh := &webdav.Handler{
602                 Prefix: webdavPrefix,
603                 FileSystem: &webdavfs.FS{
604                         FileSystem:    sessionFS,
605                         Prefix:        fsprefix,
606                         Writing:       writeMethod[r.Method],
607                         AlwaysReadEOF: r.Method == "PROPFIND",
608                 },
609                 LockSystem: webdavfs.NoLockSystem,
610                 Logger: func(r *http.Request, err error) {
611                         if err != nil && !os.IsNotExist(err) {
612                                 ctxlog.FromContext(r.Context()).WithError(err).Error("error reported by webdav handler")
613                         }
614                 },
615         }
616         h.metrics.track(wh, w, r)
617         if r.Method == http.MethodGet && w.WroteStatus() == http.StatusOK {
618                 wrote := int64(w.WroteBodyBytes())
619                 fnm := strings.Join(pathParts[stripParts:], "/")
620                 fi, err := wh.FileSystem.Stat(r.Context(), fnm)
621                 if err == nil && fi.Size() != wrote {
622                         var n int
623                         f, err := wh.FileSystem.OpenFile(r.Context(), fnm, os.O_RDONLY, 0)
624                         if err == nil {
625                                 n, err = f.Read(make([]byte, 1024))
626                                 f.Close()
627                         }
628                         ctxlog.FromContext(r.Context()).Errorf("stat.Size()==%d but only wrote %d bytes; read(1024) returns %d, %v", fi.Size(), wrote, n, err)
629                 }
630         }
631 }
632
633 var dirListingTemplate = `<!DOCTYPE HTML>
634 <HTML><HEAD>
635   <META name="robots" content="NOINDEX">
636   <TITLE>{{ .CollectionName }}</TITLE>
637   <STYLE type="text/css">
638     body {
639       margin: 1.5em;
640     }
641     pre {
642       background-color: #D9EDF7;
643       border-radius: .25em;
644       padding: .75em;
645       overflow: auto;
646     }
647     .footer p {
648       font-size: 82%;
649     }
650     ul {
651       padding: 0;
652     }
653     ul li {
654       font-family: monospace;
655       list-style: none;
656     }
657   </STYLE>
658 </HEAD>
659 <BODY>
660
661 <H1>{{ .CollectionName }}</H1>
662
663 <P>This collection of data files is being shared with you through
664 Arvados.  You can download individual files listed below.  To download
665 the entire directory tree with wget, try:</P>
666
667 <PRE>$ wget --mirror --no-parent --no-host --cut-dirs={{ .StripParts }} https://{{ .Request.Host }}{{ .Request.URL.Path }}</PRE>
668
669 <H2>File Listing</H2>
670
671 {{if .Files}}
672 <UL>
673 {{range .Files}}
674 {{if .IsDir }}
675   <LI>{{" " | printf "%15s  " | nbsp}}<A href="{{print "./" .Name}}/">{{.Name}}/</A></LI>
676 {{else}}
677   <LI>{{.Size | printf "%15d  " | nbsp}}<A href="{{print "./" .Name}}">{{.Name}}</A></LI>
678 {{end}}
679 {{end}}
680 </UL>
681 {{else}}
682 <P>(No files; this collection is empty.)</P>
683 {{end}}
684
685 <HR noshade>
686 <DIV class="footer">
687   <P>
688     About Arvados:
689     Arvados is a free and open source software bioinformatics platform.
690     To learn more, visit arvados.org.
691     Arvados is not responsible for the files listed on this page.
692   </P>
693 </DIV>
694
695 </BODY>
696 `
697
698 type fileListEnt struct {
699         Name  string
700         Size  int64
701         IsDir bool
702 }
703
704 func (h *handler) serveDirectory(w http.ResponseWriter, r *http.Request, collectionName string, fs http.FileSystem, base string, recurse bool) {
705         var files []fileListEnt
706         var walk func(string) error
707         if !strings.HasSuffix(base, "/") {
708                 base = base + "/"
709         }
710         walk = func(path string) error {
711                 dirname := base + path
712                 if dirname != "/" {
713                         dirname = strings.TrimSuffix(dirname, "/")
714                 }
715                 d, err := fs.Open(dirname)
716                 if err != nil {
717                         return err
718                 }
719                 ents, err := d.Readdir(-1)
720                 if err != nil {
721                         return err
722                 }
723                 for _, ent := range ents {
724                         if recurse && ent.IsDir() {
725                                 err = walk(path + ent.Name() + "/")
726                                 if err != nil {
727                                         return err
728                                 }
729                         } else {
730                                 files = append(files, fileListEnt{
731                                         Name:  path + ent.Name(),
732                                         Size:  ent.Size(),
733                                         IsDir: ent.IsDir(),
734                                 })
735                         }
736                 }
737                 return nil
738         }
739         if err := walk(""); err != nil {
740                 http.Error(w, "error getting directory listing: "+err.Error(), http.StatusInternalServerError)
741                 return
742         }
743
744         funcs := template.FuncMap{
745                 "nbsp": func(s string) template.HTML {
746                         return template.HTML(strings.Replace(s, " ", "&nbsp;", -1))
747                 },
748         }
749         tmpl, err := template.New("dir").Funcs(funcs).Parse(dirListingTemplate)
750         if err != nil {
751                 http.Error(w, "error parsing template: "+err.Error(), http.StatusInternalServerError)
752                 return
753         }
754         sort.Slice(files, func(i, j int) bool {
755                 return files[i].Name < files[j].Name
756         })
757         w.WriteHeader(http.StatusOK)
758         tmpl.Execute(w, map[string]interface{}{
759                 "CollectionName": collectionName,
760                 "Files":          files,
761                 "Request":        r,
762                 "StripParts":     strings.Count(strings.TrimRight(r.URL.Path, "/"), "/"),
763         })
764 }
765
766 func applyContentDispositionHdr(w http.ResponseWriter, r *http.Request, filename string, isAttachment bool) {
767         disposition := "inline"
768         if isAttachment {
769                 disposition = "attachment"
770         }
771         if strings.ContainsRune(r.RequestURI, '?') {
772                 // Help the UA realize that the filename is just
773                 // "filename.txt", not
774                 // "filename.txt?disposition=attachment".
775                 //
776                 // TODO(TC): Follow advice at RFC 6266 appendix D
777                 disposition += "; filename=" + strconv.QuoteToASCII(filename)
778         }
779         if disposition != "inline" {
780                 w.Header().Set("Content-Disposition", disposition)
781         }
782 }
783
784 func (h *handler) seeOtherWithCookie(w http.ResponseWriter, r *http.Request, location string, credentialsOK bool) {
785         if formTokens, haveFormTokens := r.Form["api_token"]; haveFormTokens {
786                 if !credentialsOK {
787                         // It is not safe to copy the provided token
788                         // into a cookie unless the current vhost
789                         // (origin) serves only a single collection or
790                         // we are in TrustAllContent mode.
791                         http.Error(w, "cannot serve inline content at this URL (possible configuration error; see https://doc.arvados.org/install/install-keep-web.html#dns)", http.StatusBadRequest)
792                         return
793                 }
794
795                 // The HttpOnly flag is necessary to prevent
796                 // JavaScript code (included in, or loaded by, a page
797                 // in the collection being served) from employing the
798                 // user's token beyond reading other files in the same
799                 // domain, i.e., same collection.
800                 //
801                 // The 303 redirect is necessary in the case of a GET
802                 // request to avoid exposing the token in the Location
803                 // bar, and in the case of a POST request to avoid
804                 // raising warnings when the user refreshes the
805                 // resulting page.
806                 for _, tok := range formTokens {
807                         if tok == "" {
808                                 continue
809                         }
810                         http.SetCookie(w, &http.Cookie{
811                                 Name:     "arvados_api_token",
812                                 Value:    auth.EncodeTokenCookie([]byte(tok)),
813                                 Path:     "/",
814                                 HttpOnly: true,
815                                 SameSite: http.SameSiteLaxMode,
816                         })
817                         break
818                 }
819         }
820
821         // Propagate query parameters (except api_token) from
822         // the original request.
823         redirQuery := r.URL.Query()
824         redirQuery.Del("api_token")
825
826         u := r.URL
827         if location != "" {
828                 newu, err := u.Parse(location)
829                 if err != nil {
830                         http.Error(w, "error resolving redirect target: "+err.Error(), http.StatusInternalServerError)
831                         return
832                 }
833                 u = newu
834         }
835         redir := (&url.URL{
836                 Scheme:   r.URL.Scheme,
837                 Host:     r.Host,
838                 Path:     u.Path,
839                 RawQuery: redirQuery.Encode(),
840         }).String()
841
842         w.Header().Add("Location", redir)
843         w.WriteHeader(http.StatusSeeOther)
844         io.WriteString(w, `<A href="`)
845         io.WriteString(w, html.EscapeString(redir))
846         io.WriteString(w, `">Continue</A>`)
847 }
848
849 func (h *handler) userPermittedToUploadOrDownload(method string, tokenUser *arvados.User) bool {
850         var permitDownload bool
851         var permitUpload bool
852         if tokenUser != nil && tokenUser.IsAdmin {
853                 permitUpload = h.Cluster.Collections.WebDAVPermission.Admin.Upload
854                 permitDownload = h.Cluster.Collections.WebDAVPermission.Admin.Download
855         } else {
856                 permitUpload = h.Cluster.Collections.WebDAVPermission.User.Upload
857                 permitDownload = h.Cluster.Collections.WebDAVPermission.User.Download
858         }
859         if (method == "PUT" || method == "POST") && !permitUpload {
860                 // Disallow operations that upload new files.
861                 // Permit webdav operations that move existing files around.
862                 return false
863         } else if method == "GET" && !permitDownload {
864                 // Disallow downloading file contents.
865                 // Permit webdav operations like PROPFIND that retrieve metadata
866                 // but not file contents.
867                 return false
868         }
869         return true
870 }
871
872 func (h *handler) logUploadOrDownload(
873         r *http.Request,
874         client *arvadosclient.ArvadosClient,
875         fs arvados.CustomFileSystem,
876         filepath string,
877         collection *arvados.Collection,
878         user *arvados.User) {
879
880         log := ctxlog.FromContext(r.Context())
881         props := make(map[string]string)
882         props["reqPath"] = r.URL.Path
883         var useruuid string
884         if user != nil {
885                 log = log.WithField("user_uuid", user.UUID).
886                         WithField("user_full_name", user.FullName)
887                 useruuid = user.UUID
888         } else {
889                 useruuid = fmt.Sprintf("%s-tpzed-anonymouspublic", h.Cluster.ClusterID)
890         }
891         if collection == nil && fs != nil {
892                 collection, filepath = h.determineCollection(fs, filepath)
893         }
894         if collection != nil {
895                 log = log.WithField("collection_file_path", filepath)
896                 props["collection_file_path"] = filepath
897                 // h.determineCollection populates the collection_uuid
898                 // prop with the PDH, if this collection is being
899                 // accessed via PDH. For logging, we use a different
900                 // field depending on whether it's a UUID or PDH.
901                 if len(collection.UUID) > 32 {
902                         log = log.WithField("portable_data_hash", collection.UUID)
903                         props["portable_data_hash"] = collection.UUID
904                 } else {
905                         log = log.WithField("collection_uuid", collection.UUID)
906                         props["collection_uuid"] = collection.UUID
907                 }
908         }
909         if r.Method == "PUT" || r.Method == "POST" {
910                 log.Info("File upload")
911                 if h.Cluster.Collections.WebDAVLogEvents {
912                         go func() {
913                                 lr := arvadosclient.Dict{"log": arvadosclient.Dict{
914                                         "object_uuid": useruuid,
915                                         "event_type":  "file_upload",
916                                         "properties":  props}}
917                                 err := client.Create("logs", lr, nil)
918                                 if err != nil {
919                                         log.WithError(err).Error("Failed to create upload log event on API server")
920                                 }
921                         }()
922                 }
923         } else if r.Method == "GET" {
924                 if collection != nil && collection.PortableDataHash != "" {
925                         log = log.WithField("portable_data_hash", collection.PortableDataHash)
926                         props["portable_data_hash"] = collection.PortableDataHash
927                 }
928                 log.Info("File download")
929                 if h.Cluster.Collections.WebDAVLogEvents {
930                         go func() {
931                                 lr := arvadosclient.Dict{"log": arvadosclient.Dict{
932                                         "object_uuid": useruuid,
933                                         "event_type":  "file_download",
934                                         "properties":  props}}
935                                 err := client.Create("logs", lr, nil)
936                                 if err != nil {
937                                         log.WithError(err).Error("Failed to create download log event on API server")
938                                 }
939                         }()
940                 }
941         }
942 }
943
944 func (h *handler) determineCollection(fs arvados.CustomFileSystem, path string) (*arvados.Collection, string) {
945         target := strings.TrimSuffix(path, "/")
946         for cut := len(target); cut >= 0; cut = strings.LastIndexByte(target, '/') {
947                 target = target[:cut]
948                 fi, err := fs.Stat(target)
949                 if os.IsNotExist(err) {
950                         // creating a new file/dir, or download
951                         // destined to fail
952                         continue
953                 } else if err != nil {
954                         return nil, ""
955                 }
956                 switch src := fi.Sys().(type) {
957                 case *arvados.Collection:
958                         return src, strings.TrimPrefix(path[len(target):], "/")
959                 case *arvados.Group:
960                         return nil, ""
961                 default:
962                         if _, ok := src.(error); ok {
963                                 return nil, ""
964                         }
965                 }
966         }
967         return nil, ""
968 }
969
970 var lockTidyInterval = time.Minute * 10
971
972 // Lock the specified collection for reading or writing. Caller must
973 // call Unlock() on the returned Locker when the operation is
974 // finished.
975 func (h *handler) collectionLock(collectionID string, writing bool) sync.Locker {
976         h.lockMtx.Lock()
977         defer h.lockMtx.Unlock()
978         if time.Since(h.lockTidied) > lockTidyInterval {
979                 // Periodically delete all locks that aren't in use.
980                 h.lockTidied = time.Now()
981                 for id, locker := range h.lock {
982                         if locker.TryLock() {
983                                 locker.Unlock()
984                                 delete(h.lock, id)
985                         }
986                 }
987         }
988         locker := h.lock[collectionID]
989         if locker == nil {
990                 locker = new(sync.RWMutex)
991                 if h.lock == nil {
992                         h.lock = map[string]*sync.RWMutex{}
993                 }
994                 h.lock[collectionID] = locker
995         }
996         if writing {
997                 locker.Lock()
998                 return locker
999         } else {
1000                 locker.RLock()
1001                 return locker.RLocker()
1002         }
1003 }
1004
1005 func ServeCORSPreflight(w http.ResponseWriter, header http.Header) bool {
1006         method := header.Get("Access-Control-Request-Method")
1007         if method == "" {
1008                 return false
1009         }
1010         if !browserMethod[method] && !webdavMethod[method] {
1011                 w.WriteHeader(http.StatusMethodNotAllowed)
1012                 return true
1013         }
1014         w.Header().Set("Access-Control-Allow-Headers", corsAllowHeadersHeader)
1015         w.Header().Set("Access-Control-Allow-Methods", "COPY, DELETE, GET, LOCK, MKCOL, MOVE, OPTIONS, POST, PROPFIND, PROPPATCH, PUT, RMCOL, UNLOCK")
1016         w.Header().Set("Access-Control-Allow-Origin", "*")
1017         w.Header().Set("Access-Control-Max-Age", "86400")
1018         return true
1019 }