a30d40d217132127a3b62b0eaf0bfbbd6216b9fc
[arvados.git] / services / keep-web / handler.go
1 // Copyright (C) The Arvados Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4
5 package main
6
7 import (
8         "encoding/json"
9         "errors"
10         "fmt"
11         "html"
12         "html/template"
13         "io"
14         "net/http"
15         "net/url"
16         "os"
17         "sort"
18         "strconv"
19         "strings"
20         "sync"
21
22         "git.curoverse.com/arvados.git/sdk/go/arvados"
23         "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
24         "git.curoverse.com/arvados.git/sdk/go/auth"
25         "git.curoverse.com/arvados.git/sdk/go/health"
26         "git.curoverse.com/arvados.git/sdk/go/httpserver"
27         "git.curoverse.com/arvados.git/sdk/go/keepclient"
28         "golang.org/x/net/webdav"
29 )
30
31 type handler struct {
32         Config        *Config
33         clientPool    *arvadosclient.ClientPool
34         setupOnce     sync.Once
35         healthHandler http.Handler
36         webdavLS      webdav.LockSystem
37 }
38
39 // parseCollectionIDFromDNSName returns a UUID or PDH if s begins with
40 // a UUID or URL-encoded PDH; otherwise "".
41 func parseCollectionIDFromDNSName(s string) string {
42         // Strip domain.
43         if i := strings.IndexRune(s, '.'); i >= 0 {
44                 s = s[:i]
45         }
46         // Names like {uuid}--collections.example.com serve the same
47         // purpose as {uuid}.collections.example.com but can reduce
48         // cost/effort of using [additional] wildcard certificates.
49         if i := strings.Index(s, "--"); i >= 0 {
50                 s = s[:i]
51         }
52         if arvadosclient.UUIDMatch(s) {
53                 return s
54         }
55         if pdh := strings.Replace(s, "-", "+", 1); arvadosclient.PDHMatch(pdh) {
56                 return pdh
57         }
58         return ""
59 }
60
61 var urlPDHDecoder = strings.NewReplacer(" ", "+", "-", "+")
62
63 // parseCollectionIDFromURL returns a UUID or PDH if s is a UUID or a
64 // PDH (even if it is a PDH with "+" replaced by " " or "-");
65 // otherwise "".
66 func parseCollectionIDFromURL(s string) string {
67         if arvadosclient.UUIDMatch(s) {
68                 return s
69         }
70         if pdh := urlPDHDecoder.Replace(s); arvadosclient.PDHMatch(pdh) {
71                 return pdh
72         }
73         return ""
74 }
75
76 func (h *handler) setup() {
77         h.clientPool = arvadosclient.MakeClientPool()
78
79         keepclient.RefreshServiceDiscoveryOnSIGHUP()
80
81         h.healthHandler = &health.Handler{
82                 Token:  h.Config.ManagementToken,
83                 Prefix: "/_health/",
84         }
85
86         // Even though we don't accept LOCK requests, every webdav
87         // handler must have a non-nil LockSystem.
88         h.webdavLS = &noLockSystem{}
89 }
90
91 func (h *handler) serveStatus(w http.ResponseWriter, r *http.Request) {
92         status := struct {
93                 cacheStats
94         }{
95                 cacheStats: h.Config.Cache.Stats(),
96         }
97         json.NewEncoder(w).Encode(status)
98 }
99
100 // updateOnSuccess wraps httpserver.ResponseWriter. If the handler
101 // sends an HTTP header indicating success, updateOnSuccess first
102 // calls the provided update func. If the update func fails, a 500
103 // response is sent, and the status code and body sent by the handler
104 // are ignored (all response writes return errors).
105 type updateOnSuccess struct {
106         httpserver.ResponseWriter
107         update     func() error
108         sentHeader bool
109         dropBody   bool
110 }
111
112 var errUpdateFailed = errors.New("update failed")
113
114 func (uos *updateOnSuccess) Write(p []byte) (int, error) {
115         if uos.dropBody {
116                 return 0, errUpdateFailed
117         }
118         if !uos.sentHeader {
119                 uos.WriteHeader(http.StatusOK)
120         }
121         return uos.ResponseWriter.Write(p)
122 }
123
124 func (uos *updateOnSuccess) WriteHeader(code int) {
125         if !uos.sentHeader {
126                 if code >= 200 && code < 400 {
127                         if err := uos.update(); err != nil {
128                                 http.Error(uos.ResponseWriter, err.Error(), http.StatusInternalServerError)
129                                 uos.dropBody = true
130                                 return
131                         }
132                 }
133                 uos.sentHeader = true
134         }
135         uos.ResponseWriter.WriteHeader(code)
136 }
137
138 var (
139         webdavMethod = map[string]bool{
140                 "DELETE":   true,
141                 "MKCOL":    true,
142                 "MOVE":     true,
143                 "OPTIONS":  true,
144                 "PROPFIND": true,
145                 "PUT":      true,
146                 "RMCOL":    true,
147         }
148         browserMethod = map[string]bool{
149                 "GET":  true,
150                 "HEAD": true,
151                 "POST": true,
152         }
153 )
154
155 // ServeHTTP implements http.Handler.
156 func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
157         h.setupOnce.Do(h.setup)
158
159         var statusCode = 0
160         var statusText string
161
162         remoteAddr := r.RemoteAddr
163         if xff := r.Header.Get("X-Forwarded-For"); xff != "" {
164                 remoteAddr = xff + "," + remoteAddr
165         }
166
167         w := httpserver.WrapResponseWriter(wOrig)
168         defer func() {
169                 if statusCode == 0 {
170                         statusCode = w.WroteStatus()
171                 } else if w.WroteStatus() == 0 {
172                         w.WriteHeader(statusCode)
173                 } else if w.WroteStatus() != statusCode {
174                         httpserver.Log(r.RemoteAddr, "WARNING",
175                                 fmt.Sprintf("Our status changed from %d to %d after we sent headers", w.WroteStatus(), statusCode))
176                 }
177                 if statusText == "" {
178                         statusText = http.StatusText(statusCode)
179                 }
180                 httpserver.Log(remoteAddr, statusCode, statusText, w.WroteBodyBytes(), r.Method, r.Host, r.URL.Path, r.URL.RawQuery)
181         }()
182
183         if strings.HasPrefix(r.URL.Path, "/_health/") && r.Method == "GET" {
184                 h.healthHandler.ServeHTTP(w, r)
185                 return
186         }
187
188         if method := r.Header.Get("Access-Control-Request-Method"); method != "" && r.Method == "OPTIONS" {
189                 if !browserMethod[method] && !webdavMethod[method] {
190                         statusCode = http.StatusMethodNotAllowed
191                         return
192                 }
193                 w.Header().Set("Access-Control-Allow-Headers", "Authorization, Content-Type, Range")
194                 w.Header().Set("Access-Control-Allow-Methods", "DELETE, GET, MKCOL, MOVE, OPTIONS, POST, PROPFIND, PUT, RMCOL")
195                 w.Header().Set("Access-Control-Allow-Origin", "*")
196                 w.Header().Set("Access-Control-Max-Age", "86400")
197                 statusCode = http.StatusOK
198                 return
199         }
200
201         if !browserMethod[r.Method] && !webdavMethod[r.Method] {
202                 statusCode, statusText = http.StatusMethodNotAllowed, r.Method
203                 return
204         }
205
206         if r.Header.Get("Origin") != "" {
207                 // Allow simple cross-origin requests without user
208                 // credentials ("user credentials" as defined by CORS,
209                 // i.e., cookies, HTTP authentication, and client-side
210                 // SSL certificates. See
211                 // http://www.w3.org/TR/cors/#user-credentials).
212                 w.Header().Set("Access-Control-Allow-Origin", "*")
213                 w.Header().Set("Access-Control-Expose-Headers", "Content-Range")
214         }
215
216         arv := h.clientPool.Get()
217         if arv == nil {
218                 statusCode, statusText = http.StatusInternalServerError, "Pool failed: "+h.clientPool.Err().Error()
219                 return
220         }
221         defer h.clientPool.Put(arv)
222
223         pathParts := strings.Split(r.URL.Path[1:], "/")
224
225         var stripParts int
226         var targetID string
227         var tokens []string
228         var reqTokens []string
229         var pathToken bool
230         var attachment bool
231         credentialsOK := h.Config.TrustAllContent
232
233         if r.Host != "" && r.Host == h.Config.AttachmentOnlyHost {
234                 credentialsOK = true
235                 attachment = true
236         } else if r.FormValue("disposition") == "attachment" {
237                 attachment = true
238         }
239
240         if targetID = parseCollectionIDFromDNSName(r.Host); targetID != "" {
241                 // http://ID.collections.example/PATH...
242                 credentialsOK = true
243         } else if r.URL.Path == "/status.json" {
244                 h.serveStatus(w, r)
245                 return
246         } else if len(pathParts) >= 1 && strings.HasPrefix(pathParts[0], "c=") {
247                 // /c=ID[/PATH...]
248                 targetID = parseCollectionIDFromURL(pathParts[0][2:])
249                 stripParts = 1
250         } else if len(pathParts) >= 2 && pathParts[0] == "collections" {
251                 if len(pathParts) >= 4 && pathParts[1] == "download" {
252                         // /collections/download/ID/TOKEN/PATH...
253                         targetID = parseCollectionIDFromURL(pathParts[2])
254                         tokens = []string{pathParts[3]}
255                         stripParts = 4
256                         pathToken = true
257                 } else {
258                         // /collections/ID/PATH...
259                         targetID = parseCollectionIDFromURL(pathParts[1])
260                         tokens = h.Config.AnonymousTokens
261                         stripParts = 2
262                 }
263         }
264
265         if targetID == "" {
266                 statusCode = http.StatusNotFound
267                 return
268         }
269
270         formToken := r.FormValue("api_token")
271         if formToken != "" && r.Header.Get("Origin") != "" && attachment && r.URL.Query().Get("api_token") == "" {
272                 // The client provided an explicit token in the POST
273                 // body. The Origin header indicates this *might* be
274                 // an AJAX request, in which case redirect-with-cookie
275                 // won't work: we should just serve the content in the
276                 // POST response. This is safe because:
277                 //
278                 // * We're supplying an attachment, not inline
279                 //   content, so we don't need to convert the POST to
280                 //   a GET and avoid the "really resubmit form?"
281                 //   problem.
282                 //
283                 // * The token isn't embedded in the URL, so we don't
284                 //   need to worry about bookmarks and copy/paste.
285                 tokens = append(tokens, formToken)
286         } else if formToken != "" && browserMethod[r.Method] {
287                 // The client provided an explicit token in the query
288                 // string, or a form in POST body. We must put the
289                 // token in an HttpOnly cookie, and redirect to the
290                 // same URL with the query param redacted and method =
291                 // GET.
292                 h.seeOtherWithCookie(w, r, "", credentialsOK)
293                 return
294         }
295
296         targetPath := pathParts[stripParts:]
297         if tokens == nil && len(targetPath) > 0 && strings.HasPrefix(targetPath[0], "t=") {
298                 // http://ID.example/t=TOKEN/PATH...
299                 // /c=ID/t=TOKEN/PATH...
300                 //
301                 // This form must only be used to pass scoped tokens
302                 // that give permission for a single collection. See
303                 // FormValue case above.
304                 tokens = []string{targetPath[0][2:]}
305                 pathToken = true
306                 targetPath = targetPath[1:]
307                 stripParts++
308         }
309
310         if tokens == nil {
311                 if credentialsOK {
312                         reqTokens = auth.NewCredentialsFromHTTPRequest(r).Tokens
313                 }
314                 tokens = append(reqTokens, h.Config.AnonymousTokens...)
315         }
316
317         if len(targetPath) > 0 && targetPath[0] == "_" {
318                 // If a collection has a directory called "t=foo" or
319                 // "_", it can be served at
320                 // //collections.example/_/t=foo/ or
321                 // //collections.example/_/_/ respectively:
322                 // //collections.example/t=foo/ won't work because
323                 // t=foo will be interpreted as a token "foo".
324                 targetPath = targetPath[1:]
325                 stripParts++
326         }
327
328         forceReload := false
329         if cc := r.Header.Get("Cache-Control"); strings.Contains(cc, "no-cache") || strings.Contains(cc, "must-revalidate") {
330                 forceReload = true
331         }
332
333         var collection *arvados.Collection
334         tokenResult := make(map[string]int)
335         for _, arv.ApiToken = range tokens {
336                 var err error
337                 collection, err = h.Config.Cache.Get(arv, targetID, forceReload)
338                 if err == nil {
339                         // Success
340                         break
341                 }
342                 if srvErr, ok := err.(arvadosclient.APIServerError); ok {
343                         switch srvErr.HttpStatusCode {
344                         case 404, 401:
345                                 // Token broken or insufficient to
346                                 // retrieve collection
347                                 tokenResult[arv.ApiToken] = srvErr.HttpStatusCode
348                                 continue
349                         }
350                 }
351                 // Something more serious is wrong
352                 statusCode, statusText = http.StatusInternalServerError, err.Error()
353                 return
354         }
355         if collection == nil {
356                 if pathToken || !credentialsOK {
357                         // Either the URL is a "secret sharing link"
358                         // that didn't work out (and asking the client
359                         // for additional credentials would just be
360                         // confusing), or we don't even accept
361                         // credentials at this path.
362                         statusCode = http.StatusNotFound
363                         return
364                 }
365                 for _, t := range reqTokens {
366                         if tokenResult[t] == 404 {
367                                 // The client provided valid token(s), but the
368                                 // collection was not found.
369                                 statusCode = http.StatusNotFound
370                                 return
371                         }
372                 }
373                 // The client's token was invalid (e.g., expired), or
374                 // the client didn't even provide one.  Propagate the
375                 // 401 to encourage the client to use a [different]
376                 // token.
377                 //
378                 // TODO(TC): This response would be confusing to
379                 // someone trying (anonymously) to download public
380                 // data that has been deleted.  Allow a referrer to
381                 // provide this context somehow?
382                 w.Header().Add("WWW-Authenticate", "Basic realm=\"collections\"")
383                 statusCode = http.StatusUnauthorized
384                 return
385         }
386
387         kc, err := keepclient.MakeKeepClient(arv)
388         if err != nil {
389                 statusCode, statusText = http.StatusInternalServerError, err.Error()
390                 return
391         }
392
393         var basename string
394         if len(targetPath) > 0 {
395                 basename = targetPath[len(targetPath)-1]
396         }
397         applyContentDispositionHdr(w, r, basename, attachment)
398
399         client := &arvados.Client{
400                 APIHost:   arv.ApiServer,
401                 AuthToken: arv.ApiToken,
402                 Insecure:  arv.ApiInsecure,
403         }
404         fs, err := collection.FileSystem(client, kc)
405         if err != nil {
406                 statusCode, statusText = http.StatusInternalServerError, err.Error()
407                 return
408         }
409         if webdavMethod[r.Method] {
410                 writing := !arvadosclient.PDHMatch(targetID)
411                 if writing {
412                         // Save the collection only if/when all
413                         // webdav->filesystem operations succeed --
414                         // and send a 500 error the modified
415                         // collection can't be saved.
416                         w = &updateOnSuccess{
417                                 ResponseWriter: w,
418                                 update: func() error {
419                                         return h.Config.Cache.Update(client, *collection, fs)
420                                 }}
421                 }
422                 h := webdav.Handler{
423                         Prefix: "/" + strings.Join(pathParts[:stripParts], "/"),
424                         FileSystem: &webdavFS{
425                                 collfs:  fs,
426                                 writing: writing,
427                         },
428                         LockSystem: h.webdavLS,
429                         Logger: func(_ *http.Request, err error) {
430                                 if os.IsNotExist(err) {
431                                         statusCode, statusText = http.StatusNotFound, err.Error()
432                                 } else if err != nil {
433                                         statusCode, statusText = http.StatusInternalServerError, err.Error()
434                                 }
435                         },
436                 }
437                 h.ServeHTTP(w, r)
438                 return
439         }
440
441         openPath := "/" + strings.Join(targetPath, "/")
442         if f, err := fs.Open(openPath); os.IsNotExist(err) {
443                 // Requested non-existent path
444                 statusCode = http.StatusNotFound
445         } else if err != nil {
446                 // Some other (unexpected) error
447                 statusCode, statusText = http.StatusInternalServerError, err.Error()
448         } else if stat, err := f.Stat(); err != nil {
449                 // Can't get Size/IsDir (shouldn't happen with a collectionFS!)
450                 statusCode, statusText = http.StatusInternalServerError, err.Error()
451         } else if stat.IsDir() && !strings.HasSuffix(r.URL.Path, "/") {
452                 // If client requests ".../dirname", redirect to
453                 // ".../dirname/". This way, relative links in the
454                 // listing for "dirname" can always be "fnm", never
455                 // "dirname/fnm".
456                 h.seeOtherWithCookie(w, r, r.URL.Path+"/", credentialsOK)
457         } else if stat.IsDir() {
458                 h.serveDirectory(w, r, collection.Name, fs, openPath, stripParts)
459         } else {
460                 http.ServeContent(w, r, basename, stat.ModTime(), f)
461                 if r.Header.Get("Range") == "" && int64(w.WroteBodyBytes()) != stat.Size() {
462                         // If we wrote fewer bytes than expected, it's
463                         // too late to change the real response code
464                         // or send an error message to the client, but
465                         // at least we can try to put some useful
466                         // debugging info in the logs.
467                         n, err := f.Read(make([]byte, 1024))
468                         statusCode, statusText = http.StatusInternalServerError, fmt.Sprintf("f.Size()==%d but only wrote %d bytes; read(1024) returns %d, %s", stat.Size(), w.WroteBodyBytes(), n, err)
469
470                 }
471         }
472 }
473
474 var dirListingTemplate = `<!DOCTYPE HTML>
475 <HTML><HEAD>
476   <META name="robots" content="NOINDEX">
477   <TITLE>{{ .Collection.Name }}</TITLE>
478   <STYLE type="text/css">
479     body {
480       margin: 1.5em;
481     }
482     pre {
483       background-color: #D9EDF7;
484       border-radius: .25em;
485       padding: .75em;
486       overflow: auto;
487     }
488     .footer p {
489       font-size: 82%;
490     }
491     ul {
492       padding: 0;
493     }
494     ul li {
495       font-family: monospace;
496       list-style: none;
497     }
498   </STYLE>
499 </HEAD>
500 <BODY>
501 <H1>{{ .CollectionName }}</H1>
502
503 <P>This collection of data files is being shared with you through
504 Arvados.  You can download individual files listed below.  To download
505 the entire collection with wget, try:</P>
506
507 <PRE>$ wget --mirror --no-parent --no-host --cut-dirs={{ .StripParts }} https://{{ .Request.Host }}{{ .Request.URL }}</PRE>
508
509 <H2>File Listing</H2>
510
511 {{if .Files}}
512 <UL>
513 {{range .Files}}  <LI>{{.Size | printf "%15d  " | nbsp}}<A href="{{.Name}}">{{.Name}}</A></LI>{{end}}
514 </UL>
515 {{else}}
516 <P>(No files; this collection is empty.)</P>
517 {{end}}
518
519 <HR noshade>
520 <DIV class="footer">
521   <P>
522     About Arvados:
523     Arvados is a free and open source software bioinformatics platform.
524     To learn more, visit arvados.org.
525     Arvados is not responsible for the files listed on this page.
526   </P>
527 </DIV>
528
529 </BODY>
530 `
531
532 type fileListEnt struct {
533         Name string
534         Size int64
535 }
536
537 func (h *handler) serveDirectory(w http.ResponseWriter, r *http.Request, collectionName string, fs http.FileSystem, base string, stripParts int) {
538         var files []fileListEnt
539         var walk func(string) error
540         if !strings.HasSuffix(base, "/") {
541                 base = base + "/"
542         }
543         walk = func(path string) error {
544                 dirname := base + path
545                 if dirname != "/" {
546                         dirname = strings.TrimSuffix(dirname, "/")
547                 }
548                 d, err := fs.Open(dirname)
549                 if err != nil {
550                         return err
551                 }
552                 ents, err := d.Readdir(-1)
553                 if err != nil {
554                         return err
555                 }
556                 for _, ent := range ents {
557                         if ent.IsDir() {
558                                 err = walk(path + ent.Name() + "/")
559                                 if err != nil {
560                                         return err
561                                 }
562                         } else {
563                                 files = append(files, fileListEnt{
564                                         Name: path + ent.Name(),
565                                         Size: ent.Size(),
566                                 })
567                         }
568                 }
569                 return nil
570         }
571         if err := walk(""); err != nil {
572                 http.Error(w, err.Error(), http.StatusInternalServerError)
573                 return
574         }
575
576         funcs := template.FuncMap{
577                 "nbsp": func(s string) template.HTML {
578                         return template.HTML(strings.Replace(s, " ", "&nbsp;", -1))
579                 },
580         }
581         tmpl, err := template.New("dir").Funcs(funcs).Parse(dirListingTemplate)
582         if err != nil {
583                 http.Error(w, err.Error(), http.StatusInternalServerError)
584                 return
585         }
586         sort.Slice(files, func(i, j int) bool {
587                 return files[i].Name < files[j].Name
588         })
589         w.WriteHeader(http.StatusOK)
590         tmpl.Execute(w, map[string]interface{}{
591                 "CollectionName": collectionName,
592                 "Files":          files,
593                 "Request":        r,
594                 "StripParts":     stripParts,
595         })
596 }
597
598 func applyContentDispositionHdr(w http.ResponseWriter, r *http.Request, filename string, isAttachment bool) {
599         disposition := "inline"
600         if isAttachment {
601                 disposition = "attachment"
602         }
603         if strings.ContainsRune(r.RequestURI, '?') {
604                 // Help the UA realize that the filename is just
605                 // "filename.txt", not
606                 // "filename.txt?disposition=attachment".
607                 //
608                 // TODO(TC): Follow advice at RFC 6266 appendix D
609                 disposition += "; filename=" + strconv.QuoteToASCII(filename)
610         }
611         if disposition != "inline" {
612                 w.Header().Set("Content-Disposition", disposition)
613         }
614 }
615
616 func (h *handler) seeOtherWithCookie(w http.ResponseWriter, r *http.Request, location string, credentialsOK bool) {
617         if formToken := r.FormValue("api_token"); formToken != "" {
618                 if !credentialsOK {
619                         // It is not safe to copy the provided token
620                         // into a cookie unless the current vhost
621                         // (origin) serves only a single collection or
622                         // we are in TrustAllContent mode.
623                         w.WriteHeader(http.StatusBadRequest)
624                         return
625                 }
626
627                 // The HttpOnly flag is necessary to prevent
628                 // JavaScript code (included in, or loaded by, a page
629                 // in the collection being served) from employing the
630                 // user's token beyond reading other files in the same
631                 // domain, i.e., same collection.
632                 //
633                 // The 303 redirect is necessary in the case of a GET
634                 // request to avoid exposing the token in the Location
635                 // bar, and in the case of a POST request to avoid
636                 // raising warnings when the user refreshes the
637                 // resulting page.
638                 http.SetCookie(w, &http.Cookie{
639                         Name:     "arvados_api_token",
640                         Value:    auth.EncodeTokenCookie([]byte(formToken)),
641                         Path:     "/",
642                         HttpOnly: true,
643                 })
644         }
645
646         // Propagate query parameters (except api_token) from
647         // the original request.
648         redirQuery := r.URL.Query()
649         redirQuery.Del("api_token")
650
651         u := r.URL
652         if location != "" {
653                 newu, err := u.Parse(location)
654                 if err != nil {
655                         w.WriteHeader(http.StatusInternalServerError)
656                         return
657                 }
658                 u = newu
659         }
660         redir := (&url.URL{
661                 Host:     r.Host,
662                 Path:     u.Path,
663                 RawQuery: redirQuery.Encode(),
664         }).String()
665
666         w.Header().Add("Location", redir)
667         w.WriteHeader(http.StatusSeeOther)
668         io.WriteString(w, `<A href="`)
669         io.WriteString(w, html.EscapeString(redir))
670         io.WriteString(w, `">Continue</A>`)
671 }