10666: Merge branch 'master' into 10666-report-version
[arvados.git] / services / keep-web / handler.go
1 // Copyright (C) The Arvados Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4
5 package main
6
7 import (
8         "encoding/json"
9         "fmt"
10         "html"
11         "html/template"
12         "io"
13         "log"
14         "net/http"
15         "net/url"
16         "os"
17         "sort"
18         "strconv"
19         "strings"
20         "sync"
21
22         "git.curoverse.com/arvados.git/sdk/go/arvados"
23         "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
24         "git.curoverse.com/arvados.git/sdk/go/auth"
25         "git.curoverse.com/arvados.git/sdk/go/health"
26         "git.curoverse.com/arvados.git/sdk/go/httpserver"
27         "git.curoverse.com/arvados.git/sdk/go/keepclient"
28         "golang.org/x/net/webdav"
29 )
30
31 type handler struct {
32         Config        *Config
33         clientPool    *arvadosclient.ClientPool
34         setupOnce     sync.Once
35         healthHandler http.Handler
36         webdavLS      webdav.LockSystem
37 }
38
39 // parseCollectionIDFromDNSName returns a UUID or PDH if s begins with
40 // a UUID or URL-encoded PDH; otherwise "".
41 func parseCollectionIDFromDNSName(s string) string {
42         // Strip domain.
43         if i := strings.IndexRune(s, '.'); i >= 0 {
44                 s = s[:i]
45         }
46         // Names like {uuid}--collections.example.com serve the same
47         // purpose as {uuid}.collections.example.com but can reduce
48         // cost/effort of using [additional] wildcard certificates.
49         if i := strings.Index(s, "--"); i >= 0 {
50                 s = s[:i]
51         }
52         if arvadosclient.UUIDMatch(s) {
53                 return s
54         }
55         if pdh := strings.Replace(s, "-", "+", 1); arvadosclient.PDHMatch(pdh) {
56                 return pdh
57         }
58         return ""
59 }
60
61 var urlPDHDecoder = strings.NewReplacer(" ", "+", "-", "+")
62
63 // parseCollectionIDFromURL returns a UUID or PDH if s is a UUID or a
64 // PDH (even if it is a PDH with "+" replaced by " " or "-");
65 // otherwise "".
66 func parseCollectionIDFromURL(s string) string {
67         if arvadosclient.UUIDMatch(s) {
68                 return s
69         }
70         if pdh := urlPDHDecoder.Replace(s); arvadosclient.PDHMatch(pdh) {
71                 return pdh
72         }
73         return ""
74 }
75
76 func (h *handler) setup() {
77         h.clientPool = arvadosclient.MakeClientPool()
78
79         keepclient.RefreshServiceDiscoveryOnSIGHUP()
80
81         h.healthHandler = &health.Handler{
82                 Token:  h.Config.ManagementToken,
83                 Prefix: "/_health/",
84         }
85
86         // Even though we don't accept LOCK requests, every webdav
87         // handler must have a non-nil LockSystem.
88         h.webdavLS = &noLockSystem{}
89 }
90
91 func (h *handler) serveStatus(w http.ResponseWriter, r *http.Request) {
92         status := struct {
93                 cacheStats
94                 Version string
95         }{
96                 cacheStats: h.Config.Cache.Stats(),
97                 Version:    version,
98         }
99         json.NewEncoder(w).Encode(status)
100 }
101
102 // updateOnSuccess wraps httpserver.ResponseWriter. If the handler
103 // sends an HTTP header indicating success, updateOnSuccess first
104 // calls the provided update func. If the update func fails, a 500
105 // response is sent, and the status code and body sent by the handler
106 // are ignored (all response writes return the update error).
107 type updateOnSuccess struct {
108         httpserver.ResponseWriter
109         update     func() error
110         sentHeader bool
111         err        error
112 }
113
114 func (uos *updateOnSuccess) Write(p []byte) (int, error) {
115         if uos.err != nil {
116                 return 0, uos.err
117         }
118         if !uos.sentHeader {
119                 uos.WriteHeader(http.StatusOK)
120         }
121         return uos.ResponseWriter.Write(p)
122 }
123
124 func (uos *updateOnSuccess) WriteHeader(code int) {
125         if !uos.sentHeader {
126                 uos.sentHeader = true
127                 if code >= 200 && code < 400 {
128                         if uos.err = uos.update(); uos.err != nil {
129                                 code := http.StatusInternalServerError
130                                 if err, ok := uos.err.(*arvados.TransactionError); ok {
131                                         code = err.StatusCode
132                                 }
133                                 log.Printf("update() changes response to HTTP %d: %T %q", code, uos.err, uos.err)
134                                 http.Error(uos.ResponseWriter, uos.err.Error(), code)
135                                 return
136                         }
137                 }
138         }
139         uos.ResponseWriter.WriteHeader(code)
140 }
141
142 var (
143         writeMethod = map[string]bool{
144                 "COPY":   true,
145                 "DELETE": true,
146                 "MKCOL":  true,
147                 "MOVE":   true,
148                 "PUT":    true,
149                 "RMCOL":  true,
150         }
151         webdavMethod = map[string]bool{
152                 "COPY":     true,
153                 "DELETE":   true,
154                 "MKCOL":    true,
155                 "MOVE":     true,
156                 "OPTIONS":  true,
157                 "PROPFIND": true,
158                 "PUT":      true,
159                 "RMCOL":    true,
160         }
161         browserMethod = map[string]bool{
162                 "GET":  true,
163                 "HEAD": true,
164                 "POST": true,
165         }
166 )
167
168 // ServeHTTP implements http.Handler.
169 func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
170         h.setupOnce.Do(h.setup)
171
172         var statusCode = 0
173         var statusText string
174
175         remoteAddr := r.RemoteAddr
176         if xff := r.Header.Get("X-Forwarded-For"); xff != "" {
177                 remoteAddr = xff + "," + remoteAddr
178         }
179
180         w := httpserver.WrapResponseWriter(wOrig)
181         defer func() {
182                 if statusCode == 0 {
183                         statusCode = w.WroteStatus()
184                 } else if w.WroteStatus() == 0 {
185                         w.WriteHeader(statusCode)
186                 } else if w.WroteStatus() != statusCode {
187                         httpserver.Log(r.RemoteAddr, "WARNING",
188                                 fmt.Sprintf("Our status changed from %d to %d after we sent headers", w.WroteStatus(), statusCode))
189                 }
190                 if statusText == "" {
191                         statusText = http.StatusText(statusCode)
192                 }
193                 httpserver.Log(remoteAddr, statusCode, statusText, w.WroteBodyBytes(), r.Method, r.Host, r.URL.Path, r.URL.RawQuery)
194         }()
195
196         if strings.HasPrefix(r.URL.Path, "/_health/") && r.Method == "GET" {
197                 h.healthHandler.ServeHTTP(w, r)
198                 return
199         }
200
201         if method := r.Header.Get("Access-Control-Request-Method"); method != "" && r.Method == "OPTIONS" {
202                 if !browserMethod[method] && !webdavMethod[method] {
203                         statusCode = http.StatusMethodNotAllowed
204                         return
205                 }
206                 w.Header().Set("Access-Control-Allow-Headers", "Authorization, Content-Type, Range")
207                 w.Header().Set("Access-Control-Allow-Methods", "COPY, DELETE, GET, MKCOL, MOVE, OPTIONS, POST, PROPFIND, PUT, RMCOL")
208                 w.Header().Set("Access-Control-Allow-Origin", "*")
209                 w.Header().Set("Access-Control-Max-Age", "86400")
210                 statusCode = http.StatusOK
211                 return
212         }
213
214         if !browserMethod[r.Method] && !webdavMethod[r.Method] {
215                 statusCode, statusText = http.StatusMethodNotAllowed, r.Method
216                 return
217         }
218
219         if r.Header.Get("Origin") != "" {
220                 // Allow simple cross-origin requests without user
221                 // credentials ("user credentials" as defined by CORS,
222                 // i.e., cookies, HTTP authentication, and client-side
223                 // SSL certificates. See
224                 // http://www.w3.org/TR/cors/#user-credentials).
225                 w.Header().Set("Access-Control-Allow-Origin", "*")
226                 w.Header().Set("Access-Control-Expose-Headers", "Content-Range")
227         }
228
229         arv := h.clientPool.Get()
230         if arv == nil {
231                 statusCode, statusText = http.StatusInternalServerError, "Pool failed: "+h.clientPool.Err().Error()
232                 return
233         }
234         defer h.clientPool.Put(arv)
235
236         pathParts := strings.Split(r.URL.Path[1:], "/")
237
238         var stripParts int
239         var targetID string
240         var tokens []string
241         var reqTokens []string
242         var pathToken bool
243         var attachment bool
244         credentialsOK := h.Config.TrustAllContent
245
246         if r.Host != "" && r.Host == h.Config.AttachmentOnlyHost {
247                 credentialsOK = true
248                 attachment = true
249         } else if r.FormValue("disposition") == "attachment" {
250                 attachment = true
251         }
252
253         if targetID = parseCollectionIDFromDNSName(r.Host); targetID != "" {
254                 // http://ID.collections.example/PATH...
255                 credentialsOK = true
256         } else if r.URL.Path == "/status.json" {
257                 h.serveStatus(w, r)
258                 return
259         } else if len(pathParts) >= 1 && strings.HasPrefix(pathParts[0], "c=") {
260                 // /c=ID[/PATH...]
261                 targetID = parseCollectionIDFromURL(pathParts[0][2:])
262                 stripParts = 1
263         } else if len(pathParts) >= 2 && pathParts[0] == "collections" {
264                 if len(pathParts) >= 4 && pathParts[1] == "download" {
265                         // /collections/download/ID/TOKEN/PATH...
266                         targetID = parseCollectionIDFromURL(pathParts[2])
267                         tokens = []string{pathParts[3]}
268                         stripParts = 4
269                         pathToken = true
270                 } else {
271                         // /collections/ID/PATH...
272                         targetID = parseCollectionIDFromURL(pathParts[1])
273                         tokens = h.Config.AnonymousTokens
274                         stripParts = 2
275                 }
276         }
277
278         if targetID == "" {
279                 statusCode = http.StatusNotFound
280                 return
281         }
282
283         formToken := r.FormValue("api_token")
284         if formToken != "" && r.Header.Get("Origin") != "" && attachment && r.URL.Query().Get("api_token") == "" {
285                 // The client provided an explicit token in the POST
286                 // body. The Origin header indicates this *might* be
287                 // an AJAX request, in which case redirect-with-cookie
288                 // won't work: we should just serve the content in the
289                 // POST response. This is safe because:
290                 //
291                 // * We're supplying an attachment, not inline
292                 //   content, so we don't need to convert the POST to
293                 //   a GET and avoid the "really resubmit form?"
294                 //   problem.
295                 //
296                 // * The token isn't embedded in the URL, so we don't
297                 //   need to worry about bookmarks and copy/paste.
298                 tokens = append(tokens, formToken)
299         } else if formToken != "" && browserMethod[r.Method] {
300                 // The client provided an explicit token in the query
301                 // string, or a form in POST body. We must put the
302                 // token in an HttpOnly cookie, and redirect to the
303                 // same URL with the query param redacted and method =
304                 // GET.
305                 h.seeOtherWithCookie(w, r, "", credentialsOK)
306                 return
307         }
308
309         targetPath := pathParts[stripParts:]
310         if tokens == nil && len(targetPath) > 0 && strings.HasPrefix(targetPath[0], "t=") {
311                 // http://ID.example/t=TOKEN/PATH...
312                 // /c=ID/t=TOKEN/PATH...
313                 //
314                 // This form must only be used to pass scoped tokens
315                 // that give permission for a single collection. See
316                 // FormValue case above.
317                 tokens = []string{targetPath[0][2:]}
318                 pathToken = true
319                 targetPath = targetPath[1:]
320                 stripParts++
321         }
322
323         if tokens == nil {
324                 if credentialsOK {
325                         reqTokens = auth.NewCredentialsFromHTTPRequest(r).Tokens
326                 }
327                 tokens = append(reqTokens, h.Config.AnonymousTokens...)
328         }
329
330         if len(targetPath) > 0 && targetPath[0] == "_" {
331                 // If a collection has a directory called "t=foo" or
332                 // "_", it can be served at
333                 // //collections.example/_/t=foo/ or
334                 // //collections.example/_/_/ respectively:
335                 // //collections.example/t=foo/ won't work because
336                 // t=foo will be interpreted as a token "foo".
337                 targetPath = targetPath[1:]
338                 stripParts++
339         }
340
341         forceReload := false
342         if cc := r.Header.Get("Cache-Control"); strings.Contains(cc, "no-cache") || strings.Contains(cc, "must-revalidate") {
343                 forceReload = true
344         }
345
346         var collection *arvados.Collection
347         tokenResult := make(map[string]int)
348         for _, arv.ApiToken = range tokens {
349                 var err error
350                 collection, err = h.Config.Cache.Get(arv, targetID, forceReload)
351                 if err == nil {
352                         // Success
353                         break
354                 }
355                 if srvErr, ok := err.(arvadosclient.APIServerError); ok {
356                         switch srvErr.HttpStatusCode {
357                         case 404, 401:
358                                 // Token broken or insufficient to
359                                 // retrieve collection
360                                 tokenResult[arv.ApiToken] = srvErr.HttpStatusCode
361                                 continue
362                         }
363                 }
364                 // Something more serious is wrong
365                 statusCode, statusText = http.StatusInternalServerError, err.Error()
366                 return
367         }
368         if collection == nil {
369                 if pathToken || !credentialsOK {
370                         // Either the URL is a "secret sharing link"
371                         // that didn't work out (and asking the client
372                         // for additional credentials would just be
373                         // confusing), or we don't even accept
374                         // credentials at this path.
375                         statusCode = http.StatusNotFound
376                         return
377                 }
378                 for _, t := range reqTokens {
379                         if tokenResult[t] == 404 {
380                                 // The client provided valid token(s), but the
381                                 // collection was not found.
382                                 statusCode = http.StatusNotFound
383                                 return
384                         }
385                 }
386                 // The client's token was invalid (e.g., expired), or
387                 // the client didn't even provide one.  Propagate the
388                 // 401 to encourage the client to use a [different]
389                 // token.
390                 //
391                 // TODO(TC): This response would be confusing to
392                 // someone trying (anonymously) to download public
393                 // data that has been deleted.  Allow a referrer to
394                 // provide this context somehow?
395                 w.Header().Add("WWW-Authenticate", "Basic realm=\"collections\"")
396                 statusCode = http.StatusUnauthorized
397                 return
398         }
399
400         kc, err := keepclient.MakeKeepClient(arv)
401         if err != nil {
402                 statusCode, statusText = http.StatusInternalServerError, err.Error()
403                 return
404         }
405
406         var basename string
407         if len(targetPath) > 0 {
408                 basename = targetPath[len(targetPath)-1]
409         }
410         applyContentDispositionHdr(w, r, basename, attachment)
411
412         client := &arvados.Client{
413                 APIHost:   arv.ApiServer,
414                 AuthToken: arv.ApiToken,
415                 Insecure:  arv.ApiInsecure,
416         }
417         fs, err := collection.FileSystem(client, kc)
418         if err != nil {
419                 statusCode, statusText = http.StatusInternalServerError, err.Error()
420                 return
421         }
422
423         targetIsPDH := arvadosclient.PDHMatch(targetID)
424         if targetIsPDH && writeMethod[r.Method] {
425                 statusCode, statusText = http.StatusMethodNotAllowed, errReadOnly.Error()
426                 return
427         }
428
429         if webdavMethod[r.Method] {
430                 if writeMethod[r.Method] {
431                         // Save the collection only if/when all
432                         // webdav->filesystem operations succeed --
433                         // and send a 500 error if the modified
434                         // collection can't be saved.
435                         w = &updateOnSuccess{
436                                 ResponseWriter: w,
437                                 update: func() error {
438                                         return h.Config.Cache.Update(client, *collection, fs)
439                                 }}
440                 }
441                 h := webdav.Handler{
442                         Prefix: "/" + strings.Join(pathParts[:stripParts], "/"),
443                         FileSystem: &webdavFS{
444                                 collfs:  fs,
445                                 writing: writeMethod[r.Method],
446                         },
447                         LockSystem: h.webdavLS,
448                         Logger: func(_ *http.Request, err error) {
449                                 if err != nil {
450                                         log.Printf("error from webdav handler: %q", err)
451                                 }
452                         },
453                 }
454                 h.ServeHTTP(w, r)
455                 return
456         }
457
458         openPath := "/" + strings.Join(targetPath, "/")
459         if f, err := fs.Open(openPath); os.IsNotExist(err) {
460                 // Requested non-existent path
461                 statusCode = http.StatusNotFound
462         } else if err != nil {
463                 // Some other (unexpected) error
464                 statusCode, statusText = http.StatusInternalServerError, err.Error()
465         } else if stat, err := f.Stat(); err != nil {
466                 // Can't get Size/IsDir (shouldn't happen with a collectionFS!)
467                 statusCode, statusText = http.StatusInternalServerError, err.Error()
468         } else if stat.IsDir() && !strings.HasSuffix(r.URL.Path, "/") {
469                 // If client requests ".../dirname", redirect to
470                 // ".../dirname/". This way, relative links in the
471                 // listing for "dirname" can always be "fnm", never
472                 // "dirname/fnm".
473                 h.seeOtherWithCookie(w, r, r.URL.Path+"/", credentialsOK)
474         } else if stat.IsDir() {
475                 h.serveDirectory(w, r, collection.Name, fs, openPath, stripParts)
476         } else {
477                 http.ServeContent(w, r, basename, stat.ModTime(), f)
478                 if r.Header.Get("Range") == "" && int64(w.WroteBodyBytes()) != stat.Size() {
479                         // If we wrote fewer bytes than expected, it's
480                         // too late to change the real response code
481                         // or send an error message to the client, but
482                         // at least we can try to put some useful
483                         // debugging info in the logs.
484                         n, err := f.Read(make([]byte, 1024))
485                         statusCode, statusText = http.StatusInternalServerError, fmt.Sprintf("f.Size()==%d but only wrote %d bytes; read(1024) returns %d, %s", stat.Size(), w.WroteBodyBytes(), n, err)
486
487                 }
488         }
489 }
490
491 var dirListingTemplate = `<!DOCTYPE HTML>
492 <HTML><HEAD>
493   <META name="robots" content="NOINDEX">
494   <TITLE>{{ .Collection.Name }}</TITLE>
495   <STYLE type="text/css">
496     body {
497       margin: 1.5em;
498     }
499     pre {
500       background-color: #D9EDF7;
501       border-radius: .25em;
502       padding: .75em;
503       overflow: auto;
504     }
505     .footer p {
506       font-size: 82%;
507     }
508     ul {
509       padding: 0;
510     }
511     ul li {
512       font-family: monospace;
513       list-style: none;
514     }
515   </STYLE>
516 </HEAD>
517 <BODY>
518 <H1>{{ .CollectionName }}</H1>
519
520 <P>This collection of data files is being shared with you through
521 Arvados.  You can download individual files listed below.  To download
522 the entire collection with wget, try:</P>
523
524 <PRE>$ wget --mirror --no-parent --no-host --cut-dirs={{ .StripParts }} https://{{ .Request.Host }}{{ .Request.URL }}</PRE>
525
526 <H2>File Listing</H2>
527
528 {{if .Files}}
529 <UL>
530 {{range .Files}}  <LI>{{.Size | printf "%15d  " | nbsp}}<A href="{{.Name}}">{{.Name}}</A></LI>{{end}}
531 </UL>
532 {{else}}
533 <P>(No files; this collection is empty.)</P>
534 {{end}}
535
536 <HR noshade>
537 <DIV class="footer">
538   <P>
539     About Arvados:
540     Arvados is a free and open source software bioinformatics platform.
541     To learn more, visit arvados.org.
542     Arvados is not responsible for the files listed on this page.
543   </P>
544 </DIV>
545
546 </BODY>
547 `
548
549 type fileListEnt struct {
550         Name string
551         Size int64
552 }
553
554 func (h *handler) serveDirectory(w http.ResponseWriter, r *http.Request, collectionName string, fs http.FileSystem, base string, stripParts int) {
555         var files []fileListEnt
556         var walk func(string) error
557         if !strings.HasSuffix(base, "/") {
558                 base = base + "/"
559         }
560         walk = func(path string) error {
561                 dirname := base + path
562                 if dirname != "/" {
563                         dirname = strings.TrimSuffix(dirname, "/")
564                 }
565                 d, err := fs.Open(dirname)
566                 if err != nil {
567                         return err
568                 }
569                 ents, err := d.Readdir(-1)
570                 if err != nil {
571                         return err
572                 }
573                 for _, ent := range ents {
574                         if ent.IsDir() {
575                                 err = walk(path + ent.Name() + "/")
576                                 if err != nil {
577                                         return err
578                                 }
579                         } else {
580                                 files = append(files, fileListEnt{
581                                         Name: path + ent.Name(),
582                                         Size: ent.Size(),
583                                 })
584                         }
585                 }
586                 return nil
587         }
588         if err := walk(""); err != nil {
589                 http.Error(w, err.Error(), http.StatusInternalServerError)
590                 return
591         }
592
593         funcs := template.FuncMap{
594                 "nbsp": func(s string) template.HTML {
595                         return template.HTML(strings.Replace(s, " ", "&nbsp;", -1))
596                 },
597         }
598         tmpl, err := template.New("dir").Funcs(funcs).Parse(dirListingTemplate)
599         if err != nil {
600                 http.Error(w, err.Error(), http.StatusInternalServerError)
601                 return
602         }
603         sort.Slice(files, func(i, j int) bool {
604                 return files[i].Name < files[j].Name
605         })
606         w.WriteHeader(http.StatusOK)
607         tmpl.Execute(w, map[string]interface{}{
608                 "CollectionName": collectionName,
609                 "Files":          files,
610                 "Request":        r,
611                 "StripParts":     stripParts,
612         })
613 }
614
615 func applyContentDispositionHdr(w http.ResponseWriter, r *http.Request, filename string, isAttachment bool) {
616         disposition := "inline"
617         if isAttachment {
618                 disposition = "attachment"
619         }
620         if strings.ContainsRune(r.RequestURI, '?') {
621                 // Help the UA realize that the filename is just
622                 // "filename.txt", not
623                 // "filename.txt?disposition=attachment".
624                 //
625                 // TODO(TC): Follow advice at RFC 6266 appendix D
626                 disposition += "; filename=" + strconv.QuoteToASCII(filename)
627         }
628         if disposition != "inline" {
629                 w.Header().Set("Content-Disposition", disposition)
630         }
631 }
632
633 func (h *handler) seeOtherWithCookie(w http.ResponseWriter, r *http.Request, location string, credentialsOK bool) {
634         if formToken := r.FormValue("api_token"); formToken != "" {
635                 if !credentialsOK {
636                         // It is not safe to copy the provided token
637                         // into a cookie unless the current vhost
638                         // (origin) serves only a single collection or
639                         // we are in TrustAllContent mode.
640                         w.WriteHeader(http.StatusBadRequest)
641                         return
642                 }
643
644                 // The HttpOnly flag is necessary to prevent
645                 // JavaScript code (included in, or loaded by, a page
646                 // in the collection being served) from employing the
647                 // user's token beyond reading other files in the same
648                 // domain, i.e., same collection.
649                 //
650                 // The 303 redirect is necessary in the case of a GET
651                 // request to avoid exposing the token in the Location
652                 // bar, and in the case of a POST request to avoid
653                 // raising warnings when the user refreshes the
654                 // resulting page.
655                 http.SetCookie(w, &http.Cookie{
656                         Name:     "arvados_api_token",
657                         Value:    auth.EncodeTokenCookie([]byte(formToken)),
658                         Path:     "/",
659                         HttpOnly: true,
660                 })
661         }
662
663         // Propagate query parameters (except api_token) from
664         // the original request.
665         redirQuery := r.URL.Query()
666         redirQuery.Del("api_token")
667
668         u := r.URL
669         if location != "" {
670                 newu, err := u.Parse(location)
671                 if err != nil {
672                         w.WriteHeader(http.StatusInternalServerError)
673                         return
674                 }
675                 u = newu
676         }
677         redir := (&url.URL{
678                 Host:     r.Host,
679                 Path:     u.Path,
680                 RawQuery: redirQuery.Encode(),
681         }).String()
682
683         w.Header().Add("Location", redir)
684         w.WriteHeader(http.StatusSeeOther)
685         io.WriteString(w, `<A href="`)
686         io.WriteString(w, html.EscapeString(redir))
687         io.WriteString(w, `">Continue</A>`)
688 }