10666: Replaced 'version' package with 'version' var
[arvados.git] / services / keep-web / handler.go
1 // Copyright (C) The Arvados Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4
5 package main
6
7 import (
8         "encoding/json"
9         "fmt"
10         "html"
11         "html/template"
12         "io"
13         "net/http"
14         "net/url"
15         "os"
16         "sort"
17         "strconv"
18         "strings"
19         "sync"
20
21         "git.curoverse.com/arvados.git/sdk/go/arvados"
22         "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
23         "git.curoverse.com/arvados.git/sdk/go/auth"
24         "git.curoverse.com/arvados.git/sdk/go/health"
25         "git.curoverse.com/arvados.git/sdk/go/httpserver"
26         "git.curoverse.com/arvados.git/sdk/go/keepclient"
27         "golang.org/x/net/webdav"
28 )
29
30 type handler struct {
31         Config        *Config
32         clientPool    *arvadosclient.ClientPool
33         setupOnce     sync.Once
34         healthHandler http.Handler
35         webdavLS      webdav.LockSystem
36 }
37
38 // parseCollectionIDFromDNSName returns a UUID or PDH if s begins with
39 // a UUID or URL-encoded PDH; otherwise "".
40 func parseCollectionIDFromDNSName(s string) string {
41         // Strip domain.
42         if i := strings.IndexRune(s, '.'); i >= 0 {
43                 s = s[:i]
44         }
45         // Names like {uuid}--collections.example.com serve the same
46         // purpose as {uuid}.collections.example.com but can reduce
47         // cost/effort of using [additional] wildcard certificates.
48         if i := strings.Index(s, "--"); i >= 0 {
49                 s = s[:i]
50         }
51         if arvadosclient.UUIDMatch(s) {
52                 return s
53         }
54         if pdh := strings.Replace(s, "-", "+", 1); arvadosclient.PDHMatch(pdh) {
55                 return pdh
56         }
57         return ""
58 }
59
60 var urlPDHDecoder = strings.NewReplacer(" ", "+", "-", "+")
61
62 // parseCollectionIDFromURL returns a UUID or PDH if s is a UUID or a
63 // PDH (even if it is a PDH with "+" replaced by " " or "-");
64 // otherwise "".
65 func parseCollectionIDFromURL(s string) string {
66         if arvadosclient.UUIDMatch(s) {
67                 return s
68         }
69         if pdh := urlPDHDecoder.Replace(s); arvadosclient.PDHMatch(pdh) {
70                 return pdh
71         }
72         return ""
73 }
74
75 func (h *handler) setup() {
76         h.clientPool = arvadosclient.MakeClientPool()
77
78         keepclient.RefreshServiceDiscoveryOnSIGHUP()
79
80         h.healthHandler = &health.Handler{
81                 Token:  h.Config.ManagementToken,
82                 Prefix: "/_health/",
83         }
84
85         // Even though we don't accept LOCK requests, every webdav
86         // handler must have a non-nil LockSystem.
87         h.webdavLS = &noLockSystem{}
88 }
89
90 func (h *handler) serveStatus(w http.ResponseWriter, r *http.Request) {
91         status := struct {
92                 cacheStats
93                 Version string
94         }{
95                 cacheStats: h.Config.Cache.Stats(),
96                 Version:    version,
97         }
98         json.NewEncoder(w).Encode(status)
99 }
100
101 var (
102         webdavMethod = map[string]bool{
103                 "OPTIONS":  true,
104                 "PROPFIND": true,
105         }
106         browserMethod = map[string]bool{
107                 "GET":  true,
108                 "HEAD": true,
109                 "POST": true,
110         }
111 )
112
113 // ServeHTTP implements http.Handler.
114 func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
115         h.setupOnce.Do(h.setup)
116
117         var statusCode = 0
118         var statusText string
119
120         remoteAddr := r.RemoteAddr
121         if xff := r.Header.Get("X-Forwarded-For"); xff != "" {
122                 remoteAddr = xff + "," + remoteAddr
123         }
124
125         w := httpserver.WrapResponseWriter(wOrig)
126         defer func() {
127                 if statusCode == 0 {
128                         statusCode = w.WroteStatus()
129                 } else if w.WroteStatus() == 0 {
130                         w.WriteHeader(statusCode)
131                 } else if w.WroteStatus() != statusCode {
132                         httpserver.Log(r.RemoteAddr, "WARNING",
133                                 fmt.Sprintf("Our status changed from %d to %d after we sent headers", w.WroteStatus(), statusCode))
134                 }
135                 if statusText == "" {
136                         statusText = http.StatusText(statusCode)
137                 }
138                 httpserver.Log(remoteAddr, statusCode, statusText, w.WroteBodyBytes(), r.Method, r.Host, r.URL.Path, r.URL.RawQuery)
139         }()
140
141         if strings.HasPrefix(r.URL.Path, "/_health/") && r.Method == "GET" {
142                 h.healthHandler.ServeHTTP(w, r)
143                 return
144         }
145
146         if method := r.Header.Get("Access-Control-Request-Method"); method != "" && r.Method == "OPTIONS" {
147                 if !browserMethod[method] && !webdavMethod[method] {
148                         statusCode = http.StatusMethodNotAllowed
149                         return
150                 }
151                 w.Header().Set("Access-Control-Allow-Headers", "Authorization, Content-Type, Range")
152                 w.Header().Set("Access-Control-Allow-Methods", "GET, POST, OPTIONS, PROPFIND")
153                 w.Header().Set("Access-Control-Allow-Origin", "*")
154                 w.Header().Set("Access-Control-Max-Age", "86400")
155                 statusCode = http.StatusOK
156                 return
157         }
158
159         if !browserMethod[r.Method] && !webdavMethod[r.Method] {
160                 statusCode, statusText = http.StatusMethodNotAllowed, r.Method
161                 return
162         }
163
164         if r.Header.Get("Origin") != "" {
165                 // Allow simple cross-origin requests without user
166                 // credentials ("user credentials" as defined by CORS,
167                 // i.e., cookies, HTTP authentication, and client-side
168                 // SSL certificates. See
169                 // http://www.w3.org/TR/cors/#user-credentials).
170                 w.Header().Set("Access-Control-Allow-Origin", "*")
171                 w.Header().Set("Access-Control-Expose-Headers", "Content-Range")
172         }
173
174         arv := h.clientPool.Get()
175         if arv == nil {
176                 statusCode, statusText = http.StatusInternalServerError, "Pool failed: "+h.clientPool.Err().Error()
177                 return
178         }
179         defer h.clientPool.Put(arv)
180
181         pathParts := strings.Split(r.URL.Path[1:], "/")
182
183         var stripParts int
184         var targetID string
185         var tokens []string
186         var reqTokens []string
187         var pathToken bool
188         var attachment bool
189         credentialsOK := h.Config.TrustAllContent
190
191         if r.Host != "" && r.Host == h.Config.AttachmentOnlyHost {
192                 credentialsOK = true
193                 attachment = true
194         } else if r.FormValue("disposition") == "attachment" {
195                 attachment = true
196         }
197
198         if targetID = parseCollectionIDFromDNSName(r.Host); targetID != "" {
199                 // http://ID.collections.example/PATH...
200                 credentialsOK = true
201         } else if r.URL.Path == "/status.json" {
202                 h.serveStatus(w, r)
203                 return
204         } else if len(pathParts) >= 1 && strings.HasPrefix(pathParts[0], "c=") {
205                 // /c=ID[/PATH...]
206                 targetID = parseCollectionIDFromURL(pathParts[0][2:])
207                 stripParts = 1
208         } else if len(pathParts) >= 2 && pathParts[0] == "collections" {
209                 if len(pathParts) >= 4 && pathParts[1] == "download" {
210                         // /collections/download/ID/TOKEN/PATH...
211                         targetID = parseCollectionIDFromURL(pathParts[2])
212                         tokens = []string{pathParts[3]}
213                         stripParts = 4
214                         pathToken = true
215                 } else {
216                         // /collections/ID/PATH...
217                         targetID = parseCollectionIDFromURL(pathParts[1])
218                         tokens = h.Config.AnonymousTokens
219                         stripParts = 2
220                 }
221         }
222
223         if targetID == "" {
224                 statusCode = http.StatusNotFound
225                 return
226         }
227
228         formToken := r.FormValue("api_token")
229         if formToken != "" && r.Header.Get("Origin") != "" && attachment && r.URL.Query().Get("api_token") == "" {
230                 // The client provided an explicit token in the POST
231                 // body. The Origin header indicates this *might* be
232                 // an AJAX request, in which case redirect-with-cookie
233                 // won't work: we should just serve the content in the
234                 // POST response. This is safe because:
235                 //
236                 // * We're supplying an attachment, not inline
237                 //   content, so we don't need to convert the POST to
238                 //   a GET and avoid the "really resubmit form?"
239                 //   problem.
240                 //
241                 // * The token isn't embedded in the URL, so we don't
242                 //   need to worry about bookmarks and copy/paste.
243                 tokens = append(tokens, formToken)
244         } else if formToken != "" && browserMethod[r.Method] {
245                 // The client provided an explicit token in the query
246                 // string, or a form in POST body. We must put the
247                 // token in an HttpOnly cookie, and redirect to the
248                 // same URL with the query param redacted and method =
249                 // GET.
250                 h.seeOtherWithCookie(w, r, "", credentialsOK)
251                 return
252         }
253
254         targetPath := pathParts[stripParts:]
255         if tokens == nil && len(targetPath) > 0 && strings.HasPrefix(targetPath[0], "t=") {
256                 // http://ID.example/t=TOKEN/PATH...
257                 // /c=ID/t=TOKEN/PATH...
258                 //
259                 // This form must only be used to pass scoped tokens
260                 // that give permission for a single collection. See
261                 // FormValue case above.
262                 tokens = []string{targetPath[0][2:]}
263                 pathToken = true
264                 targetPath = targetPath[1:]
265                 stripParts++
266         }
267
268         if tokens == nil {
269                 if credentialsOK {
270                         reqTokens = auth.NewCredentialsFromHTTPRequest(r).Tokens
271                 }
272                 tokens = append(reqTokens, h.Config.AnonymousTokens...)
273         }
274
275         if len(targetPath) > 0 && targetPath[0] == "_" {
276                 // If a collection has a directory called "t=foo" or
277                 // "_", it can be served at
278                 // //collections.example/_/t=foo/ or
279                 // //collections.example/_/_/ respectively:
280                 // //collections.example/t=foo/ won't work because
281                 // t=foo will be interpreted as a token "foo".
282                 targetPath = targetPath[1:]
283                 stripParts++
284         }
285
286         forceReload := false
287         if cc := r.Header.Get("Cache-Control"); strings.Contains(cc, "no-cache") || strings.Contains(cc, "must-revalidate") {
288                 forceReload = true
289         }
290
291         var collection *arvados.Collection
292         tokenResult := make(map[string]int)
293         for _, arv.ApiToken = range tokens {
294                 var err error
295                 collection, err = h.Config.Cache.Get(arv, targetID, forceReload)
296                 if err == nil {
297                         // Success
298                         break
299                 }
300                 if srvErr, ok := err.(arvadosclient.APIServerError); ok {
301                         switch srvErr.HttpStatusCode {
302                         case 404, 401:
303                                 // Token broken or insufficient to
304                                 // retrieve collection
305                                 tokenResult[arv.ApiToken] = srvErr.HttpStatusCode
306                                 continue
307                         }
308                 }
309                 // Something more serious is wrong
310                 statusCode, statusText = http.StatusInternalServerError, err.Error()
311                 return
312         }
313         if collection == nil {
314                 if pathToken || !credentialsOK {
315                         // Either the URL is a "secret sharing link"
316                         // that didn't work out (and asking the client
317                         // for additional credentials would just be
318                         // confusing), or we don't even accept
319                         // credentials at this path.
320                         statusCode = http.StatusNotFound
321                         return
322                 }
323                 for _, t := range reqTokens {
324                         if tokenResult[t] == 404 {
325                                 // The client provided valid token(s), but the
326                                 // collection was not found.
327                                 statusCode = http.StatusNotFound
328                                 return
329                         }
330                 }
331                 // The client's token was invalid (e.g., expired), or
332                 // the client didn't even provide one.  Propagate the
333                 // 401 to encourage the client to use a [different]
334                 // token.
335                 //
336                 // TODO(TC): This response would be confusing to
337                 // someone trying (anonymously) to download public
338                 // data that has been deleted.  Allow a referrer to
339                 // provide this context somehow?
340                 w.Header().Add("WWW-Authenticate", "Basic realm=\"collections\"")
341                 statusCode = http.StatusUnauthorized
342                 return
343         }
344
345         kc, err := keepclient.MakeKeepClient(arv)
346         if err != nil {
347                 statusCode, statusText = http.StatusInternalServerError, err.Error()
348                 return
349         }
350
351         var basename string
352         if len(targetPath) > 0 {
353                 basename = targetPath[len(targetPath)-1]
354         }
355         applyContentDispositionHdr(w, r, basename, attachment)
356
357         fs := collection.FileSystem(&arvados.Client{
358                 APIHost:   arv.ApiServer,
359                 AuthToken: arv.ApiToken,
360                 Insecure:  arv.ApiInsecure,
361         }, kc)
362         if webdavMethod[r.Method] {
363                 h := webdav.Handler{
364                         Prefix:     "/" + strings.Join(pathParts[:stripParts], "/"),
365                         FileSystem: &webdavFS{collfs: fs},
366                         LockSystem: h.webdavLS,
367                         Logger: func(_ *http.Request, err error) {
368                                 if os.IsNotExist(err) {
369                                         statusCode, statusText = http.StatusNotFound, err.Error()
370                                 } else if err != nil {
371                                         statusCode, statusText = http.StatusInternalServerError, err.Error()
372                                 }
373                         },
374                 }
375                 h.ServeHTTP(w, r)
376                 return
377         }
378
379         openPath := "/" + strings.Join(targetPath, "/")
380         if f, err := fs.Open(openPath); os.IsNotExist(err) {
381                 // Requested non-existent path
382                 statusCode = http.StatusNotFound
383         } else if err != nil {
384                 // Some other (unexpected) error
385                 statusCode, statusText = http.StatusInternalServerError, err.Error()
386         } else if stat, err := f.Stat(); err != nil {
387                 // Can't get Size/IsDir (shouldn't happen with a collectionFS!)
388                 statusCode, statusText = http.StatusInternalServerError, err.Error()
389         } else if stat.IsDir() && !strings.HasSuffix(r.URL.Path, "/") {
390                 // If client requests ".../dirname", redirect to
391                 // ".../dirname/". This way, relative links in the
392                 // listing for "dirname" can always be "fnm", never
393                 // "dirname/fnm".
394                 h.seeOtherWithCookie(w, r, r.URL.Path+"/", credentialsOK)
395         } else if stat.IsDir() {
396                 h.serveDirectory(w, r, collection.Name, fs, openPath, stripParts)
397         } else {
398                 http.ServeContent(w, r, basename, stat.ModTime(), f)
399                 if r.Header.Get("Range") == "" && int64(w.WroteBodyBytes()) != stat.Size() {
400                         // If we wrote fewer bytes than expected, it's
401                         // too late to change the real response code
402                         // or send an error message to the client, but
403                         // at least we can try to put some useful
404                         // debugging info in the logs.
405                         n, err := f.Read(make([]byte, 1024))
406                         statusCode, statusText = http.StatusInternalServerError, fmt.Sprintf("f.Size()==%d but only wrote %d bytes; read(1024) returns %d, %s", stat.Size(), w.WroteBodyBytes(), n, err)
407
408                 }
409         }
410 }
411
412 var dirListingTemplate = `<!DOCTYPE HTML>
413 <HTML><HEAD>
414   <META name="robots" content="NOINDEX">
415   <TITLE>{{ .Collection.Name }}</TITLE>
416   <STYLE type="text/css">
417     body {
418       margin: 1.5em;
419     }
420     pre {
421       background-color: #D9EDF7;
422       border-radius: .25em;
423       padding: .75em;
424       overflow: auto;
425     }
426     .footer p {
427       font-size: 82%;
428     }
429     ul {
430       padding: 0;
431     }
432     ul li {
433       font-family: monospace;
434       list-style: none;
435     }
436   </STYLE>
437 </HEAD>
438 <BODY>
439 <H1>{{ .CollectionName }}</H1>
440
441 <P>This collection of data files is being shared with you through
442 Arvados.  You can download individual files listed below.  To download
443 the entire collection with wget, try:</P>
444
445 <PRE>$ wget --mirror --no-parent --no-host --cut-dirs={{ .StripParts }} https://{{ .Request.Host }}{{ .Request.URL }}</PRE>
446
447 <H2>File Listing</H2>
448
449 {{if .Files}}
450 <UL>
451 {{range .Files}}  <LI>{{.Size | printf "%15d  " | nbsp}}<A href="{{.Name}}">{{.Name}}</A></LI>{{end}}
452 </UL>
453 {{else}}
454 <P>(No files; this collection is empty.)</P>
455 {{end}}
456
457 <HR noshade>
458 <DIV class="footer">
459   <P>
460     About Arvados:
461     Arvados is a free and open source software bioinformatics platform.
462     To learn more, visit arvados.org.
463     Arvados is not responsible for the files listed on this page.
464   </P>
465 </DIV>
466
467 </BODY>
468 `
469
470 type fileListEnt struct {
471         Name string
472         Size int64
473 }
474
475 func (h *handler) serveDirectory(w http.ResponseWriter, r *http.Request, collectionName string, fs http.FileSystem, base string, stripParts int) {
476         var files []fileListEnt
477         var walk func(string) error
478         if !strings.HasSuffix(base, "/") {
479                 base = base + "/"
480         }
481         walk = func(path string) error {
482                 dirname := base + path
483                 if dirname != "/" {
484                         dirname = strings.TrimSuffix(dirname, "/")
485                 }
486                 d, err := fs.Open(dirname)
487                 if err != nil {
488                         return err
489                 }
490                 ents, err := d.Readdir(-1)
491                 if err != nil {
492                         return err
493                 }
494                 for _, ent := range ents {
495                         if ent.IsDir() {
496                                 err = walk(path + ent.Name() + "/")
497                                 if err != nil {
498                                         return err
499                                 }
500                         } else {
501                                 files = append(files, fileListEnt{
502                                         Name: path + ent.Name(),
503                                         Size: ent.Size(),
504                                 })
505                         }
506                 }
507                 return nil
508         }
509         if err := walk(""); err != nil {
510                 http.Error(w, err.Error(), http.StatusInternalServerError)
511                 return
512         }
513
514         funcs := template.FuncMap{
515                 "nbsp": func(s string) template.HTML {
516                         return template.HTML(strings.Replace(s, " ", "&nbsp;", -1))
517                 },
518         }
519         tmpl, err := template.New("dir").Funcs(funcs).Parse(dirListingTemplate)
520         if err != nil {
521                 http.Error(w, err.Error(), http.StatusInternalServerError)
522                 return
523         }
524         sort.Slice(files, func(i, j int) bool {
525                 return files[i].Name < files[j].Name
526         })
527         w.WriteHeader(http.StatusOK)
528         tmpl.Execute(w, map[string]interface{}{
529                 "CollectionName": collectionName,
530                 "Files":          files,
531                 "Request":        r,
532                 "StripParts":     stripParts,
533         })
534 }
535
536 func applyContentDispositionHdr(w http.ResponseWriter, r *http.Request, filename string, isAttachment bool) {
537         disposition := "inline"
538         if isAttachment {
539                 disposition = "attachment"
540         }
541         if strings.ContainsRune(r.RequestURI, '?') {
542                 // Help the UA realize that the filename is just
543                 // "filename.txt", not
544                 // "filename.txt?disposition=attachment".
545                 //
546                 // TODO(TC): Follow advice at RFC 6266 appendix D
547                 disposition += "; filename=" + strconv.QuoteToASCII(filename)
548         }
549         if disposition != "inline" {
550                 w.Header().Set("Content-Disposition", disposition)
551         }
552 }
553
554 func (h *handler) seeOtherWithCookie(w http.ResponseWriter, r *http.Request, location string, credentialsOK bool) {
555         if formToken := r.FormValue("api_token"); formToken != "" {
556                 if !credentialsOK {
557                         // It is not safe to copy the provided token
558                         // into a cookie unless the current vhost
559                         // (origin) serves only a single collection or
560                         // we are in TrustAllContent mode.
561                         w.WriteHeader(http.StatusBadRequest)
562                         return
563                 }
564
565                 // The HttpOnly flag is necessary to prevent
566                 // JavaScript code (included in, or loaded by, a page
567                 // in the collection being served) from employing the
568                 // user's token beyond reading other files in the same
569                 // domain, i.e., same collection.
570                 //
571                 // The 303 redirect is necessary in the case of a GET
572                 // request to avoid exposing the token in the Location
573                 // bar, and in the case of a POST request to avoid
574                 // raising warnings when the user refreshes the
575                 // resulting page.
576                 http.SetCookie(w, &http.Cookie{
577                         Name:     "arvados_api_token",
578                         Value:    auth.EncodeTokenCookie([]byte(formToken)),
579                         Path:     "/",
580                         HttpOnly: true,
581                 })
582         }
583
584         // Propagate query parameters (except api_token) from
585         // the original request.
586         redirQuery := r.URL.Query()
587         redirQuery.Del("api_token")
588
589         u := r.URL
590         if location != "" {
591                 newu, err := u.Parse(location)
592                 if err != nil {
593                         w.WriteHeader(http.StatusInternalServerError)
594                         return
595                 }
596                 u = newu
597         }
598         redir := (&url.URL{
599                 Host:     r.Host,
600                 Path:     u.Path,
601                 RawQuery: redirQuery.Encode(),
602         }).String()
603
604         w.Header().Add("Location", redir)
605         w.WriteHeader(http.StatusSeeOther)
606         io.WriteString(w, `<A href="`)
607         io.WriteString(w, html.EscapeString(redir))
608         io.WriteString(w, `">Continue</A>`)
609 }