5824: Propagate non-token parts of query string (notably ?attachment=disposition...
[arvados.git] / services / keep-web / handler.go
1 package main
2
3 import (
4         "flag"
5         "fmt"
6         "html"
7         "io"
8         "mime"
9         "net/http"
10         "net/url"
11         "os"
12         "regexp"
13         "strconv"
14         "strings"
15
16         "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
17         "git.curoverse.com/arvados.git/sdk/go/auth"
18         "git.curoverse.com/arvados.git/sdk/go/httpserver"
19         "git.curoverse.com/arvados.git/sdk/go/keepclient"
20 )
21
22 type handler struct{}
23
24 var (
25         clientPool         = arvadosclient.MakeClientPool()
26         trustAllContent    = false
27         attachmentOnlyHost = ""
28 )
29
30 func init() {
31         flag.StringVar(&attachmentOnlyHost, "attachment-only-host", "",
32                 "Accept credentials, and add \"Content-Disposition: attachment\" response headers, for requests at this hostname:port. Prohibiting inline display makes it possible to serve untrusted and non-public content from a single origin, i.e., without wildcard DNS or SSL.")
33         flag.BoolVar(&trustAllContent, "trust-all-content", false,
34                 "Serve non-public content from a single origin. Dangerous: read docs before using!")
35 }
36
37 // return a UUID or PDH if s begins with a UUID or URL-encoded PDH;
38 // otherwise return "".
39 func parseCollectionIDFromDNSName(s string) string {
40         // Strip domain.
41         if i := strings.IndexRune(s, '.'); i >= 0 {
42                 s = s[:i]
43         }
44         // Names like {uuid}--collections.example.com serve the same
45         // purpose as {uuid}.collections.example.com but can reduce
46         // cost/effort of using [additional] wildcard certificates.
47         if i := strings.Index(s, "--"); i >= 0 {
48                 s = s[:i]
49         }
50         if arvadosclient.UUIDMatch(s) {
51                 return s
52         }
53         if pdh := strings.Replace(s, "-", "+", 1); arvadosclient.PDHMatch(pdh) {
54                 return pdh
55         }
56         return ""
57 }
58
59 var urlPDHDecoder = strings.NewReplacer(" ", "+", "-", "+")
60
61 // return a UUID or PDH if s is a UUID or a PDH (even if it is a PDH
62 // with "+" replaced by " " or "-"); otherwise return "".
63 func parseCollectionIDFromURL(s string) string {
64         if arvadosclient.UUIDMatch(s) {
65                 return s
66         }
67         if pdh := urlPDHDecoder.Replace(s); arvadosclient.PDHMatch(pdh) {
68                 return pdh
69         }
70         return ""
71 }
72
73 func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
74         var statusCode = 0
75         var statusText string
76
77         remoteAddr := r.RemoteAddr
78         if xff := r.Header.Get("X-Forwarded-For"); xff != "" {
79                 remoteAddr = xff + "," + remoteAddr
80         }
81
82         w := httpserver.WrapResponseWriter(wOrig)
83         defer func() {
84                 if statusCode == 0 {
85                         statusCode = w.WroteStatus()
86                 } else if w.WroteStatus() == 0 {
87                         w.WriteHeader(statusCode)
88                 } else if w.WroteStatus() != statusCode {
89                         httpserver.Log(r.RemoteAddr, "WARNING",
90                                 fmt.Sprintf("Our status changed from %d to %d after we sent headers", w.WroteStatus(), statusCode))
91                 }
92                 if statusText == "" {
93                         statusText = http.StatusText(statusCode)
94                 }
95                 httpserver.Log(remoteAddr, statusCode, statusText, w.WroteBodyBytes(), r.Method, r.Host, r.URL.Path, r.URL.RawQuery)
96         }()
97
98         if r.Method != "GET" && r.Method != "POST" {
99                 statusCode, statusText = http.StatusMethodNotAllowed, r.Method
100                 return
101         }
102
103         arv := clientPool.Get()
104         if arv == nil {
105                 statusCode, statusText = http.StatusInternalServerError, "Pool failed: "+clientPool.Err().Error()
106                 return
107         }
108         defer clientPool.Put(arv)
109
110         pathParts := strings.Split(r.URL.Path[1:], "/")
111
112         var targetID string
113         var targetPath []string
114         var tokens []string
115         var reqTokens []string
116         var pathToken bool
117         var attachment bool
118         credentialsOK := trustAllContent
119
120         if r.Host != "" && r.Host == attachmentOnlyHost {
121                 credentialsOK = true
122                 attachment = true
123         } else if r.FormValue("disposition") == "attachment" {
124                 attachment = true
125         }
126
127         if targetID = parseCollectionIDFromDNSName(r.Host); targetID != "" {
128                 // http://ID.collections.example/PATH...
129                 credentialsOK = true
130                 targetPath = pathParts
131         } else if len(pathParts) >= 2 && strings.HasPrefix(pathParts[0], "c=") {
132                 // /c=ID/PATH...
133                 targetID = parseCollectionIDFromURL(pathParts[0][2:])
134                 targetPath = pathParts[1:]
135         } else if len(pathParts) >= 3 && pathParts[0] == "collections" {
136                 if len(pathParts) >= 5 && pathParts[1] == "download" {
137                         // /collections/download/ID/TOKEN/PATH...
138                         targetID = pathParts[2]
139                         tokens = []string{pathParts[3]}
140                         targetPath = pathParts[4:]
141                         pathToken = true
142                 } else {
143                         // /collections/ID/PATH...
144                         targetID = pathParts[1]
145                         tokens = anonymousTokens
146                         targetPath = pathParts[2:]
147                 }
148         } else {
149                 statusCode = http.StatusNotFound
150                 return
151         }
152         if t := r.FormValue("api_token"); t != "" {
153                 // The client provided an explicit token in the query
154                 // string, or a form in POST body. We must put the
155                 // token in an HttpOnly cookie, and redirect to the
156                 // same URL with the query param redacted and method =
157                 // GET.
158
159                 if !credentialsOK {
160                         // It is not safe to copy the provided token
161                         // into a cookie unless the current vhost
162                         // (origin) serves only a single collection or
163                         // we are in trustAllContent mode.
164                         statusCode = http.StatusBadRequest
165                         return
166                 }
167
168                 // The HttpOnly flag is necessary to prevent
169                 // JavaScript code (included in, or loaded by, a page
170                 // in the collection being served) from employing the
171                 // user's token beyond reading other files in the same
172                 // domain, i.e., same collection.
173                 //
174                 // The 303 redirect is necessary in the case of a GET
175                 // request to avoid exposing the token in the Location
176                 // bar, and in the case of a POST request to avoid
177                 // raising warnings when the user refreshes the
178                 // resulting page.
179
180                 http.SetCookie(w, &http.Cookie{
181                         Name:     "arvados_api_token",
182                         Value:    auth.EncodeTokenCookie([]byte(t)),
183                         Path:     "/",
184                         HttpOnly: true,
185                 })
186
187                 // Propagate query parameters (except api_token) from
188                 // the original request.
189                 redirQuery := r.URL.Query()
190                 redirQuery.Del("api_token")
191
192                 redir := (&url.URL{
193                         Host:     r.Host,
194                         Path:     r.URL.Path,
195                         RawQuery: redirQuery.Encode(),
196                 }).String()
197
198                 w.Header().Add("Location", redir)
199                 statusCode, statusText = http.StatusSeeOther, redir
200                 w.WriteHeader(statusCode)
201                 io.WriteString(w, `<A href="`)
202                 io.WriteString(w, html.EscapeString(redir))
203                 io.WriteString(w, `">Continue</A>`)
204                 return
205         }
206
207         if tokens == nil && strings.HasPrefix(targetPath[0], "t=") {
208                 // http://ID.example/t=TOKEN/PATH...
209                 // /c=ID/t=TOKEN/PATH...
210                 //
211                 // This form must only be used to pass scoped tokens
212                 // that give permission for a single collection. See
213                 // FormValue case above.
214                 tokens = []string{targetPath[0][2:]}
215                 pathToken = true
216                 targetPath = targetPath[1:]
217         }
218
219         if tokens == nil {
220                 if credentialsOK {
221                         reqTokens = auth.NewCredentialsFromHTTPRequest(r).Tokens
222                 }
223                 tokens = append(reqTokens, anonymousTokens...)
224         }
225
226         if len(targetPath) > 0 && targetPath[0] == "_" {
227                 // If a collection has a directory called "t=foo" or
228                 // "_", it can be served at
229                 // //collections.example/_/t=foo/ or
230                 // //collections.example/_/_/ respectively:
231                 // //collections.example/t=foo/ won't work because
232                 // t=foo will be interpreted as a token "foo".
233                 targetPath = targetPath[1:]
234         }
235
236         tokenResult := make(map[string]int)
237         collection := make(map[string]interface{})
238         found := false
239         for _, arv.ApiToken = range tokens {
240                 err := arv.Get("collections", targetID, nil, &collection)
241                 if err == nil {
242                         // Success
243                         found = true
244                         break
245                 }
246                 if srvErr, ok := err.(arvadosclient.APIServerError); ok {
247                         switch srvErr.HttpStatusCode {
248                         case 404, 401:
249                                 // Token broken or insufficient to
250                                 // retrieve collection
251                                 tokenResult[arv.ApiToken] = srvErr.HttpStatusCode
252                                 continue
253                         }
254                 }
255                 // Something more serious is wrong
256                 statusCode, statusText = http.StatusInternalServerError, err.Error()
257                 return
258         }
259         if !found {
260                 if pathToken || !credentialsOK {
261                         // Either the URL is a "secret sharing link"
262                         // that didn't work out (and asking the client
263                         // for additional credentials would just be
264                         // confusing), or we don't even accept
265                         // credentials at this path.
266                         statusCode = http.StatusNotFound
267                         return
268                 }
269                 for _, t := range reqTokens {
270                         if tokenResult[t] == 404 {
271                                 // The client provided valid token(s), but the
272                                 // collection was not found.
273                                 statusCode = http.StatusNotFound
274                                 return
275                         }
276                 }
277                 // The client's token was invalid (e.g., expired), or
278                 // the client didn't even provide one.  Propagate the
279                 // 401 to encourage the client to use a [different]
280                 // token.
281                 //
282                 // TODO(TC): This response would be confusing to
283                 // someone trying (anonymously) to download public
284                 // data that has been deleted.  Allow a referrer to
285                 // provide this context somehow?
286                 w.Header().Add("WWW-Authenticate", "Basic realm=\"collections\"")
287                 statusCode = http.StatusUnauthorized
288                 return
289         }
290
291         filename := strings.Join(targetPath, "/")
292         kc, err := keepclient.MakeKeepClient(arv)
293         if err != nil {
294                 statusCode, statusText = http.StatusInternalServerError, err.Error()
295                 return
296         }
297         rdr, err := kc.CollectionFileReader(collection, filename)
298         if os.IsNotExist(err) {
299                 statusCode = http.StatusNotFound
300                 return
301         } else if err != nil {
302                 statusCode, statusText = http.StatusBadGateway, err.Error()
303                 return
304         }
305         defer rdr.Close()
306
307         basenamePos := strings.LastIndex(filename, "/")
308         if basenamePos < 0 {
309                 basenamePos = 0
310         }
311         extPos := strings.LastIndex(filename, ".")
312         if extPos > basenamePos {
313                 // Now extPos is safely >= 0.
314                 if t := mime.TypeByExtension(filename[extPos:]); t != "" {
315                         w.Header().Set("Content-Type", t)
316                 }
317         }
318         if rdr, ok := rdr.(keepclient.ReadCloserWithLen); ok {
319                 w.Header().Set("Content-Length", fmt.Sprintf("%d", rdr.Len()))
320         }
321
322         applyContentDispositionHdr(w, r, filename[basenamePos:], attachment)
323         rangeRdr, statusCode := applyRangeHdr(w, r, rdr)
324
325         w.WriteHeader(statusCode)
326         _, err = io.Copy(w, rangeRdr)
327         if err != nil {
328                 statusCode, statusText = http.StatusBadGateway, err.Error()
329         }
330 }
331
332 var rangeRe = regexp.MustCompile(`^bytes=0-([0-9]*)$`)
333
334 func applyRangeHdr(w http.ResponseWriter, r *http.Request, rdr keepclient.ReadCloserWithLen) (io.Reader, int) {
335         w.Header().Set("Accept-Ranges", "bytes")
336         hdr := r.Header.Get("Range")
337         fields := rangeRe.FindStringSubmatch(hdr)
338         if fields == nil {
339                 return rdr, http.StatusOK
340         }
341         rangeEnd, err := strconv.ParseInt(fields[1], 10, 64)
342         if err != nil {
343                 // Empty or too big for int64 == send entire content
344                 return rdr, http.StatusOK
345         }
346         if uint64(rangeEnd) >= rdr.Len() {
347                 return rdr, http.StatusOK
348         }
349         w.Header().Set("Content-Length", fmt.Sprintf("%d", rangeEnd+1))
350         w.Header().Set("Content-Range", fmt.Sprintf("bytes %d-%d/%d", 0, rangeEnd, rdr.Len()))
351         return &io.LimitedReader{R: rdr, N: rangeEnd + 1}, http.StatusPartialContent
352 }
353
354 func applyContentDispositionHdr(w http.ResponseWriter, r *http.Request, filename string, isAttachment bool) {
355         disposition := "inline"
356         if isAttachment {
357                 disposition = "attachment"
358         }
359         if strings.ContainsRune(r.RequestURI, '?') {
360                 // Help the UA realize that the filename is just
361                 // "filename.txt", not
362                 // "filename.txt?disposition=attachment".
363                 //
364                 // TODO(TC): Follow advice at RFC 6266 appendix D
365                 disposition += "; filename=" + strconv.QuoteToASCII(filename)
366         }
367         if disposition != "inline" {
368                 w.Header().Set("Content-Disposition", disposition)
369         }
370 }