5824: Support partial content with Range header (only if start==0).
[arvados.git] / services / keep-web / handler.go
1 package main
2
3 import (
4         "flag"
5         "fmt"
6         "html"
7         "io"
8         "mime"
9         "net/http"
10         "net/url"
11         "os"
12         "regexp"
13         "strconv"
14         "strings"
15
16         "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
17         "git.curoverse.com/arvados.git/sdk/go/auth"
18         "git.curoverse.com/arvados.git/sdk/go/httpserver"
19         "git.curoverse.com/arvados.git/sdk/go/keepclient"
20 )
21
22 type handler struct{}
23
24 var (
25         clientPool         = arvadosclient.MakeClientPool()
26         trustAllContent    = false
27         attachmentOnlyHost = ""
28 )
29
30 func init() {
31         flag.StringVar(&attachmentOnlyHost, "attachment-only-host", "",
32                 "Accept credentials, and add \"Content-Disposition: attachment\" response headers, for requests at this hostname:port. Prohibiting inline display makes it possible to serve untrusted and non-public content from a single origin, i.e., without wildcard DNS or SSL.")
33         flag.BoolVar(&trustAllContent, "trust-all-content", false,
34                 "Serve non-public content from a single origin. Dangerous: read docs before using!")
35 }
36
37 // return a UUID or PDH if s begins with a UUID or URL-encoded PDH;
38 // otherwise return "".
39 func parseCollectionIDFromDNSName(s string) string {
40         // Strip domain.
41         if i := strings.IndexRune(s, '.'); i >= 0 {
42                 s = s[:i]
43         }
44         // Names like {uuid}--collections.example.com serve the same
45         // purpose as {uuid}.collections.example.com but can reduce
46         // cost/effort of using [additional] wildcard certificates.
47         if i := strings.Index(s, "--"); i >= 0 {
48                 s = s[:i]
49         }
50         if arvadosclient.UUIDMatch(s) {
51                 return s
52         }
53         if pdh := strings.Replace(s, "-", "+", 1); arvadosclient.PDHMatch(pdh) {
54                 return pdh
55         }
56         return ""
57 }
58
59 var urlPDHDecoder = strings.NewReplacer(" ", "+", "-", "+")
60
61 // return a UUID or PDH if s is a UUID or a PDH (even if it is a PDH
62 // with "+" replaced by " " or "-"); otherwise return "".
63 func parseCollectionIDFromURL(s string) string {
64         if arvadosclient.UUIDMatch(s) {
65                 return s
66         }
67         if pdh := urlPDHDecoder.Replace(s); arvadosclient.PDHMatch(pdh) {
68                 return pdh
69         }
70         return ""
71 }
72
73 func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
74         var statusCode = 0
75         var statusText string
76
77         remoteAddr := r.RemoteAddr
78         if xff := r.Header.Get("X-Forwarded-For"); xff != "" {
79                 remoteAddr = xff + "," + remoteAddr
80         }
81
82         w := httpserver.WrapResponseWriter(wOrig)
83         defer func() {
84                 if statusCode == 0 {
85                         statusCode = w.WroteStatus()
86                 } else if w.WroteStatus() == 0 {
87                         w.WriteHeader(statusCode)
88                 } else if w.WroteStatus() != statusCode {
89                         httpserver.Log(r.RemoteAddr, "WARNING",
90                                 fmt.Sprintf("Our status changed from %d to %d after we sent headers", w.WroteStatus(), statusCode))
91                 }
92                 if statusText == "" {
93                         statusText = http.StatusText(statusCode)
94                 }
95                 httpserver.Log(remoteAddr, statusCode, statusText, w.WroteBodyBytes(), r.Method, r.Host, r.URL.Path, r.URL.RawQuery)
96         }()
97
98         if r.Method != "GET" && r.Method != "POST" {
99                 statusCode, statusText = http.StatusMethodNotAllowed, r.Method
100                 return
101         }
102
103         arv := clientPool.Get()
104         if arv == nil {
105                 statusCode, statusText = http.StatusInternalServerError, "Pool failed: "+clientPool.Err().Error()
106                 return
107         }
108         defer clientPool.Put(arv)
109
110         pathParts := strings.Split(r.URL.Path[1:], "/")
111
112         var targetID string
113         var targetPath []string
114         var tokens []string
115         var reqTokens []string
116         var pathToken bool
117         var attachment bool
118         credentialsOK := trustAllContent
119
120         if r.Host != "" && r.Host == attachmentOnlyHost {
121                 credentialsOK = true
122                 attachment = true
123         } else if r.FormValue("disposition") == "attachment" {
124                 attachment = true
125         }
126
127         if targetID = parseCollectionIDFromDNSName(r.Host); targetID != "" {
128                 // http://ID.collections.example/PATH...
129                 credentialsOK = true
130                 targetPath = pathParts
131         } else if len(pathParts) >= 2 && strings.HasPrefix(pathParts[0], "c=") {
132                 // /c=ID/PATH...
133                 targetID = parseCollectionIDFromURL(pathParts[0][2:])
134                 targetPath = pathParts[1:]
135         } else if len(pathParts) >= 3 && pathParts[0] == "collections" {
136                 if len(pathParts) >= 5 && pathParts[1] == "download" {
137                         // /collections/download/ID/TOKEN/PATH...
138                         targetID = pathParts[2]
139                         tokens = []string{pathParts[3]}
140                         targetPath = pathParts[4:]
141                         pathToken = true
142                 } else {
143                         // /collections/ID/PATH...
144                         targetID = pathParts[1]
145                         tokens = anonymousTokens
146                         targetPath = pathParts[2:]
147                 }
148         } else {
149                 statusCode = http.StatusNotFound
150                 return
151         }
152         if t := r.FormValue("api_token"); t != "" {
153                 // The client provided an explicit token in the query
154                 // string, or a form in POST body. We must put the
155                 // token in an HttpOnly cookie, and redirect to the
156                 // same URL with the query param redacted and method =
157                 // GET.
158
159                 if !credentialsOK {
160                         // It is not safe to copy the provided token
161                         // into a cookie unless the current vhost
162                         // (origin) serves only a single collection or
163                         // we are in trustAllContent mode.
164                         statusCode = http.StatusBadRequest
165                         return
166                 }
167
168                 // The HttpOnly flag is necessary to prevent
169                 // JavaScript code (included in, or loaded by, a page
170                 // in the collection being served) from employing the
171                 // user's token beyond reading other files in the same
172                 // domain, i.e., same collection.
173                 //
174                 // The 303 redirect is necessary in the case of a GET
175                 // request to avoid exposing the token in the Location
176                 // bar, and in the case of a POST request to avoid
177                 // raising warnings when the user refreshes the
178                 // resulting page.
179
180                 http.SetCookie(w, &http.Cookie{
181                         Name:     "arvados_api_token",
182                         Value:    auth.EncodeTokenCookie([]byte(t)),
183                         Path:     "/",
184                         HttpOnly: true,
185                 })
186                 redir := (&url.URL{Host: r.Host, Path: r.URL.Path}).String()
187
188                 w.Header().Add("Location", redir)
189                 statusCode, statusText = http.StatusSeeOther, redir
190                 w.WriteHeader(statusCode)
191                 io.WriteString(w, `<A href="`)
192                 io.WriteString(w, html.EscapeString(redir))
193                 io.WriteString(w, `">Continue</A>`)
194                 return
195         }
196
197         if tokens == nil && strings.HasPrefix(targetPath[0], "t=") {
198                 // http://ID.example/t=TOKEN/PATH...
199                 // /c=ID/t=TOKEN/PATH...
200                 //
201                 // This form must only be used to pass scoped tokens
202                 // that give permission for a single collection. See
203                 // FormValue case above.
204                 tokens = []string{targetPath[0][2:]}
205                 pathToken = true
206                 targetPath = targetPath[1:]
207         }
208
209         if tokens == nil {
210                 if credentialsOK {
211                         reqTokens = auth.NewCredentialsFromHTTPRequest(r).Tokens
212                 }
213                 tokens = append(reqTokens, anonymousTokens...)
214         }
215
216         if len(targetPath) > 0 && targetPath[0] == "_" {
217                 // If a collection has a directory called "t=foo" or
218                 // "_", it can be served at
219                 // //collections.example/_/t=foo/ or
220                 // //collections.example/_/_/ respectively:
221                 // //collections.example/t=foo/ won't work because
222                 // t=foo will be interpreted as a token "foo".
223                 targetPath = targetPath[1:]
224         }
225
226         tokenResult := make(map[string]int)
227         collection := make(map[string]interface{})
228         found := false
229         for _, arv.ApiToken = range tokens {
230                 err := arv.Get("collections", targetID, nil, &collection)
231                 if err == nil {
232                         // Success
233                         found = true
234                         break
235                 }
236                 if srvErr, ok := err.(arvadosclient.APIServerError); ok {
237                         switch srvErr.HttpStatusCode {
238                         case 404, 401:
239                                 // Token broken or insufficient to
240                                 // retrieve collection
241                                 tokenResult[arv.ApiToken] = srvErr.HttpStatusCode
242                                 continue
243                         }
244                 }
245                 // Something more serious is wrong
246                 statusCode, statusText = http.StatusInternalServerError, err.Error()
247                 return
248         }
249         if !found {
250                 if pathToken || !credentialsOK {
251                         // Either the URL is a "secret sharing link"
252                         // that didn't work out (and asking the client
253                         // for additional credentials would just be
254                         // confusing), or we don't even accept
255                         // credentials at this path.
256                         statusCode = http.StatusNotFound
257                         return
258                 }
259                 for _, t := range reqTokens {
260                         if tokenResult[t] == 404 {
261                                 // The client provided valid token(s), but the
262                                 // collection was not found.
263                                 statusCode = http.StatusNotFound
264                                 return
265                         }
266                 }
267                 // The client's token was invalid (e.g., expired), or
268                 // the client didn't even provide one.  Propagate the
269                 // 401 to encourage the client to use a [different]
270                 // token.
271                 //
272                 // TODO(TC): This response would be confusing to
273                 // someone trying (anonymously) to download public
274                 // data that has been deleted.  Allow a referrer to
275                 // provide this context somehow?
276                 w.Header().Add("WWW-Authenticate", "Basic realm=\"collections\"")
277                 statusCode = http.StatusUnauthorized
278                 return
279         }
280
281         filename := strings.Join(targetPath, "/")
282         kc, err := keepclient.MakeKeepClient(arv)
283         if err != nil {
284                 statusCode, statusText = http.StatusInternalServerError, err.Error()
285                 return
286         }
287         rdr, err := kc.CollectionFileReader(collection, filename)
288         if os.IsNotExist(err) {
289                 statusCode = http.StatusNotFound
290                 return
291         } else if err != nil {
292                 statusCode, statusText = http.StatusBadGateway, err.Error()
293                 return
294         }
295         defer rdr.Close()
296
297         basenamePos := strings.LastIndex(filename, "/")
298         if basenamePos < 0 {
299                 basenamePos = 0
300         }
301         extPos := strings.LastIndex(filename, ".")
302         if extPos > basenamePos {
303                 // Now extPos is safely >= 0.
304                 if t := mime.TypeByExtension(filename[extPos:]); t != "" {
305                         w.Header().Set("Content-Type", t)
306                 }
307         }
308         if rdr, ok := rdr.(keepclient.ReadCloserWithLen); ok {
309                 w.Header().Set("Content-Length", fmt.Sprintf("%d", rdr.Len()))
310         }
311
312         applyContentDispositionHdr(w, r, filename[basenamePos:], attachment)
313         rangeRdr, statusCode := applyRangeHdr(w, r, rdr)
314
315         w.WriteHeader(statusCode)
316         _, err = io.Copy(w, rangeRdr)
317         if err != nil {
318                 statusCode, statusText = http.StatusBadGateway, err.Error()
319         }
320 }
321
322 var rangeRe = regexp.MustCompile(`^bytes=0-([0-9]*)$`)
323
324 func applyRangeHdr(w http.ResponseWriter, r *http.Request, rdr keepclient.ReadCloserWithLen) (io.Reader, int) {
325         w.Header().Set("Accept-Ranges", "bytes")
326         hdr := r.Header.Get("Range")
327         fields := rangeRe.FindStringSubmatch(hdr)
328         if fields == nil {
329                 return rdr, http.StatusOK
330         }
331         rangeEnd, err := strconv.ParseInt(fields[1], 10, 64)
332         if err != nil {
333                 // Empty or too big for int64 == send entire content
334                 return rdr, http.StatusOK
335         }
336         if uint64(rangeEnd) >= rdr.Len() {
337                 return rdr, http.StatusOK
338         }
339         w.Header().Set("Content-Length", fmt.Sprintf("%d", rangeEnd+1))
340         w.Header().Set("Content-Range", fmt.Sprintf("bytes %d-%d/%d", 0, rangeEnd, rdr.Len()))
341         return &io.LimitedReader{R: rdr, N: rangeEnd + 1}, http.StatusPartialContent
342 }
343
344 func applyContentDispositionHdr(w http.ResponseWriter, r *http.Request, filename string, isAttachment bool) {
345         disposition := "inline"
346         if isAttachment {
347                 disposition = "attachment"
348         }
349         if strings.ContainsRune(r.RequestURI, '?') {
350                 // Help the UA realize that the filename is just
351                 // "filename.txt", not
352                 // "filename.txt?disposition=attachment".
353                 //
354                 // TODO(TC): Follow advice at RFC 6266 appendix D
355                 disposition += "; filename=" + strconv.QuoteToASCII(filename)
356         }
357         if disposition != "inline" {
358                 w.Header().Set("Content-Disposition", disposition)
359         }
360 }