5824: Add -attachment-only-host feature.
[arvados.git] / services / keep-web / handler.go
1 package main
2
3 import (
4         "flag"
5         "fmt"
6         "html"
7         "io"
8         "mime"
9         "net/http"
10         "net/url"
11         "os"
12         "strings"
13         "time"
14
15         "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
16         "git.curoverse.com/arvados.git/sdk/go/auth"
17         "git.curoverse.com/arvados.git/sdk/go/httpserver"
18         "git.curoverse.com/arvados.git/sdk/go/keepclient"
19 )
20
21 type handler struct{}
22
23 var (
24         clientPool      = arvadosclient.MakeClientPool()
25         trustAllContent = false
26         anonymousTokens []string
27         attachmentOnlyHost = ""
28 )
29
30 func init() {
31         flag.BoolVar(&trustAllContent, "trust-all-content", false,
32                 "Serve non-public content from a single origin. Dangerous: read docs before using!")
33         flag.StringVar(&attachmentOnlyHost, "attachment-only-host", "",
34                 "Accept credentials, and add \"Content-Disposition: attachment\" response headers, for requests at this hostname:port. Prohibiting inline display makes it possible to serve untrusted and non-public content from a single origin, i.e., without wildcard DNS or SSL.")
35 }
36
37 // return a UUID or PDH if s begins with a UUID or URL-encoded PDH;
38 // otherwise return "".
39 func parseCollectionIdFromDNSName(s string) string {
40         // Strip domain.
41         if i := strings.IndexRune(s, '.'); i >= 0 {
42                 s = s[:i]
43         }
44         // Names like {uuid}--dl.example.com serve the same purpose as
45         // {uuid}.dl.example.com but can reduce cost/effort of using
46         // [additional] wildcard certificates.
47         if i := strings.Index(s, "--"); i >= 0 {
48                 s = s[:i]
49         }
50         if arvadosclient.UUIDMatch(s) {
51                 return s
52         }
53         if pdh := strings.Replace(s, "-", "+", 1); arvadosclient.PDHMatch(pdh) {
54                 return pdh
55         }
56         return ""
57 }
58
59 var urlPDHDecoder = strings.NewReplacer(" ", "+", "-", "+")
60
61 // return a UUID or PDH if s is a UUID or a PDH (even if it is a PDH
62 // with "+" replaced by " " or "-"); otherwise return "".
63 func parseCollectionIdFromURL(s string) string {
64         if arvadosclient.UUIDMatch(s) {
65                 return s
66         }
67         if pdh := urlPDHDecoder.Replace(s); arvadosclient.PDHMatch(pdh) {
68                 return pdh
69         }
70         return ""
71 }
72
73 func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
74         var statusCode = 0
75         var statusText string
76
77         remoteAddr := r.RemoteAddr
78         if xff := r.Header.Get("X-Forwarded-For"); xff != "" {
79                 remoteAddr = xff + "," + remoteAddr
80         }
81
82         w := httpserver.WrapResponseWriter(wOrig)
83         defer func() {
84                 if statusCode == 0 {
85                         statusCode = w.WroteStatus()
86                 } else if w.WroteStatus() == 0 {
87                         w.WriteHeader(statusCode)
88                 } else if w.WroteStatus() != statusCode {
89                         httpserver.Log(r.RemoteAddr, "WARNING",
90                                 fmt.Sprintf("Our status changed from %d to %d after we sent headers", w.WroteStatus(), statusCode))
91                 }
92                 if statusText == "" {
93                         statusText = http.StatusText(statusCode)
94                 }
95                 httpserver.Log(remoteAddr, statusCode, statusText, w.WroteBodyBytes(), r.Method, r.Host, r.URL.Path, r.URL.RawQuery)
96         }()
97
98         if r.Method != "GET" && r.Method != "POST" {
99                 statusCode, statusText = http.StatusMethodNotAllowed, r.Method
100                 return
101         }
102
103         arv := clientPool.Get()
104         if arv == nil {
105                 statusCode, statusText = http.StatusInternalServerError, "Pool failed: "+clientPool.Err().Error()
106                 return
107         }
108         defer clientPool.Put(arv)
109
110         pathParts := strings.Split(r.URL.Path[1:], "/")
111
112         var targetId string
113         var targetPath []string
114         var tokens []string
115         var reqTokens []string
116         var pathToken bool
117         var attachment bool
118         credentialsOK := trustAllContent
119
120         if r.Host != "" && r.Host == attachmentOnlyHost {
121                 credentialsOK = true
122                 attachment = true
123         } else if r.FormValue("disposition") == "attachment" {
124                 attachment = true
125         }
126
127         if targetId = parseCollectionIdFromDNSName(r.Host); targetId != "" {
128                 // http://ID.dl.example/PATH...
129                 credentialsOK = true
130                 targetPath = pathParts
131         } else if len(pathParts) >= 2 && strings.HasPrefix(pathParts[0], "c=") {
132                 // /c=ID/PATH...
133                 targetId = parseCollectionIdFromURL(pathParts[0][2:])
134                 targetPath = pathParts[1:]
135         } else if len(pathParts) >= 3 && pathParts[0] == "collections" {
136                 if len(pathParts) >= 5 && pathParts[1] == "download" {
137                         // /collections/download/ID/TOKEN/PATH...
138                         targetId = pathParts[2]
139                         tokens = []string{pathParts[3]}
140                         targetPath = pathParts[4:]
141                         pathToken = true
142                 } else {
143                         // /collections/ID/PATH...
144                         targetId = pathParts[1]
145                         tokens = anonymousTokens
146                         targetPath = pathParts[2:]
147                 }
148         } else {
149                 statusCode = http.StatusNotFound
150                 return
151         }
152         if t := r.FormValue("api_token"); t != "" {
153                 // The client provided an explicit token in the query
154                 // string, or a form in POST body. We must put the
155                 // token in an HttpOnly cookie, and redirect to the
156                 // same URL with the query param redacted and method =
157                 // GET.
158
159                 if !credentialsOK {
160                         // It is not safe to copy the provided token
161                         // into a cookie unless the current vhost
162                         // (origin) serves only a single collection or
163                         // we are in trustAllContent mode.
164                         statusCode = http.StatusBadRequest
165                         return
166                 }
167
168                 // The HttpOnly flag is necessary to prevent
169                 // JavaScript code (included in, or loaded by, a page
170                 // in the collection being served) from employing the
171                 // user's token beyond reading other files in the same
172                 // domain, i.e., same collection.
173                 //
174                 // The 303 redirect is necessary in the case of a GET
175                 // request to avoid exposing the token in the Location
176                 // bar, and in the case of a POST request to avoid
177                 // raising warnings when the user refreshes the
178                 // resulting page.
179
180                 http.SetCookie(w, &http.Cookie{
181                         Name:     "api_token",
182                         Value:    auth.EncodeTokenCookie([]byte(t)),
183                         Path:     "/",
184                         Expires:  time.Now().AddDate(10, 0, 0),
185                         HttpOnly: true,
186                 })
187                 redir := (&url.URL{Host: r.Host, Path: r.URL.Path}).String()
188
189                 w.Header().Add("Location", redir)
190                 statusCode, statusText = http.StatusSeeOther, redir
191                 w.WriteHeader(statusCode)
192                 io.WriteString(w, `<A href="`)
193                 io.WriteString(w, html.EscapeString(redir))
194                 io.WriteString(w, `">Continue</A>`)
195                 return
196         }
197
198         if tokens == nil && strings.HasPrefix(targetPath[0], "t=") {
199                 // http://ID.example/t=TOKEN/PATH...
200                 // /c=ID/t=TOKEN/PATH...
201                 //
202                 // This form must only be used to pass scoped tokens
203                 // that give permission for a single collection. See
204                 // FormValue case above.
205                 tokens = []string{targetPath[0][2:]}
206                 pathToken = true
207                 targetPath = targetPath[1:]
208         }
209
210         if tokens == nil {
211                 if credentialsOK {
212                         reqTokens = auth.NewCredentialsFromHTTPRequest(r).Tokens
213                 }
214                 tokens = append(reqTokens, anonymousTokens...)
215         }
216
217         if len(targetPath) > 0 && targetPath[0] == "_" {
218                 // If a collection has a directory called "t=foo" or
219                 // "_", it can be served at //dl.example/_/t=foo/ or
220                 // //dl.example/_/_/ respectively: //dl.example/t=foo/
221                 // won't work because t=foo will be interpreted as a
222                 // token "foo".
223                 targetPath = targetPath[1:]
224         }
225
226         tokenResult := make(map[string]int)
227         collection := make(map[string]interface{})
228         found := false
229         for _, arv.ApiToken = range tokens {
230                 err := arv.Get("collections", targetId, nil, &collection)
231                 if err == nil {
232                         // Success
233                         found = true
234                         break
235                 }
236                 if srvErr, ok := err.(arvadosclient.APIServerError); ok {
237                         switch srvErr.HttpStatusCode {
238                         case 404, 401:
239                                 // Token broken or insufficient to
240                                 // retrieve collection
241                                 tokenResult[arv.ApiToken] = srvErr.HttpStatusCode
242                                 continue
243                         }
244                 }
245                 // Something more serious is wrong
246                 statusCode, statusText = http.StatusInternalServerError, err.Error()
247                 return
248         }
249         if !found {
250                 if pathToken || !credentialsOK {
251                         // Either the URL is a "secret sharing link"
252                         // that didn't work out (and asking the client
253                         // for additional credentials would just be
254                         // confusing), or we don't even accept
255                         // credentials at this path.
256                         statusCode = http.StatusNotFound
257                         return
258                 }
259                 for _, t := range reqTokens {
260                         if tokenResult[t] == 404 {
261                                 // The client provided valid token(s), but the
262                                 // collection was not found.
263                                 statusCode = http.StatusNotFound
264                                 return
265                         }
266                 }
267                 // The client's token was invalid (e.g., expired), or
268                 // the client didn't even provide one.  Propagate the
269                 // 401 to encourage the client to use a [different]
270                 // token.
271                 //
272                 // TODO(TC): This response would be confusing to
273                 // someone trying (anonymously) to download public
274                 // data that has been deleted.  Allow a referrer to
275                 // provide this context somehow?
276                 w.Header().Add("WWW-Authenticate", "Basic realm=\"dl\"")
277                 statusCode = http.StatusUnauthorized
278                 return
279         }
280
281         filename := strings.Join(targetPath, "/")
282         kc, err := keepclient.MakeKeepClient(arv)
283         if err != nil {
284                 statusCode, statusText = http.StatusInternalServerError, err.Error()
285                 return
286         }
287         rdr, err := kc.CollectionFileReader(collection, filename)
288         if os.IsNotExist(err) {
289                 statusCode = http.StatusNotFound
290                 return
291         } else if err != nil {
292                 statusCode, statusText = http.StatusBadGateway, err.Error()
293                 return
294         }
295         defer rdr.Close()
296
297         // One or both of these can be -1 if not found:
298         basenamePos := strings.LastIndex(filename, "/")
299         extPos := strings.LastIndex(filename, ".")
300         if extPos > basenamePos {
301                 // Now extPos is safely >= 0.
302                 if t := mime.TypeByExtension(filename[extPos:]); t != "" {
303                         w.Header().Set("Content-Type", t)
304                 }
305         }
306         w.Header().Set("Content-Length", fmt.Sprintf("%d", rdr.Len()))
307         if attachment {
308                 w.Header().Set("Content-Disposition", "attachment")
309         }
310
311         w.WriteHeader(http.StatusOK)
312         _, err = io.Copy(w, rdr)
313         if err != nil {
314                 statusCode, statusText = http.StatusBadGateway, err.Error()
315         }
316 }