5824: Handle various combinations of c= and t= more consistently. Use vhosts in integ...
[arvados.git] / services / keep-web / handler.go
1 package main
2
3 import (
4         "fmt"
5         "html"
6         "io"
7         "mime"
8         "net/http"
9         "net/url"
10         "os"
11         "strings"
12         "time"
13
14         "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
15         "git.curoverse.com/arvados.git/sdk/go/auth"
16         "git.curoverse.com/arvados.git/sdk/go/httpserver"
17         "git.curoverse.com/arvados.git/sdk/go/keepclient"
18 )
19
20 var clientPool = arvadosclient.MakeClientPool()
21
22 var anonymousTokens []string
23
24 type handler struct{}
25
26 func init() {
27         // TODO(TC): Get anonymousTokens from flags
28         anonymousTokens = []string{}
29 }
30
31 // return a UUID or PDH if s begins with a UUID or URL-encoded PDH;
32 // otherwise return "".
33 func parseCollectionIdFromDNSName(s string) string {
34         // Strip domain.
35         if i := strings.IndexRune(s, '.'); i >= 0 {
36                 s = s[:i]
37         }
38         // Names like {uuid}--dl.example.com serve the same purpose as
39         // {uuid}.dl.example.com but can reduce cost/effort of using
40         // [additional] wildcard certificates.
41         if i := strings.Index(s, "--"); i >= 0 {
42                 s = s[:i]
43         }
44         if arvadosclient.UUIDMatch(s) {
45                 return s
46         }
47         if pdh := strings.Replace(s, "-", "+", 1); arvadosclient.PDHMatch(pdh) {
48                 return pdh
49         }
50         return ""
51 }
52
53 var urlPDHDecoder = strings.NewReplacer(" ", "+", "-", "+")
54
55 // return a UUID or PDH if s is a UUID or a PDH (even if it is a PDH
56 // with "+" replaced by " " or "-"); otherwise return "".
57 func parseCollectionIdFromURL(s string) string {
58         if arvadosclient.UUIDMatch(s) {
59                 return s
60         }
61         if pdh := urlPDHDecoder.Replace(s); arvadosclient.PDHMatch(pdh) {
62                 return pdh
63         }
64         return ""
65 }
66
67 func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
68         var statusCode = 0
69         var statusText string
70
71         w := httpserver.WrapResponseWriter(wOrig)
72         defer func() {
73                 if statusCode == 0 {
74                         statusCode = w.WroteStatus()
75                 } else if w.WroteStatus() == 0 {
76                         w.WriteHeader(statusCode)
77                 } else if w.WroteStatus() != statusCode {
78                         httpserver.Log(r.RemoteAddr, "WARNING",
79                                 fmt.Sprintf("Our status changed from %d to %d after we sent headers", w.WroteStatus(), statusCode))
80                 }
81                 if statusText == "" {
82                         statusText = http.StatusText(statusCode)
83                 }
84                 httpserver.Log(r.RemoteAddr, statusCode, statusText, w.WroteBodyBytes(), r.Method, r.Host, r.URL.Path, r.URL.RawQuery)
85         }()
86
87         if r.Method != "GET" && r.Method != "POST" {
88                 statusCode, statusText = http.StatusMethodNotAllowed, r.Method
89                 return
90         }
91
92         arv := clientPool.Get()
93         if arv == nil {
94                 statusCode, statusText = http.StatusInternalServerError, "Pool failed: "+clientPool.Err().Error()
95                 return
96         }
97         defer clientPool.Put(arv)
98
99         pathParts := strings.Split(r.URL.Path[1:], "/")
100
101         var targetId string
102         var targetPath []string
103         var tokens []string
104         var reqTokens []string
105         var pathToken bool
106         var credentialsOK bool
107
108         if targetId = parseCollectionIdFromDNSName(r.Host); targetId != "" {
109                 // http://ID.dl.example/PATH...
110                 credentialsOK = true
111                 targetPath = pathParts
112         } else if len(pathParts) >= 2 && strings.HasPrefix(pathParts[0], "c=") {
113                 // /c=ID/PATH...
114                 targetId = parseCollectionIdFromURL(pathParts[0][2:])
115                 targetPath = pathParts[1:]
116         } else if len(pathParts) >= 3 && pathParts[0] == "collections" {
117                 if len(pathParts) >= 5 && pathParts[1] == "download" {
118                         // /collections/download/ID/TOKEN/PATH...
119                         targetId = pathParts[2]
120                         tokens = []string{pathParts[3]}
121                         targetPath = pathParts[4:]
122                         pathToken = true
123                 } else {
124                         // /collections/ID/PATH...
125                         targetId = pathParts[1]
126                         tokens = anonymousTokens
127                         targetPath = pathParts[2:]
128                 }
129         } else {
130                 statusCode = http.StatusNotFound
131                 return
132         }
133         if t := r.FormValue("api_token"); t != "" {
134                 // The client provided an explicit token in the query
135                 // string, or a form in POST body. We must put the
136                 // token in an HttpOnly cookie, and redirect to the
137                 // same URL with the query param redacted and method =
138                 // GET.
139
140                 if !credentialsOK {
141                         // It is not safe to copy the provided token
142                         // into a cookie unless the current vhost
143                         // (origin) serves only a single collection.
144                         statusCode = http.StatusBadRequest
145                         return
146                 }
147
148                 // The HttpOnly flag is necessary to prevent
149                 // JavaScript code (included in, or loaded by, a page
150                 // in the collection being served) from employing the
151                 // user's token beyond reading other files in the same
152                 // domain, i.e., same collection.
153                 //
154                 // The 303 redirect is necessary in the case of a GET
155                 // request to avoid exposing the token in the Location
156                 // bar, and in the case of a POST request to avoid
157                 // raising warnings when the user refreshes the
158                 // resulting page.
159
160                 http.SetCookie(w, &http.Cookie{
161                         Name:     "api_token",
162                         Value:    auth.EncodeTokenCookie([]byte(t)),
163                         Path:     "/",
164                         Expires:  time.Now().AddDate(10,0,0),
165                         HttpOnly: true,
166                 })
167                 redir := (&url.URL{Host: r.Host, Path: r.URL.Path}).String()
168
169                 w.Header().Add("Location", redir)
170                 statusCode, statusText = http.StatusSeeOther, redir
171                 w.WriteHeader(statusCode)
172                 io.WriteString(w, `<A href="`)
173                 io.WriteString(w, html.EscapeString(redir))
174                 io.WriteString(w, `">Continue</A>`)
175                 return
176         }
177
178         if tokens == nil && strings.HasPrefix(targetPath[0], "t=") {
179                 // http://ID.example/t=TOKEN/PATH...
180                 // /c=ID/t=TOKEN/PATH...
181                 //
182                 // This form must only be used to pass scoped tokens
183                 // that give permission for a single collection. See
184                 // FormValue case above.
185                 tokens = []string{targetPath[0][2:]}
186                 pathToken = true
187                 targetPath = targetPath[1:]
188         }
189
190         if tokens == nil {
191                 if credentialsOK {
192                         reqTokens = auth.NewCredentialsFromHTTPRequest(r).Tokens
193                 }
194                 tokens = append(reqTokens, anonymousTokens...)
195         }
196
197         if len(targetPath) > 0 && targetPath[0] == "_" {
198                 // If a collection has a directory called "t=foo" or
199                 // "_", it can be served at //dl.example/_/t=foo/ or
200                 // //dl.example/_/_/ respectively: //dl.example/t=foo/
201                 // won't work because t=foo will be interpreted as a
202                 // token "foo".
203                 targetPath = targetPath[1:]
204         }
205
206         tokenResult := make(map[string]int)
207         collection := make(map[string]interface{})
208         found := false
209         for _, arv.ApiToken = range tokens {
210                 err := arv.Get("collections", targetId, nil, &collection)
211                 if err == nil {
212                         // Success
213                         found = true
214                         break
215                 }
216                 if srvErr, ok := err.(arvadosclient.APIServerError); ok {
217                         switch srvErr.HttpStatusCode {
218                         case 404, 401:
219                                 // Token broken or insufficient to
220                                 // retrieve collection
221                                 tokenResult[arv.ApiToken] = srvErr.HttpStatusCode
222                                 continue
223                         }
224                 }
225                 // Something more serious is wrong
226                 statusCode, statusText = http.StatusInternalServerError, err.Error()
227                 return
228         }
229         if !found {
230                 if pathToken || !credentialsOK {
231                         // Either the URL is a "secret sharing link"
232                         // that didn't work out (and asking the client
233                         // for additional credentials would just be
234                         // confusing), or we don't even accept
235                         // credentials at this path.
236                         statusCode = http.StatusNotFound
237                         return
238                 }
239                 for _, t := range reqTokens {
240                         if tokenResult[t] == 404 {
241                                 // The client provided valid token(s), but the
242                                 // collection was not found.
243                                 statusCode = http.StatusNotFound
244                                 return
245                         }
246                 }
247                 // The client's token was invalid (e.g., expired), or
248                 // the client didn't even provide one.  Propagate the
249                 // 401 to encourage the client to use a [different]
250                 // token.
251                 //
252                 // TODO(TC): This response would be confusing to
253                 // someone trying (anonymously) to download public
254                 // data that has been deleted.  Allow a referrer to
255                 // provide this context somehow?
256                 w.Header().Add("WWW-Authenticate", "Basic realm=\"dl\"")
257                 statusCode = http.StatusUnauthorized
258                 return
259         }
260
261         filename := strings.Join(targetPath, "/")
262         kc, err := keepclient.MakeKeepClient(arv)
263         if err != nil {
264                 statusCode, statusText = http.StatusInternalServerError, err.Error()
265                 return
266         }
267         rdr, err := kc.CollectionFileReader(collection, filename)
268         if os.IsNotExist(err) {
269                 statusCode = http.StatusNotFound
270                 return
271         } else if err != nil {
272                 statusCode, statusText = http.StatusBadGateway, err.Error()
273                 return
274         }
275         defer rdr.Close()
276
277         // One or both of these can be -1 if not found:
278         basenamePos := strings.LastIndex(filename, "/")
279         extPos := strings.LastIndex(filename, ".")
280         if extPos > basenamePos {
281                 // Now extPos is safely >= 0.
282                 if t := mime.TypeByExtension(filename[extPos:]); t != "" {
283                         w.Header().Set("Content-Type", t)
284                 }
285         }
286         w.Header().Set("Content-Length", fmt.Sprintf("%d", rdr.Len()))
287
288         w.WriteHeader(http.StatusOK)
289         _, err = io.Copy(w, rdr)
290         if err != nil {
291                 statusCode, statusText = http.StatusBadGateway, err.Error()
292         }
293 }