5824: Log X-Forwarded-For header value if provided.
[arvados.git] / services / keep-web / handler.go
1 package main
2
3 import (
4         "fmt"
5         "html"
6         "io"
7         "mime"
8         "net/http"
9         "net/url"
10         "os"
11         "strings"
12         "time"
13
14         "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
15         "git.curoverse.com/arvados.git/sdk/go/auth"
16         "git.curoverse.com/arvados.git/sdk/go/httpserver"
17         "git.curoverse.com/arvados.git/sdk/go/keepclient"
18 )
19
20 type handler struct{}
21
22 var clientPool = arvadosclient.MakeClientPool()
23 var anonymousTokens []string
24
25 // return a UUID or PDH if s begins with a UUID or URL-encoded PDH;
26 // otherwise return "".
27 func parseCollectionIdFromDNSName(s string) string {
28         // Strip domain.
29         if i := strings.IndexRune(s, '.'); i >= 0 {
30                 s = s[:i]
31         }
32         // Names like {uuid}--dl.example.com serve the same purpose as
33         // {uuid}.dl.example.com but can reduce cost/effort of using
34         // [additional] wildcard certificates.
35         if i := strings.Index(s, "--"); i >= 0 {
36                 s = s[:i]
37         }
38         if arvadosclient.UUIDMatch(s) {
39                 return s
40         }
41         if pdh := strings.Replace(s, "-", "+", 1); arvadosclient.PDHMatch(pdh) {
42                 return pdh
43         }
44         return ""
45 }
46
47 var urlPDHDecoder = strings.NewReplacer(" ", "+", "-", "+")
48
49 // return a UUID or PDH if s is a UUID or a PDH (even if it is a PDH
50 // with "+" replaced by " " or "-"); otherwise return "".
51 func parseCollectionIdFromURL(s string) string {
52         if arvadosclient.UUIDMatch(s) {
53                 return s
54         }
55         if pdh := urlPDHDecoder.Replace(s); arvadosclient.PDHMatch(pdh) {
56                 return pdh
57         }
58         return ""
59 }
60
61 func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
62         var statusCode = 0
63         var statusText string
64
65         remoteAddr := r.RemoteAddr
66         if xff := r.Header.Get("X-Forwarded-For"); xff != "" {
67                 remoteAddr = xff + "," + remoteAddr
68         }
69
70         w := httpserver.WrapResponseWriter(wOrig)
71         defer func() {
72                 if statusCode == 0 {
73                         statusCode = w.WroteStatus()
74                 } else if w.WroteStatus() == 0 {
75                         w.WriteHeader(statusCode)
76                 } else if w.WroteStatus() != statusCode {
77                         httpserver.Log(r.RemoteAddr, "WARNING",
78                                 fmt.Sprintf("Our status changed from %d to %d after we sent headers", w.WroteStatus(), statusCode))
79                 }
80                 if statusText == "" {
81                         statusText = http.StatusText(statusCode)
82                 }
83                 httpserver.Log(remoteAddr, statusCode, statusText, w.WroteBodyBytes(), r.Method, r.Host, r.URL.Path, r.URL.RawQuery)
84         }()
85
86         if r.Method != "GET" && r.Method != "POST" {
87                 statusCode, statusText = http.StatusMethodNotAllowed, r.Method
88                 return
89         }
90
91         arv := clientPool.Get()
92         if arv == nil {
93                 statusCode, statusText = http.StatusInternalServerError, "Pool failed: "+clientPool.Err().Error()
94                 return
95         }
96         defer clientPool.Put(arv)
97
98         pathParts := strings.Split(r.URL.Path[1:], "/")
99
100         var targetId string
101         var targetPath []string
102         var tokens []string
103         var reqTokens []string
104         var pathToken bool
105         var credentialsOK bool
106
107         if targetId = parseCollectionIdFromDNSName(r.Host); targetId != "" {
108                 // http://ID.dl.example/PATH...
109                 credentialsOK = true
110                 targetPath = pathParts
111         } else if len(pathParts) >= 2 && strings.HasPrefix(pathParts[0], "c=") {
112                 // /c=ID/PATH...
113                 targetId = parseCollectionIdFromURL(pathParts[0][2:])
114                 targetPath = pathParts[1:]
115         } else if len(pathParts) >= 3 && pathParts[0] == "collections" {
116                 if len(pathParts) >= 5 && pathParts[1] == "download" {
117                         // /collections/download/ID/TOKEN/PATH...
118                         targetId = pathParts[2]
119                         tokens = []string{pathParts[3]}
120                         targetPath = pathParts[4:]
121                         pathToken = true
122                 } else {
123                         // /collections/ID/PATH...
124                         targetId = pathParts[1]
125                         tokens = anonymousTokens
126                         targetPath = pathParts[2:]
127                 }
128         } else {
129                 statusCode = http.StatusNotFound
130                 return
131         }
132         if t := r.FormValue("api_token"); t != "" {
133                 // The client provided an explicit token in the query
134                 // string, or a form in POST body. We must put the
135                 // token in an HttpOnly cookie, and redirect to the
136                 // same URL with the query param redacted and method =
137                 // GET.
138
139                 if !credentialsOK {
140                         // It is not safe to copy the provided token
141                         // into a cookie unless the current vhost
142                         // (origin) serves only a single collection.
143                         statusCode = http.StatusBadRequest
144                         return
145                 }
146
147                 // The HttpOnly flag is necessary to prevent
148                 // JavaScript code (included in, or loaded by, a page
149                 // in the collection being served) from employing the
150                 // user's token beyond reading other files in the same
151                 // domain, i.e., same collection.
152                 //
153                 // The 303 redirect is necessary in the case of a GET
154                 // request to avoid exposing the token in the Location
155                 // bar, and in the case of a POST request to avoid
156                 // raising warnings when the user refreshes the
157                 // resulting page.
158
159                 http.SetCookie(w, &http.Cookie{
160                         Name:     "api_token",
161                         Value:    auth.EncodeTokenCookie([]byte(t)),
162                         Path:     "/",
163                         Expires:  time.Now().AddDate(10,0,0),
164                         HttpOnly: true,
165                 })
166                 redir := (&url.URL{Host: r.Host, Path: r.URL.Path}).String()
167
168                 w.Header().Add("Location", redir)
169                 statusCode, statusText = http.StatusSeeOther, redir
170                 w.WriteHeader(statusCode)
171                 io.WriteString(w, `<A href="`)
172                 io.WriteString(w, html.EscapeString(redir))
173                 io.WriteString(w, `">Continue</A>`)
174                 return
175         }
176
177         if tokens == nil && strings.HasPrefix(targetPath[0], "t=") {
178                 // http://ID.example/t=TOKEN/PATH...
179                 // /c=ID/t=TOKEN/PATH...
180                 //
181                 // This form must only be used to pass scoped tokens
182                 // that give permission for a single collection. See
183                 // FormValue case above.
184                 tokens = []string{targetPath[0][2:]}
185                 pathToken = true
186                 targetPath = targetPath[1:]
187         }
188
189         if tokens == nil {
190                 if credentialsOK {
191                         reqTokens = auth.NewCredentialsFromHTTPRequest(r).Tokens
192                 }
193                 tokens = append(reqTokens, anonymousTokens...)
194         }
195
196         if len(targetPath) > 0 && targetPath[0] == "_" {
197                 // If a collection has a directory called "t=foo" or
198                 // "_", it can be served at //dl.example/_/t=foo/ or
199                 // //dl.example/_/_/ respectively: //dl.example/t=foo/
200                 // won't work because t=foo will be interpreted as a
201                 // token "foo".
202                 targetPath = targetPath[1:]
203         }
204
205         tokenResult := make(map[string]int)
206         collection := make(map[string]interface{})
207         found := false
208         for _, arv.ApiToken = range tokens {
209                 err := arv.Get("collections", targetId, nil, &collection)
210                 if err == nil {
211                         // Success
212                         found = true
213                         break
214                 }
215                 if srvErr, ok := err.(arvadosclient.APIServerError); ok {
216                         switch srvErr.HttpStatusCode {
217                         case 404, 401:
218                                 // Token broken or insufficient to
219                                 // retrieve collection
220                                 tokenResult[arv.ApiToken] = srvErr.HttpStatusCode
221                                 continue
222                         }
223                 }
224                 // Something more serious is wrong
225                 statusCode, statusText = http.StatusInternalServerError, err.Error()
226                 return
227         }
228         if !found {
229                 if pathToken || !credentialsOK {
230                         // Either the URL is a "secret sharing link"
231                         // that didn't work out (and asking the client
232                         // for additional credentials would just be
233                         // confusing), or we don't even accept
234                         // credentials at this path.
235                         statusCode = http.StatusNotFound
236                         return
237                 }
238                 for _, t := range reqTokens {
239                         if tokenResult[t] == 404 {
240                                 // The client provided valid token(s), but the
241                                 // collection was not found.
242                                 statusCode = http.StatusNotFound
243                                 return
244                         }
245                 }
246                 // The client's token was invalid (e.g., expired), or
247                 // the client didn't even provide one.  Propagate the
248                 // 401 to encourage the client to use a [different]
249                 // token.
250                 //
251                 // TODO(TC): This response would be confusing to
252                 // someone trying (anonymously) to download public
253                 // data that has been deleted.  Allow a referrer to
254                 // provide this context somehow?
255                 w.Header().Add("WWW-Authenticate", "Basic realm=\"dl\"")
256                 statusCode = http.StatusUnauthorized
257                 return
258         }
259
260         filename := strings.Join(targetPath, "/")
261         kc, err := keepclient.MakeKeepClient(arv)
262         if err != nil {
263                 statusCode, statusText = http.StatusInternalServerError, err.Error()
264                 return
265         }
266         rdr, err := kc.CollectionFileReader(collection, filename)
267         if os.IsNotExist(err) {
268                 statusCode = http.StatusNotFound
269                 return
270         } else if err != nil {
271                 statusCode, statusText = http.StatusBadGateway, err.Error()
272                 return
273         }
274         defer rdr.Close()
275
276         // One or both of these can be -1 if not found:
277         basenamePos := strings.LastIndex(filename, "/")
278         extPos := strings.LastIndex(filename, ".")
279         if extPos > basenamePos {
280                 // Now extPos is safely >= 0.
281                 if t := mime.TypeByExtension(filename[extPos:]); t != "" {
282                         w.Header().Set("Content-Type", t)
283                 }
284         }
285         w.Header().Set("Content-Length", fmt.Sprintf("%d", rdr.Len()))
286
287         w.WriteHeader(http.StatusOK)
288         _, err = io.Copy(w, rdr)
289         if err != nil {
290                 statusCode, statusText = http.StatusBadGateway, err.Error()
291         }
292 }