5824: Add Content-Length header.
[arvados.git] / services / keep-web / handler.go
1 package main
2
3 import (
4         "fmt"
5         "html"
6         "io"
7         "mime"
8         "net/http"
9         "net/url"
10         "os"
11         "strings"
12         "time"
13
14         "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
15         "git.curoverse.com/arvados.git/sdk/go/auth"
16         "git.curoverse.com/arvados.git/sdk/go/httpserver"
17         "git.curoverse.com/arvados.git/sdk/go/keepclient"
18 )
19
20 var clientPool = arvadosclient.MakeClientPool()
21
22 var anonymousTokens []string
23
24 type handler struct{}
25
26 func init() {
27         // TODO(TC): Get anonymousTokens from flags
28         anonymousTokens = []string{}
29 }
30
31 // return s if s is a UUID or a PDH, otherwise ""
32 func parseCollectionIdFromDNSName(s string) string {
33         // Strip domain.
34         if i := strings.IndexRune(s, '.'); i >= 0 {
35                 s = s[:i]
36         }
37         // Names like {uuid}--dl.example.com serve the same purpose as
38         // {uuid}.dl.example.com but can reduce cost/effort of using
39         // [additional] wildcard certificates.
40         if i := strings.Index(s, "--"); i >= 0 {
41                 s = s[:i]
42         }
43         if !arvadosclient.UUIDMatch(s) && !arvadosclient.PDHMatch(s) {
44                 return ""
45         }
46         return s
47 }
48
49 func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
50         var statusCode = 0
51         var statusText string
52
53         w := httpserver.WrapResponseWriter(wOrig)
54         defer func() {
55                 if statusCode == 0 {
56                         statusCode = w.WroteStatus()
57                 } else if w.WroteStatus() == 0 {
58                         w.WriteHeader(statusCode)
59                 } else if w.WroteStatus() != statusCode {
60                         httpserver.Log(r.RemoteAddr, "WARNING",
61                                 fmt.Sprintf("Our status changed from %d to %d after we sent headers", w.WroteStatus(), statusCode))
62                 }
63                 if statusText == "" {
64                         statusText = http.StatusText(statusCode)
65                 }
66                 httpserver.Log(r.RemoteAddr, statusCode, statusText, w.WroteBodyBytes(), r.Method, r.Host, r.URL.Path, r.URL.RawQuery)
67         }()
68
69         if r.Method != "GET" && r.Method != "POST" {
70                 statusCode, statusText = http.StatusMethodNotAllowed, r.Method
71                 return
72         }
73
74         arv := clientPool.Get()
75         if arv == nil {
76                 statusCode, statusText = http.StatusInternalServerError, "Pool failed: "+clientPool.Err().Error()
77                 return
78         }
79         defer clientPool.Put(arv)
80
81         pathParts := strings.Split(r.URL.Path[1:], "/")
82
83         var targetId string
84         var targetPath []string
85         var tokens []string
86         var reqTokens []string
87         var pathToken bool
88
89         if targetId = parseCollectionIdFromDNSName(r.Host); targetId != "" {
90                 // "http://{id}.domain.example.com/{path}" form
91                 if t := r.FormValue("api_token"); t != "" {
92                         // ...with explicit token in query string or
93                         // form in POST body. We must encrypt the
94                         // token such that it can only be used for
95                         // this collection; put it in an HttpOnly
96                         // cookie; and redirect to the same URL with
97                         // the query param redacted, and method =
98                         // GET.
99                         //
100                         // The HttpOnly flag is necessary to prevent
101                         // JavaScript code (included in, or loaded by,
102                         // a page in the collection being served) from
103                         // employing the user's token beyond reading
104                         // other files in the same domain, i.e., same
105                         // the collection.
106                         //
107                         // The 303 redirect is necessary in the case
108                         // of a GET request to avoid exposing the
109                         // token in the Location bar, and in the case
110                         // of a POST request to avoid raising warnings
111                         // when the user refreshes the resulting page.
112                         http.SetCookie(w, &http.Cookie{
113                                 Name:    "api_token",
114                                 Value:   auth.EncodeTokenCookie([]byte(t)),
115                                 Path:    "/",
116                                 Expires: time.Now().AddDate(10,0,0),
117                         })
118                         redir := (&url.URL{Host: r.Host, Path: r.URL.Path}).String()
119
120                         w.Header().Add("Location", redir)
121                         statusCode, statusText = http.StatusSeeOther, redir
122                         w.WriteHeader(statusCode)
123                         io.WriteString(w, `<A href="`)
124                         io.WriteString(w, html.EscapeString(redir))
125                         io.WriteString(w, `">Continue</A>`)
126                         return
127                 } else if strings.HasPrefix(pathParts[0], "t=") {
128                         // ...with explicit token in path,
129                         // "{...}.com/t={token}/{path}".  This form
130                         // must only be used to pass scoped tokens
131                         // that give permission for a single
132                         // collection. See FormValue case above.
133                         tokens = []string{pathParts[0][2:]}
134                         targetPath = pathParts[1:]
135                         pathToken = true
136                 } else {
137                         // ...with cookie, Authorization header, or
138                         // no token at all
139                         reqTokens = auth.NewCredentialsFromHTTPRequest(r).Tokens
140                         tokens = append(reqTokens, anonymousTokens...)
141                         targetPath = pathParts
142                 }
143         } else if len(pathParts) < 3 || pathParts[0] != "collections" || pathParts[1] == "" || pathParts[2] == "" {
144                 statusCode = http.StatusNotFound
145                 return
146         } else if len(pathParts) >= 5 && pathParts[1] == "download" {
147                 // "/collections/download/{id}/{token}/path..." form:
148                 // Don't use our configured anonymous tokens,
149                 // Authorization headers, etc.  Just use the token in
150                 // the path.
151                 targetId = pathParts[2]
152                 tokens = []string{pathParts[3]}
153                 targetPath = pathParts[4:]
154                 pathToken = true
155         } else {
156                 // "/collections/{id}/path..." form
157                 targetId = pathParts[1]
158                 reqTokens = auth.NewCredentialsFromHTTPRequest(r).Tokens
159                 tokens = append(reqTokens, anonymousTokens...)
160                 targetPath = pathParts[2:]
161         }
162
163         tokenResult := make(map[string]int)
164         collection := make(map[string]interface{})
165         found := false
166         for _, arv.ApiToken = range tokens {
167                 err := arv.Get("collections", targetId, nil, &collection)
168                 if err == nil {
169                         // Success
170                         found = true
171                         break
172                 }
173                 if srvErr, ok := err.(arvadosclient.APIServerError); ok {
174                         switch srvErr.HttpStatusCode {
175                         case 404, 401:
176                                 // Token broken or insufficient to
177                                 // retrieve collection
178                                 tokenResult[arv.ApiToken] = srvErr.HttpStatusCode
179                                 continue
180                         }
181                 }
182                 // Something more serious is wrong
183                 statusCode, statusText = http.StatusInternalServerError, err.Error()
184                 return
185         }
186         if !found {
187                 if pathToken {
188                         // The URL is a "secret sharing link", but it
189                         // didn't work out. Asking the client for
190                         // additional credentials would just be
191                         // confusing.
192                         statusCode = http.StatusNotFound
193                         return
194                 }
195                 for _, t := range reqTokens {
196                         if tokenResult[t] == 404 {
197                                 // The client provided valid token(s), but the
198                                 // collection was not found.
199                                 statusCode = http.StatusNotFound
200                                 return
201                         }
202                 }
203                 // The client's token was invalid (e.g., expired), or
204                 // the client didn't even provide one.  Propagate the
205                 // 401 to encourage the client to use a [different]
206                 // token.
207                 //
208                 // TODO(TC): This response would be confusing to
209                 // someone trying (anonymously) to download public
210                 // data that has been deleted.  Allow a referrer to
211                 // provide this context somehow?
212                 w.Header().Add("WWW-Authenticate", "Basic realm=\"dl\"")
213                 statusCode = http.StatusUnauthorized
214                 return
215         }
216
217         filename := strings.Join(targetPath, "/")
218         kc, err := keepclient.MakeKeepClient(arv)
219         if err != nil {
220                 statusCode, statusText = http.StatusInternalServerError, err.Error()
221                 return
222         }
223         rdr, err := kc.CollectionFileReader(collection, filename)
224         if os.IsNotExist(err) {
225                 statusCode = http.StatusNotFound
226                 return
227         } else if err != nil {
228                 statusCode, statusText = http.StatusBadGateway, err.Error()
229                 return
230         }
231         defer rdr.Close()
232
233         // One or both of these can be -1 if not found:
234         basenamePos := strings.LastIndex(filename, "/")
235         extPos := strings.LastIndex(filename, ".")
236         if extPos > basenamePos {
237                 // Now extPos is safely >= 0.
238                 if t := mime.TypeByExtension(filename[extPos:]); t != "" {
239                         w.Header().Set("Content-Type", t)
240                 }
241         }
242         w.Header().Set("Content-Length", fmt.Sprintf("%d", rdr.Len()))
243
244         w.WriteHeader(http.StatusOK)
245         _, err = io.Copy(w, rdr)
246         if err != nil {
247                 statusCode, statusText = http.StatusBadGateway, err.Error()
248         }
249 }