Merge branch '16118-arvboot-fixes'
[arvados.git] / services / keepproxy / keepproxy.go
1 // Copyright (C) The Arvados Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4
5 package main
6
7 import (
8         "errors"
9         "flag"
10         "fmt"
11         "io"
12         "io/ioutil"
13         "net"
14         "net/http"
15         "os"
16         "os/signal"
17         "regexp"
18         "strings"
19         "sync"
20         "syscall"
21         "time"
22
23         "git.arvados.org/arvados.git/lib/config"
24         "git.arvados.org/arvados.git/sdk/go/arvados"
25         "git.arvados.org/arvados.git/sdk/go/arvadosclient"
26         "git.arvados.org/arvados.git/sdk/go/health"
27         "git.arvados.org/arvados.git/sdk/go/httpserver"
28         "git.arvados.org/arvados.git/sdk/go/keepclient"
29         "github.com/coreos/go-systemd/daemon"
30         "github.com/ghodss/yaml"
31         "github.com/gorilla/mux"
32         log "github.com/sirupsen/logrus"
33 )
34
35 var version = "dev"
36
37 var (
38         listener net.Listener
39         router   http.Handler
40 )
41
42 const rfc3339NanoFixed = "2006-01-02T15:04:05.000000000Z07:00"
43
44 func configure(logger log.FieldLogger, args []string) (*arvados.Cluster, error) {
45         flags := flag.NewFlagSet(args[0], flag.ExitOnError)
46
47         dumpConfig := flags.Bool("dump-config", false, "write current configuration to stdout and exit")
48         getVersion := flags.Bool("version", false, "Print version information and exit.")
49
50         loader := config.NewLoader(os.Stdin, logger)
51         loader.SetupFlags(flags)
52
53         args = loader.MungeLegacyConfigArgs(logger, args[1:], "-legacy-keepproxy-config")
54         flags.Parse(args)
55
56         // Print version information if requested
57         if *getVersion {
58                 fmt.Printf("keepproxy %s\n", version)
59                 return nil, nil
60         }
61
62         cfg, err := loader.Load()
63         if err != nil {
64                 return nil, err
65         }
66         cluster, err := cfg.GetCluster("")
67         if err != nil {
68                 return nil, err
69         }
70
71         if *dumpConfig {
72                 out, err := yaml.Marshal(cfg)
73                 if err != nil {
74                         return nil, err
75                 }
76                 if _, err := os.Stdout.Write(out); err != nil {
77                         return nil, err
78                 }
79                 return nil, nil
80         }
81         return cluster, nil
82 }
83
84 func main() {
85         logger := log.New()
86         logger.Formatter = &log.JSONFormatter{
87                 TimestampFormat: rfc3339NanoFixed,
88         }
89
90         cluster, err := configure(logger, os.Args)
91         if err != nil {
92                 log.Fatal(err)
93         }
94         if cluster == nil {
95                 return
96         }
97
98         log.Printf("keepproxy %s started", version)
99
100         if err := run(logger, cluster); err != nil {
101                 log.Fatal(err)
102         }
103
104         log.Println("shutting down")
105 }
106
107 func run(logger log.FieldLogger, cluster *arvados.Cluster) error {
108         client, err := arvados.NewClientFromConfig(cluster)
109         if err != nil {
110                 return err
111         }
112         client.AuthToken = cluster.SystemRootToken
113
114         arv, err := arvadosclient.New(client)
115         if err != nil {
116                 return fmt.Errorf("Error setting up arvados client %v", err)
117         }
118
119         // If a config file is available, use the keepstores defined there
120         // instead of the legacy autodiscover mechanism via the API server
121         for k := range cluster.Services.Keepstore.InternalURLs {
122                 arv.KeepServiceURIs = append(arv.KeepServiceURIs, k.String())
123         }
124
125         if cluster.SystemLogs.LogLevel == "debug" {
126                 keepclient.DebugPrintf = log.Printf
127         }
128         kc, err := keepclient.MakeKeepClient(arv)
129         if err != nil {
130                 return fmt.Errorf("Error setting up keep client %v", err)
131         }
132         keepclient.RefreshServiceDiscoveryOnSIGHUP()
133
134         if cluster.Collections.DefaultReplication > 0 {
135                 kc.Want_replicas = cluster.Collections.DefaultReplication
136         }
137
138         var listen arvados.URL
139         for listen = range cluster.Services.Keepproxy.InternalURLs {
140                 break
141         }
142
143         var lErr error
144         listener, lErr = net.Listen("tcp", listen.Host)
145         if lErr != nil {
146                 return fmt.Errorf("listen(%s): %v", listen.Host, lErr)
147         }
148
149         if _, err := daemon.SdNotify(false, "READY=1"); err != nil {
150                 log.Printf("Error notifying init daemon: %v", err)
151         }
152         log.Println("listening at", listener.Addr())
153
154         // Shut down the server gracefully (by closing the listener)
155         // if SIGTERM is received.
156         term := make(chan os.Signal, 1)
157         go func(sig <-chan os.Signal) {
158                 s := <-sig
159                 log.Println("caught signal:", s)
160                 listener.Close()
161         }(term)
162         signal.Notify(term, syscall.SIGTERM)
163         signal.Notify(term, syscall.SIGINT)
164
165         // Start serving requests.
166         router = MakeRESTRouter(kc, time.Duration(keepclient.DefaultProxyRequestTimeout), cluster.ManagementToken)
167         return http.Serve(listener, httpserver.AddRequestIDs(httpserver.LogRequests(router)))
168 }
169
170 type ApiTokenCache struct {
171         tokens     map[string]int64
172         lock       sync.Mutex
173         expireTime int64
174 }
175
176 // Cache the token and set an expire time.  If we already have an expire time
177 // on the token, it is not updated.
178 func (this *ApiTokenCache) RememberToken(token string) {
179         this.lock.Lock()
180         defer this.lock.Unlock()
181
182         now := time.Now().Unix()
183         if this.tokens[token] == 0 {
184                 this.tokens[token] = now + this.expireTime
185         }
186 }
187
188 // Check if the cached token is known and still believed to be valid.
189 func (this *ApiTokenCache) RecallToken(token string) bool {
190         this.lock.Lock()
191         defer this.lock.Unlock()
192
193         now := time.Now().Unix()
194         if this.tokens[token] == 0 {
195                 // Unknown token
196                 return false
197         } else if now < this.tokens[token] {
198                 // Token is known and still valid
199                 return true
200         } else {
201                 // Token is expired
202                 this.tokens[token] = 0
203                 return false
204         }
205 }
206
207 func GetRemoteAddress(req *http.Request) string {
208         if xff := req.Header.Get("X-Forwarded-For"); xff != "" {
209                 return xff + "," + req.RemoteAddr
210         }
211         return req.RemoteAddr
212 }
213
214 func CheckAuthorizationHeader(kc *keepclient.KeepClient, cache *ApiTokenCache, req *http.Request) (pass bool, tok string) {
215         parts := strings.SplitN(req.Header.Get("Authorization"), " ", 2)
216         if len(parts) < 2 || !(parts[0] == "OAuth2" || parts[0] == "Bearer") || len(parts[1]) == 0 {
217                 return false, ""
218         }
219         tok = parts[1]
220
221         // Tokens are validated differently depending on what kind of
222         // operation is being performed. For example, tokens in
223         // collection-sharing links permit GET requests, but not
224         // PUT requests.
225         var op string
226         if req.Method == "GET" || req.Method == "HEAD" {
227                 op = "read"
228         } else {
229                 op = "write"
230         }
231
232         if cache.RecallToken(op + ":" + tok) {
233                 // Valid in the cache, short circuit
234                 return true, tok
235         }
236
237         var err error
238         arv := *kc.Arvados
239         arv.ApiToken = tok
240         arv.RequestID = req.Header.Get("X-Request-Id")
241         if op == "read" {
242                 err = arv.Call("HEAD", "keep_services", "", "accessible", nil, nil)
243         } else {
244                 err = arv.Call("HEAD", "users", "", "current", nil, nil)
245         }
246         if err != nil {
247                 log.Printf("%s: CheckAuthorizationHeader error: %v", GetRemoteAddress(req), err)
248                 return false, ""
249         }
250
251         // Success!  Update cache
252         cache.RememberToken(op + ":" + tok)
253
254         return true, tok
255 }
256
257 // We need to make a private copy of the default http transport early
258 // in initialization, then make copies of our private copy later. It
259 // won't be safe to copy http.DefaultTransport itself later, because
260 // its private mutexes might have already been used. (Without this,
261 // the test suite sometimes panics "concurrent map writes" in
262 // net/http.(*Transport).removeIdleConnLocked().)
263 var defaultTransport = *(http.DefaultTransport.(*http.Transport))
264
265 type proxyHandler struct {
266         http.Handler
267         *keepclient.KeepClient
268         *ApiTokenCache
269         timeout   time.Duration
270         transport *http.Transport
271 }
272
273 // MakeRESTRouter returns an http.Handler that passes GET and PUT
274 // requests to the appropriate handlers.
275 func MakeRESTRouter(kc *keepclient.KeepClient, timeout time.Duration, mgmtToken string) http.Handler {
276         rest := mux.NewRouter()
277
278         transport := defaultTransport
279         transport.DialContext = (&net.Dialer{
280                 Timeout:   keepclient.DefaultConnectTimeout,
281                 KeepAlive: keepclient.DefaultKeepAlive,
282                 DualStack: true,
283         }).DialContext
284         transport.TLSClientConfig = arvadosclient.MakeTLSConfig(kc.Arvados.ApiInsecure)
285         transport.TLSHandshakeTimeout = keepclient.DefaultTLSHandshakeTimeout
286
287         h := &proxyHandler{
288                 Handler:    rest,
289                 KeepClient: kc,
290                 timeout:    timeout,
291                 transport:  &transport,
292                 ApiTokenCache: &ApiTokenCache{
293                         tokens:     make(map[string]int64),
294                         expireTime: 300,
295                 },
296         }
297
298         rest.HandleFunc(`/{locator:[0-9a-f]{32}\+.*}`, h.Get).Methods("GET", "HEAD")
299         rest.HandleFunc(`/{locator:[0-9a-f]{32}}`, h.Get).Methods("GET", "HEAD")
300
301         // List all blocks
302         rest.HandleFunc(`/index`, h.Index).Methods("GET")
303
304         // List blocks whose hash has the given prefix
305         rest.HandleFunc(`/index/{prefix:[0-9a-f]{0,32}}`, h.Index).Methods("GET")
306
307         rest.HandleFunc(`/{locator:[0-9a-f]{32}\+.*}`, h.Put).Methods("PUT")
308         rest.HandleFunc(`/{locator:[0-9a-f]{32}}`, h.Put).Methods("PUT")
309         rest.HandleFunc(`/`, h.Put).Methods("POST")
310         rest.HandleFunc(`/{any}`, h.Options).Methods("OPTIONS")
311         rest.HandleFunc(`/`, h.Options).Methods("OPTIONS")
312
313         rest.Handle("/_health/{check}", &health.Handler{
314                 Token:  mgmtToken,
315                 Prefix: "/_health/",
316         }).Methods("GET")
317
318         rest.NotFoundHandler = InvalidPathHandler{}
319         return h
320 }
321
322 var errLoopDetected = errors.New("loop detected")
323
324 func (*proxyHandler) checkLoop(resp http.ResponseWriter, req *http.Request) error {
325         if via := req.Header.Get("Via"); strings.Index(via, " "+viaAlias) >= 0 {
326                 log.Printf("proxy loop detected (request has Via: %q): perhaps keepproxy is misidentified by gateway config as an external client, or its keep_services record does not have service_type=proxy?", via)
327                 http.Error(resp, errLoopDetected.Error(), http.StatusInternalServerError)
328                 return errLoopDetected
329         }
330         return nil
331 }
332
333 func SetCorsHeaders(resp http.ResponseWriter) {
334         resp.Header().Set("Access-Control-Allow-Methods", "GET, HEAD, POST, PUT, OPTIONS")
335         resp.Header().Set("Access-Control-Allow-Origin", "*")
336         resp.Header().Set("Access-Control-Allow-Headers", "Authorization, Content-Length, Content-Type, X-Keep-Desired-Replicas")
337         resp.Header().Set("Access-Control-Max-Age", "86486400")
338 }
339
340 type InvalidPathHandler struct{}
341
342 func (InvalidPathHandler) ServeHTTP(resp http.ResponseWriter, req *http.Request) {
343         log.Printf("%s: %s %s unroutable", GetRemoteAddress(req), req.Method, req.URL.Path)
344         http.Error(resp, "Bad request", http.StatusBadRequest)
345 }
346
347 func (h *proxyHandler) Options(resp http.ResponseWriter, req *http.Request) {
348         log.Printf("%s: %s %s", GetRemoteAddress(req), req.Method, req.URL.Path)
349         SetCorsHeaders(resp)
350 }
351
352 var BadAuthorizationHeader = errors.New("Missing or invalid Authorization header")
353 var ContentLengthMismatch = errors.New("Actual length != expected content length")
354 var MethodNotSupported = errors.New("Method not supported")
355
356 var removeHint, _ = regexp.Compile("\\+K@[a-z0-9]{5}(\\+|$)")
357
358 func (h *proxyHandler) Get(resp http.ResponseWriter, req *http.Request) {
359         if err := h.checkLoop(resp, req); err != nil {
360                 return
361         }
362         SetCorsHeaders(resp)
363         resp.Header().Set("Via", req.Proto+" "+viaAlias)
364
365         locator := mux.Vars(req)["locator"]
366         var err error
367         var status int
368         var expectLength, responseLength int64
369         var proxiedURI = "-"
370
371         defer func() {
372                 log.Println(GetRemoteAddress(req), req.Method, req.URL.Path, status, expectLength, responseLength, proxiedURI, err)
373                 if status != http.StatusOK {
374                         http.Error(resp, err.Error(), status)
375                 }
376         }()
377
378         kc := h.makeKeepClient(req)
379
380         var pass bool
381         var tok string
382         if pass, tok = CheckAuthorizationHeader(kc, h.ApiTokenCache, req); !pass {
383                 status, err = http.StatusForbidden, BadAuthorizationHeader
384                 return
385         }
386
387         // Copy ArvadosClient struct and use the client's API token
388         arvclient := *kc.Arvados
389         arvclient.ApiToken = tok
390         kc.Arvados = &arvclient
391
392         var reader io.ReadCloser
393
394         locator = removeHint.ReplaceAllString(locator, "$1")
395
396         switch req.Method {
397         case "HEAD":
398                 expectLength, proxiedURI, err = kc.Ask(locator)
399         case "GET":
400                 reader, expectLength, proxiedURI, err = kc.Get(locator)
401                 if reader != nil {
402                         defer reader.Close()
403                 }
404         default:
405                 status, err = http.StatusNotImplemented, MethodNotSupported
406                 return
407         }
408
409         if expectLength == -1 {
410                 log.Println("Warning:", GetRemoteAddress(req), req.Method, proxiedURI, "Content-Length not provided")
411         }
412
413         switch respErr := err.(type) {
414         case nil:
415                 status = http.StatusOK
416                 resp.Header().Set("Content-Length", fmt.Sprint(expectLength))
417                 switch req.Method {
418                 case "HEAD":
419                         responseLength = 0
420                 case "GET":
421                         responseLength, err = io.Copy(resp, reader)
422                         if err == nil && expectLength > -1 && responseLength != expectLength {
423                                 err = ContentLengthMismatch
424                         }
425                 }
426         case keepclient.Error:
427                 if respErr == keepclient.BlockNotFound {
428                         status = http.StatusNotFound
429                 } else if respErr.Temporary() {
430                         status = http.StatusBadGateway
431                 } else {
432                         status = 422
433                 }
434         default:
435                 status = http.StatusInternalServerError
436         }
437 }
438
439 var LengthRequiredError = errors.New(http.StatusText(http.StatusLengthRequired))
440 var LengthMismatchError = errors.New("Locator size hint does not match Content-Length header")
441
442 func (h *proxyHandler) Put(resp http.ResponseWriter, req *http.Request) {
443         if err := h.checkLoop(resp, req); err != nil {
444                 return
445         }
446         SetCorsHeaders(resp)
447         resp.Header().Set("Via", "HTTP/1.1 "+viaAlias)
448
449         kc := h.makeKeepClient(req)
450
451         var err error
452         var expectLength int64
453         var status = http.StatusInternalServerError
454         var wroteReplicas int
455         var locatorOut string = "-"
456
457         defer func() {
458                 log.Println(GetRemoteAddress(req), req.Method, req.URL.Path, status, expectLength, kc.Want_replicas, wroteReplicas, locatorOut, err)
459                 if status != http.StatusOK {
460                         http.Error(resp, err.Error(), status)
461                 }
462         }()
463
464         locatorIn := mux.Vars(req)["locator"]
465
466         // Check if the client specified storage classes
467         if req.Header.Get("X-Keep-Storage-Classes") != "" {
468                 var scl []string
469                 for _, sc := range strings.Split(req.Header.Get("X-Keep-Storage-Classes"), ",") {
470                         scl = append(scl, strings.Trim(sc, " "))
471                 }
472                 kc.StorageClasses = scl
473         }
474
475         _, err = fmt.Sscanf(req.Header.Get("Content-Length"), "%d", &expectLength)
476         if err != nil || expectLength < 0 {
477                 err = LengthRequiredError
478                 status = http.StatusLengthRequired
479                 return
480         }
481
482         if locatorIn != "" {
483                 var loc *keepclient.Locator
484                 if loc, err = keepclient.MakeLocator(locatorIn); err != nil {
485                         status = http.StatusBadRequest
486                         return
487                 } else if loc.Size > 0 && int64(loc.Size) != expectLength {
488                         err = LengthMismatchError
489                         status = http.StatusBadRequest
490                         return
491                 }
492         }
493
494         var pass bool
495         var tok string
496         if pass, tok = CheckAuthorizationHeader(kc, h.ApiTokenCache, req); !pass {
497                 err = BadAuthorizationHeader
498                 status = http.StatusForbidden
499                 return
500         }
501
502         // Copy ArvadosClient struct and use the client's API token
503         arvclient := *kc.Arvados
504         arvclient.ApiToken = tok
505         kc.Arvados = &arvclient
506
507         // Check if the client specified the number of replicas
508         if req.Header.Get("X-Keep-Desired-Replicas") != "" {
509                 var r int
510                 _, err := fmt.Sscanf(req.Header.Get(keepclient.X_Keep_Desired_Replicas), "%d", &r)
511                 if err == nil {
512                         kc.Want_replicas = r
513                 }
514         }
515
516         // Now try to put the block through
517         if locatorIn == "" {
518                 bytes, err2 := ioutil.ReadAll(req.Body)
519                 if err2 != nil {
520                         err = fmt.Errorf("Error reading request body: %s", err2)
521                         status = http.StatusInternalServerError
522                         return
523                 }
524                 locatorOut, wroteReplicas, err = kc.PutB(bytes)
525         } else {
526                 locatorOut, wroteReplicas, err = kc.PutHR(locatorIn, req.Body, expectLength)
527         }
528
529         // Tell the client how many successful PUTs we accomplished
530         resp.Header().Set(keepclient.X_Keep_Replicas_Stored, fmt.Sprintf("%d", wroteReplicas))
531
532         switch err.(type) {
533         case nil:
534                 status = http.StatusOK
535                 _, err = io.WriteString(resp, locatorOut)
536
537         case keepclient.OversizeBlockError:
538                 // Too much data
539                 status = http.StatusRequestEntityTooLarge
540
541         case keepclient.InsufficientReplicasError:
542                 if wroteReplicas > 0 {
543                         // At least one write is considered success.  The
544                         // client can decide if getting less than the number of
545                         // replications it asked for is a fatal error.
546                         status = http.StatusOK
547                         _, err = io.WriteString(resp, locatorOut)
548                 } else {
549                         status = http.StatusServiceUnavailable
550                 }
551
552         default:
553                 status = http.StatusBadGateway
554         }
555 }
556
557 // ServeHTTP implementation for IndexHandler
558 // Supports only GET requests for /index/{prefix:[0-9a-f]{0,32}}
559 // For each keep server found in LocalRoots:
560 //   Invokes GetIndex using keepclient
561 //   Expects "complete" response (terminating with blank new line)
562 //   Aborts on any errors
563 // Concatenates responses from all those keep servers and returns
564 func (h *proxyHandler) Index(resp http.ResponseWriter, req *http.Request) {
565         SetCorsHeaders(resp)
566
567         prefix := mux.Vars(req)["prefix"]
568         var err error
569         var status int
570
571         defer func() {
572                 if status != http.StatusOK {
573                         http.Error(resp, err.Error(), status)
574                 }
575         }()
576
577         kc := h.makeKeepClient(req)
578         ok, token := CheckAuthorizationHeader(kc, h.ApiTokenCache, req)
579         if !ok {
580                 status, err = http.StatusForbidden, BadAuthorizationHeader
581                 return
582         }
583
584         // Copy ArvadosClient struct and use the client's API token
585         arvclient := *kc.Arvados
586         arvclient.ApiToken = token
587         kc.Arvados = &arvclient
588
589         // Only GET method is supported
590         if req.Method != "GET" {
591                 status, err = http.StatusNotImplemented, MethodNotSupported
592                 return
593         }
594
595         // Get index from all LocalRoots and write to resp
596         var reader io.Reader
597         for uuid := range kc.LocalRoots() {
598                 reader, err = kc.GetIndex(uuid, prefix)
599                 if err != nil {
600                         status = http.StatusBadGateway
601                         return
602                 }
603
604                 _, err = io.Copy(resp, reader)
605                 if err != nil {
606                         status = http.StatusBadGateway
607                         return
608                 }
609         }
610
611         // Got index from all the keep servers and wrote to resp
612         status = http.StatusOK
613         resp.Write([]byte("\n"))
614 }
615
616 func (h *proxyHandler) makeKeepClient(req *http.Request) *keepclient.KeepClient {
617         kc := *h.KeepClient
618         kc.RequestID = req.Header.Get("X-Request-Id")
619         kc.HTTPClient = &proxyClient{
620                 client: &http.Client{
621                         Timeout:   h.timeout,
622                         Transport: h.transport,
623                 },
624                 proto: req.Proto,
625         }
626         return &kc
627 }