Merge branch '2960-keepstore-streaming'
[arvados.git] / services / keepproxy / keepproxy.go
index 7c1360ad71a2e945b2aaee72d4f6aadabaf664ba..39ffd45cbe37b69f663dc6093acd4dfe221c74a1 100644 (file)
 //
 // SPDX-License-Identifier: AGPL-3.0
 
-package main
+package keepproxy
 
 import (
+       "context"
        "errors"
-       "flag"
        "fmt"
        "io"
        "io/ioutil"
        "net"
        "net/http"
-       "os"
-       "os/signal"
        "regexp"
        "strings"
-       "syscall"
        "time"
 
-       "git.arvados.org/arvados.git/lib/cmd"
-       "git.arvados.org/arvados.git/lib/config"
+       "git.arvados.org/arvados.git/lib/service"
        "git.arvados.org/arvados.git/sdk/go/arvados"
        "git.arvados.org/arvados.git/sdk/go/arvadosclient"
+       "git.arvados.org/arvados.git/sdk/go/ctxlog"
        "git.arvados.org/arvados.git/sdk/go/health"
        "git.arvados.org/arvados.git/sdk/go/httpserver"
        "git.arvados.org/arvados.git/sdk/go/keepclient"
-       "github.com/coreos/go-systemd/daemon"
-       "github.com/ghodss/yaml"
        "github.com/gorilla/mux"
        lru "github.com/hashicorp/golang-lru"
-       log "github.com/sirupsen/logrus"
-)
-
-var version = "dev"
-
-var (
-       listener net.Listener
-       router   http.Handler
+       "github.com/prometheus/client_golang/prometheus"
+       "github.com/sirupsen/logrus"
 )
 
 const rfc3339NanoFixed = "2006-01-02T15:04:05.000000000Z07:00"
 
-func configure(logger log.FieldLogger, args []string) (*arvados.Cluster, error) {
-       prog := args[0]
-       flags := flag.NewFlagSet(prog, flag.ContinueOnError)
-
-       dumpConfig := flags.Bool("dump-config", false, "write current configuration to stdout and exit")
-       getVersion := flags.Bool("version", false, "Print version information and exit.")
-
-       loader := config.NewLoader(os.Stdin, logger)
-       loader.SetupFlags(flags)
-       args = loader.MungeLegacyConfigArgs(logger, args[1:], "-legacy-keepproxy-config")
-
-       if ok, code := cmd.ParseFlags(flags, prog, args, "", os.Stderr); !ok {
-               os.Exit(code)
-       } else if *getVersion {
-               fmt.Printf("keepproxy %s\n", version)
-               return nil, nil
-       }
-
-       cfg, err := loader.Load()
-       if err != nil {
-               return nil, err
-       }
-       cluster, err := cfg.GetCluster("")
-       if err != nil {
-               return nil, err
-       }
+var Command = service.Command(arvados.ServiceNameKeepproxy, newHandlerOrErrorHandler)
 
-       if *dumpConfig {
-               out, err := yaml.Marshal(cfg)
-               if err != nil {
-                       return nil, err
-               }
-               if _, err := os.Stdout.Write(out); err != nil {
-                       return nil, err
-               }
-               return nil, nil
-       }
-       return cluster, nil
-}
-
-func main() {
-       logger := log.New()
-       logger.Formatter = &log.JSONFormatter{
-               TimestampFormat: rfc3339NanoFixed,
-       }
-
-       cluster, err := configure(logger, os.Args)
-       if err != nil {
-               log.Fatal(err)
-       }
-       if cluster == nil {
-               return
-       }
-
-       log.Printf("keepproxy %s started", version)
-
-       if err := run(logger, cluster); err != nil {
-               log.Fatal(err)
-       }
-
-       log.Println("shutting down")
-}
-
-func run(logger log.FieldLogger, cluster *arvados.Cluster) error {
+func newHandlerOrErrorHandler(ctx context.Context, cluster *arvados.Cluster, token string, reg *prometheus.Registry) service.Handler {
        client, err := arvados.NewClientFromConfig(cluster)
        if err != nil {
-               return err
+               return service.ErrorHandler(ctx, cluster, fmt.Errorf("Error setting up arvados client: %w", err))
        }
-       client.AuthToken = cluster.SystemRootToken
-
        arv, err := arvadosclient.New(client)
        if err != nil {
-               return fmt.Errorf("Error setting up arvados client %v", err)
-       }
-
-       // If a config file is available, use the keepstores defined there
-       // instead of the legacy autodiscover mechanism via the API server
-       for k := range cluster.Services.Keepstore.InternalURLs {
-               arv.KeepServiceURIs = append(arv.KeepServiceURIs, strings.TrimRight(k.String(), "/"))
-       }
-
-       if cluster.SystemLogs.LogLevel == "debug" {
-               keepclient.DebugPrintf = log.Printf
+               return service.ErrorHandler(ctx, cluster, fmt.Errorf("Error setting up arvados client: %w", err))
        }
        kc, err := keepclient.MakeKeepClient(arv)
        if err != nil {
-               return fmt.Errorf("Error setting up keep client %v", err)
+               return service.ErrorHandler(ctx, cluster, fmt.Errorf("Error setting up keep client: %w", err))
        }
        keepclient.RefreshServiceDiscoveryOnSIGHUP()
-
-       if cluster.Collections.DefaultReplication > 0 {
-               kc.Want_replicas = cluster.Collections.DefaultReplication
-       }
-
-       var listen arvados.URL
-       for listen = range cluster.Services.Keepproxy.InternalURLs {
-               break
-       }
-
-       var lErr error
-       listener, lErr = net.Listen("tcp", listen.Host)
-       if lErr != nil {
-               return fmt.Errorf("listen(%s): %v", listen.Host, lErr)
-       }
-
-       if _, err := daemon.SdNotify(false, "READY=1"); err != nil {
-               log.Printf("Error notifying init daemon: %v", err)
-       }
-       log.Println("listening at", listener.Addr())
-
-       // Shut down the server gracefully (by closing the listener)
-       // if SIGTERM is received.
-       term := make(chan os.Signal, 1)
-       go func(sig <-chan os.Signal) {
-               s := <-sig
-               log.Println("caught signal:", s)
-               listener.Close()
-       }(term)
-       signal.Notify(term, syscall.SIGTERM)
-       signal.Notify(term, syscall.SIGINT)
-
-       // Start serving requests.
-       router, err = MakeRESTRouter(kc, time.Duration(keepclient.DefaultProxyRequestTimeout), cluster, logger)
+       router, err := newHandler(ctx, kc, time.Duration(keepclient.DefaultProxyRequestTimeout), cluster)
        if err != nil {
-               return err
+               return service.ErrorHandler(ctx, cluster, err)
        }
-       return http.Serve(listener, httpserver.AddRequestIDs(httpserver.LogRequests(router)))
+       return router
 }
 
-type TokenCacheEntry struct {
+type tokenCacheEntry struct {
        expire int64
        user   *arvados.User
 }
 
-type APITokenCache struct {
+type apiTokenCache struct {
        tokens     *lru.TwoQueueCache
        expireTime int64
 }
 
 // RememberToken caches the token and set an expire time.  If the
 // token is already in the cache, it is not updated.
-func (cache *APITokenCache) RememberToken(token string, user *arvados.User) {
+func (cache *apiTokenCache) RememberToken(token string, user *arvados.User) {
        now := time.Now().Unix()
        _, ok := cache.tokens.Get(token)
        if !ok {
-               cache.tokens.Add(token, TokenCacheEntry{
+               cache.tokens.Add(token, tokenCacheEntry{
                        expire: now + cache.expireTime,
                        user:   user,
                })
@@ -196,13 +79,13 @@ func (cache *APITokenCache) RememberToken(token string, user *arvados.User) {
 
 // RecallToken checks if the cached token is known and still believed to be
 // valid.
-func (cache *APITokenCache) RecallToken(token string) (bool, *arvados.User) {
+func (cache *apiTokenCache) RecallToken(token string) (bool, *arvados.User) {
        val, ok := cache.tokens.Get(token)
        if !ok {
                return false, nil
        }
 
-       cacheEntry := val.(TokenCacheEntry)
+       cacheEntry := val.(tokenCacheEntry)
        now := time.Now().Unix()
        if now < cacheEntry.expire {
                // Token is known and still valid
@@ -214,18 +97,15 @@ func (cache *APITokenCache) RecallToken(token string) (bool, *arvados.User) {
        }
 }
 
-// GetRemoteAddress returns a string with the remote address for the request.
-// If the X-Forwarded-For header is set and has a non-zero length, it returns a
-// string made from a comma separated list of all the remote addresses,
-// starting with the one(s) from the X-Forwarded-For header.
-func GetRemoteAddress(req *http.Request) string {
-       if xff := req.Header.Get("X-Forwarded-For"); xff != "" {
-               return xff + "," + req.RemoteAddr
-       }
-       return req.RemoteAddr
+func (h *proxyHandler) Done() <-chan struct{} {
+       return nil
 }
 
-func (h *proxyHandler) CheckAuthorizationHeader(req *http.Request) (pass bool, tok string, user *arvados.User) {
+func (h *proxyHandler) CheckHealth() error {
+       return nil
+}
+
+func (h *proxyHandler) checkAuthorizationHeader(req *http.Request) (pass bool, tok string, user *arvados.User) {
        parts := strings.SplitN(req.Header.Get("Authorization"), " ", 2)
        if len(parts) < 2 || !(parts[0] == "OAuth2" || parts[0] == "Bearer") || len(parts[1]) == 0 {
                return false, "", nil
@@ -243,7 +123,7 @@ func (h *proxyHandler) CheckAuthorizationHeader(req *http.Request) (pass bool, t
                op = "write"
        }
 
-       if ok, user := h.APITokenCache.RecallToken(op + ":" + tok); ok {
+       if ok, user := h.apiTokenCache.RecallToken(op + ":" + tok); ok {
                // Valid in the cache, short circuit
                return true, tok, user
        }
@@ -267,7 +147,7 @@ func (h *proxyHandler) CheckAuthorizationHeader(req *http.Request) (pass bool, t
                }
        }
        if err != nil {
-               log.Printf("%s: CheckAuthorizationHeader error: %v", GetRemoteAddress(req), err)
+               ctxlog.FromContext(req.Context()).WithError(err).Info("checkAuthorizationHeader error")
                return false, "", nil
        }
 
@@ -290,42 +170,53 @@ func (h *proxyHandler) CheckAuthorizationHeader(req *http.Request) (pass bool, t
        }
 
        // Success!  Update cache
-       h.APITokenCache.RememberToken(op+":"+tok, user)
+       h.apiTokenCache.RememberToken(op+":"+tok, user)
 
        return true, tok, user
 }
 
-// We need to make a private copy of the default http transport early
-// in initialization, then make copies of our private copy later. It
-// won't be safe to copy http.DefaultTransport itself later, because
-// its private mutexes might have already been used. (Without this,
-// the test suite sometimes panics "concurrent map writes" in
-// net/http.(*Transport).removeIdleConnLocked().)
-var defaultTransport = *(http.DefaultTransport.(*http.Transport))
+// We can't copy the default http transport because http.Transport has
+// a mutex field, so we make our own using the values of the exported
+// fields.
+var defaultTransport = http.Transport{
+       Proxy:                 http.DefaultTransport.(*http.Transport).Proxy,
+       DialContext:           http.DefaultTransport.(*http.Transport).DialContext,
+       ForceAttemptHTTP2:     http.DefaultTransport.(*http.Transport).ForceAttemptHTTP2,
+       MaxIdleConns:          http.DefaultTransport.(*http.Transport).MaxIdleConns,
+       IdleConnTimeout:       http.DefaultTransport.(*http.Transport).IdleConnTimeout,
+       TLSHandshakeTimeout:   http.DefaultTransport.(*http.Transport).TLSHandshakeTimeout,
+       ExpectContinueTimeout: http.DefaultTransport.(*http.Transport).ExpectContinueTimeout,
+}
 
 type proxyHandler struct {
        http.Handler
        *keepclient.KeepClient
-       *APITokenCache
+       *apiTokenCache
        timeout   time.Duration
        transport *http.Transport
-       logger    log.FieldLogger
        cluster   *arvados.Cluster
 }
 
-// MakeRESTRouter returns an http.Handler that passes GET and PUT
-// requests to the appropriate handlers.
-func MakeRESTRouter(kc *keepclient.KeepClient, timeout time.Duration, cluster *arvados.Cluster, logger log.FieldLogger) (http.Handler, error) {
+func newHandler(ctx context.Context, kc *keepclient.KeepClient, timeout time.Duration, cluster *arvados.Cluster) (service.Handler, error) {
        rest := mux.NewRouter()
 
-       transport := defaultTransport
-       transport.DialContext = (&net.Dialer{
-               Timeout:   keepclient.DefaultConnectTimeout,
-               KeepAlive: keepclient.DefaultKeepAlive,
-               DualStack: true,
-       }).DialContext
-       transport.TLSClientConfig = arvadosclient.MakeTLSConfig(kc.Arvados.ApiInsecure)
-       transport.TLSHandshakeTimeout = keepclient.DefaultTLSHandshakeTimeout
+       // We can't copy the default http transport because
+       // http.Transport has a mutex field, so we copy the fields
+       // that we know have non-zero values in http.DefaultTransport.
+       transport := &http.Transport{
+               Proxy:                 http.DefaultTransport.(*http.Transport).Proxy,
+               ForceAttemptHTTP2:     http.DefaultTransport.(*http.Transport).ForceAttemptHTTP2,
+               MaxIdleConns:          http.DefaultTransport.(*http.Transport).MaxIdleConns,
+               IdleConnTimeout:       http.DefaultTransport.(*http.Transport).IdleConnTimeout,
+               ExpectContinueTimeout: http.DefaultTransport.(*http.Transport).ExpectContinueTimeout,
+               DialContext: (&net.Dialer{
+                       Timeout:   keepclient.DefaultConnectTimeout,
+                       KeepAlive: keepclient.DefaultKeepAlive,
+                       DualStack: true,
+               }).DialContext,
+               TLSClientConfig:     arvadosclient.MakeTLSConfig(kc.Arvados.ApiInsecure),
+               TLSHandshakeTimeout: keepclient.DefaultTLSHandshakeTimeout,
+       }
 
        cacheQ, err := lru.New2Q(500)
        if err != nil {
@@ -336,12 +227,11 @@ func MakeRESTRouter(kc *keepclient.KeepClient, timeout time.Duration, cluster *a
                Handler:    rest,
                KeepClient: kc,
                timeout:    timeout,
-               transport:  &transport,
-               APITokenCache: &APITokenCache{
+               transport:  transport,
+               apiTokenCache: &apiTokenCache{
                        tokens:     cacheQ,
                        expireTime: 300,
                },
-               logger:  logger,
                cluster: cluster,
        }
 
@@ -365,7 +255,7 @@ func MakeRESTRouter(kc *keepclient.KeepClient, timeout time.Duration, cluster *a
                Prefix: "/_health/",
        }).Methods("GET")
 
-       rest.NotFoundHandler = InvalidPathHandler{}
+       rest.NotFoundHandler = invalidPathHandler{}
        return h, nil
 }
 
@@ -373,30 +263,28 @@ var errLoopDetected = errors.New("loop detected")
 
 func (h *proxyHandler) checkLoop(resp http.ResponseWriter, req *http.Request) error {
        if via := req.Header.Get("Via"); strings.Index(via, " "+viaAlias) >= 0 {
-               h.logger.Printf("proxy loop detected (request has Via: %q): perhaps keepproxy is misidentified by gateway config as an external client, or its keep_services record does not have service_type=proxy?", via)
+               ctxlog.FromContext(req.Context()).Printf("proxy loop detected (request has Via: %q): perhaps keepproxy is misidentified by gateway config as an external client, or its keep_services record does not have service_type=proxy?", via)
                http.Error(resp, errLoopDetected.Error(), http.StatusInternalServerError)
                return errLoopDetected
        }
        return nil
 }
 
-func SetCorsHeaders(resp http.ResponseWriter) {
+func setCORSHeaders(resp http.ResponseWriter) {
        resp.Header().Set("Access-Control-Allow-Methods", "GET, HEAD, POST, PUT, OPTIONS")
        resp.Header().Set("Access-Control-Allow-Origin", "*")
        resp.Header().Set("Access-Control-Allow-Headers", "Authorization, Content-Length, Content-Type, X-Keep-Desired-Replicas")
        resp.Header().Set("Access-Control-Max-Age", "86486400")
 }
 
-type InvalidPathHandler struct{}
+type invalidPathHandler struct{}
 
-func (InvalidPathHandler) ServeHTTP(resp http.ResponseWriter, req *http.Request) {
-       log.Printf("%s: %s %s unroutable", GetRemoteAddress(req), req.Method, req.URL.Path)
+func (invalidPathHandler) ServeHTTP(resp http.ResponseWriter, req *http.Request) {
        http.Error(resp, "Bad request", http.StatusBadRequest)
 }
 
 func (h *proxyHandler) Options(resp http.ResponseWriter, req *http.Request) {
-       log.Printf("%s: %s %s", GetRemoteAddress(req), req.Method, req.URL.Path)
-       SetCorsHeaders(resp)
+       setCORSHeaders(resp)
 }
 
 var errBadAuthorizationHeader = errors.New("Missing or invalid Authorization header, or method not allowed")
@@ -409,31 +297,41 @@ func (h *proxyHandler) Get(resp http.ResponseWriter, req *http.Request) {
        if err := h.checkLoop(resp, req); err != nil {
                return
        }
-       SetCorsHeaders(resp)
+       setCORSHeaders(resp)
        resp.Header().Set("Via", req.Proto+" "+viaAlias)
 
        locator := mux.Vars(req)["locator"]
        var err error
        var status int
        var expectLength, responseLength int64
-       var proxiedURI = "-"
 
+       logger := ctxlog.FromContext(req.Context())
        defer func() {
-               log.Println(GetRemoteAddress(req), req.Method, req.URL.Path, status, expectLength, responseLength, proxiedURI, err)
+               httpserver.SetResponseLogFields(req.Context(), logrus.Fields{
+                       "locator":        locator,
+                       "expectLength":   expectLength,
+                       "responseLength": responseLength,
+                       "err":            err,
+               })
                if status != http.StatusOK {
                        http.Error(resp, err.Error(), status)
                }
        }()
 
        kc := h.makeKeepClient(req)
+       kc.DiskCacheSize = keepclient.DiskCacheDisabled
 
        var pass bool
        var tok string
        var user *arvados.User
-       if pass, tok, user = h.CheckAuthorizationHeader(req); !pass {
+       if pass, tok, user = h.checkAuthorizationHeader(req); !pass {
                status, err = http.StatusForbidden, errBadAuthorizationHeader
                return
        }
+       httpserver.SetResponseLogFields(req.Context(), logrus.Fields{
+               "userUUID":     user.UUID,
+               "userFullName": user.FullName,
+       })
 
        // Copy ArvadosClient struct and use the client's API token
        arvclient := *kc.Arvados
@@ -444,23 +342,11 @@ func (h *proxyHandler) Get(resp http.ResponseWriter, req *http.Request) {
 
        locator = removeHint.ReplaceAllString(locator, "$1")
 
-       if locator != "" {
-               parts := strings.SplitN(locator, "+", 3)
-               if len(parts) >= 2 {
-                       logger := h.logger
-                       if user != nil {
-                               logger = logger.WithField("user_uuid", user.UUID).
-                                       WithField("user_full_name", user.FullName)
-                       }
-                       logger.WithField("locator", fmt.Sprintf("%s+%s", parts[0], parts[1])).Infof("Block download")
-               }
-       }
-
        switch req.Method {
        case "HEAD":
-               expectLength, proxiedURI, err = kc.Ask(locator)
+               expectLength, _, err = kc.Ask(locator)
        case "GET":
-               reader, expectLength, proxiedURI, err = kc.Get(locator)
+               reader, expectLength, _, err = kc.Get(locator)
                if reader != nil {
                        defer reader.Close()
                }
@@ -470,7 +356,7 @@ func (h *proxyHandler) Get(resp http.ResponseWriter, req *http.Request) {
        }
 
        if expectLength == -1 {
-               log.Println("Warning:", GetRemoteAddress(req), req.Method, proxiedURI, "Content-Length not provided")
+               logger.Warn("Content-Length not provided")
        }
 
        switch respErr := err.(type) {
@@ -506,7 +392,7 @@ func (h *proxyHandler) Put(resp http.ResponseWriter, req *http.Request) {
        if err := h.checkLoop(resp, req); err != nil {
                return
        }
-       SetCorsHeaders(resp)
+       setCORSHeaders(resp)
        resp.Header().Set("Via", "HTTP/1.1 "+viaAlias)
 
        kc := h.makeKeepClient(req)
@@ -518,7 +404,13 @@ func (h *proxyHandler) Put(resp http.ResponseWriter, req *http.Request) {
        var locatorOut string = "-"
 
        defer func() {
-               log.Println(GetRemoteAddress(req), req.Method, req.URL.Path, status, expectLength, kc.Want_replicas, wroteReplicas, locatorOut, err)
+               httpserver.SetResponseLogFields(req.Context(), logrus.Fields{
+                       "expectLength":  expectLength,
+                       "wantReplicas":  kc.Want_replicas,
+                       "wroteReplicas": wroteReplicas,
+                       "locator":       strings.SplitN(locatorOut, "+A", 2)[0],
+                       "err":           err,
+               })
                if status != http.StatusOK {
                        http.Error(resp, err.Error(), status)
                }
@@ -557,11 +449,15 @@ func (h *proxyHandler) Put(resp http.ResponseWriter, req *http.Request) {
        var pass bool
        var tok string
        var user *arvados.User
-       if pass, tok, user = h.CheckAuthorizationHeader(req); !pass {
+       if pass, tok, user = h.checkAuthorizationHeader(req); !pass {
                err = errBadAuthorizationHeader
                status = http.StatusForbidden
                return
        }
+       httpserver.SetResponseLogFields(req.Context(), logrus.Fields{
+               "userUUID":     user.UUID,
+               "userFullName": user.FullName,
+       })
 
        // Copy ArvadosClient struct and use the client's API token
        arvclient := *kc.Arvados
@@ -590,18 +486,6 @@ func (h *proxyHandler) Put(resp http.ResponseWriter, req *http.Request) {
                locatorOut, wroteReplicas, err = kc.PutHR(locatorIn, req.Body, expectLength)
        }
 
-       if locatorOut != "" {
-               parts := strings.SplitN(locatorOut, "+", 3)
-               if len(parts) >= 2 {
-                       logger := h.logger
-                       if user != nil {
-                               logger = logger.WithField("user_uuid", user.UUID).
-                                       WithField("user_full_name", user.FullName)
-                       }
-                       logger.WithField("locator", fmt.Sprintf("%s+%s", parts[0], parts[1])).Infof("Block upload")
-               }
-       }
-
        // Tell the client how many successful PUTs we accomplished
        resp.Header().Set(keepclient.XKeepReplicasStored, fmt.Sprintf("%d", wroteReplicas))
 
@@ -638,12 +522,12 @@ func (h *proxyHandler) Put(resp http.ResponseWriter, req *http.Request) {
 // ServeHTTP implementation for IndexHandler
 // Supports only GET requests for /index/{prefix:[0-9a-f]{0,32}}
 // For each keep server found in LocalRoots:
-//   Invokes GetIndex using keepclient
-//   Expects "complete" response (terminating with blank new line)
-//   Aborts on any errors
+// - Invokes GetIndex using keepclient
+// - Expects "complete" response (terminating with blank new line)
+// - Aborts on any errors
 // Concatenates responses from all those keep servers and returns
 func (h *proxyHandler) Index(resp http.ResponseWriter, req *http.Request) {
-       SetCorsHeaders(resp)
+       setCORSHeaders(resp)
 
        prefix := mux.Vars(req)["prefix"]
        var err error
@@ -656,7 +540,7 @@ func (h *proxyHandler) Index(resp http.ResponseWriter, req *http.Request) {
        }()
 
        kc := h.makeKeepClient(req)
-       ok, token, _ := h.CheckAuthorizationHeader(req)
+       ok, token, _ := h.checkAuthorizationHeader(req)
        if !ok {
                status, err = http.StatusForbidden, errBadAuthorizationHeader
                return
@@ -695,7 +579,7 @@ func (h *proxyHandler) Index(resp http.ResponseWriter, req *http.Request) {
 }
 
 func (h *proxyHandler) makeKeepClient(req *http.Request) *keepclient.KeepClient {
-       kc := *h.KeepClient
+       kc := h.KeepClient.Clone()
        kc.RequestID = req.Header.Get("X-Request-Id")
        kc.HTTPClient = &proxyClient{
                client: &http.Client{
@@ -704,5 +588,5 @@ func (h *proxyHandler) makeKeepClient(req *http.Request) *keepclient.KeepClient
                },
                proto: req.Proto,
        }
-       return &kc
+       return kc
 }