17840: Deduplicate flag-parsing code.
[arvados.git] / services / keepproxy / keepproxy.go
index 68500748148be5a1cdf744311481d5205a7c8b76..7c1360ad71a2e945b2aaee72d4f6aadabaf664ba 100644 (file)
@@ -16,19 +16,20 @@ import (
        "os/signal"
        "regexp"
        "strings"
-       "sync"
        "syscall"
        "time"
 
-       "git.curoverse.com/arvados.git/lib/config"
-       "git.curoverse.com/arvados.git/sdk/go/arvados"
-       "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
-       "git.curoverse.com/arvados.git/sdk/go/health"
-       "git.curoverse.com/arvados.git/sdk/go/httpserver"
-       "git.curoverse.com/arvados.git/sdk/go/keepclient"
+       "git.arvados.org/arvados.git/lib/cmd"
+       "git.arvados.org/arvados.git/lib/config"
+       "git.arvados.org/arvados.git/sdk/go/arvados"
+       "git.arvados.org/arvados.git/sdk/go/arvadosclient"
+       "git.arvados.org/arvados.git/sdk/go/health"
+       "git.arvados.org/arvados.git/sdk/go/httpserver"
+       "git.arvados.org/arvados.git/sdk/go/keepclient"
        "github.com/coreos/go-systemd/daemon"
        "github.com/ghodss/yaml"
        "github.com/gorilla/mux"
+       lru "github.com/hashicorp/golang-lru"
        log "github.com/sirupsen/logrus"
 )
 
@@ -42,20 +43,19 @@ var (
 const rfc3339NanoFixed = "2006-01-02T15:04:05.000000000Z07:00"
 
 func configure(logger log.FieldLogger, args []string) (*arvados.Cluster, error) {
-       flags := flag.NewFlagSet(args[0], flag.ExitOnError)
-       flags.Usage = usage
+       prog := args[0]
+       flags := flag.NewFlagSet(prog, flag.ContinueOnError)
 
        dumpConfig := flags.Bool("dump-config", false, "write current configuration to stdout and exit")
        getVersion := flags.Bool("version", false, "Print version information and exit.")
 
        loader := config.NewLoader(os.Stdin, logger)
        loader.SetupFlags(flags)
-
        args = loader.MungeLegacyConfigArgs(logger, args[1:], "-legacy-keepproxy-config")
-       flags.Parse(args)
 
-       // Print version information if requested
-       if *getVersion {
+       if ok, code := cmd.ParseFlags(flags, prog, args, "", os.Stderr); !ok {
+               os.Exit(code)
+       } else if *getVersion {
                fmt.Printf("keepproxy %s\n", version)
                return nil, nil
        }
@@ -117,6 +117,12 @@ func run(logger log.FieldLogger, cluster *arvados.Cluster) error {
                return fmt.Errorf("Error setting up arvados client %v", err)
        }
 
+       // If a config file is available, use the keepstores defined there
+       // instead of the legacy autodiscover mechanism via the API server
+       for k := range cluster.Services.Keepstore.InternalURLs {
+               arv.KeepServiceURIs = append(arv.KeepServiceURIs, strings.TrimRight(k.String(), "/"))
+       }
+
        if cluster.SystemLogs.LogLevel == "debug" {
                keepclient.DebugPrintf = log.Printf
        }
@@ -158,47 +164,60 @@ func run(logger log.FieldLogger, cluster *arvados.Cluster) error {
        signal.Notify(term, syscall.SIGINT)
 
        // Start serving requests.
-       router = MakeRESTRouter(kc, time.Duration(cluster.API.KeepServiceRequestTimeout), cluster.SystemRootToken)
+       router, err = MakeRESTRouter(kc, time.Duration(keepclient.DefaultProxyRequestTimeout), cluster, logger)
+       if err != nil {
+               return err
+       }
        return http.Serve(listener, httpserver.AddRequestIDs(httpserver.LogRequests(router)))
 }
 
-type ApiTokenCache struct {
-       tokens     map[string]int64
-       lock       sync.Mutex
-       expireTime int64
+type TokenCacheEntry struct {
+       expire int64
+       user   *arvados.User
 }
 
-// Cache the token and set an expire time.  If we already have an expire time
-// on the token, it is not updated.
-func (this *ApiTokenCache) RememberToken(token string) {
-       this.lock.Lock()
-       defer this.lock.Unlock()
+type APITokenCache struct {
+       tokens     *lru.TwoQueueCache
+       expireTime int64
+}
 
+// RememberToken caches the token and set an expire time.  If the
+// token is already in the cache, it is not updated.
+func (cache *APITokenCache) RememberToken(token string, user *arvados.User) {
        now := time.Now().Unix()
-       if this.tokens[token] == 0 {
-               this.tokens[token] = now + this.expireTime
+       _, ok := cache.tokens.Get(token)
+       if !ok {
+               cache.tokens.Add(token, TokenCacheEntry{
+                       expire: now + cache.expireTime,
+                       user:   user,
+               })
        }
 }
 
-// Check if the cached token is known and still believed to be valid.
-func (this *ApiTokenCache) RecallToken(token string) bool {
-       this.lock.Lock()
-       defer this.lock.Unlock()
+// RecallToken checks if the cached token is known and still believed to be
+// valid.
+func (cache *APITokenCache) RecallToken(token string) (bool, *arvados.User) {
+       val, ok := cache.tokens.Get(token)
+       if !ok {
+               return false, nil
+       }
 
+       cacheEntry := val.(TokenCacheEntry)
        now := time.Now().Unix()
-       if this.tokens[token] == 0 {
-               // Unknown token
-               return false
-       } else if now < this.tokens[token] {
+       if now < cacheEntry.expire {
                // Token is known and still valid
-               return true
+               return true, cacheEntry.user
        } else {
                // Token is expired
-               this.tokens[token] = 0
-               return false
+               cache.tokens.Remove(token)
+               return false, nil
        }
 }
 
+// GetRemoteAddress returns a string with the remote address for the request.
+// If the X-Forwarded-For header is set and has a non-zero length, it returns a
+// string made from a comma separated list of all the remote addresses,
+// starting with the one(s) from the X-Forwarded-For header.
 func GetRemoteAddress(req *http.Request) string {
        if xff := req.Header.Get("X-Forwarded-For"); xff != "" {
                return xff + "," + req.RemoteAddr
@@ -206,10 +225,10 @@ func GetRemoteAddress(req *http.Request) string {
        return req.RemoteAddr
 }
 
-func CheckAuthorizationHeader(kc *keepclient.KeepClient, cache *ApiTokenCache, req *http.Request) (pass bool, tok string) {
+func (h *proxyHandler) CheckAuthorizationHeader(req *http.Request) (pass bool, tok string, user *arvados.User) {
        parts := strings.SplitN(req.Header.Get("Authorization"), " ", 2)
        if len(parts) < 2 || !(parts[0] == "OAuth2" || parts[0] == "Bearer") || len(parts[1]) == 0 {
-               return false, ""
+               return false, "", nil
        }
        tok = parts[1]
 
@@ -224,29 +243,56 @@ func CheckAuthorizationHeader(kc *keepclient.KeepClient, cache *ApiTokenCache, r
                op = "write"
        }
 
-       if cache.RecallToken(op + ":" + tok) {
+       if ok, user := h.APITokenCache.RecallToken(op + ":" + tok); ok {
                // Valid in the cache, short circuit
-               return true, tok
+               return true, tok, user
        }
 
        var err error
-       arv := *kc.Arvados
+       arv := *h.KeepClient.Arvados
        arv.ApiToken = tok
        arv.RequestID = req.Header.Get("X-Request-Id")
-       if op == "read" {
-               err = arv.Call("HEAD", "keep_services", "", "accessible", nil, nil)
-       } else {
-               err = arv.Call("HEAD", "users", "", "current", nil, nil)
+       user = &arvados.User{}
+       userCurrentError := arv.Call("GET", "users", "", "current", nil, user)
+       err = userCurrentError
+       if err != nil && op == "read" {
+               apiError, ok := err.(arvadosclient.APIServerError)
+               if ok && apiError.HttpStatusCode == http.StatusForbidden {
+                       // If it was a scoped "sharing" token it will
+                       // return 403 instead of 401 for the current
+                       // user check.  If it is a download operation
+                       // and they have permission to read the
+                       // keep_services table, we can allow it.
+                       err = arv.Call("HEAD", "keep_services", "", "accessible", nil, nil)
+               }
        }
        if err != nil {
                log.Printf("%s: CheckAuthorizationHeader error: %v", GetRemoteAddress(req), err)
-               return false, ""
+               return false, "", nil
+       }
+
+       if userCurrentError == nil && user.IsAdmin {
+               // checking userCurrentError is probably redundant,
+               // IsAdmin would be false anyway. But can't hurt.
+               if op == "read" && !h.cluster.Collections.KeepproxyPermission.Admin.Download {
+                       return false, "", nil
+               }
+               if op == "write" && !h.cluster.Collections.KeepproxyPermission.Admin.Upload {
+                       return false, "", nil
+               }
+       } else {
+               if op == "read" && !h.cluster.Collections.KeepproxyPermission.User.Download {
+                       return false, "", nil
+               }
+               if op == "write" && !h.cluster.Collections.KeepproxyPermission.User.Upload {
+                       return false, "", nil
+               }
        }
 
        // Success!  Update cache
-       cache.RememberToken(op + ":" + tok)
+       h.APITokenCache.RememberToken(op+":"+tok, user)
 
-       return true, tok
+       return true, tok, user
 }
 
 // We need to make a private copy of the default http transport early
@@ -260,14 +306,16 @@ var defaultTransport = *(http.DefaultTransport.(*http.Transport))
 type proxyHandler struct {
        http.Handler
        *keepclient.KeepClient
-       *ApiTokenCache
+       *APITokenCache
        timeout   time.Duration
        transport *http.Transport
+       logger    log.FieldLogger
+       cluster   *arvados.Cluster
 }
 
 // MakeRESTRouter returns an http.Handler that passes GET and PUT
 // requests to the appropriate handlers.
-func MakeRESTRouter(kc *keepclient.KeepClient, timeout time.Duration, mgmtToken string) http.Handler {
+func MakeRESTRouter(kc *keepclient.KeepClient, timeout time.Duration, cluster *arvados.Cluster, logger log.FieldLogger) (http.Handler, error) {
        rest := mux.NewRouter()
 
        transport := defaultTransport
@@ -279,15 +327,22 @@ func MakeRESTRouter(kc *keepclient.KeepClient, timeout time.Duration, mgmtToken
        transport.TLSClientConfig = arvadosclient.MakeTLSConfig(kc.Arvados.ApiInsecure)
        transport.TLSHandshakeTimeout = keepclient.DefaultTLSHandshakeTimeout
 
+       cacheQ, err := lru.New2Q(500)
+       if err != nil {
+               return nil, fmt.Errorf("Error from lru.New2Q: %v", err)
+       }
+
        h := &proxyHandler{
                Handler:    rest,
                KeepClient: kc,
                timeout:    timeout,
                transport:  &transport,
-               ApiTokenCache: &ApiTokenCache{
-                       tokens:     make(map[string]int64),
+               APITokenCache: &APITokenCache{
+                       tokens:     cacheQ,
                        expireTime: 300,
                },
+               logger:  logger,
+               cluster: cluster,
        }
 
        rest.HandleFunc(`/{locator:[0-9a-f]{32}\+.*}`, h.Get).Methods("GET", "HEAD")
@@ -306,19 +361,19 @@ func MakeRESTRouter(kc *keepclient.KeepClient, timeout time.Duration, mgmtToken
        rest.HandleFunc(`/`, h.Options).Methods("OPTIONS")
 
        rest.Handle("/_health/{check}", &health.Handler{
-               Token:  mgmtToken,
+               Token:  cluster.ManagementToken,
                Prefix: "/_health/",
        }).Methods("GET")
 
        rest.NotFoundHandler = InvalidPathHandler{}
-       return h
+       return h, nil
 }
 
 var errLoopDetected = errors.New("loop detected")
 
-func (*proxyHandler) checkLoop(resp http.ResponseWriter, req *http.Request) error {
+func (*proxyHandler) checkLoop(resp http.ResponseWriter, req *http.Request) error {
        if via := req.Header.Get("Via"); strings.Index(via, " "+viaAlias) >= 0 {
-               log.Printf("proxy loop detected (request has Via: %q): perhaps keepproxy is misidentified by gateway config as an external client, or its keep_services record does not have service_type=proxy?", via)
+               h.logger.Printf("proxy loop detected (request has Via: %q): perhaps keepproxy is misidentified by gateway config as an external client, or its keep_services record does not have service_type=proxy?", via)
                http.Error(resp, errLoopDetected.Error(), http.StatusInternalServerError)
                return errLoopDetected
        }
@@ -344,9 +399,9 @@ func (h *proxyHandler) Options(resp http.ResponseWriter, req *http.Request) {
        SetCorsHeaders(resp)
 }
 
-var BadAuthorizationHeader = errors.New("Missing or invalid Authorization header")
-var ContentLengthMismatch = errors.New("Actual length != expected content length")
-var MethodNotSupported = errors.New("Method not supported")
+var errBadAuthorizationHeader = errors.New("Missing or invalid Authorization header, or method not allowed")
+var errContentLengthMismatch = errors.New("Actual length != expected content length")
+var errMethodNotSupported = errors.New("Method not supported")
 
 var removeHint, _ = regexp.Compile("\\+K@[a-z0-9]{5}(\\+|$)")
 
@@ -374,8 +429,9 @@ func (h *proxyHandler) Get(resp http.ResponseWriter, req *http.Request) {
 
        var pass bool
        var tok string
-       if pass, tok = CheckAuthorizationHeader(kc, h.ApiTokenCache, req); !pass {
-               status, err = http.StatusForbidden, BadAuthorizationHeader
+       var user *arvados.User
+       if pass, tok, user = h.CheckAuthorizationHeader(req); !pass {
+               status, err = http.StatusForbidden, errBadAuthorizationHeader
                return
        }
 
@@ -388,6 +444,18 @@ func (h *proxyHandler) Get(resp http.ResponseWriter, req *http.Request) {
 
        locator = removeHint.ReplaceAllString(locator, "$1")
 
+       if locator != "" {
+               parts := strings.SplitN(locator, "+", 3)
+               if len(parts) >= 2 {
+                       logger := h.logger
+                       if user != nil {
+                               logger = logger.WithField("user_uuid", user.UUID).
+                                       WithField("user_full_name", user.FullName)
+                       }
+                       logger.WithField("locator", fmt.Sprintf("%s+%s", parts[0], parts[1])).Infof("Block download")
+               }
+       }
+
        switch req.Method {
        case "HEAD":
                expectLength, proxiedURI, err = kc.Ask(locator)
@@ -397,7 +465,7 @@ func (h *proxyHandler) Get(resp http.ResponseWriter, req *http.Request) {
                        defer reader.Close()
                }
        default:
-               status, err = http.StatusNotImplemented, MethodNotSupported
+               status, err = http.StatusNotImplemented, errMethodNotSupported
                return
        }
 
@@ -415,7 +483,7 @@ func (h *proxyHandler) Get(resp http.ResponseWriter, req *http.Request) {
                case "GET":
                        responseLength, err = io.Copy(resp, reader)
                        if err == nil && expectLength > -1 && responseLength != expectLength {
-                               err = ContentLengthMismatch
+                               err = errContentLengthMismatch
                        }
                }
        case keepclient.Error:
@@ -431,8 +499,8 @@ func (h *proxyHandler) Get(resp http.ResponseWriter, req *http.Request) {
        }
 }
 
-var LengthRequiredError = errors.New(http.StatusText(http.StatusLengthRequired))
-var LengthMismatchError = errors.New("Locator size hint does not match Content-Length header")
+var errLengthRequired = errors.New(http.StatusText(http.StatusLengthRequired))
+var errLengthMismatch = errors.New("Locator size hint does not match Content-Length header")
 
 func (h *proxyHandler) Put(resp http.ResponseWriter, req *http.Request) {
        if err := h.checkLoop(resp, req); err != nil {
@@ -464,12 +532,12 @@ func (h *proxyHandler) Put(resp http.ResponseWriter, req *http.Request) {
                for _, sc := range strings.Split(req.Header.Get("X-Keep-Storage-Classes"), ",") {
                        scl = append(scl, strings.Trim(sc, " "))
                }
-               kc.StorageClasses = scl
+               kc.SetStorageClasses(scl)
        }
 
        _, err = fmt.Sscanf(req.Header.Get("Content-Length"), "%d", &expectLength)
        if err != nil || expectLength < 0 {
-               err = LengthRequiredError
+               err = errLengthRequired
                status = http.StatusLengthRequired
                return
        }
@@ -480,7 +548,7 @@ func (h *proxyHandler) Put(resp http.ResponseWriter, req *http.Request) {
                        status = http.StatusBadRequest
                        return
                } else if loc.Size > 0 && int64(loc.Size) != expectLength {
-                       err = LengthMismatchError
+                       err = errLengthMismatch
                        status = http.StatusBadRequest
                        return
                }
@@ -488,8 +556,9 @@ func (h *proxyHandler) Put(resp http.ResponseWriter, req *http.Request) {
 
        var pass bool
        var tok string
-       if pass, tok = CheckAuthorizationHeader(kc, h.ApiTokenCache, req); !pass {
-               err = BadAuthorizationHeader
+       var user *arvados.User
+       if pass, tok, user = h.CheckAuthorizationHeader(req); !pass {
+               err = errBadAuthorizationHeader
                status = http.StatusForbidden
                return
        }
@@ -500,9 +569,9 @@ func (h *proxyHandler) Put(resp http.ResponseWriter, req *http.Request) {
        kc.Arvados = &arvclient
 
        // Check if the client specified the number of replicas
-       if req.Header.Get("X-Keep-Desired-Replicas") != "" {
+       if desiredReplicas := req.Header.Get(keepclient.XKeepDesiredReplicas); desiredReplicas != "" {
                var r int
-               _, err := fmt.Sscanf(req.Header.Get(keepclient.X_Keep_Desired_Replicas), "%d", &r)
+               _, err := fmt.Sscanf(desiredReplicas, "%d", &r)
                if err == nil {
                        kc.Want_replicas = r
                }
@@ -521,29 +590,46 @@ func (h *proxyHandler) Put(resp http.ResponseWriter, req *http.Request) {
                locatorOut, wroteReplicas, err = kc.PutHR(locatorIn, req.Body, expectLength)
        }
 
+       if locatorOut != "" {
+               parts := strings.SplitN(locatorOut, "+", 3)
+               if len(parts) >= 2 {
+                       logger := h.logger
+                       if user != nil {
+                               logger = logger.WithField("user_uuid", user.UUID).
+                                       WithField("user_full_name", user.FullName)
+                       }
+                       logger.WithField("locator", fmt.Sprintf("%s+%s", parts[0], parts[1])).Infof("Block upload")
+               }
+       }
+
        // Tell the client how many successful PUTs we accomplished
-       resp.Header().Set(keepclient.X_Keep_Replicas_Stored, fmt.Sprintf("%d", wroteReplicas))
+       resp.Header().Set(keepclient.XKeepReplicasStored, fmt.Sprintf("%d", wroteReplicas))
 
        switch err.(type) {
        case nil:
                status = http.StatusOK
+               if len(kc.StorageClasses) > 0 {
+                       // A successful PUT request with storage classes means that all
+                       // storage classes were fulfilled, so the client will get a
+                       // confirmation via the X-Storage-Classes-Confirmed header.
+                       hdr := ""
+                       isFirst := true
+                       for _, sc := range kc.StorageClasses {
+                               if isFirst {
+                                       hdr = fmt.Sprintf("%s=%d", sc, wroteReplicas)
+                                       isFirst = false
+                               } else {
+                                       hdr += fmt.Sprintf(", %s=%d", sc, wroteReplicas)
+                               }
+                       }
+                       resp.Header().Set(keepclient.XKeepStorageClassesConfirmed, hdr)
+               }
                _, err = io.WriteString(resp, locatorOut)
-
        case keepclient.OversizeBlockError:
                // Too much data
                status = http.StatusRequestEntityTooLarge
-
        case keepclient.InsufficientReplicasError:
-               if wroteReplicas > 0 {
-                       // At least one write is considered success.  The
-                       // client can decide if getting less than the number of
-                       // replications it asked for is a fatal error.
-                       status = http.StatusOK
-                       _, err = io.WriteString(resp, locatorOut)
-               } else {
-                       status = http.StatusServiceUnavailable
-               }
-
+               status = http.StatusServiceUnavailable
        default:
                status = http.StatusBadGateway
        }
@@ -570,9 +656,9 @@ func (h *proxyHandler) Index(resp http.ResponseWriter, req *http.Request) {
        }()
 
        kc := h.makeKeepClient(req)
-       ok, token := CheckAuthorizationHeader(kc, h.ApiTokenCache, req)
+       ok, token, _ := h.CheckAuthorizationHeader(req)
        if !ok {
-               status, err = http.StatusForbidden, BadAuthorizationHeader
+               status, err = http.StatusForbidden, errBadAuthorizationHeader
                return
        }
 
@@ -583,7 +669,7 @@ func (h *proxyHandler) Index(resp http.ResponseWriter, req *http.Request) {
 
        // Only GET method is supported
        if req.Method != "GET" {
-               status, err = http.StatusNotImplemented, MethodNotSupported
+               status, err = http.StatusNotImplemented, errMethodNotSupported
                return
        }