Merge remote-tracking branch 'origin/master' into 14645-fuse-operations-reporting
[arvados.git] / services / keepstore / handlers.go
index d84ede6ef6b599fbac4aea3c94f43ba8009ff035..51dd73a513c1d4c729a6743aaabe0cefa1202c4b 100644 (file)
@@ -20,11 +20,11 @@ import (
        "sync"
        "time"
 
-       "github.com/gorilla/mux"
-
        "git.curoverse.com/arvados.git/sdk/go/arvados"
        "git.curoverse.com/arvados.git/sdk/go/health"
        "git.curoverse.com/arvados.git/sdk/go/httpserver"
+       "github.com/gorilla/mux"
+       "github.com/prometheus/client_golang/prometheus"
 )
 
 type router struct {
@@ -32,14 +32,16 @@ type router struct {
        limiter     httpserver.RequestCounter
        cluster     *arvados.Cluster
        remoteProxy remoteProxy
+       metrics     *nodeMetrics
 }
 
 // MakeRESTRouter returns a new router that forwards all Keep requests
 // to the appropriate handlers.
-func MakeRESTRouter(cluster *arvados.Cluster) http.Handler {
+func MakeRESTRouter(cluster *arvados.Cluster, reg *prometheus.Registry) http.Handler {
        rtr := &router{
                Router:  mux.NewRouter(),
                cluster: cluster,
+               metrics: &nodeMetrics{reg: reg},
        }
 
        rtr.HandleFunc(
@@ -86,10 +88,14 @@ func MakeRESTRouter(cluster *arvados.Cluster) http.Handler {
        rtr.NotFoundHandler = http.HandlerFunc(BadRequestHandler)
 
        rtr.limiter = httpserver.NewRequestLimiter(theConfig.MaxRequests, rtr)
+       rtr.metrics.setupBufferPoolMetrics(bufs)
+       rtr.metrics.setupWorkQueueMetrics(pullq, "pull")
+       rtr.metrics.setupWorkQueueMetrics(trashq, "trash")
+       rtr.metrics.setupRequestMetrics(rtr.limiter)
 
-       stack := httpserver.Instrument(nil, nil,
+       instrumented := httpserver.Instrument(rtr.metrics.reg, nil,
                httpserver.AddRequestIDs(httpserver.LogRequests(nil, rtr.limiter)))
-       return stack.ServeAPI(stack)
+       return instrumented.ServeAPI(theConfig.ManagementToken, instrumented)
 }
 
 // BadRequestHandler is a HandleFunc to address bad requests.
@@ -669,6 +675,11 @@ func GetBlock(ctx context.Context, hash string, buf []byte, resp http.ResponseWr
                        if !os.IsNotExist(err) {
                                log.Printf("%s: Get(%s): %s", vol, hash, err)
                        }
+                       // If some volume returns a transient error, return it to the caller
+                       // instead of "Not found" so it can retry.
+                       if err == VolumeBusyError {
+                               errorToCaller = err.(*KeepError)
+                       }
                        continue
                }
                // Check the file checksum.