X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/79a9d03d3470ef013a908051a2bccddc680ecb5c..5b863886118890cc81b728a3a606ea823c836f2b:/services/keepstore/handlers.go diff --git a/services/keepstore/handlers.go b/services/keepstore/handlers.go index d84ede6ef6..51dd73a513 100644 --- a/services/keepstore/handlers.go +++ b/services/keepstore/handlers.go @@ -20,11 +20,11 @@ import ( "sync" "time" - "github.com/gorilla/mux" - "git.curoverse.com/arvados.git/sdk/go/arvados" "git.curoverse.com/arvados.git/sdk/go/health" "git.curoverse.com/arvados.git/sdk/go/httpserver" + "github.com/gorilla/mux" + "github.com/prometheus/client_golang/prometheus" ) type router struct { @@ -32,14 +32,16 @@ type router struct { limiter httpserver.RequestCounter cluster *arvados.Cluster remoteProxy remoteProxy + metrics *nodeMetrics } // MakeRESTRouter returns a new router that forwards all Keep requests // to the appropriate handlers. -func MakeRESTRouter(cluster *arvados.Cluster) http.Handler { +func MakeRESTRouter(cluster *arvados.Cluster, reg *prometheus.Registry) http.Handler { rtr := &router{ Router: mux.NewRouter(), cluster: cluster, + metrics: &nodeMetrics{reg: reg}, } rtr.HandleFunc( @@ -86,10 +88,14 @@ func MakeRESTRouter(cluster *arvados.Cluster) http.Handler { rtr.NotFoundHandler = http.HandlerFunc(BadRequestHandler) rtr.limiter = httpserver.NewRequestLimiter(theConfig.MaxRequests, rtr) + rtr.metrics.setupBufferPoolMetrics(bufs) + rtr.metrics.setupWorkQueueMetrics(pullq, "pull") + rtr.metrics.setupWorkQueueMetrics(trashq, "trash") + rtr.metrics.setupRequestMetrics(rtr.limiter) - stack := httpserver.Instrument(nil, nil, + instrumented := httpserver.Instrument(rtr.metrics.reg, nil, httpserver.AddRequestIDs(httpserver.LogRequests(nil, rtr.limiter))) - return stack.ServeAPI(stack) + return instrumented.ServeAPI(theConfig.ManagementToken, instrumented) } // BadRequestHandler is a HandleFunc to address bad requests. @@ -669,6 +675,11 @@ func GetBlock(ctx context.Context, hash string, buf []byte, resp http.ResponseWr if !os.IsNotExist(err) { log.Printf("%s: Get(%s): %s", vol, hash, err) } + // If some volume returns a transient error, return it to the caller + // instead of "Not found" so it can retry. + if err == VolumeBusyError { + errorToCaller = err.(*KeepError) + } continue } // Check the file checksum.