X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/f0553505e32ee00999d1d680da14260a9a0f6b99..5b863886118890cc81b728a3a606ea823c836f2b:/services/keepstore/handlers.go diff --git a/services/keepstore/handlers.go b/services/keepstore/handlers.go index e4f025d6b1..51dd73a513 100644 --- a/services/keepstore/handlers.go +++ b/services/keepstore/handlers.go @@ -32,17 +32,16 @@ type router struct { limiter httpserver.RequestCounter cluster *arvados.Cluster remoteProxy remoteProxy - registry *prometheus.Registry - metrics nodeMetrics + metrics *nodeMetrics } // MakeRESTRouter returns a new router that forwards all Keep requests // to the appropriate handlers. -func MakeRESTRouter(cluster *arvados.Cluster) http.Handler { +func MakeRESTRouter(cluster *arvados.Cluster, reg *prometheus.Registry) http.Handler { rtr := &router{ - Router: mux.NewRouter(), - cluster: cluster, - registry: prometheus.NewRegistry(), + Router: mux.NewRouter(), + cluster: cluster, + metrics: &nodeMetrics{reg: reg}, } rtr.HandleFunc( @@ -89,13 +88,12 @@ func MakeRESTRouter(cluster *arvados.Cluster) http.Handler { rtr.NotFoundHandler = http.HandlerFunc(BadRequestHandler) rtr.limiter = httpserver.NewRequestLimiter(theConfig.MaxRequests, rtr) - rtr.metrics = nodeMetrics{ - reg: rtr.registry, - rc: rtr.limiter, - } - rtr.metrics.setup() + rtr.metrics.setupBufferPoolMetrics(bufs) + rtr.metrics.setupWorkQueueMetrics(pullq, "pull") + rtr.metrics.setupWorkQueueMetrics(trashq, "trash") + rtr.metrics.setupRequestMetrics(rtr.limiter) - instrumented := httpserver.Instrument(rtr.registry, nil, + instrumented := httpserver.Instrument(rtr.metrics.reg, nil, httpserver.AddRequestIDs(httpserver.LogRequests(nil, rtr.limiter))) return instrumented.ServeAPI(theConfig.ManagementToken, instrumented) } @@ -677,6 +675,11 @@ func GetBlock(ctx context.Context, hash string, buf []byte, resp http.ResponseWr if !os.IsNotExist(err) { log.Printf("%s: Get(%s): %s", vol, hash, err) } + // If some volume returns a transient error, return it to the caller + // instead of "Not found" so it can retry. + if err == VolumeBusyError { + errorToCaller = err.(*KeepError) + } continue } // Check the file checksum.