15003: Merge branch 'master' into 15003-preprocess-config
[arvados.git] / services / keepstore / handlers.go
index e4f025d6b1d71cd0e490da6f5c6525b97273874a..9a4d02df850fab836cdafaa4e21abb070b492782 100644 (file)
@@ -32,17 +32,16 @@ type router struct {
        limiter     httpserver.RequestCounter
        cluster     *arvados.Cluster
        remoteProxy remoteProxy
-       registry    *prometheus.Registry
-       metrics     nodeMetrics
+       metrics     *nodeMetrics
 }
 
 // MakeRESTRouter returns a new router that forwards all Keep requests
 // to the appropriate handlers.
-func MakeRESTRouter(cluster *arvados.Cluster) http.Handler {
+func MakeRESTRouter(cluster *arvados.Cluster, reg *prometheus.Registry) http.Handler {
        rtr := &router{
-               Router:   mux.NewRouter(),
-               cluster:  cluster,
-               registry: prometheus.NewRegistry(),
+               Router:  mux.NewRouter(),
+               cluster: cluster,
+               metrics: &nodeMetrics{reg: reg},
        }
 
        rtr.HandleFunc(
@@ -89,13 +88,12 @@ func MakeRESTRouter(cluster *arvados.Cluster) http.Handler {
        rtr.NotFoundHandler = http.HandlerFunc(BadRequestHandler)
 
        rtr.limiter = httpserver.NewRequestLimiter(theConfig.MaxRequests, rtr)
-       rtr.metrics = nodeMetrics{
-               reg: rtr.registry,
-               rc:  rtr.limiter,
-       }
-       rtr.metrics.setup()
+       rtr.metrics.setupBufferPoolMetrics(bufs)
+       rtr.metrics.setupWorkQueueMetrics(pullq, "pull")
+       rtr.metrics.setupWorkQueueMetrics(trashq, "trash")
+       rtr.metrics.setupRequestMetrics(rtr.limiter)
 
-       instrumented := httpserver.Instrument(rtr.registry, nil,
+       instrumented := httpserver.Instrument(rtr.metrics.reg, nil,
                httpserver.AddRequestIDs(httpserver.LogRequests(nil, rtr.limiter)))
        return instrumented.ServeAPI(theConfig.ManagementToken, instrumented)
 }
@@ -279,13 +277,19 @@ func (rtr *router) IndexHandler(resp http.ResponseWriter, req *http.Request) {
 
        for _, v := range vols {
                if err := v.IndexTo(prefix, resp); err != nil {
-                       // The only errors returned by IndexTo are
-                       // write errors returned by resp.Write(),
-                       // which probably means the client has
-                       // disconnected and this error will never be
-                       // reported to the client -- but it will
-                       // appear in our own error log.
-                       http.Error(resp, err.Error(), http.StatusInternalServerError)
+                       // We can't send an error message to the
+                       // client because we might have already sent
+                       // headers and index content. All we can do is
+                       // log the error in our own logs, and (in
+                       // cases where headers haven't been sent yet)
+                       // set a 500 status.
+                       //
+                       // If headers have already been sent, the
+                       // client must notice the lack of trailing
+                       // newline as an indication that the response
+                       // is incomplete.
+                       log.Printf("index error from volume %s: %s", v, err)
+                       http.Error(resp, "", http.StatusInternalServerError)
                        return
                }
        }
@@ -677,6 +681,11 @@ func GetBlock(ctx context.Context, hash string, buf []byte, resp http.ResponseWr
                        if !os.IsNotExist(err) {
                                log.Printf("%s: Get(%s): %s", vol, hash, err)
                        }
+                       // If some volume returns a transient error, return it to the caller
+                       // instead of "Not found" so it can retry.
+                       if err == VolumeBusyError {
+                               errorToCaller = err.(*KeepError)
+                       }
                        continue
                }
                // Check the file checksum.