X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/3b1598bb557a30f9a896ed36988702c5ae9f2ba9..554fe927169e928d91c2d8c4bed158aef4d4d746:/services/keepstore/handlers.go diff --git a/services/keepstore/handlers.go b/services/keepstore/handlers.go index 2b96dbc582..adaaa361e9 100644 --- a/services/keepstore/handlers.go +++ b/services/keepstore/handlers.go @@ -9,26 +9,35 @@ package main import ( "container/list" + "context" "crypto/md5" "encoding/json" "fmt" "github.com/gorilla/mux" "io" - "log" "net/http" "os" "regexp" "runtime" "strconv" + "strings" "sync" "time" + + "git.curoverse.com/arvados.git/sdk/go/httpserver" + log "github.com/Sirupsen/logrus" ) -// MakeRESTRouter returns a new mux.Router that forwards all Keep -// requests to the appropriate handlers. -// -func MakeRESTRouter() *mux.Router { +type router struct { + *mux.Router + limiter httpserver.RequestCounter +} + +// MakeRESTRouter returns a new router that forwards all Keep requests +// to the appropriate handlers. +func MakeRESTRouter() *router { rest := mux.NewRouter() + rtr := &router{Router: rest} rest.HandleFunc( `/{hash:[0-9a-f]{32}}`, GetBlockHandler).Methods("GET", "HEAD") @@ -44,8 +53,11 @@ func MakeRESTRouter() *mux.Router { // Privileged client only. rest.HandleFunc(`/index/{prefix:[0-9a-f]{0,32}}`, IndexHandler).Methods("GET", "HEAD") + // Internals/debugging info (runtime.MemStats) + rest.HandleFunc(`/debug.json`, rtr.DebugHandler).Methods("GET", "HEAD") + // List volumes: path, device number, bytes used/avail. - rest.HandleFunc(`/status.json`, StatusHandler).Methods("GET", "HEAD") + rest.HandleFunc(`/status.json`, rtr.StatusHandler).Methods("GET", "HEAD") // Replace the current pull queue. rest.HandleFunc(`/pull`, PullHandler).Methods("PUT") @@ -53,11 +65,14 @@ func MakeRESTRouter() *mux.Router { // Replace the current trash queue. rest.HandleFunc(`/trash`, TrashHandler).Methods("PUT") + // Untrash moves blocks from trash back into store + rest.HandleFunc(`/untrash/{hash:[0-9a-f]{32}}`, UntrashHandler).Methods("PUT") + // Any request which does not match any of these routes gets // 400 Bad Request. rest.NotFoundHandler = http.HandlerFunc(BadRequestHandler) - return rest + return rtr } // BadRequestHandler is a HandleFunc to address bad requests. @@ -67,30 +82,88 @@ func BadRequestHandler(w http.ResponseWriter, r *http.Request) { // GetBlockHandler is a HandleFunc to address Get block requests. func GetBlockHandler(resp http.ResponseWriter, req *http.Request) { - if enforcePermissions { + ctx, cancel := contextForResponse(context.TODO(), resp) + defer cancel() + + if theConfig.RequireSignatures { locator := req.URL.Path[1:] // strip leading slash - if err := VerifySignature(locator, GetApiToken(req)); err != nil { + if err := VerifySignature(locator, GetAPIToken(req)); err != nil { http.Error(resp, err.Error(), err.(*KeepError).HTTPCode) return } } - block, err := GetBlock(mux.Vars(req)["hash"]) + // TODO: Probe volumes to check whether the block _might_ + // exist. Some volumes/types could support a quick existence + // check without causing other operations to suffer. If all + // volumes support that, and assure us the block definitely + // isn't here, we can return 404 now instead of waiting for a + // buffer. + + buf, err := getBufferWithContext(ctx, bufs, BlockSize) + if err != nil { + http.Error(resp, err.Error(), http.StatusServiceUnavailable) + return + } + defer bufs.Put(buf) + + size, err := GetBlock(ctx, mux.Vars(req)["hash"], buf, resp) if err != nil { - // This type assertion is safe because the only errors - // GetBlock can return are DiskHashError or NotFoundError. - http.Error(resp, err.Error(), err.(*KeepError).HTTPCode) + code := http.StatusInternalServerError + if err, ok := err.(*KeepError); ok { + code = err.HTTPCode + } + http.Error(resp, err.Error(), code) return } - defer bufs.Put(block) - resp.Header().Set("Content-Length", strconv.Itoa(len(block))) + resp.Header().Set("Content-Length", strconv.Itoa(size)) resp.Header().Set("Content-Type", "application/octet-stream") - resp.Write(block) + resp.Write(buf[:size]) +} + +// Return a new context that gets cancelled by resp's CloseNotifier. +func contextForResponse(parent context.Context, resp http.ResponseWriter) (context.Context, context.CancelFunc) { + ctx, cancel := context.WithCancel(parent) + if cn, ok := resp.(http.CloseNotifier); ok { + go func(c <-chan bool) { + select { + case <-c: + theConfig.debugLogf("cancel context") + cancel() + case <-ctx.Done(): + } + }(cn.CloseNotify()) + } + return ctx, cancel +} + +// Get a buffer from the pool -- but give up and return a non-nil +// error if ctx ends before we get a buffer. +func getBufferWithContext(ctx context.Context, bufs *bufferPool, bufSize int) ([]byte, error) { + bufReady := make(chan []byte) + go func() { + bufReady <- bufs.Get(bufSize) + }() + select { + case buf := <-bufReady: + return buf, nil + case <-ctx.Done(): + go func() { + // Even if closeNotifier happened first, we + // need to keep waiting for our buf so we can + // return it to the pool. + bufs.Put(<-bufReady) + }() + return nil, ErrClientDisconnect + } } // PutBlockHandler is a HandleFunc to address Put block requests. func PutBlockHandler(resp http.ResponseWriter, req *http.Request) { + ctx, cancel := contextForResponse(context.TODO(), resp) + defer cancel() + hash := mux.Vars(req)["hash"] // Detect as many error conditions as possible before reading @@ -112,38 +185,47 @@ func PutBlockHandler(resp http.ResponseWriter, req *http.Request) { return } - buf := bufs.Get(int(req.ContentLength)) - _, err := io.ReadFull(req.Body, buf) + buf, err := getBufferWithContext(ctx, bufs, int(req.ContentLength)) + if err != nil { + http.Error(resp, err.Error(), http.StatusServiceUnavailable) + return + } + + _, err = io.ReadFull(req.Body, buf) if err != nil { http.Error(resp, err.Error(), 500) bufs.Put(buf) return } - err = PutBlock(buf, hash) + replication, err := PutBlock(ctx, buf, hash) bufs.Put(buf) if err != nil { - ke := err.(*KeepError) - http.Error(resp, ke.Error(), ke.HTTPCode) + code := http.StatusInternalServerError + if err, ok := err.(*KeepError); ok { + code = err.HTTPCode + } + http.Error(resp, err.Error(), code) return } // Success; add a size hint, sign the locator if possible, and // return it to the client. returnHash := fmt.Sprintf("%s+%d", hash, req.ContentLength) - apiToken := GetApiToken(req) - if PermissionSecret != nil && apiToken != "" { - expiry := time.Now().Add(blobSignatureTTL) + apiToken := GetAPIToken(req) + if theConfig.blobSigningKey != nil && apiToken != "" { + expiry := time.Now().Add(theConfig.BlobSignatureTTL.Duration()) returnHash = SignLocator(returnHash, apiToken, expiry) } + resp.Header().Set("X-Keep-Replicas-Stored", strconv.Itoa(replication)) resp.Write([]byte(returnHash + "\n")) } // IndexHandler is a HandleFunc to address /index and /index/{prefix} requests. func IndexHandler(resp http.ResponseWriter, req *http.Request) { // Reject unauthorized requests. - if !IsDataManagerToken(GetApiToken(req)) { + if !IsSystemAuth(GetAPIToken(req)) { http.Error(resp, UnauthorizedError.Error(), UnauthorizedError.HTTPCode) return } @@ -167,18 +249,6 @@ func IndexHandler(resp http.ResponseWriter, req *http.Request) { resp.Write([]byte{'\n'}) } -// StatusHandler -// Responds to /status.json requests with the current node status, -// described in a JSON structure. -// -// The data given in a status.json response includes: -// volumes - a list of Keep volumes currently in use by this server -// each volume is an object with the following fields: -// * mount_point -// * device_num (an integer identifying the underlying filesystem) -// * bytes_free -// * bytes_used - // PoolStatus struct type PoolStatus struct { Alloc uint64 `json:"BytesAllocated"` @@ -186,22 +256,43 @@ type PoolStatus struct { Len int `json:"BuffersInUse"` } +type volumeStatusEnt struct { + Label string + Status *VolumeStatus `json:",omitempty"` + VolumeStats *ioStats `json:",omitempty"` + InternalStats interface{} `json:",omitempty"` +} + // NodeStatus struct type NodeStatus struct { - Volumes []*VolumeStatus `json:"volumes"` - BufferPool PoolStatus - PullQueue WorkQueueStatus - TrashQueue WorkQueueStatus - Memory runtime.MemStats + Volumes []*volumeStatusEnt + BufferPool PoolStatus + PullQueue WorkQueueStatus + TrashQueue WorkQueueStatus + RequestsCurrent int + RequestsMax int } var st NodeStatus var stLock sync.Mutex +// DebugHandler addresses /debug.json requests. +func (rtr *router) DebugHandler(resp http.ResponseWriter, req *http.Request) { + type debugStats struct { + MemStats runtime.MemStats + } + var ds debugStats + runtime.ReadMemStats(&ds.MemStats) + err := json.NewEncoder(resp).Encode(&ds) + if err != nil { + http.Error(resp, err.Error(), 500) + } +} + // StatusHandler addresses /status.json requests. -func StatusHandler(resp http.ResponseWriter, req *http.Request) { +func (rtr *router) StatusHandler(resp http.ResponseWriter, req *http.Request) { stLock.Lock() - readNodeStatus(&st) + rtr.readNodeStatus(&st) jstat, err := json.Marshal(&st) stLock.Unlock() if err == nil { @@ -214,23 +305,33 @@ func StatusHandler(resp http.ResponseWriter, req *http.Request) { } // populate the given NodeStatus struct with current values. -func readNodeStatus(st *NodeStatus) { +func (rtr *router) readNodeStatus(st *NodeStatus) { vols := KeepVM.AllReadable() if cap(st.Volumes) < len(vols) { - st.Volumes = make([]*VolumeStatus, len(vols)) + st.Volumes = make([]*volumeStatusEnt, len(vols)) } st.Volumes = st.Volumes[:0] for _, vol := range vols { - if s := vol.Status(); s != nil { - st.Volumes = append(st.Volumes, s) + var internalStats interface{} + if vol, ok := vol.(InternalStatser); ok { + internalStats = vol.InternalStats() } + st.Volumes = append(st.Volumes, &volumeStatusEnt{ + Label: vol.String(), + Status: vol.Status(), + InternalStats: internalStats, + //VolumeStats: KeepVM.VolumeStats(vol), + }) } st.BufferPool.Alloc = bufs.Alloc() st.BufferPool.Cap = bufs.Cap() st.BufferPool.Len = bufs.Len() st.PullQueue = getWorkQueueStatus(pullq) st.TrashQueue = getWorkQueueStatus(trashq) - runtime.ReadMemStats(&st.Memory) + if rtr.limiter != nil { + st.RequestsCurrent = rtr.limiter.Current() + st.RequestsMax = rtr.limiter.Max() + } } // return a WorkQueueStatus for the given queue. If q is nil (which @@ -275,13 +376,13 @@ func DeleteHandler(resp http.ResponseWriter, req *http.Request) { hash := mux.Vars(req)["hash"] // Confirm that this user is an admin and has a token with unlimited scope. - var tok = GetApiToken(req) + var tok = GetAPIToken(req) if tok == "" || !CanDelete(tok) { http.Error(resp, PermissionError.Error(), PermissionError.HTTPCode) return } - if neverDelete { + if !theConfig.EnableDelete { http.Error(resp, MethodDisabledError.Error(), MethodDisabledError.HTTPCode) return } @@ -294,7 +395,7 @@ func DeleteHandler(resp http.ResponseWriter, req *http.Request) { Failed int `json:"copies_failed"` } for _, vol := range KeepVM.AllWritable() { - if err := vol.Delete(hash); err == nil { + if err := vol.Trash(hash); err == nil { result.Deleted++ } else if os.IsNotExist(err) { continue @@ -366,7 +467,7 @@ type PullRequest struct { // PullHandler processes "PUT /pull" requests for the data manager. func PullHandler(resp http.ResponseWriter, req *http.Request) { // Reject unauthorized requests. - if !IsDataManagerToken(GetApiToken(req)) { + if !IsSystemAuth(GetAPIToken(req)) { http.Error(resp, UnauthorizedError.Error(), UnauthorizedError.HTTPCode) return } @@ -402,7 +503,7 @@ type TrashRequest struct { // TrashHandler processes /trash requests. func TrashHandler(resp http.ResponseWriter, req *http.Request) { // Reject unauthorized requests. - if !IsDataManagerToken(GetApiToken(req)) { + if !IsSystemAuth(GetAPIToken(req)) { http.Error(resp, UnauthorizedError.Error(), UnauthorizedError.HTTPCode) return } @@ -429,7 +530,53 @@ func TrashHandler(resp http.ResponseWriter, req *http.Request) { trashq.ReplaceQueue(tlist) } -// ============================== +// UntrashHandler processes "PUT /untrash/{hash:[0-9a-f]{32}}" requests for the data manager. +func UntrashHandler(resp http.ResponseWriter, req *http.Request) { + // Reject unauthorized requests. + if !IsSystemAuth(GetAPIToken(req)) { + http.Error(resp, UnauthorizedError.Error(), UnauthorizedError.HTTPCode) + return + } + + hash := mux.Vars(req)["hash"] + + if len(KeepVM.AllWritable()) == 0 { + http.Error(resp, "No writable volumes", http.StatusNotFound) + return + } + + var untrashedOn, failedOn []string + var numNotFound int + for _, vol := range KeepVM.AllWritable() { + err := vol.Untrash(hash) + + if os.IsNotExist(err) { + numNotFound++ + } else if err != nil { + log.Printf("Error untrashing %v on volume %v", hash, vol.String()) + failedOn = append(failedOn, vol.String()) + } else { + log.Printf("Untrashed %v on volume %v", hash, vol.String()) + untrashedOn = append(untrashedOn, vol.String()) + } + } + + if numNotFound == len(KeepVM.AllWritable()) { + http.Error(resp, "Block not found on any of the writable volumes", http.StatusNotFound) + return + } + + if len(failedOn) == len(KeepVM.AllWritable()) { + http.Error(resp, "Failed to untrash on all writable volumes", http.StatusInternalServerError) + } else { + respBody := "Successfully untrashed on: " + strings.Join(untrashedOn, ",") + if len(failedOn) > 0 { + respBody += "; Failed to untrash on: " + strings.Join(failedOn, ",") + } + resp.Write([]byte(respBody)) + } +} + // GetBlock and PutBlock implement lower-level code for handling // blocks by rooting through volumes connected to the local machine. // Once the handler has determined that system policy permits the @@ -440,24 +587,26 @@ func TrashHandler(resp http.ResponseWriter, req *http.Request) { // should be the only part of the code that cares about which volume a // block is stored on, so it should be responsible for figuring out // which volume to check for fetching blocks, storing blocks, etc. -// ============================== -// GetBlock fetches and returns the block identified by "hash". -// -// On success, GetBlock returns a byte slice with the block data, and -// a nil error. +// GetBlock fetches the block identified by "hash" into the provided +// buf, and returns the data size. // // If the block cannot be found on any volume, returns NotFoundError. // // If the block found does not have the correct MD5 hash, returns // DiskHashError. // -func GetBlock(hash string) ([]byte, error) { +func GetBlock(ctx context.Context, hash string, buf []byte, resp http.ResponseWriter) (int, error) { // Attempt to read the requested hash from a keep volume. errorToCaller := NotFoundError for _, vol := range KeepVM.AllReadable() { - buf, err := vol.Get(hash) + size, err := vol.Get(ctx, hash, buf) + select { + case <-ctx.Done(): + return 0, ErrClientDisconnect + default: + } if err != nil { // IsNotExist is an expected error and may be // ignored. All other errors are logged. In @@ -471,28 +620,27 @@ func GetBlock(hash string) ([]byte, error) { } // Check the file checksum. // - filehash := fmt.Sprintf("%x", md5.Sum(buf)) + filehash := fmt.Sprintf("%x", md5.Sum(buf[:size])) if filehash != hash { // TODO: Try harder to tell a sysadmin about // this. log.Printf("%s: checksum mismatch for request %s (actual %s)", vol, hash, filehash) errorToCaller = DiskHashError - bufs.Put(buf) continue } if errorToCaller == DiskHashError { log.Printf("%s: checksum mismatch for request %s but a good copy was found on another volume and returned", vol, hash) } - return buf, nil + return size, nil } - return nil, errorToCaller + return 0, errorToCaller } // PutBlock Stores the BLOCK (identified by the content id HASH) in Keep. // -// PutBlock(block, hash) +// PutBlock(ctx, block, hash) // Stores the BLOCK (identified by the content id HASH) in Keep. // // The MD5 checksum of the block must be identical to the content id HASH. @@ -517,40 +665,48 @@ func GetBlock(hash string) ([]byte, error) { // all writes failed). The text of the error message should // provide as much detail as possible. // -func PutBlock(block []byte, hash string) error { +func PutBlock(ctx context.Context, block []byte, hash string) (int, error) { // Check that BLOCK's checksum matches HASH. blockhash := fmt.Sprintf("%x", md5.Sum(block)) if blockhash != hash { log.Printf("%s: MD5 checksum %s did not match request", hash, blockhash) - return RequestHashError + return 0, RequestHashError } // If we already have this data, it's intact on disk, and we // can update its timestamp, return success. If we have // different data with the same hash, return failure. - if err := CompareAndTouch(hash, block); err == nil || err == CollisionError { - return err + if n, err := CompareAndTouch(ctx, hash, block); err == nil || err == CollisionError { + return n, err + } else if ctx.Err() != nil { + return 0, ErrClientDisconnect } // Choose a Keep volume to write to. // If this volume fails, try all of the volumes in order. if vol := KeepVM.NextWritable(); vol != nil { - if err := vol.Put(hash, block); err == nil { - return nil // success! + if err := vol.Put(ctx, hash, block); err == nil { + return vol.Replication(), nil // success! + } + if ctx.Err() != nil { + return 0, ErrClientDisconnect } } writables := KeepVM.AllWritable() if len(writables) == 0 { log.Print("No writable volumes.") - return FullError + return 0, FullError } allFull := true for _, vol := range writables { - err := vol.Put(hash, block) + err := vol.Put(ctx, hash, block) + if ctx.Err() != nil { + return 0, ErrClientDisconnect + } if err == nil { - return nil // success! + return vol.Replication(), nil // success! } if err != FullError { // The volume is not full but the @@ -563,27 +719,31 @@ func PutBlock(block []byte, hash string) error { if allFull { log.Print("All volumes are full.") - return FullError + return 0, FullError } // Already logged the non-full errors. - return GenericError + return 0, GenericError } -// CompareAndTouch returns nil if one of the volumes already has the -// given content and it successfully updates the relevant block's -// modification time in order to protect it from premature garbage -// collection. -func CompareAndTouch(hash string, buf []byte) error { +// CompareAndTouch returns the current replication level if one of the +// volumes already has the given content and it successfully updates +// the relevant block's modification time in order to protect it from +// premature garbage collection. Otherwise, it returns a non-nil +// error. +func CompareAndTouch(ctx context.Context, hash string, buf []byte) (int, error) { var bestErr error = NotFoundError for _, vol := range KeepVM.AllWritable() { - if err := vol.Compare(hash, buf); err == CollisionError { + err := vol.Compare(ctx, hash, buf) + if ctx.Err() != nil { + return 0, ctx.Err() + } else if err == CollisionError { // Stop if we have a block with same hash but // different content. (It will be impossible // to tell which one is wanted if we have // both, so there's no point writing it even // on a different volume.) log.Printf("%s: Compare(%s): %s", vol, hash, err) - return err + return 0, err } else if os.IsNotExist(err) { // Block does not exist. This is the only // "normal" error: we don't log anything. @@ -601,9 +761,9 @@ func CompareAndTouch(hash string, buf []byte) error { continue } // Compare and Touch both worked --> done. - return nil + return vol.Replication(), nil } - return bestErr + return 0, bestErr } var validLocatorRe = regexp.MustCompile(`^[0-9a-f]{32}$`) @@ -618,10 +778,10 @@ func IsValidLocator(loc string) bool { var authRe = regexp.MustCompile(`^OAuth2\s+(.*)`) -// GetApiToken returns the OAuth2 token from the Authorization +// GetAPIToken returns the OAuth2 token from the Authorization // header of a HTTP request, or an empty string if no matching // token is found. -func GetApiToken(req *http.Request) string { +func GetAPIToken(req *http.Request) string { if auth, ok := req.Header["Authorization"]; ok { if match := authRe.FindStringSubmatch(auth[0]); match != nil { return match[1] @@ -650,7 +810,7 @@ func CanDelete(apiToken string) bool { } // Blocks may be deleted only when Keep has been configured with a // data manager. - if IsDataManagerToken(apiToken) { + if IsSystemAuth(apiToken) { return true } // TODO(twp): look up apiToken with the API server @@ -659,8 +819,8 @@ func CanDelete(apiToken string) bool { return false } -// IsDataManagerToken returns true if apiToken represents the data -// manager's token. -func IsDataManagerToken(apiToken string) bool { - return dataManagerToken != "" && apiToken == dataManagerToken +// IsSystemAuth returns true if the given token is allowed to perform +// system level actions like deleting data. +func IsSystemAuth(token string) bool { + return token != "" && token == theConfig.systemAuthToken }