+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
package main
// REST handlers for Keep are implemented here.
// StatusHandler (GET /status.json)
import (
- "bytes"
"container/list"
+ "context"
"crypto/md5"
"encoding/json"
"fmt"
- "github.com/gorilla/mux"
"io"
- "log"
"net/http"
"os"
"regexp"
"runtime"
"strconv"
"strings"
- "syscall"
+ "sync"
"time"
+
+ "github.com/gorilla/mux"
+
+ "git.curoverse.com/arvados.git/sdk/go/health"
+ "git.curoverse.com/arvados.git/sdk/go/httpserver"
)
-// MakeRESTRouter returns a new mux.Router that forwards all Keep
-// requests to the appropriate handlers.
-//
-func MakeRESTRouter() *mux.Router {
- rest := mux.NewRouter()
+type router struct {
+ *mux.Router
+ limiter httpserver.RequestCounter
+}
- rest.HandleFunc(
+// MakeRESTRouter returns a new router that forwards all Keep requests
+// to the appropriate handlers.
+func MakeRESTRouter() http.Handler {
+ rtr := &router{Router: mux.NewRouter()}
+
+ rtr.HandleFunc(
`/{hash:[0-9a-f]{32}}`, GetBlockHandler).Methods("GET", "HEAD")
- rest.HandleFunc(
+ rtr.HandleFunc(
`/{hash:[0-9a-f]{32}}+{hints}`,
GetBlockHandler).Methods("GET", "HEAD")
- rest.HandleFunc(`/{hash:[0-9a-f]{32}}`, PutBlockHandler).Methods("PUT")
- rest.HandleFunc(`/{hash:[0-9a-f]{32}}`, DeleteHandler).Methods("DELETE")
+ rtr.HandleFunc(`/{hash:[0-9a-f]{32}}`, PutBlockHandler).Methods("PUT")
+ rtr.HandleFunc(`/{hash:[0-9a-f]{32}}`, DeleteHandler).Methods("DELETE")
// List all blocks stored here. Privileged client only.
- rest.HandleFunc(`/index`, IndexHandler).Methods("GET", "HEAD")
+ rtr.HandleFunc(`/index`, rtr.IndexHandler).Methods("GET", "HEAD")
// List blocks stored here whose hash has the given prefix.
// Privileged client only.
- rest.HandleFunc(`/index/{prefix:[0-9a-f]{0,32}}`, IndexHandler).Methods("GET", "HEAD")
+ rtr.HandleFunc(`/index/{prefix:[0-9a-f]{0,32}}`, rtr.IndexHandler).Methods("GET", "HEAD")
+
+ // Internals/debugging info (runtime.MemStats)
+ rtr.HandleFunc(`/debug.json`, rtr.DebugHandler).Methods("GET", "HEAD")
// List volumes: path, device number, bytes used/avail.
- rest.HandleFunc(`/status.json`, StatusHandler).Methods("GET", "HEAD")
+ rtr.HandleFunc(`/status.json`, rtr.StatusHandler).Methods("GET", "HEAD")
+
+ // List mounts: UUID, readonly, tier, device ID, ...
+ rtr.HandleFunc(`/mounts`, rtr.MountsHandler).Methods("GET")
+ rtr.HandleFunc(`/mounts/{uuid}/blocks`, rtr.IndexHandler).Methods("GET")
+ rtr.HandleFunc(`/mounts/{uuid}/blocks/`, rtr.IndexHandler).Methods("GET")
// Replace the current pull queue.
- rest.HandleFunc(`/pull`, PullHandler).Methods("PUT")
+ rtr.HandleFunc(`/pull`, PullHandler).Methods("PUT")
// Replace the current trash queue.
- rest.HandleFunc(`/trash`, TrashHandler).Methods("PUT")
+ rtr.HandleFunc(`/trash`, TrashHandler).Methods("PUT")
+
+ // Untrash moves blocks from trash back into store
+ rtr.HandleFunc(`/untrash/{hash:[0-9a-f]{32}}`, UntrashHandler).Methods("PUT")
+
+ rtr.Handle("/_health/{check}", &health.Handler{
+ Token: theConfig.ManagementToken,
+ Prefix: "/_health/",
+ }).Methods("GET")
// Any request which does not match any of these routes gets
// 400 Bad Request.
- rest.NotFoundHandler = http.HandlerFunc(BadRequestHandler)
+ rtr.NotFoundHandler = http.HandlerFunc(BadRequestHandler)
+
+ theConfig.metrics.setup()
+
+ rtr.limiter = httpserver.NewRequestLimiter(theConfig.MaxRequests, rtr)
- return rest
+ mux := http.NewServeMux()
+ mux.Handle("/", theConfig.metrics.Instrument(
+ httpserver.AddRequestIDs(httpserver.LogRequests(rtr.limiter))))
+ mux.HandleFunc("/metrics.json", theConfig.metrics.exportJSON)
+ mux.Handle("/metrics", theConfig.metrics.exportProm)
+
+ return mux
}
+// BadRequestHandler is a HandleFunc to address bad requests.
func BadRequestHandler(w http.ResponseWriter, r *http.Request) {
http.Error(w, BadRequestError.Error(), BadRequestError.HTTPCode)
}
+// GetBlockHandler is a HandleFunc to address Get block requests.
func GetBlockHandler(resp http.ResponseWriter, req *http.Request) {
- hash := mux.Vars(req)["hash"]
-
- hints := mux.Vars(req)["hints"]
-
- // Parse the locator string and hints from the request.
- // TODO(twp): implement a Locator type.
- var signature, timestamp string
- if hints != "" {
- signature_pat, _ := regexp.Compile("^A([[:xdigit:]]+)@([[:xdigit:]]{8})$")
- for _, hint := range strings.Split(hints, "+") {
- if match, _ := regexp.MatchString("^[[:digit:]]+$", hint); match {
- // Server ignores size hints
- } else if m := signature_pat.FindStringSubmatch(hint); m != nil {
- signature = m[1]
- timestamp = m[2]
- } else if match, _ := regexp.MatchString("^[[:upper:]]", hint); match {
- // Any unknown hint that starts with an uppercase letter is
- // presumed to be valid and ignored, to permit forward compatibility.
- } else {
- // Unknown format; not a valid locator.
- http.Error(resp, BadRequestError.Error(), BadRequestError.HTTPCode)
- return
- }
- }
- }
+ ctx, cancel := contextForResponse(context.TODO(), resp)
+ defer cancel()
- // If permission checking is in effect, verify this
- // request's permission signature.
- if enforce_permissions {
- if signature == "" || timestamp == "" {
- http.Error(resp, PermissionError.Error(), PermissionError.HTTPCode)
- return
- } else if IsExpired(timestamp) {
- http.Error(resp, ExpiredError.Error(), ExpiredError.HTTPCode)
+ if theConfig.RequireSignatures {
+ locator := req.URL.Path[1:] // strip leading slash
+ if err := VerifySignature(locator, GetAPIToken(req)); err != nil {
+ http.Error(resp, err.Error(), err.(*KeepError).HTTPCode)
return
- } else {
- req_locator := req.URL.Path[1:] // strip leading slash
- if !VerifySignature(req_locator, GetApiToken(req)) {
- http.Error(resp, PermissionError.Error(), PermissionError.HTTPCode)
- return
- }
}
}
- block, err := GetBlock(hash, false)
+ // TODO: Probe volumes to check whether the block _might_
+ // exist. Some volumes/types could support a quick existence
+ // check without causing other operations to suffer. If all
+ // volumes support that, and assure us the block definitely
+ // isn't here, we can return 404 now instead of waiting for a
+ // buffer.
- // Garbage collect after each GET. Fixes #2865.
- // TODO(twp): review Keep memory usage and see if there's
- // a better way to do this than blindly garbage collecting
- // after every block.
- defer runtime.GC()
+ buf, err := getBufferWithContext(ctx, bufs, BlockSize)
+ if err != nil {
+ http.Error(resp, err.Error(), http.StatusServiceUnavailable)
+ return
+ }
+ defer bufs.Put(buf)
+ size, err := GetBlock(ctx, mux.Vars(req)["hash"], buf, resp)
if err != nil {
- // This type assertion is safe because the only errors
- // GetBlock can return are DiskHashError or NotFoundError.
- http.Error(resp, err.Error(), err.(*KeepError).HTTPCode)
+ code := http.StatusInternalServerError
+ if err, ok := err.(*KeepError); ok {
+ code = err.HTTPCode
+ }
+ http.Error(resp, err.Error(), code)
return
}
- resp.Header().Set("Content-Length", fmt.Sprintf("%d", len(block)))
+ resp.Header().Set("Content-Length", strconv.Itoa(size))
+ resp.Header().Set("Content-Type", "application/octet-stream")
+ resp.Write(buf[:size])
+}
- _, err = resp.Write(block)
+// Return a new context that gets cancelled by resp's CloseNotifier.
+func contextForResponse(parent context.Context, resp http.ResponseWriter) (context.Context, context.CancelFunc) {
+ ctx, cancel := context.WithCancel(parent)
+ if cn, ok := resp.(http.CloseNotifier); ok {
+ go func(c <-chan bool) {
+ select {
+ case <-c:
+ theConfig.debugLogf("cancel context")
+ cancel()
+ case <-ctx.Done():
+ }
+ }(cn.CloseNotify())
+ }
+ return ctx, cancel
+}
- return
+// Get a buffer from the pool -- but give up and return a non-nil
+// error if ctx ends before we get a buffer.
+func getBufferWithContext(ctx context.Context, bufs *bufferPool, bufSize int) ([]byte, error) {
+ bufReady := make(chan []byte)
+ go func() {
+ bufReady <- bufs.Get(bufSize)
+ }()
+ select {
+ case buf := <-bufReady:
+ return buf, nil
+ case <-ctx.Done():
+ go func() {
+ // Even if closeNotifier happened first, we
+ // need to keep waiting for our buf so we can
+ // return it to the pool.
+ bufs.Put(<-bufReady)
+ }()
+ return nil, ErrClientDisconnect
+ }
}
+// PutBlockHandler is a HandleFunc to address Put block requests.
func PutBlockHandler(resp http.ResponseWriter, req *http.Request) {
- // Garbage collect after each PUT. Fixes #2865.
- // See also GetBlockHandler.
- defer runtime.GC()
+ ctx, cancel := contextForResponse(context.TODO(), resp)
+ defer cancel()
hash := mux.Vars(req)["hash"]
return
}
- if req.ContentLength > BLOCKSIZE {
+ if req.ContentLength > BlockSize {
http.Error(resp, TooLongError.Error(), TooLongError.HTTPCode)
return
}
return
}
- buf := make([]byte, req.ContentLength)
- nread, err := io.ReadFull(req.Body, buf)
+ buf, err := getBufferWithContext(ctx, bufs, int(req.ContentLength))
if err != nil {
- http.Error(resp, err.Error(), 500)
+ http.Error(resp, err.Error(), http.StatusServiceUnavailable)
return
- } else if int64(nread) < req.ContentLength {
- http.Error(resp, "request truncated", 500)
+ }
+
+ _, err = io.ReadFull(req.Body, buf)
+ if err != nil {
+ http.Error(resp, err.Error(), 500)
+ bufs.Put(buf)
return
}
- err = PutBlock(buf, hash)
+ replication, err := PutBlock(ctx, buf, hash)
+ bufs.Put(buf)
+
if err != nil {
- ke := err.(*KeepError)
- http.Error(resp, ke.Error(), ke.HTTPCode)
+ code := http.StatusInternalServerError
+ if err, ok := err.(*KeepError); ok {
+ code = err.HTTPCode
+ }
+ http.Error(resp, err.Error(), code)
return
}
// Success; add a size hint, sign the locator if possible, and
// return it to the client.
- return_hash := fmt.Sprintf("%s+%d", hash, len(buf))
- api_token := GetApiToken(req)
- if PermissionSecret != nil && api_token != "" {
- expiry := time.Now().Add(blob_signature_ttl)
- return_hash = SignLocator(return_hash, api_token, expiry)
- }
- resp.Write([]byte(return_hash + "\n"))
+ returnHash := fmt.Sprintf("%s+%d", hash, req.ContentLength)
+ apiToken := GetAPIToken(req)
+ if theConfig.blobSigningKey != nil && apiToken != "" {
+ expiry := time.Now().Add(theConfig.BlobSignatureTTL.Duration())
+ returnHash = SignLocator(returnHash, apiToken, expiry)
+ }
+ resp.Header().Set("X-Keep-Replicas-Stored", strconv.Itoa(replication))
+ resp.Write([]byte(returnHash + "\n"))
}
-// IndexHandler
-// A HandleFunc to address /index and /index/{prefix} requests.
-//
-func IndexHandler(resp http.ResponseWriter, req *http.Request) {
- // Reject unauthorized requests.
- if !IsDataManagerToken(GetApiToken(req)) {
+// IndexHandler responds to "/index", "/index/{prefix}", and
+// "/mounts/{uuid}/blocks" requests.
+func (rtr *router) IndexHandler(resp http.ResponseWriter, req *http.Request) {
+ if !IsSystemAuth(GetAPIToken(req)) {
http.Error(resp, UnauthorizedError.Error(), UnauthorizedError.HTTPCode)
return
}
prefix := mux.Vars(req)["prefix"]
+ if prefix == "" {
+ req.ParseForm()
+ prefix = req.Form.Get("prefix")
+ }
- for _, vol := range KeepVM.AllReadable() {
- if err := vol.IndexTo(prefix, resp); err != nil {
+ uuid := mux.Vars(req)["uuid"]
+
+ var vols []Volume
+ if uuid == "" {
+ vols = KeepVM.AllReadable()
+ } else if v := KeepVM.Lookup(uuid, false); v == nil {
+ http.Error(resp, "mount not found", http.StatusNotFound)
+ return
+ } else {
+ vols = []Volume{v}
+ }
+
+ for _, v := range vols {
+ if err := v.IndexTo(prefix, resp); err != nil {
// The only errors returned by IndexTo are
// write errors returned by resp.Write(),
// which probably means the client has
return
}
}
+ // An empty line at EOF is the only way the client can be
+ // assured the entire index was received.
+ resp.Write([]byte{'\n'})
}
-// StatusHandler
-// Responds to /status.json requests with the current node status,
-// described in a JSON structure.
-//
-// The data given in a status.json response includes:
-// volumes - a list of Keep volumes currently in use by this server
-// each volume is an object with the following fields:
-// * mount_point
-// * device_num (an integer identifying the underlying filesystem)
-// * bytes_free
-// * bytes_used
-//
-type VolumeStatus struct {
- MountPoint string `json:"mount_point"`
- DeviceNum uint64 `json:"device_num"`
- BytesFree uint64 `json:"bytes_free"`
- BytesUsed uint64 `json:"bytes_used"`
+// MountsHandler responds to "GET /mounts" requests.
+func (rtr *router) MountsHandler(resp http.ResponseWriter, req *http.Request) {
+ err := json.NewEncoder(resp).Encode(KeepVM.Mounts())
+ if err != nil {
+ http.Error(resp, err.Error(), http.StatusInternalServerError)
+ }
}
+// PoolStatus struct
+type PoolStatus struct {
+ Alloc uint64 `json:"BytesAllocatedCumulative"`
+ Cap int `json:"BuffersMax"`
+ Len int `json:"BuffersInUse"`
+}
+
+type volumeStatusEnt struct {
+ Label string
+ Status *VolumeStatus `json:",omitempty"`
+ VolumeStats *ioStats `json:",omitempty"`
+ InternalStats interface{} `json:",omitempty"`
+}
+
+// NodeStatus struct
type NodeStatus struct {
- Volumes []*VolumeStatus `json:"volumes"`
+ Volumes []*volumeStatusEnt
+ BufferPool PoolStatus
+ PullQueue WorkQueueStatus
+ TrashQueue WorkQueueStatus
+ RequestsCurrent int
+ RequestsMax int
+ Version string
+}
+
+var st NodeStatus
+var stLock sync.Mutex
+
+// DebugHandler addresses /debug.json requests.
+func (rtr *router) DebugHandler(resp http.ResponseWriter, req *http.Request) {
+ type debugStats struct {
+ MemStats runtime.MemStats
+ }
+ var ds debugStats
+ runtime.ReadMemStats(&ds.MemStats)
+ err := json.NewEncoder(resp).Encode(&ds)
+ if err != nil {
+ http.Error(resp, err.Error(), 500)
+ }
}
-func StatusHandler(resp http.ResponseWriter, req *http.Request) {
- st := GetNodeStatus()
- if jstat, err := json.Marshal(st); err == nil {
+// StatusHandler addresses /status.json requests.
+func (rtr *router) StatusHandler(resp http.ResponseWriter, req *http.Request) {
+ stLock.Lock()
+ rtr.readNodeStatus(&st)
+ jstat, err := json.Marshal(&st)
+ stLock.Unlock()
+ if err == nil {
resp.Write(jstat)
} else {
- log.Printf("json.Marshal: %s\n", err)
- log.Printf("NodeStatus = %v\n", st)
+ log.Printf("json.Marshal: %s", err)
+ log.Printf("NodeStatus = %v", &st)
http.Error(resp, err.Error(), 500)
}
}
-// GetNodeStatus
-// Returns a NodeStatus struct describing this Keep
-// node's current status.
-//
-func GetNodeStatus() *NodeStatus {
- st := new(NodeStatus)
-
- st.Volumes = make([]*VolumeStatus, len(KeepVM.AllReadable()))
- for i, vol := range KeepVM.AllReadable() {
- st.Volumes[i] = vol.Status()
+// populate the given NodeStatus struct with current values.
+func (rtr *router) readNodeStatus(st *NodeStatus) {
+ st.Version = version
+ vols := KeepVM.AllReadable()
+ if cap(st.Volumes) < len(vols) {
+ st.Volumes = make([]*volumeStatusEnt, len(vols))
+ }
+ st.Volumes = st.Volumes[:0]
+ for _, vol := range vols {
+ var internalStats interface{}
+ if vol, ok := vol.(InternalStatser); ok {
+ internalStats = vol.InternalStats()
+ }
+ st.Volumes = append(st.Volumes, &volumeStatusEnt{
+ Label: vol.String(),
+ Status: vol.Status(),
+ InternalStats: internalStats,
+ //VolumeStats: KeepVM.VolumeStats(vol),
+ })
+ }
+ st.BufferPool.Alloc = bufs.Alloc()
+ st.BufferPool.Cap = bufs.Cap()
+ st.BufferPool.Len = bufs.Len()
+ st.PullQueue = getWorkQueueStatus(pullq)
+ st.TrashQueue = getWorkQueueStatus(trashq)
+ if rtr.limiter != nil {
+ st.RequestsCurrent = rtr.limiter.Current()
+ st.RequestsMax = rtr.limiter.Max()
}
- return st
}
-// GetVolumeStatus
-// Returns a VolumeStatus describing the requested volume.
-//
-func GetVolumeStatus(volume string) *VolumeStatus {
- var fs syscall.Statfs_t
- var devnum uint64
-
- if fi, err := os.Stat(volume); err == nil {
- devnum = fi.Sys().(*syscall.Stat_t).Dev
- } else {
- log.Printf("GetVolumeStatus: os.Stat: %s\n", err)
- return nil
+// return a WorkQueueStatus for the given queue. If q is nil (which
+// should never happen except in test suites), return a zero status
+// value instead of crashing.
+func getWorkQueueStatus(q *WorkQueue) WorkQueueStatus {
+ if q == nil {
+ // This should only happen during tests.
+ return WorkQueueStatus{}
}
-
- err := syscall.Statfs(volume, &fs)
- if err != nil {
- log.Printf("GetVolumeStatus: statfs: %s\n", err)
- return nil
- }
- // These calculations match the way df calculates disk usage:
- // "free" space is measured by fs.Bavail, but "used" space
- // uses fs.Blocks - fs.Bfree.
- free := fs.Bavail * uint64(fs.Bsize)
- used := (fs.Blocks - fs.Bfree) * uint64(fs.Bsize)
- return &VolumeStatus{volume, devnum, free, used}
+ return q.Status()
}
// DeleteHandler processes DELETE requests.
hash := mux.Vars(req)["hash"]
// Confirm that this user is an admin and has a token with unlimited scope.
- var tok = GetApiToken(req)
+ var tok = GetAPIToken(req)
if tok == "" || !CanDelete(tok) {
http.Error(resp, PermissionError.Error(), PermissionError.HTTPCode)
return
}
- if never_delete {
+ if !theConfig.EnableDelete {
http.Error(resp, MethodDisabledError.Error(), MethodDisabledError.HTTPCode)
return
}
Failed int `json:"copies_failed"`
}
for _, vol := range KeepVM.AllWritable() {
- if err := vol.Delete(hash); err == nil {
+ if err := vol.Trash(hash); err == nil {
result.Deleted++
} else if os.IsNotExist(err) {
continue
if body, err := json.Marshal(result); err == nil {
resp.Write(body)
} else {
- log.Printf("json.Marshal: %s (result = %v)\n", err, result)
+ log.Printf("json.Marshal: %s (result = %v)", err, result)
http.Error(resp, err.Error(), 500)
}
}
If the JSON unmarshalling fails, return 400 Bad Request.
*/
+// PullRequest consists of a block locator and an ordered list of servers
type PullRequest struct {
Locator string `json:"locator"`
Servers []string `json:"servers"`
+
+ // Destination mount, or "" for "anywhere"
+ MountUUID string
}
+// PullHandler processes "PUT /pull" requests for the data manager.
func PullHandler(resp http.ResponseWriter, req *http.Request) {
// Reject unauthorized requests.
- if !IsDataManagerToken(GetApiToken(req)) {
+ if !IsSystemAuth(GetAPIToken(req)) {
http.Error(resp, UnauthorizedError.Error(), UnauthorizedError.HTTPCode)
return
}
var pr []PullRequest
r := json.NewDecoder(req.Body)
if err := r.Decode(&pr); err != nil {
- http.Error(resp, BadRequestError.Error(), BadRequestError.HTTPCode)
+ http.Error(resp, err.Error(), BadRequestError.HTTPCode)
return
}
pullq.ReplaceQueue(plist)
}
+// TrashRequest consists of a block locator and it's Mtime
type TrashRequest struct {
Locator string `json:"locator"`
BlockMtime int64 `json:"block_mtime"`
+
+ // Target mount, or "" for "everywhere"
+ MountUUID string
}
+// TrashHandler processes /trash requests.
func TrashHandler(resp http.ResponseWriter, req *http.Request) {
// Reject unauthorized requests.
- if !IsDataManagerToken(GetApiToken(req)) {
+ if !IsSystemAuth(GetAPIToken(req)) {
http.Error(resp, UnauthorizedError.Error(), UnauthorizedError.HTTPCode)
return
}
var trash []TrashRequest
r := json.NewDecoder(req.Body)
if err := r.Decode(&trash); err != nil {
- http.Error(resp, BadRequestError.Error(), BadRequestError.HTTPCode)
+ http.Error(resp, err.Error(), BadRequestError.HTTPCode)
return
}
trashq.ReplaceQueue(tlist)
}
-// ==============================
+// UntrashHandler processes "PUT /untrash/{hash:[0-9a-f]{32}}" requests for the data manager.
+func UntrashHandler(resp http.ResponseWriter, req *http.Request) {
+ // Reject unauthorized requests.
+ if !IsSystemAuth(GetAPIToken(req)) {
+ http.Error(resp, UnauthorizedError.Error(), UnauthorizedError.HTTPCode)
+ return
+ }
+
+ hash := mux.Vars(req)["hash"]
+
+ if len(KeepVM.AllWritable()) == 0 {
+ http.Error(resp, "No writable volumes", http.StatusNotFound)
+ return
+ }
+
+ var untrashedOn, failedOn []string
+ var numNotFound int
+ for _, vol := range KeepVM.AllWritable() {
+ err := vol.Untrash(hash)
+
+ if os.IsNotExist(err) {
+ numNotFound++
+ } else if err != nil {
+ log.Printf("Error untrashing %v on volume %v", hash, vol.String())
+ failedOn = append(failedOn, vol.String())
+ } else {
+ log.Printf("Untrashed %v on volume %v", hash, vol.String())
+ untrashedOn = append(untrashedOn, vol.String())
+ }
+ }
+
+ if numNotFound == len(KeepVM.AllWritable()) {
+ http.Error(resp, "Block not found on any of the writable volumes", http.StatusNotFound)
+ return
+ }
+
+ if len(failedOn) == len(KeepVM.AllWritable()) {
+ http.Error(resp, "Failed to untrash on all writable volumes", http.StatusInternalServerError)
+ } else {
+ respBody := "Successfully untrashed on: " + strings.Join(untrashedOn, ",")
+ if len(failedOn) > 0 {
+ respBody += "; Failed to untrash on: " + strings.Join(failedOn, ",")
+ }
+ resp.Write([]byte(respBody))
+ }
+}
+
// GetBlock and PutBlock implement lower-level code for handling
// blocks by rooting through volumes connected to the local machine.
// Once the handler has determined that system policy permits the
// block is stored on, so it should be responsible for figuring out
// which volume to check for fetching blocks, storing blocks, etc.
-// ==============================
-// GetBlock fetches and returns the block identified by "hash". If
-// the update_timestamp argument is true, GetBlock also updates the
-// block's file modification time (for the sake of PutBlock, which
-// must update the file's timestamp when the block already exists).
-//
-// On success, GetBlock returns a byte slice with the block data, and
-// a nil error.
+// GetBlock fetches the block identified by "hash" into the provided
+// buf, and returns the data size.
//
// If the block cannot be found on any volume, returns NotFoundError.
//
// If the block found does not have the correct MD5 hash, returns
// DiskHashError.
//
-
-func GetBlock(hash string, update_timestamp bool) ([]byte, error) {
+func GetBlock(ctx context.Context, hash string, buf []byte, resp http.ResponseWriter) (int, error) {
// Attempt to read the requested hash from a keep volume.
- error_to_caller := NotFoundError
-
- var vols []Volume
- if update_timestamp {
- // Pointless to find the block on an unwritable volume
- // because Touch() will fail -- this is as good as
- // "not found" for purposes of callers who need to
- // update_timestamp.
- vols = KeepVM.AllWritable()
- } else {
- vols = KeepVM.AllReadable()
- }
+ errorToCaller := NotFoundError
- for _, vol := range vols {
- buf, err := vol.Get(hash)
+ for _, vol := range KeepVM.AllReadable() {
+ size, err := vol.Get(ctx, hash, buf)
+ select {
+ case <-ctx.Done():
+ return 0, ErrClientDisconnect
+ default:
+ }
if err != nil {
// IsNotExist is an expected error and may be
// ignored. All other errors are logged. In
// volumes. If all volumes report IsNotExist,
// we return a NotFoundError.
if !os.IsNotExist(err) {
- log.Printf("GetBlock: reading %s: %s\n", hash, err)
+ log.Printf("%s: Get(%s): %s", vol, hash, err)
}
continue
}
// Check the file checksum.
//
- filehash := fmt.Sprintf("%x", md5.Sum(buf))
+ filehash := fmt.Sprintf("%x", md5.Sum(buf[:size]))
if filehash != hash {
// TODO: Try harder to tell a sysadmin about
// this.
- log.Printf("%s: checksum mismatch for request %s (actual %s)\n",
+ log.Printf("%s: checksum mismatch for request %s (actual %s)",
vol, hash, filehash)
- error_to_caller = DiskHashError
+ errorToCaller = DiskHashError
continue
}
- if error_to_caller == DiskHashError {
+ if errorToCaller == DiskHashError {
log.Printf("%s: checksum mismatch for request %s but a good copy was found on another volume and returned",
vol, hash)
}
- if update_timestamp {
- if err := vol.Touch(hash); err != nil {
- error_to_caller = GenericError
- log.Printf("%s: Touch %s failed: %s",
- vol, hash, error_to_caller)
- continue
- }
- }
- return buf, nil
+ return size, nil
}
- return nil, error_to_caller
+ return 0, errorToCaller
}
-/* PutBlock(block, hash)
- Stores the BLOCK (identified by the content id HASH) in Keep.
-
- The MD5 checksum of the block must be identical to the content id HASH.
- If not, an error is returned.
-
- PutBlock stores the BLOCK on the first Keep volume with free space.
- A failure code is returned to the user only if all volumes fail.
-
- On success, PutBlock returns nil.
- On failure, it returns a KeepError with one of the following codes:
-
- 500 Collision
- A different block with the same hash already exists on this
- Keep server.
- 422 MD5Fail
- The MD5 hash of the BLOCK does not match the argument HASH.
- 503 Full
- There was not enough space left in any Keep volume to store
- the object.
- 500 Fail
- The object could not be stored for some other reason (e.g.
- all writes failed). The text of the error message should
- provide as much detail as possible.
-*/
-
-func PutBlock(block []byte, hash string) error {
+// PutBlock Stores the BLOCK (identified by the content id HASH) in Keep.
+//
+// PutBlock(ctx, block, hash)
+// Stores the BLOCK (identified by the content id HASH) in Keep.
+//
+// The MD5 checksum of the block must be identical to the content id HASH.
+// If not, an error is returned.
+//
+// PutBlock stores the BLOCK on the first Keep volume with free space.
+// A failure code is returned to the user only if all volumes fail.
+//
+// On success, PutBlock returns nil.
+// On failure, it returns a KeepError with one of the following codes:
+//
+// 500 Collision
+// A different block with the same hash already exists on this
+// Keep server.
+// 422 MD5Fail
+// The MD5 hash of the BLOCK does not match the argument HASH.
+// 503 Full
+// There was not enough space left in any Keep volume to store
+// the object.
+// 500 Fail
+// The object could not be stored for some other reason (e.g.
+// all writes failed). The text of the error message should
+// provide as much detail as possible.
+//
+func PutBlock(ctx context.Context, block []byte, hash string) (int, error) {
// Check that BLOCK's checksum matches HASH.
blockhash := fmt.Sprintf("%x", md5.Sum(block))
if blockhash != hash {
log.Printf("%s: MD5 checksum %s did not match request", hash, blockhash)
- return RequestHashError
- }
-
- // If we already have a block on disk under this identifier, return
- // success (but check for MD5 collisions). While fetching the block,
- // update its timestamp.
- // The only errors that GetBlock can return are DiskHashError and NotFoundError.
- // In either case, we want to write our new (good) block to disk,
- // so there is nothing special to do if err != nil.
- //
- if oldblock, err := GetBlock(hash, true); err == nil {
- if bytes.Compare(block, oldblock) == 0 {
- // The block already exists; return success.
- return nil
- } else {
- return CollisionError
- }
+ return 0, RequestHashError
+ }
+
+ // If we already have this data, it's intact on disk, and we
+ // can update its timestamp, return success. If we have
+ // different data with the same hash, return failure.
+ if n, err := CompareAndTouch(ctx, hash, block); err == nil || err == CollisionError {
+ return n, err
+ } else if ctx.Err() != nil {
+ return 0, ErrClientDisconnect
}
// Choose a Keep volume to write to.
// If this volume fails, try all of the volumes in order.
if vol := KeepVM.NextWritable(); vol != nil {
- if err := vol.Put(hash, block); err == nil {
- return nil // success!
+ if err := vol.Put(ctx, hash, block); err == nil {
+ return vol.Replication(), nil // success!
+ }
+ if ctx.Err() != nil {
+ return 0, ErrClientDisconnect
}
}
writables := KeepVM.AllWritable()
if len(writables) == 0 {
log.Print("No writable volumes.")
- return FullError
+ return 0, FullError
}
allFull := true
for _, vol := range writables {
- err := vol.Put(hash, block)
+ err := vol.Put(ctx, hash, block)
+ if ctx.Err() != nil {
+ return 0, ErrClientDisconnect
+ }
if err == nil {
- return nil // success!
+ return vol.Replication(), nil // success!
}
if err != FullError {
// The volume is not full but the
// write did not succeed. Report the
// error and continue trying.
allFull = false
- log.Printf("%s: Write(%s): %s\n", vol, hash, err)
+ log.Printf("%s: Write(%s): %s", vol, hash, err)
}
}
if allFull {
log.Print("All volumes are full.")
- return FullError
- } else {
- // Already logged the non-full errors.
- return GenericError
+ return 0, FullError
+ }
+ // Already logged the non-full errors.
+ return 0, GenericError
+}
+
+// CompareAndTouch returns the current replication level if one of the
+// volumes already has the given content and it successfully updates
+// the relevant block's modification time in order to protect it from
+// premature garbage collection. Otherwise, it returns a non-nil
+// error.
+func CompareAndTouch(ctx context.Context, hash string, buf []byte) (int, error) {
+ var bestErr error = NotFoundError
+ for _, vol := range KeepVM.AllWritable() {
+ err := vol.Compare(ctx, hash, buf)
+ if ctx.Err() != nil {
+ return 0, ctx.Err()
+ } else if err == CollisionError {
+ // Stop if we have a block with same hash but
+ // different content. (It will be impossible
+ // to tell which one is wanted if we have
+ // both, so there's no point writing it even
+ // on a different volume.)
+ log.Printf("%s: Compare(%s): %s", vol, hash, err)
+ return 0, err
+ } else if os.IsNotExist(err) {
+ // Block does not exist. This is the only
+ // "normal" error: we don't log anything.
+ continue
+ } else if err != nil {
+ // Couldn't open file, data is corrupt on
+ // disk, etc.: log this abnormal condition,
+ // and try the next volume.
+ log.Printf("%s: Compare(%s): %s", vol, hash, err)
+ continue
+ }
+ if err := vol.Touch(hash); err != nil {
+ log.Printf("%s: Touch %s failed: %s", vol, hash, err)
+ bestErr = err
+ continue
+ }
+ // Compare and Touch both worked --> done.
+ return vol.Replication(), nil
}
+ return 0, bestErr
}
-// IsValidLocator
-// Return true if the specified string is a valid Keep locator.
-// When Keep is extended to support hash types other than MD5,
-// this should be updated to cover those as well.
+var validLocatorRe = regexp.MustCompile(`^[0-9a-f]{32}$`)
+
+// IsValidLocator returns true if the specified string is a valid Keep locator.
+// When Keep is extended to support hash types other than MD5,
+// this should be updated to cover those as well.
//
func IsValidLocator(loc string) bool {
- match, err := regexp.MatchString(`^[0-9a-f]{32}$`, loc)
- if err == nil {
- return match
- }
- log.Printf("IsValidLocator: %s\n", err)
- return false
+ return validLocatorRe.MatchString(loc)
}
-// GetApiToken returns the OAuth2 token from the Authorization
+var authRe = regexp.MustCompile(`^OAuth2\s+(.*)`)
+
+// GetAPIToken returns the OAuth2 token from the Authorization
// header of a HTTP request, or an empty string if no matching
// token is found.
-func GetApiToken(req *http.Request) string {
+func GetAPIToken(req *http.Request) string {
if auth, ok := req.Header["Authorization"]; ok {
- if pat, err := regexp.Compile(`^OAuth2\s+(.*)`); err != nil {
- log.Println(err)
- } else if match := pat.FindStringSubmatch(auth[0]); match != nil {
+ if match := authRe.FindStringSubmatch(auth[0]); match != nil {
return match[1]
}
}
}
// IsExpired returns true if the given Unix timestamp (expressed as a
-// hexadecimal string) is in the past, or if timestamp_hex cannot be
+// hexadecimal string) is in the past, or if timestampHex cannot be
// parsed as a hexadecimal string.
-func IsExpired(timestamp_hex string) bool {
- ts, err := strconv.ParseInt(timestamp_hex, 16, 0)
+func IsExpired(timestampHex string) bool {
+ ts, err := strconv.ParseInt(timestampHex, 16, 0)
if err != nil {
- log.Printf("IsExpired: %s\n", err)
+ log.Printf("IsExpired: %s", err)
return true
}
return time.Unix(ts, 0).Before(time.Now())
}
-// CanDelete returns true if the user identified by api_token is
+// CanDelete returns true if the user identified by apiToken is
// allowed to delete blocks.
-func CanDelete(api_token string) bool {
- if api_token == "" {
+func CanDelete(apiToken string) bool {
+ if apiToken == "" {
return false
}
// Blocks may be deleted only when Keep has been configured with a
// data manager.
- if IsDataManagerToken(api_token) {
+ if IsSystemAuth(apiToken) {
return true
}
- // TODO(twp): look up api_token with the API server
+ // TODO(twp): look up apiToken with the API server
// return true if is_admin is true and if the token
// has unlimited scope
return false
}
-// IsDataManagerToken returns true if api_token represents the data
-// manager's token.
-func IsDataManagerToken(api_token string) bool {
- return data_manager_token != "" && api_token == data_manager_token
+// IsSystemAuth returns true if the given token is allowed to perform
+// system level actions like deleting data.
+func IsSystemAuth(token string) bool {
+ return token != "" && token == theConfig.systemAuthToken
}