import (
"bytes"
+ "context"
"crypto/md5"
"errors"
"fmt"
"io/ioutil"
"net"
"net/http"
+ "os"
+ "path/filepath"
"regexp"
"strconv"
"strings"
"sync"
"time"
+ "git.arvados.org/arvados.git/sdk/go/arvados"
"git.arvados.org/arvados.git/sdk/go/arvadosclient"
- "git.arvados.org/arvados.git/sdk/go/asyncbuf"
"git.arvados.org/arvados.git/sdk/go/httpserver"
)
DefaultProxyConnectTimeout = 30 * time.Second
DefaultProxyTLSHandshakeTimeout = 10 * time.Second
DefaultProxyKeepAlive = 120 * time.Second
+
+ rootCacheDir = "/var/cache/arvados/keep"
+ userCacheDir = ".cache/arvados/keep" // relative to HOME
)
// Error interface with an error and boolean indicating whether the error is temporary
multipleResponseError
}
-type InsufficientReplicasError error
+type InsufficientReplicasError struct{ error }
-type OversizeBlockError error
+type OversizeBlockError struct{ error }
-var ErrOversizeBlock = OversizeBlockError(errors.New("Exceeded maximum block size (" + strconv.Itoa(BLOCKSIZE) + ")"))
+var ErrOversizeBlock = OversizeBlockError{error: errors.New("Exceeded maximum block size (" + strconv.Itoa(BLOCKSIZE) + ")")}
var MissingArvadosApiHost = errors.New("Missing required environment variable ARVADOS_API_HOST")
var MissingArvadosApiToken = errors.New("Missing required environment variable ARVADOS_API_TOKEN")
var InvalidLocatorError = errors.New("Invalid locator")
// ErrIncompleteIndex is returned when the Index response does not end with a new empty line
var ErrIncompleteIndex = errors.New("Got incomplete index")
-const XKeepDesiredReplicas = "X-Keep-Desired-Replicas"
-const XKeepReplicasStored = "X-Keep-Replicas-Stored"
+const (
+ XKeepDesiredReplicas = "X-Keep-Desired-Replicas"
+ XKeepReplicasStored = "X-Keep-Replicas-Stored"
+ XKeepStorageClasses = "X-Keep-Storage-Classes"
+ XKeepStorageClassesConfirmed = "X-Keep-Storage-Classes-Confirmed"
+)
type HTTPClient interface {
Do(*http.Request) (*http.Response, error)
}
+const DiskCacheDisabled = arvados.ByteSizeOrPercent(1)
+
// KeepClient holds information about Arvados and Keep servers.
type KeepClient struct {
- Arvados *arvadosclient.ArvadosClient
- Want_replicas int
- localRoots map[string]string
- writableLocalRoots map[string]string
- gatewayRoots map[string]string
- lock sync.RWMutex
- HTTPClient HTTPClient
- Retries int
- BlockCache *BlockCache
- RequestID string
- StorageClasses []string
+ Arvados *arvadosclient.ArvadosClient
+ Want_replicas int
+ localRoots map[string]string
+ writableLocalRoots map[string]string
+ gatewayRoots map[string]string
+ lock sync.RWMutex
+ HTTPClient HTTPClient
+ Retries int
+ RequestID string
+ StorageClasses []string
+ DefaultStorageClasses []string // Set by cluster's exported config
+ DiskCacheSize arvados.ByteSizeOrPercent // See also DiskCacheDisabled
// set to 1 if all writable services are of disk type, otherwise 0
replicasPerService int
// Disable automatic discovery of keep services
disableDiscovery bool
+
+ gatewayStack arvados.KeepGateway
+}
+
+func (kc *KeepClient) Clone() *KeepClient {
+ kc.lock.Lock()
+ defer kc.lock.Unlock()
+ return &KeepClient{
+ Arvados: kc.Arvados,
+ Want_replicas: kc.Want_replicas,
+ localRoots: kc.localRoots,
+ writableLocalRoots: kc.writableLocalRoots,
+ gatewayRoots: kc.gatewayRoots,
+ HTTPClient: kc.HTTPClient,
+ Retries: kc.Retries,
+ RequestID: kc.RequestID,
+ StorageClasses: kc.StorageClasses,
+ DefaultStorageClasses: kc.DefaultStorageClasses,
+ DiskCacheSize: kc.DiskCacheSize,
+ replicasPerService: kc.replicasPerService,
+ foundNonDiskSvc: kc.foundNonDiskSvc,
+ disableDiscovery: kc.disableDiscovery,
+ }
+}
+
+func (kc *KeepClient) loadDefaultClasses() error {
+ scData, err := kc.Arvados.ClusterConfig("StorageClasses")
+ if err != nil {
+ return err
+ }
+ classes := scData.(map[string]interface{})
+ for scName := range classes {
+ scConf, _ := classes[scName].(map[string]interface{})
+ isDefault, ok := scConf["Default"].(bool)
+ if ok && isDefault {
+ kc.DefaultStorageClasses = append(kc.DefaultStorageClasses, scName)
+ }
+ }
+ return nil
}
-// MakeKeepClient creates a new KeepClient, calls
+// MakeKeepClient creates a new KeepClient, loads default storage classes, calls
// DiscoverKeepServices(), and returns when the client is ready to
// use.
func MakeKeepClient(arv *arvadosclient.ArvadosClient) (*KeepClient, error) {
defaultReplicationLevel = int(v)
}
}
- return &KeepClient{
+ kc := &KeepClient{
Arvados: arv,
Want_replicas: defaultReplicationLevel,
Retries: 2,
}
+ err = kc.loadDefaultClasses()
+ if err != nil {
+ DebugPrintf("DEBUG: Unable to load the default storage classes cluster config")
+ }
+ return kc
}
// PutHR puts a block given the block hash, a reader, and the number of bytes
// Returns an InsufficientReplicasError if 0 <= replicas <
// kc.Wants_replicas.
func (kc *KeepClient) PutHR(hash string, r io.Reader, dataBytes int64) (string, int, error) {
- // Buffer for reads from 'r'
- var bufsize int
- if dataBytes > 0 {
- if dataBytes > BLOCKSIZE {
- return "", 0, ErrOversizeBlock
- }
- bufsize = int(dataBytes)
- } else {
- bufsize = BLOCKSIZE
- }
-
- buf := asyncbuf.NewBuffer(make([]byte, 0, bufsize))
- go func() {
- _, err := io.Copy(buf, HashCheckingReader{r, md5.New(), hash})
- buf.CloseWithError(err)
- }()
- return kc.putReplicas(hash, buf.NewReader, dataBytes)
+ resp, err := kc.BlockWrite(context.Background(), arvados.BlockWriteOptions{
+ Hash: hash,
+ Reader: r,
+ DataSize: int(dataBytes),
+ })
+ return resp.Locator, resp.Replicas, err
}
// PutHB writes a block to Keep. The hash of the bytes is given in
//
// Return values are the same as for PutHR.
func (kc *KeepClient) PutHB(hash string, buf []byte) (string, int, error) {
- newReader := func() io.Reader { return bytes.NewBuffer(buf) }
- return kc.putReplicas(hash, newReader, int64(len(buf)))
+ resp, err := kc.BlockWrite(context.Background(), arvados.BlockWriteOptions{
+ Hash: hash,
+ Data: buf,
+ })
+ return resp.Locator, resp.Replicas, err
}
// PutB writes a block to Keep. It computes the hash itself.
//
// Return values are the same as for PutHR.
func (kc *KeepClient) PutB(buffer []byte) (string, int, error) {
- hash := fmt.Sprintf("%x", md5.Sum(buffer))
- return kc.PutHB(hash, buffer)
+ resp, err := kc.BlockWrite(context.Background(), arvados.BlockWriteOptions{
+ Data: buffer,
+ })
+ return resp.Locator, resp.Replicas, err
}
// PutR writes a block to Keep. It first reads all data from r into a buffer
return nil, 0, "", nil, err
}
+// attempt to create dir/subdir/ and its parents, up to but not
+// including dir itself, using mode 0700.
+func makedirs(dir, subdir string) {
+ for _, part := range strings.Split(subdir, string(os.PathSeparator)) {
+ dir = filepath.Join(dir, part)
+ os.Mkdir(dir, 0700)
+ }
+}
+
+// upstreamGateway creates/returns the KeepGateway stack used to read
+// and write data: a disk-backed cache on top of an http backend.
+func (kc *KeepClient) upstreamGateway() arvados.KeepGateway {
+ kc.lock.Lock()
+ defer kc.lock.Unlock()
+ if kc.gatewayStack != nil {
+ return kc.gatewayStack
+ }
+ var cachedir string
+ if os.Geteuid() == 0 {
+ cachedir = rootCacheDir
+ makedirs("/", cachedir)
+ } else {
+ home := "/" + os.Getenv("HOME")
+ makedirs(home, userCacheDir)
+ cachedir = filepath.Join(home, userCacheDir)
+ }
+ backend := &keepViaHTTP{kc}
+ if kc.DiskCacheSize == DiskCacheDisabled {
+ kc.gatewayStack = backend
+ } else {
+ kc.gatewayStack = &arvados.DiskCache{
+ Dir: cachedir,
+ MaxSize: kc.DiskCacheSize,
+ KeepGateway: backend,
+ }
+ }
+ return kc.gatewayStack
+}
+
// LocalLocator returns a locator equivalent to the one supplied, but
// with a valid signature from the local cluster. If the given locator
// already has a local signature, it is returned unchanged.
func (kc *KeepClient) LocalLocator(locator string) (string, error) {
- if !strings.Contains(locator, "+R") {
- // Either it has +A, or it's unsigned and we assume
- // it's a local locator on a site with signatures
- // disabled.
- return locator, nil
- }
- sighdr := fmt.Sprintf("local, time=%s", time.Now().UTC().Format(time.RFC3339))
- _, _, url, hdr, err := kc.getOrHead("HEAD", locator, http.Header{"X-Keep-Signature": []string{sighdr}})
- if err != nil {
- return "", err
- }
- loc := hdr.Get("X-Keep-Locator")
- if loc == "" {
- return "", fmt.Errorf("missing X-Keep-Locator header in HEAD response from %s", url)
- }
- return loc, nil
+ return kc.upstreamGateway().LocalLocator(locator)
}
// Get retrieves a block, given a locator. Returns a reader, the
// ReadAt retrieves a portion of block from the cache if it's
// present, otherwise from the network.
func (kc *KeepClient) ReadAt(locator string, p []byte, off int) (int, error) {
- return kc.cache().ReadAt(kc, locator, p, off)
+ return kc.upstreamGateway().ReadAt(locator, p, off)
+}
+
+// BlockWrite writes a full block to upstream servers and saves a copy
+// in the local cache.
+func (kc *KeepClient) BlockWrite(ctx context.Context, req arvados.BlockWriteOptions) (arvados.BlockWriteResponse, error) {
+ return kc.upstreamGateway().BlockWrite(ctx, req)
}
// Ask verifies that a block with the given hash is available and
return found
}
-func (kc *KeepClient) cache() *BlockCache {
- if kc.BlockCache != nil {
- return kc.BlockCache
- }
- return DefaultBlockCache
-}
-
-func (kc *KeepClient) ClearBlockCache() {
- kc.cache().Clear()
+func (kc *KeepClient) SetStorageClasses(sc []string) {
+ // make a copy so the caller can't mess with it.
+ kc.StorageClasses = append([]string{}, sc...)
}
var (