X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/63cfe7a9b391e1a07c6c6c8b0fab10b1c875f1be..8d73164d5ca50e7af1b3752b4251eaf9f11a9fc8:/sdk/go/keepclient/keepclient.go diff --git a/sdk/go/keepclient/keepclient.go b/sdk/go/keepclient/keepclient.go index 76ea17517f..169f1457e2 100644 --- a/sdk/go/keepclient/keepclient.go +++ b/sdk/go/keepclient/keepclient.go @@ -1,3 +1,7 @@ +// Copyright (C) The Arvados Authors. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + /* Provides low-level Get/Put primitives for accessing Arvados Keep blocks. */ package keepclient @@ -17,7 +21,8 @@ import ( "time" "git.curoverse.com/arvados.git/sdk/go/arvadosclient" - "git.curoverse.com/arvados.git/sdk/go/streamer" + "git.curoverse.com/arvados.git/sdk/go/asyncbuf" + "git.curoverse.com/arvados.git/sdk/go/httpserver" ) // A Keep "block" is 64MB. @@ -88,19 +93,24 @@ type HTTPClient interface { type KeepClient struct { Arvados *arvadosclient.ArvadosClient Want_replicas int - localRoots *map[string]string - writableLocalRoots *map[string]string - gatewayRoots *map[string]string + localRoots map[string]string + writableLocalRoots map[string]string + gatewayRoots map[string]string lock sync.RWMutex HTTPClient HTTPClient Retries int BlockCache *BlockCache + RequestID string + StorageClasses []string // set to 1 if all writable services are of disk type, otherwise 0 replicasPerService int // Any non-disk typed services found in the list of keepservers? foundNonDiskSvc bool + + // Disable automatic discovery of keep services + disableDiscovery bool } // MakeKeepClient creates a new KeepClient, calls @@ -108,12 +118,11 @@ type KeepClient struct { // use. func MakeKeepClient(arv *arvadosclient.ArvadosClient) (*KeepClient, error) { kc := New(arv) - return kc, kc.DiscoverKeepServers() + return kc, kc.discoverServices() } -// New creates a new KeepClient. The caller must call -// DiscoverKeepServers() before using the returned client to read or -// write data. +// New creates a new KeepClient. Service discovery will occur on the +// next read/write operation. func New(arv *arvadosclient.ArvadosClient) *KeepClient { defaultReplicationLevel := 2 value, err := arv.Discovery("defaultCollectionReplication") @@ -150,10 +159,12 @@ func (kc *KeepClient) PutHR(hash string, r io.Reader, dataBytes int64) (string, bufsize = BLOCKSIZE } - t := streamer.AsyncStreamFromReader(bufsize, HashCheckingReader{r, md5.New(), hash}) - defer t.Close() - - return kc.putReplicas(hash, t, dataBytes) + buf := asyncbuf.NewBuffer(make([]byte, 0, bufsize)) + go func() { + _, err := io.Copy(buf, HashCheckingReader{r, md5.New(), hash}) + buf.CloseWithError(err) + }() + return kc.putReplicas(hash, buf.NewReader, dataBytes) } // PutHB writes a block to Keep. The hash of the bytes is given in @@ -161,9 +172,8 @@ func (kc *KeepClient) PutHR(hash string, r io.Reader, dataBytes int64) (string, // // Return values are the same as for PutHR. func (kc *KeepClient) PutHB(hash string, buf []byte) (string, int, error) { - t := streamer.AsyncStreamFromSlice(buf) - defer t.Close() - return kc.putReplicas(hash, t, int64(len(buf))) + newReader := func() io.Reader { return bytes.NewBuffer(buf) } + return kc.putReplicas(hash, newReader, int64(len(buf))) } // PutB writes a block to Keep. It computes the hash itself. @@ -193,6 +203,17 @@ func (kc *KeepClient) getOrHead(method string, locator string) (io.ReadCloser, i return ioutil.NopCloser(bytes.NewReader(nil)), 0, "", nil } + reqid := kc.getRequestID() + + var expectLength int64 + if parts := strings.SplitN(locator, "+", 3); len(parts) < 2 { + expectLength = -1 + } else if n, err := strconv.ParseInt(parts[1], 10, 64); err != nil { + expectLength = -1 + } else { + expectLength = n + } + var errs []string tries_remaining := 1 + kc.Retries @@ -216,14 +237,17 @@ func (kc *KeepClient) getOrHead(method string, locator string) (io.ReadCloser, i errs = append(errs, fmt.Sprintf("%s: %v", url, err)) continue } - req.Header.Add("Authorization", fmt.Sprintf("OAuth2 %s", kc.Arvados.ApiToken)) + req.Header.Add("Authorization", "OAuth2 "+kc.Arvados.ApiToken) + req.Header.Add("X-Request-Id", reqid) resp, err := kc.httpClient().Do(req) if err != nil { // Probably a network error, may be transient, // can try again. errs = append(errs, fmt.Sprintf("%s: %v", url, err)) retryList = append(retryList, host) - } else if resp.StatusCode != http.StatusOK { + continue + } + if resp.StatusCode != http.StatusOK { var respbody []byte respbody, _ = ioutil.ReadAll(&io.LimitedReader{R: resp.Body, N: 4096}) resp.Body.Close() @@ -240,20 +264,29 @@ func (kc *KeepClient) getOrHead(method string, locator string) (io.ReadCloser, i } else if resp.StatusCode == 404 { count404++ } - } else { - // Success. - if method == "GET" { - return HashCheckingReader{ - Reader: resp.Body, - Hash: md5.New(), - Check: locator[0:32], - }, resp.ContentLength, url, nil - } else { + continue + } + if expectLength < 0 { + if resp.ContentLength < 0 { resp.Body.Close() - return nil, resp.ContentLength, url, nil + return nil, 0, "", fmt.Errorf("error reading %q: no size hint, no Content-Length header in response", locator) } + expectLength = resp.ContentLength + } else if resp.ContentLength >= 0 && expectLength != resp.ContentLength { + resp.Body.Close() + return nil, 0, "", fmt.Errorf("error reading %q: size hint %d != Content-Length %d", locator, expectLength, resp.ContentLength) + } + // Success + if method == "GET" { + return HashCheckingReader{ + Reader: resp.Body, + Hash: md5.New(), + Check: locator[0:32], + }, expectLength, url, nil + } else { + resp.Body.Close() + return nil, expectLength, url, nil } - } serversToTry = retryList } @@ -282,6 +315,12 @@ func (kc *KeepClient) Get(locator string) (io.ReadCloser, int64, string, error) return kc.getOrHead("GET", locator) } +// ReadAt() retrieves a portion of block from the cache if it's +// present, otherwise from the network. +func (kc *KeepClient) ReadAt(locator string, p []byte, off int) (int, error) { + return kc.cache().ReadAt(kc, locator, p, off) +} + // Ask() verifies that a block with the given hash is available and // readable, according to at least one Keep service. Unlike Get, it // does not retrieve the data or verify that the data content matches @@ -317,7 +356,8 @@ func (kc *KeepClient) GetIndex(keepServiceUUID, prefix string) (io.Reader, error return nil, err } - req.Header.Add("Authorization", fmt.Sprintf("OAuth2 %s", kc.Arvados.ApiToken)) + req.Header.Add("Authorization", "OAuth2 "+kc.Arvados.ApiToken) + req.Header.Set("X-Request-Id", kc.getRequestID()) resp, err := kc.httpClient().Do(req) if err != nil { return nil, err @@ -349,55 +389,47 @@ func (kc *KeepClient) GetIndex(keepServiceUUID, prefix string) (io.Reader, error // LocalRoots() returns the map of local (i.e., disk and proxy) Keep // services: uuid -> baseURI. func (kc *KeepClient) LocalRoots() map[string]string { + kc.discoverServices() kc.lock.RLock() defer kc.lock.RUnlock() - return *kc.localRoots + return kc.localRoots } // GatewayRoots() returns the map of Keep remote gateway services: // uuid -> baseURI. func (kc *KeepClient) GatewayRoots() map[string]string { + kc.discoverServices() kc.lock.RLock() defer kc.lock.RUnlock() - return *kc.gatewayRoots + return kc.gatewayRoots } // WritableLocalRoots() returns the map of writable local Keep services: // uuid -> baseURI. func (kc *KeepClient) WritableLocalRoots() map[string]string { + kc.discoverServices() kc.lock.RLock() defer kc.lock.RUnlock() - return *kc.writableLocalRoots + return kc.writableLocalRoots } -// SetServiceRoots updates the localRoots and gatewayRoots maps, -// without risk of disrupting operations that are already in progress. +// SetServiceRoots disables service discovery and updates the +// localRoots and gatewayRoots maps, without disrupting operations +// that are already in progress. // -// The KeepClient makes its own copy of the supplied maps, so the -// caller can reuse/modify them after SetServiceRoots returns, but -// they should not be modified by any other goroutine while -// SetServiceRoots is running. -func (kc *KeepClient) SetServiceRoots(newLocals, newWritableLocals, newGateways map[string]string) { - locals := make(map[string]string) - for uuid, root := range newLocals { - locals[uuid] = root - } - - writables := make(map[string]string) - for uuid, root := range newWritableLocals { - writables[uuid] = root - } - - gateways := make(map[string]string) - for uuid, root := range newGateways { - gateways[uuid] = root - } +// The supplied maps must not be modified after calling +// SetServiceRoots. +func (kc *KeepClient) SetServiceRoots(locals, writables, gateways map[string]string) { + kc.disableDiscovery = true + kc.setServiceRoots(locals, writables, gateways) +} +func (kc *KeepClient) setServiceRoots(locals, writables, gateways map[string]string) { kc.lock.Lock() defer kc.lock.Unlock() - kc.localRoots = &locals - kc.writableLocalRoots = &writables - kc.gatewayRoots = &gateways + kc.localRoots = locals + kc.writableLocalRoots = writables + kc.gatewayRoots = gateways } // getSortedRoots returns a list of base URIs of Keep services, in the @@ -436,6 +468,10 @@ func (kc *KeepClient) cache() *BlockCache { } } +func (kc *KeepClient) ClearBlockCache() { + kc.cache().Clear() +} + var ( // There are four global http.Client objects for the four // possible permutations of TLS behavior (verify/skip-verify) @@ -510,6 +546,16 @@ func (kc *KeepClient) httpClient() HTTPClient { return c } +var reqIDGen = httpserver.IDGenerator{Prefix: "req-"} + +func (kc *KeepClient) getRequestID() string { + if kc.RequestID != "" { + return kc.RequestID + } else { + return reqIDGen.Next() + } +} + type Locator struct { Hash string Size int // -1 if data size is not known