1 // Copyright (C) The Arvados Authors. All rights reserved.
3 // SPDX-License-Identifier: Apache-2.0
33 "git.arvados.org/arvados.git/sdk/go/httpserver"
34 "github.com/hashicorp/go-retryablehttp"
37 // A Client is an HTTP client with an API endpoint and a set of
38 // Arvados credentials.
40 // It offers methods for accessing individual Arvados APIs, and
41 // methods that implement common patterns like fetching multiple pages
42 // of results using List APIs.
44 // HTTP client used to make requests. If nil,
45 // DefaultSecureClient or InsecureHTTPClient will be used.
46 Client *http.Client `json:"-"`
48 // Protocol scheme: "http", "https", or "" (https)
51 // Hostname (or host:port) of Arvados API server.
54 // User authentication token.
57 // Accept unverified certificates. This works only if the
58 // Client field is nil: otherwise, it has no effect.
61 // Override keep service discovery with a list of base
62 // URIs. (Currently there are no Client methods for
63 // discovering keep services so this is just a convenience for
64 // callers who use a Client to initialize an
65 // arvadosclient.ArvadosClient.)
66 KeepServiceURIs []string `json:",omitempty"`
68 // HTTP headers to add/override in outgoing requests.
69 SendHeader http.Header
71 // Timeout for requests. NewClientFromConfig and
72 // NewClientFromEnv return a Client with a default 5 minute
73 // timeout. Within this time, retryable errors are
74 // automatically retried with exponential backoff.
76 // To disable automatic retries, set Timeout to zero and use a
77 // context deadline to establish a maximum request time.
80 // Maximum disk cache size in bytes or percent of total
81 // filesystem size. If zero, use default, currently 10% of
83 DiskCacheSize ByteSizeOrPercent
87 defaultRequestID string
89 // APIHost and AuthToken were loaded from ARVADOS_* env vars
90 // (used to customize "no host/token" error messages)
93 // Track/limit concurrent outgoing API calls. Note this
94 // differs from an outgoing connection limit (a feature
95 // provided by http.Transport) when concurrent calls are
96 // multiplexed on a single http2 connection.
98 // getRequestLimiter() should always be used, because this can
100 requestLimiter *requestLimiter
105 // InsecureHTTPClient is the default http.Client used by a Client with
106 // Insecure==true and Client==nil.
107 var InsecureHTTPClient = &http.Client{
108 Transport: &http.Transport{
109 TLSClientConfig: &tls.Config{
110 InsecureSkipVerify: true}}}
112 // DefaultSecureClient is the default http.Client used by a Client otherwise.
113 var DefaultSecureClient = &http.Client{}
115 // NewClientFromConfig creates a new Client that uses the endpoints in
116 // the given cluster.
118 // AuthToken is left empty for the caller to populate.
119 func NewClientFromConfig(cluster *Cluster) (*Client, error) {
120 ctrlURL := cluster.Services.Controller.ExternalURL
121 if ctrlURL.Host == "" {
122 return nil, fmt.Errorf("no host in config Services.Controller.ExternalURL: %v", ctrlURL)
125 if srvaddr := os.Getenv("ARVADOS_SERVER_ADDRESS"); srvaddr != "" {
126 // When this client is used to make a request to
127 // https://{ctrlhost}:port/ (any port), it dials the
128 // indicated port on ARVADOS_SERVER_ADDRESS instead.
130 // This is invoked by arvados-server boot to ensure
131 // that server->server traffic (e.g.,
132 // keepproxy->controller) only hits local interfaces,
133 // even if the Controller.ExternalURL host is a load
134 // balancer / gateway and not a local interface
135 // address (e.g., when running on a cloud VM).
137 // This avoids unnecessary delay/cost of routing
138 // external traffic, and also allows controller to
139 // recognize other services as internal clients based
140 // on the connection source address.
141 divertedHost := (*url.URL)(&cluster.Services.Controller.ExternalURL).Hostname()
142 var dialer net.Dialer
144 Transport: &http.Transport{
145 TLSClientConfig: &tls.Config{InsecureSkipVerify: cluster.TLS.Insecure},
146 DialContext: func(ctx context.Context, network, addr string) (net.Conn, error) {
147 host, port, err := net.SplitHostPort(addr)
148 if err == nil && network == "tcp" && host == divertedHost {
149 addr = net.JoinHostPort(srvaddr, port)
151 return dialer.DialContext(ctx, network, addr)
158 Scheme: ctrlURL.Scheme,
159 APIHost: ctrlURL.Host,
160 Insecure: cluster.TLS.Insecure,
161 Timeout: 5 * time.Minute,
162 DiskCacheSize: cluster.Collections.WebDAVCache.DiskCacheSize,
163 requestLimiter: &requestLimiter{maxlimit: int64(cluster.API.MaxConcurrentRequests / 4)},
167 // NewClientFromEnv creates a new Client that uses the default HTTP
168 // client, and loads API endpoint and credentials from ARVADOS_*
169 // environment variables (if set) and
170 // $HOME/.config/arvados/settings.conf (if readable).
172 // If a config exists in both locations, the environment variable is
175 // If there is an error (other than ENOENT) reading settings.conf,
176 // NewClientFromEnv logs the error to log.Default(), then proceeds as
177 // if settings.conf did not exist.
179 // Space characters are trimmed when reading the settings file, so
180 // these are equivalent:
182 // ARVADOS_API_HOST=localhost\n
183 // ARVADOS_API_HOST=localhost\r\n
184 // ARVADOS_API_HOST = localhost \n
185 // \tARVADOS_API_HOST = localhost\n
186 func NewClientFromEnv() *Client {
187 vars := map[string]string{}
188 home := os.Getenv("HOME")
189 conffile := home + "/.config/arvados/settings.conf"
191 // no $HOME => just use env vars
192 } else if settings, err := os.ReadFile(conffile); errors.Is(err, fs.ErrNotExist) {
193 // no config file => just use env vars
194 } else if err != nil {
195 // config file unreadable => log message, then use env vars
196 log.Printf("continuing without loading %s: %s", conffile, err)
198 for _, line := range bytes.Split(settings, []byte{'\n'}) {
199 kv := bytes.SplitN(line, []byte{'='}, 2)
200 k := string(bytes.TrimSpace(kv[0]))
201 if len(kv) != 2 || !strings.HasPrefix(k, "ARVADOS_") {
202 // Same behavior as python sdk:
203 // silently skip leading # (comments),
204 // blank lines, typos, and non-Arvados
208 vars[k] = string(bytes.TrimSpace(kv[1]))
211 for _, env := range os.Environ() {
212 if !strings.HasPrefix(env, "ARVADOS_") {
215 kv := strings.SplitN(env, "=", 2)
221 for _, s := range strings.Split(vars["ARVADOS_KEEP_SERVICES"], " ") {
224 } else if u, err := url.Parse(s); err != nil {
225 log.Printf("ARVADOS_KEEP_SERVICES: %q: %s", s, err)
226 } else if !u.IsAbs() {
227 log.Printf("ARVADOS_KEEP_SERVICES: %q: not an absolute URI", s)
229 svcs = append(svcs, s)
233 if s := strings.ToLower(vars["ARVADOS_API_HOST_INSECURE"]); s == "1" || s == "yes" || s == "true" {
238 APIHost: vars["ARVADOS_API_HOST"],
239 AuthToken: vars["ARVADOS_API_TOKEN"],
241 KeepServiceURIs: svcs,
242 Timeout: 5 * time.Minute,
247 var reqIDGen = httpserver.IDGenerator{Prefix: "req-"}
249 var nopCancelFunc context.CancelFunc = func() {}
251 var reqErrorRe = regexp.MustCompile(`net/http: invalid header `)
253 // Do augments (*http.Client)Do(): adds Authorization and X-Request-Id
254 // headers, delays in order to comply with rate-limiting restrictions,
255 // and retries failed requests when appropriate.
256 func (c *Client) Do(req *http.Request) (*http.Response, error) {
258 if auth, _ := ctx.Value(contextKeyAuthorization{}).(string); auth != "" {
259 req.Header.Add("Authorization", auth)
260 } else if c.AuthToken != "" {
261 req.Header.Add("Authorization", "OAuth2 "+c.AuthToken)
264 if req.Header.Get("X-Request-Id") == "" {
266 if ctxreqid, _ := ctx.Value(contextKeyRequestID{}).(string); ctxreqid != "" {
268 } else if c.defaultRequestID != "" {
269 reqid = c.defaultRequestID
271 reqid = reqIDGen.Next()
273 if req.Header == nil {
274 req.Header = http.Header{"X-Request-Id": {reqid}}
276 req.Header.Set("X-Request-Id", reqid)
280 rreq, err := retryablehttp.FromRequest(req)
285 cancel := nopCancelFunc
286 var lastResp *http.Response
287 var lastRespBody io.ReadCloser
289 var checkRetryCalled int
291 rclient := retryablehttp.NewClient()
292 rclient.HTTPClient = c.httpClient()
293 rclient.Backoff = exponentialBackoff
295 rclient.RetryWaitMax = c.Timeout / 10
296 rclient.RetryMax = 32
297 ctx, cancel = context.WithDeadline(ctx, time.Now().Add(c.Timeout))
298 rreq = rreq.WithContext(ctx)
302 rclient.CheckRetry = func(ctx context.Context, resp *http.Response, respErr error) (bool, error) {
304 if c.getRequestLimiter().Report(resp, respErr) {
305 c.last503.Store(time.Now())
310 // This check can be removed when
311 // https://github.com/hashicorp/go-retryablehttp/pull/210
312 // (or equivalent) is merged and we update go.mod.
313 // Until then, it is needed to pass
314 // TestNonRetryableStdlibError.
315 if respErr != nil && reqErrorRe.MatchString(respErr.Error()) {
318 retrying, err := retryablehttp.DefaultRetryPolicy(ctx, resp, respErr)
320 lastResp, lastRespBody, lastErr = resp, nil, respErr
322 // Save the response and body so we
323 // can return it instead of "deadline
324 // exceeded". retryablehttp.Client
325 // will drain and discard resp.body,
326 // so we need to stash it separately.
327 buf, err := ioutil.ReadAll(resp.Body)
329 lastRespBody = io.NopCloser(bytes.NewReader(buf))
331 lastResp, lastErr = nil, err
339 limiter := c.getRequestLimiter()
341 if ctx.Err() != nil {
344 return nil, ctx.Err()
346 resp, err := rclient.Do(rreq)
347 if (errors.Is(err, context.DeadlineExceeded) || errors.Is(err, context.Canceled)) && (lastResp != nil || lastErr != nil) {
350 if checkRetryCalled > 0 && err != nil {
351 // Mimic retryablehttp's "giving up after X
352 // attempts" message, even if we gave up
353 // because of time rather than maxretries.
354 err = fmt.Errorf("%s %s giving up after %d attempt(s): %w", req.Method, req.URL.String(), checkRetryCalled, err)
357 resp.Body = lastRespBody
365 // We need to call cancel() eventually, but we can't use
366 // "defer cancel()" because the context has to stay alive
367 // until the caller has finished reading the response body.
368 resp.Body = cancelOnClose{
369 ReadCloser: resp.Body,
378 // Last503 returns the time of the most recent HTTP 503 (Service
379 // Unavailable) response. Zero time indicates never.
380 func (c *Client) Last503() time.Time {
381 t, _ := c.last503.Load().(time.Time)
385 // globalRequestLimiter entries (one for each APIHost) don't have a
386 // hard limit on outgoing connections, but do add a delay and reduce
387 // concurrency after 503 errors.
389 globalRequestLimiter = map[string]*requestLimiter{}
390 globalRequestLimiterLock sync.Mutex
393 // Get this client's requestLimiter, or a global requestLimiter
394 // singleton for c's APIHost if this client doesn't have its own.
395 func (c *Client) getRequestLimiter() *requestLimiter {
396 if c.requestLimiter != nil {
397 return c.requestLimiter
399 globalRequestLimiterLock.Lock()
400 defer globalRequestLimiterLock.Unlock()
401 limiter := globalRequestLimiter[c.APIHost]
403 limiter = &requestLimiter{}
404 globalRequestLimiter[c.APIHost] = limiter
409 // cancelOnClose calls a provided CancelFunc when its wrapped
410 // ReadCloser's Close() method is called.
411 type cancelOnClose struct {
413 cancel context.CancelFunc
416 func (coc cancelOnClose) Close() error {
417 err := coc.ReadCloser.Close()
422 func isRedirectStatus(code int) bool {
424 case http.StatusMovedPermanently, http.StatusFound, http.StatusSeeOther, http.StatusTemporaryRedirect, http.StatusPermanentRedirect:
431 const minExponentialBackoffBase = time.Second
433 // Implements retryablehttp.Backoff using the server-provided
434 // Retry-After header if available, otherwise nearly-full jitter
435 // exponential backoff (similar to
436 // https://aws.amazon.com/blogs/architecture/exponential-backoff-and-jitter/),
437 // in all cases respecting the provided min and max.
438 func exponentialBackoff(min, max time.Duration, attemptNum int, resp *http.Response) time.Duration {
439 if attemptNum > 0 && min < minExponentialBackoffBase {
440 min = minExponentialBackoffBase
443 if resp != nil && (resp.StatusCode == http.StatusTooManyRequests || resp.StatusCode == http.StatusServiceUnavailable) {
444 if s := resp.Header.Get("Retry-After"); s != "" {
445 if sleep, err := strconv.ParseInt(s, 10, 64); err == nil {
446 t = time.Second * time.Duration(sleep)
447 } else if stamp, err := time.Parse(time.RFC1123, s); err == nil {
448 t = stamp.Sub(time.Now())
453 jitter := mathrand.New(mathrand.NewSource(int64(time.Now().Nanosecond()))).Float64()
454 t = min + time.Duration((math.Pow(2, float64(attemptNum))*float64(min)-float64(min))*jitter)
465 // DoAndDecode performs req and unmarshals the response (which must be
466 // JSON) into dst. Use this instead of RequestAndDecode if you need
467 // more control of the http.Request object.
469 // If the response status indicates an HTTP redirect, the Location
470 // header value is unmarshalled to dst as a RedirectLocation
472 func (c *Client) DoAndDecode(dst interface{}, req *http.Request) error {
473 resp, err := c.Do(req)
477 defer resp.Body.Close()
478 buf, err := ioutil.ReadAll(resp.Body)
483 case resp.StatusCode == http.StatusNoContent:
485 case resp.StatusCode == http.StatusOK && dst == nil:
487 case resp.StatusCode == http.StatusOK:
488 return json.Unmarshal(buf, dst)
490 // If the caller uses a client with a custom CheckRedirect
491 // func, Do() might return the 3xx response instead of
493 case isRedirectStatus(resp.StatusCode) && dst == nil:
495 case isRedirectStatus(resp.StatusCode):
496 // Copy the redirect target URL to dst.RedirectLocation.
497 buf, err := json.Marshal(map[string]string{"redirect_location": resp.Header.Get("Location")})
501 return json.Unmarshal(buf, dst)
504 return newTransactionError(req, resp, buf)
508 // Convert an arbitrary struct to url.Values. For example,
510 // Foo{Bar: []int{1,2,3}, Baz: "waz"}
514 // url.Values{`bar`:`{"a":[1,2,3]}`,`Baz`:`waz`}
516 // params itself is returned if it is already an url.Values.
517 func anythingToValues(params interface{}) (url.Values, error) {
518 if v, ok := params.(url.Values); ok {
521 // TODO: Do this more efficiently, possibly using
522 // json.Decode/Encode, so the whole thing doesn't have to get
523 // encoded, decoded, and re-encoded.
524 j, err := json.Marshal(params)
528 var generic map[string]interface{}
529 dec := json.NewDecoder(bytes.NewBuffer(j))
531 err = dec.Decode(&generic)
535 urlValues := url.Values{}
536 for k, v := range generic {
537 if v, ok := v.(string); ok {
541 if v, ok := v.(json.Number); ok {
542 urlValues.Set(k, v.String())
545 if v, ok := v.(bool); ok {
547 urlValues.Set(k, "true")
549 // "foo=false", "foo=0", and "foo="
550 // are all taken as true strings, so
551 // don't send false values at all --
552 // rely on the default being false.
556 j, err := json.Marshal(v)
560 if bytes.Equal(j, []byte("null")) {
561 // don't add it to urlValues at all
564 urlValues.Set(k, string(j))
566 return urlValues, nil
569 // RequestAndDecode performs an API request and unmarshals the
570 // response (which must be JSON) into dst. Method and body arguments
571 // are the same as for http.NewRequest(). The given path is added to
572 // the server's scheme/host/port to form the request URL. The given
573 // params are passed via POST form or query string.
575 // path must not contain a query string.
576 func (c *Client) RequestAndDecode(dst interface{}, method, path string, body io.Reader, params interface{}) error {
577 return c.RequestAndDecodeContext(context.Background(), dst, method, path, body, params)
580 // RequestAndDecodeContext does the same as RequestAndDecode, but with a context
581 func (c *Client) RequestAndDecodeContext(ctx context.Context, dst interface{}, method, path string, body io.Reader, params interface{}) error {
582 if body, ok := body.(io.Closer); ok {
583 // Ensure body is closed even if we error out early
588 return errors.New("ARVADOS_API_HOST and/or ARVADOS_API_TOKEN environment variables are not set")
590 return errors.New("arvados.Client cannot perform request: APIHost is not set")
592 urlString := c.apiURL(path)
593 urlValues, err := anythingToValues(params)
598 if urlValues == nil {
599 urlValues = url.Values{}
601 urlValues["select"] = []string{`["uuid"]`}
603 if urlValues == nil {
605 } else if body != nil || ((method == "GET" || method == "HEAD") && len(urlValues.Encode()) < 1000) {
606 // Send params in query part of URL
607 u, err := url.Parse(urlString)
611 u.RawQuery = urlValues.Encode()
612 urlString = u.String()
614 body = strings.NewReader(urlValues.Encode())
616 req, err := http.NewRequest(method, urlString, body)
620 if (method == "GET" || method == "HEAD") && body != nil {
621 req.Header.Set("X-Http-Method-Override", method)
624 req = req.WithContext(ctx)
625 req.Header.Set("Content-type", "application/x-www-form-urlencoded")
626 for k, v := range c.SendHeader {
629 return c.DoAndDecode(dst, req)
632 type resource interface {
633 resourceName() string
636 // UpdateBody returns an io.Reader suitable for use as an http.Request
637 // Body for a create or update API call.
638 func (c *Client) UpdateBody(rsc resource) io.Reader {
639 j, err := json.Marshal(rsc)
641 // Return a reader that returns errors.
643 w.CloseWithError(err)
646 v := url.Values{rsc.resourceName(): {string(j)}}
647 return bytes.NewBufferString(v.Encode())
650 // WithRequestID returns a new shallow copy of c that sends the given
651 // X-Request-Id value (instead of a new randomly generated one) with
652 // each subsequent request that doesn't provide its own via context or
654 func (c *Client) WithRequestID(reqid string) *Client {
656 cc.defaultRequestID = reqid
660 func (c *Client) httpClient() *http.Client {
662 case c.Client != nil:
665 return InsecureHTTPClient
667 return DefaultSecureClient
671 func (c *Client) apiURL(path string) string {
676 // Double-slash in URLs tend to cause subtle hidden problems
677 // (e.g., they can behave differently when a load balancer is
678 // in the picture). Here we ensure exactly one "/" regardless
679 // of whether the given APIHost or path has a superfluous one.
680 return scheme + "://" + strings.TrimSuffix(c.APIHost, "/") + "/" + strings.TrimPrefix(path, "/")
683 // DiscoveryDocument is the Arvados server's description of itself.
684 type DiscoveryDocument struct {
685 BasePath string `json:"basePath"`
686 DefaultCollectionReplication int `json:"defaultCollectionReplication"`
687 BlobSignatureTTL int64 `json:"blobSignatureTtl"`
688 GitURL string `json:"gitUrl"`
689 Schemas map[string]Schema `json:"schemas"`
690 Resources map[string]Resource `json:"resources"`
691 Revision string `json:"revision"`
694 type Resource struct {
695 Methods map[string]ResourceMethod `json:"methods"`
698 type ResourceMethod struct {
699 HTTPMethod string `json:"httpMethod"`
700 Path string `json:"path"`
701 Response MethodResponse `json:"response"`
704 type MethodResponse struct {
705 Ref string `json:"$ref"`
709 UUIDPrefix string `json:"uuidPrefix"`
712 // DiscoveryDocument returns a *DiscoveryDocument. The returned object
713 // should not be modified: the same object may be returned by
715 func (c *Client) DiscoveryDocument() (*DiscoveryDocument, error) {
719 var dd DiscoveryDocument
720 err := c.RequestAndDecode(&dd, "GET", "discovery/v1/apis/arvados/v1/rest", nil, nil)
728 var pdhRegexp = regexp.MustCompile(`^[0-9a-f]{32}\+\d+$`)
730 func (c *Client) modelForUUID(dd *DiscoveryDocument, uuid string) (string, error) {
731 if pdhRegexp.MatchString(uuid) {
732 return "Collection", nil
735 return "", fmt.Errorf("invalid UUID: %q", uuid)
739 for m, s := range dd.Schemas {
740 if s.UUIDPrefix == infix {
746 return "", fmt.Errorf("unrecognized type portion %q in UUID %q", infix, uuid)
751 func (c *Client) KindForUUID(uuid string) (string, error) {
752 dd, err := c.DiscoveryDocument()
756 model, err := c.modelForUUID(dd, uuid)
760 return "arvados#" + strings.ToLower(model[:1]) + model[1:], nil
763 func (c *Client) PathForUUID(method, uuid string) (string, error) {
764 dd, err := c.DiscoveryDocument()
768 model, err := c.modelForUUID(dd, uuid)
773 for r, rsc := range dd.Resources {
774 if rsc.Methods["get"].Response.Ref == model {
780 return "", fmt.Errorf("no resource for model: %q", model)
782 m, ok := dd.Resources[resource].Methods[method]
784 return "", fmt.Errorf("no method %q for resource %q", method, resource)
786 path := dd.BasePath + strings.Replace(m.Path, "{uuid}", uuid, -1)
793 var maxUUIDInt = (&big.Int{}).Exp(big.NewInt(36), big.NewInt(15), nil)
795 func RandomUUID(clusterID, infix string) string {
796 n, err := rand.Int(rand.Reader, maxUUIDInt)
804 return clusterID + "-" + infix + "-" + nstr