17529: Return RequestLimitExceeded as a RateLimitError.
[arvados.git] / lib / cloud / ec2 / ec2.go
index c329c1f88a4c0b8a2b3c3b49142f3ce60d17e871..b7afdf0ecdfdae4ced0f1f869af81e17989a3329 100644 (file)
@@ -14,11 +14,16 @@ import (
        "fmt"
        "math/big"
        "sync"
+       "sync/atomic"
+       "time"
 
        "git.arvados.org/arvados.git/lib/cloud"
        "git.arvados.org/arvados.git/sdk/go/arvados"
        "github.com/aws/aws-sdk-go/aws"
        "github.com/aws/aws-sdk-go/aws/credentials"
+       "github.com/aws/aws-sdk-go/aws/credentials/ec2rolecreds"
+       "github.com/aws/aws-sdk-go/aws/ec2metadata"
+       "github.com/aws/aws-sdk-go/aws/request"
        "github.com/aws/aws-sdk-go/aws/session"
        "github.com/aws/aws-sdk-go/service/ec2"
        "github.com/sirupsen/logrus"
@@ -28,6 +33,11 @@ import (
 // Driver is the ec2 implementation of the cloud.Driver interface.
 var Driver = cloud.DriverFunc(newEC2InstanceSet)
 
+const (
+       throttleDelayMin = time.Second
+       throttleDelayMax = time.Minute
+)
+
 type ec2InstanceSetConfig struct {
        AccessKeyID      string
        SecretAccessKey  string
@@ -54,6 +64,7 @@ type ec2InstanceSet struct {
        client        ec2Interface
        keysMtx       sync.Mutex
        keys          map[string]string
+       throttleDelay atomic.Value
 }
 
 func newEC2InstanceSet(config json.RawMessage, instanceSetID cloud.InstanceSetID, _ cloud.SharedResourceTags, logger logrus.FieldLogger) (prv cloud.InstanceSet, err error) {
@@ -65,12 +76,19 @@ func newEC2InstanceSet(config json.RawMessage, instanceSetID cloud.InstanceSetID
        if err != nil {
                return nil, err
        }
-       awsConfig := aws.NewConfig().
-               WithCredentials(credentials.NewStaticCredentials(
-                       instanceSet.ec2config.AccessKeyID,
-                       instanceSet.ec2config.SecretAccessKey,
-                       "")).
-               WithRegion(instanceSet.ec2config.Region)
+
+       sess, err := session.NewSession()
+       if err != nil {
+               return nil, err
+       }
+       // First try any static credentials, fall back to an IAM instance profile/role
+       creds := credentials.NewChainCredentials(
+               []credentials.Provider{
+                       &credentials.StaticProvider{Value: credentials.Value{AccessKeyID: instanceSet.ec2config.AccessKeyID, SecretAccessKey: instanceSet.ec2config.SecretAccessKey}},
+                       &ec2rolecreds.EC2RoleProvider{Client: ec2metadata.New(sess)},
+               })
+
+       awsConfig := aws.NewConfig().WithCredentials(creds).WithRegion(instanceSet.ec2config.Region)
        instanceSet.client = ec2.New(session.Must(session.NewSession(awsConfig)))
        instanceSet.keys = make(map[string]string)
        if instanceSet.ec2config.EBSVolumeType == "" {
@@ -103,10 +121,10 @@ func awsKeyFingerprint(pk ssh.PublicKey) (md5fp string, sha1fp string, err error
        sha1pkix := sha1.Sum([]byte(pkix))
        md5fp = ""
        sha1fp = ""
-       for i := 0; i < len(md5pkix); i += 1 {
+       for i := 0; i < len(md5pkix); i++ {
                md5fp += fmt.Sprintf(":%02x", md5pkix[i])
        }
-       for i := 0; i < len(sha1pkix); i += 1 {
+       for i := 0; i < len(sha1pkix); i++ {
                sha1fp += fmt.Sprintf(":%02x", sha1pkix[i])
        }
        return md5fp[1:], sha1fp[1:], nil
@@ -212,8 +230,23 @@ func (instanceSet *ec2InstanceSet) Create(
 
        rsv, err := instanceSet.client.RunInstances(&rii)
 
-       if err != nil {
+       if request.IsErrorThrottle(err) {
+               // Back off exponentially until a create call either
+               // succeeds or returns a non-throttle error.
+               d, _ := instanceSet.throttleDelay.Load().(time.Duration)
+               d = d*3/2 + time.Second
+               if d < throttleDelayMin {
+                       d = throttleDelayMin
+               } else if d > throttleDelayMax {
+                       d = throttleDelayMax
+               }
+               instanceSet.throttleDelay.Store(d)
+               return nil, rateLimitError{error: err, earliestRetry: time.Now().Add(d)}
+       } else if err != nil {
+               instanceSet.throttleDelay.Store(time.Duration(0))
                return nil, err
+       } else {
+               instanceSet.throttleDelay.Store(time.Duration(0))
        }
 
        return &ec2Instance{
@@ -251,7 +284,7 @@ func (instanceSet *ec2InstanceSet) Instances(tags cloud.InstanceTags) (instances
        }
 }
 
-func (az *ec2InstanceSet) Stop() {
+func (instanceSet *ec2InstanceSet) Stop() {
 }
 
 type ec2Instance struct {
@@ -308,9 +341,8 @@ func (inst *ec2Instance) Destroy() error {
 func (inst *ec2Instance) Address() string {
        if inst.instance.PrivateIpAddress != nil {
                return *inst.instance.PrivateIpAddress
-       } else {
-               return ""
        }
+       return ""
 }
 
 func (inst *ec2Instance) RemoteUser() string {
@@ -320,3 +352,12 @@ func (inst *ec2Instance) RemoteUser() string {
 func (inst *ec2Instance) VerifyHostKey(ssh.PublicKey, *ssh.Client) error {
        return cloud.ErrNotImplemented
 }
+
+type rateLimitError struct {
+       error
+       earliestRetry time.Time
+}
+
+func (err rateLimitError) EarliestRetry() time.Time {
+       return err.earliestRetry
+}