X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/78c908ca43839aa38bb45ef9a9959e1005e39494..ff11506c916cb2cd8abd1905e16c4d4f5ddd4240:/lib/cloud/ec2/ec2.go diff --git a/lib/cloud/ec2/ec2.go b/lib/cloud/ec2/ec2.go index e2ad6b42b2..b7afdf0ecd 100644 --- a/lib/cloud/ec2/ec2.go +++ b/lib/cloud/ec2/ec2.go @@ -14,27 +14,35 @@ import ( "fmt" "math/big" "sync" + "sync/atomic" + "time" - "git.curoverse.com/arvados.git/lib/cloud" - "git.curoverse.com/arvados.git/sdk/go/arvados" + "git.arvados.org/arvados.git/lib/cloud" + "git.arvados.org/arvados.git/sdk/go/arvados" "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/aws/credentials" + "github.com/aws/aws-sdk-go/aws/credentials/ec2rolecreds" + "github.com/aws/aws-sdk-go/aws/ec2metadata" + "github.com/aws/aws-sdk-go/aws/request" "github.com/aws/aws-sdk-go/aws/session" "github.com/aws/aws-sdk-go/service/ec2" "github.com/sirupsen/logrus" "golang.org/x/crypto/ssh" ) -const tagKeyInstanceSetID = "arvados-dispatch-id" - // Driver is the ec2 implementation of the cloud.Driver interface. var Driver = cloud.DriverFunc(newEC2InstanceSet) +const ( + throttleDelayMin = time.Second + throttleDelayMax = time.Minute +) + type ec2InstanceSetConfig struct { AccessKeyID string SecretAccessKey string Region string - SecurityGroupIDs []string + SecurityGroupIDs arvados.StringSet SubnetID string AdminUsername string EBSVolumeType string @@ -56,9 +64,10 @@ type ec2InstanceSet struct { client ec2Interface keysMtx sync.Mutex keys map[string]string + throttleDelay atomic.Value } -func newEC2InstanceSet(config json.RawMessage, instanceSetID cloud.InstanceSetID, logger logrus.FieldLogger) (prv cloud.InstanceSet, err error) { +func newEC2InstanceSet(config json.RawMessage, instanceSetID cloud.InstanceSetID, _ cloud.SharedResourceTags, logger logrus.FieldLogger) (prv cloud.InstanceSet, err error) { instanceSet := &ec2InstanceSet{ instanceSetID: instanceSetID, logger: logger, @@ -67,12 +76,19 @@ func newEC2InstanceSet(config json.RawMessage, instanceSetID cloud.InstanceSetID if err != nil { return nil, err } - awsConfig := aws.NewConfig(). - WithCredentials(credentials.NewStaticCredentials( - instanceSet.ec2config.AccessKeyID, - instanceSet.ec2config.SecretAccessKey, - "")). - WithRegion(instanceSet.ec2config.Region) + + sess, err := session.NewSession() + if err != nil { + return nil, err + } + // First try any static credentials, fall back to an IAM instance profile/role + creds := credentials.NewChainCredentials( + []credentials.Provider{ + &credentials.StaticProvider{Value: credentials.Value{AccessKeyID: instanceSet.ec2config.AccessKeyID, SecretAccessKey: instanceSet.ec2config.SecretAccessKey}}, + &ec2rolecreds.EC2RoleProvider{Client: ec2metadata.New(sess)}, + }) + + awsConfig := aws.NewConfig().WithCredentials(creds).WithRegion(instanceSet.ec2config.Region) instanceSet.client = ec2.New(session.Must(session.NewSession(awsConfig))) instanceSet.keys = make(map[string]string) if instanceSet.ec2config.EBSVolumeType == "" { @@ -105,10 +121,10 @@ func awsKeyFingerprint(pk ssh.PublicKey) (md5fp string, sha1fp string, err error sha1pkix := sha1.Sum([]byte(pkix)) md5fp = "" sha1fp = "" - for i := 0; i < len(md5pkix); i += 1 { + for i := 0; i < len(md5pkix); i++ { md5fp += fmt.Sprintf(":%02x", md5pkix[i]) } - for i := 0; i < len(sha1pkix); i += 1 { + for i := 0; i < len(sha1pkix); i++ { sha1fp += fmt.Sprintf(":%02x", sha1pkix[i]) } return md5fp[1:], sha1fp[1:], nil @@ -130,7 +146,7 @@ func (instanceSet *ec2InstanceSet) Create( var ok bool if keyname, ok = instanceSet.keys[md5keyFingerprint]; !ok { keyout, err := instanceSet.client.DescribeKeyPairs(&ec2.DescribeKeyPairsInput{ - Filters: []*ec2.Filter{&ec2.Filter{ + Filters: []*ec2.Filter{{ Name: aws.String("fingerprint"), Values: []*string{&md5keyFingerprint, &sha1keyFingerprint}, }}, @@ -155,12 +171,7 @@ func (instanceSet *ec2InstanceSet) Create( } instanceSet.keysMtx.Unlock() - ec2tags := []*ec2.Tag{ - &ec2.Tag{ - Key: aws.String(tagKeyInstanceSetID), - Value: aws.String(string(instanceSet.instanceSetID)), - }, - } + ec2tags := []*ec2.Tag{} for k, v := range newTags { ec2tags = append(ec2tags, &ec2.Tag{ Key: aws.String(k), @@ -168,6 +179,11 @@ func (instanceSet *ec2InstanceSet) Create( }) } + var groups []string + for sg := range instanceSet.ec2config.SecurityGroupIDs { + groups = append(groups, sg) + } + rii := ec2.RunInstancesInput{ ImageId: aws.String(string(imageID)), InstanceType: &instanceType.ProviderType, @@ -176,17 +192,17 @@ func (instanceSet *ec2InstanceSet) Create( KeyName: &keyname, NetworkInterfaces: []*ec2.InstanceNetworkInterfaceSpecification{ - &ec2.InstanceNetworkInterfaceSpecification{ + { AssociatePublicIpAddress: aws.Bool(false), DeleteOnTermination: aws.Bool(true), DeviceIndex: aws.Int64(0), - Groups: aws.StringSlice(instanceSet.ec2config.SecurityGroupIDs), + Groups: aws.StringSlice(groups), SubnetId: &instanceSet.ec2config.SubnetID, }}, DisableApiTermination: aws.Bool(false), InstanceInitiatedShutdownBehavior: aws.String("terminate"), TagSpecifications: []*ec2.TagSpecification{ - &ec2.TagSpecification{ + { ResourceType: aws.String("instance"), Tags: ec2tags, }}, @@ -194,7 +210,7 @@ func (instanceSet *ec2InstanceSet) Create( } if instanceType.AddedScratch > 0 { - rii.BlockDeviceMappings = []*ec2.BlockDeviceMapping{&ec2.BlockDeviceMapping{ + rii.BlockDeviceMappings = []*ec2.BlockDeviceMapping{{ DeviceName: aws.String("/dev/xvdt"), Ebs: &ec2.EbsBlockDevice{ DeleteOnTermination: aws.Bool(true), @@ -214,8 +230,23 @@ func (instanceSet *ec2InstanceSet) Create( rsv, err := instanceSet.client.RunInstances(&rii) - if err != nil { + if request.IsErrorThrottle(err) { + // Back off exponentially until a create call either + // succeeds or returns a non-throttle error. + d, _ := instanceSet.throttleDelay.Load().(time.Duration) + d = d*3/2 + time.Second + if d < throttleDelayMin { + d = throttleDelayMin + } else if d > throttleDelayMax { + d = throttleDelayMax + } + instanceSet.throttleDelay.Store(d) + return nil, rateLimitError{error: err, earliestRetry: time.Now().Add(d)} + } else if err != nil { + instanceSet.throttleDelay.Store(time.Duration(0)) return nil, err + } else { + instanceSet.throttleDelay.Store(time.Duration(0)) } return &ec2Instance{ @@ -224,13 +255,15 @@ func (instanceSet *ec2InstanceSet) Create( }, nil } -func (instanceSet *ec2InstanceSet) Instances(cloud.InstanceTags) (instances []cloud.Instance, err error) { - dii := &ec2.DescribeInstancesInput{ - Filters: []*ec2.Filter{&ec2.Filter{ - Name: aws.String("tag:" + tagKeyInstanceSetID), - Values: []*string{aws.String(string(instanceSet.instanceSetID))}, - }}} - +func (instanceSet *ec2InstanceSet) Instances(tags cloud.InstanceTags) (instances []cloud.Instance, err error) { + var filters []*ec2.Filter + for k, v := range tags { + filters = append(filters, &ec2.Filter{ + Name: aws.String("tag:" + k), + Values: []*string{aws.String(v)}, + }) + } + dii := &ec2.DescribeInstancesInput{Filters: filters} for { dio, err := instanceSet.client.DescribeInstances(dii) if err != nil { @@ -251,7 +284,7 @@ func (instanceSet *ec2InstanceSet) Instances(cloud.InstanceTags) (instances []cl } } -func (az *ec2InstanceSet) Stop() { +func (instanceSet *ec2InstanceSet) Stop() { } type ec2Instance struct { @@ -272,12 +305,7 @@ func (inst *ec2Instance) ProviderType() string { } func (inst *ec2Instance) SetTags(newTags cloud.InstanceTags) error { - ec2tags := []*ec2.Tag{ - &ec2.Tag{ - Key: aws.String(tagKeyInstanceSetID), - Value: aws.String(string(inst.provider.instanceSetID)), - }, - } + var ec2tags []*ec2.Tag for k, v := range newTags { ec2tags = append(ec2tags, &ec2.Tag{ Key: aws.String(k), @@ -313,9 +341,8 @@ func (inst *ec2Instance) Destroy() error { func (inst *ec2Instance) Address() string { if inst.instance.PrivateIpAddress != nil { return *inst.instance.PrivateIpAddress - } else { - return "" } + return "" } func (inst *ec2Instance) RemoteUser() string { @@ -325,3 +352,12 @@ func (inst *ec2Instance) RemoteUser() string { func (inst *ec2Instance) VerifyHostKey(ssh.PublicKey, *ssh.Client) error { return cloud.ErrNotImplemented } + +type rateLimitError struct { + error + earliestRetry time.Time +} + +func (err rateLimitError) EarliestRetry() time.Time { + return err.earliestRetry +}