20755: Allow cloud drivers to register their own metrics.
[arvados.git] / lib / cloud / ec2 / ec2.go
1 // Copyright (C) The Arvados Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4
5 package ec2
6
7 import (
8         "crypto/md5"
9         "crypto/rsa"
10         "crypto/sha1"
11         "crypto/x509"
12         "encoding/base64"
13         "encoding/json"
14         "fmt"
15         "math/big"
16         "strconv"
17         "strings"
18         "sync"
19         "sync/atomic"
20         "time"
21
22         "git.arvados.org/arvados.git/lib/cloud"
23         "git.arvados.org/arvados.git/sdk/go/arvados"
24         "github.com/aws/aws-sdk-go/aws"
25         "github.com/aws/aws-sdk-go/aws/awserr"
26         "github.com/aws/aws-sdk-go/aws/credentials"
27         "github.com/aws/aws-sdk-go/aws/credentials/ec2rolecreds"
28         "github.com/aws/aws-sdk-go/aws/ec2metadata"
29         "github.com/aws/aws-sdk-go/aws/request"
30         "github.com/aws/aws-sdk-go/aws/session"
31         "github.com/aws/aws-sdk-go/service/ec2"
32         "github.com/prometheus/client_golang/prometheus"
33         "github.com/sirupsen/logrus"
34         "golang.org/x/crypto/ssh"
35 )
36
37 // Driver is the ec2 implementation of the cloud.Driver interface.
38 var Driver = cloud.DriverFunc(newEC2InstanceSet)
39
40 const (
41         throttleDelayMin = time.Second
42         throttleDelayMax = time.Minute
43 )
44
45 type ec2InstanceSetConfig struct {
46         AccessKeyID             string
47         SecretAccessKey         string
48         Region                  string
49         SecurityGroupIDs        arvados.StringSet
50         SubnetID                sliceOrSingleString
51         AdminUsername           string
52         EBSVolumeType           string
53         EBSPrice                float64
54         IAMInstanceProfile      string
55         SpotPriceUpdateInterval arvados.Duration
56 }
57
58 type sliceOrSingleString []string
59
60 // UnmarshalJSON unmarshals an array of strings, and also accepts ""
61 // as [], and "foo" as ["foo"].
62 func (ss *sliceOrSingleString) UnmarshalJSON(data []byte) error {
63         if len(data) == 0 {
64                 *ss = nil
65         } else if data[0] == '[' {
66                 var slice []string
67                 err := json.Unmarshal(data, &slice)
68                 if err != nil {
69                         return err
70                 }
71                 if len(slice) == 0 {
72                         *ss = nil
73                 } else {
74                         *ss = slice
75                 }
76         } else {
77                 var str string
78                 err := json.Unmarshal(data, &str)
79                 if err != nil {
80                         return err
81                 }
82                 if str == "" {
83                         *ss = nil
84                 } else {
85                         *ss = []string{str}
86                 }
87         }
88         return nil
89 }
90
91 type ec2Interface interface {
92         DescribeKeyPairs(input *ec2.DescribeKeyPairsInput) (*ec2.DescribeKeyPairsOutput, error)
93         ImportKeyPair(input *ec2.ImportKeyPairInput) (*ec2.ImportKeyPairOutput, error)
94         RunInstances(input *ec2.RunInstancesInput) (*ec2.Reservation, error)
95         DescribeInstances(input *ec2.DescribeInstancesInput) (*ec2.DescribeInstancesOutput, error)
96         DescribeInstanceStatusPages(input *ec2.DescribeInstanceStatusInput, fn func(*ec2.DescribeInstanceStatusOutput, bool) bool) error
97         DescribeSpotPriceHistoryPages(input *ec2.DescribeSpotPriceHistoryInput, fn func(*ec2.DescribeSpotPriceHistoryOutput, bool) bool) error
98         CreateTags(input *ec2.CreateTagsInput) (*ec2.CreateTagsOutput, error)
99         TerminateInstances(input *ec2.TerminateInstancesInput) (*ec2.TerminateInstancesOutput, error)
100 }
101
102 type ec2InstanceSet struct {
103         ec2config              ec2InstanceSetConfig
104         currentSubnetIDIndex   int32
105         instanceSetID          cloud.InstanceSetID
106         logger                 logrus.FieldLogger
107         client                 ec2Interface
108         keysMtx                sync.Mutex
109         keys                   map[string]string
110         throttleDelayCreate    atomic.Value
111         throttleDelayInstances atomic.Value
112
113         prices        map[priceKey][]cloud.InstancePrice
114         pricesLock    sync.Mutex
115         pricesUpdated map[priceKey]time.Time
116 }
117
118 func newEC2InstanceSet(config json.RawMessage, instanceSetID cloud.InstanceSetID, _ cloud.SharedResourceTags, logger logrus.FieldLogger, reg *prometheus.Registry) (prv cloud.InstanceSet, err error) {
119         instanceSet := &ec2InstanceSet{
120                 instanceSetID: instanceSetID,
121                 logger:        logger,
122         }
123         err = json.Unmarshal(config, &instanceSet.ec2config)
124         if err != nil {
125                 return nil, err
126         }
127
128         sess, err := session.NewSession()
129         if err != nil {
130                 return nil, err
131         }
132         // First try any static credentials, fall back to an IAM instance profile/role
133         creds := credentials.NewChainCredentials(
134                 []credentials.Provider{
135                         &credentials.StaticProvider{Value: credentials.Value{AccessKeyID: instanceSet.ec2config.AccessKeyID, SecretAccessKey: instanceSet.ec2config.SecretAccessKey}},
136                         &ec2rolecreds.EC2RoleProvider{Client: ec2metadata.New(sess)},
137                 })
138
139         awsConfig := aws.NewConfig().WithCredentials(creds).WithRegion(instanceSet.ec2config.Region)
140         instanceSet.client = ec2.New(session.Must(session.NewSession(awsConfig)))
141         instanceSet.keys = make(map[string]string)
142         if instanceSet.ec2config.EBSVolumeType == "" {
143                 instanceSet.ec2config.EBSVolumeType = "gp2"
144         }
145         return instanceSet, nil
146 }
147
148 func awsKeyFingerprint(pk ssh.PublicKey) (md5fp string, sha1fp string, err error) {
149         // AWS key fingerprints don't use the usual key fingerprint
150         // you get from ssh-keygen or ssh.FingerprintLegacyMD5()
151         // (you can get that from md5.Sum(pk.Marshal())
152         //
153         // AWS uses the md5 or sha1 of the PKIX DER encoding of the
154         // public key, so calculate those fingerprints here.
155         var rsaPub struct {
156                 Name string
157                 E    *big.Int
158                 N    *big.Int
159         }
160         if err := ssh.Unmarshal(pk.Marshal(), &rsaPub); err != nil {
161                 return "", "", fmt.Errorf("agent: Unmarshal failed to parse public key: %v", err)
162         }
163         rsaPk := rsa.PublicKey{
164                 E: int(rsaPub.E.Int64()),
165                 N: rsaPub.N,
166         }
167         pkix, _ := x509.MarshalPKIXPublicKey(&rsaPk)
168         md5pkix := md5.Sum([]byte(pkix))
169         sha1pkix := sha1.Sum([]byte(pkix))
170         md5fp = ""
171         sha1fp = ""
172         for i := 0; i < len(md5pkix); i++ {
173                 md5fp += fmt.Sprintf(":%02x", md5pkix[i])
174         }
175         for i := 0; i < len(sha1pkix); i++ {
176                 sha1fp += fmt.Sprintf(":%02x", sha1pkix[i])
177         }
178         return md5fp[1:], sha1fp[1:], nil
179 }
180
181 func (instanceSet *ec2InstanceSet) Create(
182         instanceType arvados.InstanceType,
183         imageID cloud.ImageID,
184         newTags cloud.InstanceTags,
185         initCommand cloud.InitCommand,
186         publicKey ssh.PublicKey) (cloud.Instance, error) {
187
188         ec2tags := []*ec2.Tag{}
189         for k, v := range newTags {
190                 ec2tags = append(ec2tags, &ec2.Tag{
191                         Key:   aws.String(k),
192                         Value: aws.String(v),
193                 })
194         }
195
196         var groups []string
197         for sg := range instanceSet.ec2config.SecurityGroupIDs {
198                 groups = append(groups, sg)
199         }
200
201         rii := ec2.RunInstancesInput{
202                 ImageId:      aws.String(string(imageID)),
203                 InstanceType: &instanceType.ProviderType,
204                 MaxCount:     aws.Int64(1),
205                 MinCount:     aws.Int64(1),
206
207                 NetworkInterfaces: []*ec2.InstanceNetworkInterfaceSpecification{
208                         {
209                                 AssociatePublicIpAddress: aws.Bool(false),
210                                 DeleteOnTermination:      aws.Bool(true),
211                                 DeviceIndex:              aws.Int64(0),
212                                 Groups:                   aws.StringSlice(groups),
213                         }},
214                 DisableApiTermination:             aws.Bool(false),
215                 InstanceInitiatedShutdownBehavior: aws.String("terminate"),
216                 TagSpecifications: []*ec2.TagSpecification{
217                         {
218                                 ResourceType: aws.String("instance"),
219                                 Tags:         ec2tags,
220                         }},
221                 UserData: aws.String(base64.StdEncoding.EncodeToString([]byte("#!/bin/sh\n" + initCommand + "\n"))),
222         }
223
224         if publicKey != nil {
225                 keyname, err := instanceSet.getKeyName(publicKey)
226                 if err != nil {
227                         return nil, err
228                 }
229                 rii.KeyName = &keyname
230         }
231
232         if instanceType.AddedScratch > 0 {
233                 rii.BlockDeviceMappings = []*ec2.BlockDeviceMapping{{
234                         DeviceName: aws.String("/dev/xvdt"),
235                         Ebs: &ec2.EbsBlockDevice{
236                                 DeleteOnTermination: aws.Bool(true),
237                                 VolumeSize:          aws.Int64((int64(instanceType.AddedScratch) + (1<<30 - 1)) >> 30),
238                                 VolumeType:          &instanceSet.ec2config.EBSVolumeType,
239                         }}}
240         }
241
242         if instanceType.Preemptible {
243                 rii.InstanceMarketOptions = &ec2.InstanceMarketOptionsRequest{
244                         MarketType: aws.String("spot"),
245                         SpotOptions: &ec2.SpotMarketOptions{
246                                 InstanceInterruptionBehavior: aws.String("terminate"),
247                                 MaxPrice:                     aws.String(fmt.Sprintf("%v", instanceType.Price)),
248                         }}
249         }
250
251         if instanceSet.ec2config.IAMInstanceProfile != "" {
252                 rii.IamInstanceProfile = &ec2.IamInstanceProfileSpecification{
253                         Name: aws.String(instanceSet.ec2config.IAMInstanceProfile),
254                 }
255         }
256
257         var rsv *ec2.Reservation
258         var err error
259         subnets := instanceSet.ec2config.SubnetID
260         currentSubnetIDIndex := int(atomic.LoadInt32(&instanceSet.currentSubnetIDIndex))
261         for tryOffset := 0; ; tryOffset++ {
262                 tryIndex := 0
263                 if len(subnets) > 0 {
264                         tryIndex = (currentSubnetIDIndex + tryOffset) % len(subnets)
265                         rii.NetworkInterfaces[0].SubnetId = aws.String(subnets[tryIndex])
266                 }
267                 rsv, err = instanceSet.client.RunInstances(&rii)
268                 if isErrorSubnetSpecific(err) &&
269                         tryOffset < len(subnets)-1 {
270                         instanceSet.logger.WithError(err).WithField("SubnetID", subnets[tryIndex]).
271                                 Warn("RunInstances failed, trying next subnet")
272                         continue
273                 }
274                 // Succeeded, or exhausted all subnets, or got a
275                 // non-subnet-related error.
276                 //
277                 // We intentionally update currentSubnetIDIndex even
278                 // in the non-retryable-failure case here to avoid a
279                 // situation where successive calls to Create() keep
280                 // returning errors for the same subnet (perhaps
281                 // "subnet full") and never reveal the errors for the
282                 // other configured subnets (perhaps "subnet ID
283                 // invalid").
284                 atomic.StoreInt32(&instanceSet.currentSubnetIDIndex, int32(tryIndex))
285                 break
286         }
287         err = wrapError(err, &instanceSet.throttleDelayCreate)
288         if err != nil {
289                 return nil, err
290         }
291         return &ec2Instance{
292                 provider: instanceSet,
293                 instance: rsv.Instances[0],
294         }, nil
295 }
296
297 func (instanceSet *ec2InstanceSet) getKeyName(publicKey ssh.PublicKey) (string, error) {
298         instanceSet.keysMtx.Lock()
299         defer instanceSet.keysMtx.Unlock()
300         md5keyFingerprint, sha1keyFingerprint, err := awsKeyFingerprint(publicKey)
301         if err != nil {
302                 return "", fmt.Errorf("Could not make key fingerprint: %v", err)
303         }
304         if keyname, ok := instanceSet.keys[md5keyFingerprint]; ok {
305                 return keyname, nil
306         }
307         keyout, err := instanceSet.client.DescribeKeyPairs(&ec2.DescribeKeyPairsInput{
308                 Filters: []*ec2.Filter{{
309                         Name:   aws.String("fingerprint"),
310                         Values: []*string{&md5keyFingerprint, &sha1keyFingerprint},
311                 }},
312         })
313         if err != nil {
314                 return "", fmt.Errorf("Could not search for keypair: %v", err)
315         }
316         if len(keyout.KeyPairs) > 0 {
317                 return *(keyout.KeyPairs[0].KeyName), nil
318         }
319         keyname := "arvados-dispatch-keypair-" + md5keyFingerprint
320         _, err = instanceSet.client.ImportKeyPair(&ec2.ImportKeyPairInput{
321                 KeyName:           &keyname,
322                 PublicKeyMaterial: ssh.MarshalAuthorizedKey(publicKey),
323         })
324         if err != nil {
325                 return "", fmt.Errorf("Could not import keypair: %v", err)
326         }
327         instanceSet.keys[md5keyFingerprint] = keyname
328         return keyname, nil
329 }
330
331 func (instanceSet *ec2InstanceSet) Instances(tags cloud.InstanceTags) (instances []cloud.Instance, err error) {
332         var filters []*ec2.Filter
333         for k, v := range tags {
334                 filters = append(filters, &ec2.Filter{
335                         Name:   aws.String("tag:" + k),
336                         Values: []*string{aws.String(v)},
337                 })
338         }
339         needAZs := false
340         dii := &ec2.DescribeInstancesInput{Filters: filters}
341         for {
342                 dio, err := instanceSet.client.DescribeInstances(dii)
343                 err = wrapError(err, &instanceSet.throttleDelayInstances)
344                 if err != nil {
345                         return nil, err
346                 }
347
348                 for _, rsv := range dio.Reservations {
349                         for _, inst := range rsv.Instances {
350                                 if *inst.State.Name != "shutting-down" && *inst.State.Name != "terminated" {
351                                         instances = append(instances, &ec2Instance{
352                                                 provider: instanceSet,
353                                                 instance: inst,
354                                         })
355                                         if aws.StringValue(inst.InstanceLifecycle) == "spot" {
356                                                 needAZs = true
357                                         }
358                                 }
359                         }
360                 }
361                 if dio.NextToken == nil {
362                         break
363                 }
364                 dii.NextToken = dio.NextToken
365         }
366         if needAZs && instanceSet.ec2config.SpotPriceUpdateInterval > 0 {
367                 az := map[string]string{}
368                 err := instanceSet.client.DescribeInstanceStatusPages(&ec2.DescribeInstanceStatusInput{
369                         IncludeAllInstances: aws.Bool(true),
370                 }, func(page *ec2.DescribeInstanceStatusOutput, lastPage bool) bool {
371                         for _, ent := range page.InstanceStatuses {
372                                 az[*ent.InstanceId] = *ent.AvailabilityZone
373                         }
374                         return true
375                 })
376                 if err != nil {
377                         instanceSet.logger.Warnf("error getting instance statuses: %s", err)
378                 }
379                 for _, inst := range instances {
380                         inst := inst.(*ec2Instance)
381                         inst.availabilityZone = az[*inst.instance.InstanceId]
382                 }
383                 instanceSet.updateSpotPrices(instances)
384         }
385         return instances, err
386 }
387
388 type priceKey struct {
389         instanceType     string
390         spot             bool
391         availabilityZone string
392 }
393
394 // Refresh recent spot instance pricing data for the given instances,
395 // unless we already have recent pricing data for all relevant types.
396 func (instanceSet *ec2InstanceSet) updateSpotPrices(instances []cloud.Instance) {
397         if len(instances) == 0 {
398                 return
399         }
400
401         instanceSet.pricesLock.Lock()
402         defer instanceSet.pricesLock.Unlock()
403         if instanceSet.prices == nil {
404                 instanceSet.prices = map[priceKey][]cloud.InstancePrice{}
405                 instanceSet.pricesUpdated = map[priceKey]time.Time{}
406         }
407
408         updateTime := time.Now()
409         staleTime := updateTime.Add(-instanceSet.ec2config.SpotPriceUpdateInterval.Duration())
410         needUpdate := false
411         allTypes := map[string]bool{}
412
413         for _, inst := range instances {
414                 ec2inst := inst.(*ec2Instance).instance
415                 if aws.StringValue(ec2inst.InstanceLifecycle) == "spot" {
416                         pk := priceKey{
417                                 instanceType:     *ec2inst.InstanceType,
418                                 spot:             true,
419                                 availabilityZone: inst.(*ec2Instance).availabilityZone,
420                         }
421                         if instanceSet.pricesUpdated[pk].Before(staleTime) {
422                                 needUpdate = true
423                         }
424                         allTypes[*ec2inst.InstanceType] = true
425                 }
426         }
427         if !needUpdate {
428                 return
429         }
430         var typeFilterValues []*string
431         for instanceType := range allTypes {
432                 typeFilterValues = append(typeFilterValues, aws.String(instanceType))
433         }
434         // Get 3x update interval worth of pricing data. (Ideally the
435         // AWS API would tell us "we have shown you all of the price
436         // changes up to time T", but it doesn't, so we'll just ask
437         // for 3 intervals worth of data on each update, de-duplicate
438         // the data points, and not worry too much about occasionally
439         // missing some data points when our lookups fail twice in a
440         // row.
441         dsphi := &ec2.DescribeSpotPriceHistoryInput{
442                 StartTime: aws.Time(updateTime.Add(-3 * instanceSet.ec2config.SpotPriceUpdateInterval.Duration())),
443                 Filters: []*ec2.Filter{
444                         &ec2.Filter{Name: aws.String("instance-type"), Values: typeFilterValues},
445                         &ec2.Filter{Name: aws.String("product-description"), Values: []*string{aws.String("Linux/UNIX")}},
446                 },
447         }
448         err := instanceSet.client.DescribeSpotPriceHistoryPages(dsphi, func(page *ec2.DescribeSpotPriceHistoryOutput, lastPage bool) bool {
449                 for _, ent := range page.SpotPriceHistory {
450                         if ent.InstanceType == nil || ent.SpotPrice == nil || ent.Timestamp == nil {
451                                 // bogus record?
452                                 continue
453                         }
454                         price, err := strconv.ParseFloat(*ent.SpotPrice, 64)
455                         if err != nil {
456                                 // bogus record?
457                                 continue
458                         }
459                         pk := priceKey{
460                                 instanceType:     *ent.InstanceType,
461                                 spot:             true,
462                                 availabilityZone: *ent.AvailabilityZone,
463                         }
464                         instanceSet.prices[pk] = append(instanceSet.prices[pk], cloud.InstancePrice{
465                                 StartTime: *ent.Timestamp,
466                                 Price:     price,
467                         })
468                         instanceSet.pricesUpdated[pk] = updateTime
469                 }
470                 return true
471         })
472         if err != nil {
473                 instanceSet.logger.Warnf("error retrieving spot instance prices: %s", err)
474         }
475
476         expiredTime := updateTime.Add(-64 * instanceSet.ec2config.SpotPriceUpdateInterval.Duration())
477         for pk, last := range instanceSet.pricesUpdated {
478                 if last.Before(expiredTime) {
479                         delete(instanceSet.pricesUpdated, pk)
480                         delete(instanceSet.prices, pk)
481                 }
482         }
483         for pk, prices := range instanceSet.prices {
484                 instanceSet.prices[pk] = cloud.NormalizePriceHistory(prices)
485         }
486 }
487
488 func (instanceSet *ec2InstanceSet) Stop() {
489 }
490
491 type ec2Instance struct {
492         provider         *ec2InstanceSet
493         instance         *ec2.Instance
494         availabilityZone string // sometimes available for spot instances
495 }
496
497 func (inst *ec2Instance) ID() cloud.InstanceID {
498         return cloud.InstanceID(*inst.instance.InstanceId)
499 }
500
501 func (inst *ec2Instance) String() string {
502         return *inst.instance.InstanceId
503 }
504
505 func (inst *ec2Instance) ProviderType() string {
506         return *inst.instance.InstanceType
507 }
508
509 func (inst *ec2Instance) SetTags(newTags cloud.InstanceTags) error {
510         var ec2tags []*ec2.Tag
511         for k, v := range newTags {
512                 ec2tags = append(ec2tags, &ec2.Tag{
513                         Key:   aws.String(k),
514                         Value: aws.String(v),
515                 })
516         }
517
518         _, err := inst.provider.client.CreateTags(&ec2.CreateTagsInput{
519                 Resources: []*string{inst.instance.InstanceId},
520                 Tags:      ec2tags,
521         })
522
523         return err
524 }
525
526 func (inst *ec2Instance) Tags() cloud.InstanceTags {
527         tags := make(map[string]string)
528
529         for _, t := range inst.instance.Tags {
530                 tags[*t.Key] = *t.Value
531         }
532
533         return tags
534 }
535
536 func (inst *ec2Instance) Destroy() error {
537         _, err := inst.provider.client.TerminateInstances(&ec2.TerminateInstancesInput{
538                 InstanceIds: []*string{inst.instance.InstanceId},
539         })
540         return err
541 }
542
543 func (inst *ec2Instance) Address() string {
544         if inst.instance.PrivateIpAddress != nil {
545                 return *inst.instance.PrivateIpAddress
546         }
547         return ""
548 }
549
550 func (inst *ec2Instance) RemoteUser() string {
551         return inst.provider.ec2config.AdminUsername
552 }
553
554 func (inst *ec2Instance) VerifyHostKey(ssh.PublicKey, *ssh.Client) error {
555         return cloud.ErrNotImplemented
556 }
557
558 // PriceHistory returns the price history for this specific instance.
559 //
560 // AWS documentation is elusive about whether the hourly cost of a
561 // given spot instance changes as the current spot price changes for
562 // the corresponding instance type and availability zone. Our
563 // implementation assumes the answer is yes, based on the following
564 // hints.
565 //
566 // https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/spot-requests.html
567 // says: "After your Spot Instance is running, if the Spot price rises
568 // above your maximum price, Amazon EC2 interrupts your Spot
569 // Instance." (This doesn't address what happens when the spot price
570 // rises *without* exceeding your maximum price.)
571 //
572 // https://docs.aws.amazon.com/whitepapers/latest/cost-optimization-leveraging-ec2-spot-instances/how-spot-instances-work.html
573 // says: "You pay the Spot price that's in effect, billed to the
574 // nearest second." (But it's not explicitly stated whether "the price
575 // in effect" changes over time for a given instance.)
576 //
577 // The same page also says, in a discussion about the effect of
578 // specifying a maximum price: "Note that you never pay more than the
579 // Spot price that is in effect when your Spot Instance is running."
580 // (The use of the phrase "is running", as opposed to "was launched",
581 // hints that pricing is dynamic.)
582 func (inst *ec2Instance) PriceHistory(instType arvados.InstanceType) []cloud.InstancePrice {
583         inst.provider.pricesLock.Lock()
584         defer inst.provider.pricesLock.Unlock()
585         // Note updateSpotPrices currently populates
586         // inst.provider.prices only for spot instances, so if
587         // spot==false here, we will return no data.
588         pk := priceKey{
589                 instanceType:     *inst.instance.InstanceType,
590                 spot:             aws.StringValue(inst.instance.InstanceLifecycle) == "spot",
591                 availabilityZone: inst.availabilityZone,
592         }
593         var prices []cloud.InstancePrice
594         for _, price := range inst.provider.prices[pk] {
595                 // ceil(added scratch space in GiB)
596                 gib := (instType.AddedScratch + 1<<30 - 1) >> 30
597                 monthly := inst.provider.ec2config.EBSPrice * float64(gib)
598                 hourly := monthly / 30 / 24
599                 price.Price += hourly
600                 prices = append(prices, price)
601         }
602         return prices
603 }
604
605 type rateLimitError struct {
606         error
607         earliestRetry time.Time
608 }
609
610 func (err rateLimitError) EarliestRetry() time.Time {
611         return err.earliestRetry
612 }
613
614 var isCodeCapacity = map[string]bool{
615         "InstanceLimitExceeded":             true,
616         "InsufficientAddressCapacity":       true,
617         "InsufficientFreeAddressesInSubnet": true,
618         "InsufficientInstanceCapacity":      true,
619         "InsufficientVolumeCapacity":        true,
620         "MaxSpotInstanceCountExceeded":      true,
621         "VcpuLimitExceeded":                 true,
622 }
623
624 // isErrorCapacity returns whether the error is to be throttled based on its code.
625 // Returns false if error is nil.
626 func isErrorCapacity(err error) bool {
627         if aerr, ok := err.(awserr.Error); ok && aerr != nil {
628                 if _, ok := isCodeCapacity[aerr.Code()]; ok {
629                         return true
630                 }
631         }
632         return false
633 }
634
635 // isErrorSubnetSpecific returns true if the problem encountered by
636 // RunInstances might be avoided by trying a different subnet.
637 func isErrorSubnetSpecific(err error) bool {
638         aerr, ok := err.(awserr.Error)
639         if !ok {
640                 return false
641         }
642         code := aerr.Code()
643         return strings.Contains(code, "Subnet") ||
644                 code == "InsufficientInstanceCapacity" ||
645                 code == "InsufficientVolumeCapacity"
646 }
647
648 type ec2QuotaError struct {
649         error
650 }
651
652 func (er *ec2QuotaError) IsQuotaError() bool {
653         return true
654 }
655
656 func wrapError(err error, throttleValue *atomic.Value) error {
657         if request.IsErrorThrottle(err) {
658                 // Back off exponentially until an upstream call
659                 // either succeeds or returns a non-throttle error.
660                 d, _ := throttleValue.Load().(time.Duration)
661                 d = d*3/2 + time.Second
662                 if d < throttleDelayMin {
663                         d = throttleDelayMin
664                 } else if d > throttleDelayMax {
665                         d = throttleDelayMax
666                 }
667                 throttleValue.Store(d)
668                 return rateLimitError{error: err, earliestRetry: time.Now().Add(d)}
669         } else if isErrorCapacity(err) {
670                 return &ec2QuotaError{err}
671         } else if err != nil {
672                 throttleValue.Store(time.Duration(0))
673                 return err
674         }
675         throttleValue.Store(time.Duration(0))
676         return nil
677 }