lib/cloud/ec2/ec2.go

   1 // Copyright (C) The Arvados Authors. All rights reserved.
   2 //
   3 // SPDX-License-Identifier: AGPL-3.0
   4
   5 package ec2
   6
   7 import (
   8         "crypto/md5"
   9         "crypto/rsa"
  10         "crypto/sha1"
  11         "crypto/x509"
  12         "encoding/base64"
  13         "encoding/json"
  14         "fmt"
  15         "math/big"
  16         "strconv"
  17         "strings"
  18         "sync"
  19         "sync/atomic"
  20         "time"
  21
  22         "git.arvados.org/arvados.git/lib/cloud"
  23         "git.arvados.org/arvados.git/sdk/go/arvados"
  24         "github.com/aws/aws-sdk-go/aws"
  25         "github.com/aws/aws-sdk-go/aws/awserr"
  26         "github.com/aws/aws-sdk-go/aws/credentials"
  27         "github.com/aws/aws-sdk-go/aws/credentials/ec2rolecreds"
  28         "github.com/aws/aws-sdk-go/aws/ec2metadata"
  29         "github.com/aws/aws-sdk-go/aws/request"
  30         "github.com/aws/aws-sdk-go/aws/session"
  31         "github.com/aws/aws-sdk-go/service/ec2"
  32         "github.com/prometheus/client_golang/prometheus"
  33         "github.com/sirupsen/logrus"
  34         "golang.org/x/crypto/ssh"
  35 )
  36
  37 // Driver is the ec2 implementation of the cloud.Driver interface.
  38 var Driver = cloud.DriverFunc(newEC2InstanceSet)
  39
  40 const (
  41         throttleDelayMin = time.Second
  42         throttleDelayMax = time.Minute
  43 )
  44
  45 type ec2InstanceSetConfig struct {
  46         AccessKeyID             string
  47         SecretAccessKey         string
  48         Region                  string
  49         SecurityGroupIDs        arvados.StringSet
  50         SubnetID                sliceOrSingleString
  51         AdminUsername           string
  52         EBSVolumeType           string
  53         EBSPrice                float64
  54         IAMInstanceProfile      string
  55         SpotPriceUpdateInterval arvados.Duration
  56 }
  57
  58 type sliceOrSingleString []string
  59
  60 // UnmarshalJSON unmarshals an array of strings, and also accepts ""
  61 // as [], and "foo" as ["foo"].
  62 func (ss *sliceOrSingleString) UnmarshalJSON(data []byte) error {
  63         if len(data) == 0 {
  64                 *ss = nil
  65         } else if data[0] == '[' {
  66                 var slice []string
  67                 err := json.Unmarshal(data, &slice)
  68                 if err != nil {
  69                         return err
  70                 }
  71                 if len(slice) == 0 {
  72                         *ss = nil
  73                 } else {
  74                         *ss = slice
  75                 }
  76         } else {
  77                 var str string
  78                 err := json.Unmarshal(data, &str)
  79                 if err != nil {
  80                         return err
  81                 }
  82                 if str == "" {
  83                         *ss = nil
  84                 } else {
  85                         *ss = []string{str}
  86                 }
  87         }
  88         return nil
  89 }
  90
  91 type ec2Interface interface {
  92         DescribeKeyPairs(input *ec2.DescribeKeyPairsInput) (*ec2.DescribeKeyPairsOutput, error)
  93         ImportKeyPair(input *ec2.ImportKeyPairInput) (*ec2.ImportKeyPairOutput, error)
  94         RunInstances(input *ec2.RunInstancesInput) (*ec2.Reservation, error)
  95         DescribeInstances(input *ec2.DescribeInstancesInput) (*ec2.DescribeInstancesOutput, error)
  96         DescribeInstanceStatusPages(input *ec2.DescribeInstanceStatusInput, fn func(*ec2.DescribeInstanceStatusOutput, bool) bool) error
  97         DescribeSpotPriceHistoryPages(input *ec2.DescribeSpotPriceHistoryInput, fn func(*ec2.DescribeSpotPriceHistoryOutput, bool) bool) error
  98         CreateTags(input *ec2.CreateTagsInput) (*ec2.CreateTagsOutput, error)
  99         TerminateInstances(input *ec2.TerminateInstancesInput) (*ec2.TerminateInstancesOutput, error)
 100 }
 101
 102 type ec2InstanceSet struct {
 103         ec2config              ec2InstanceSetConfig
 104         currentSubnetIDIndex   int32
 105         instanceSetID          cloud.InstanceSetID
 106         logger                 logrus.FieldLogger
 107         client                 ec2Interface
 108         keysMtx                sync.Mutex
 109         keys                   map[string]string
 110         throttleDelayCreate    atomic.Value
 111         throttleDelayInstances atomic.Value
 112
 113         prices        map[priceKey][]cloud.InstancePrice
 114         pricesLock    sync.Mutex
 115         pricesUpdated map[priceKey]time.Time
 116
 117         mInstances      *prometheus.GaugeVec
 118         mInstanceStarts *prometheus.CounterVec
 119 }
 120
 121 func newEC2InstanceSet(config json.RawMessage, instanceSetID cloud.InstanceSetID, _ cloud.SharedResourceTags, logger logrus.FieldLogger, reg *prometheus.Registry) (prv cloud.InstanceSet, err error) {
 122         instanceSet := &ec2InstanceSet{
 123                 instanceSetID: instanceSetID,
 124                 logger:        logger,
 125         }
 126         err = json.Unmarshal(config, &instanceSet.ec2config)
 127         if err != nil {
 128                 return nil, err
 129         }
 130
 131         sess, err := session.NewSession()
 132         if err != nil {
 133                 return nil, err
 134         }
 135         // First try any static credentials, fall back to an IAM instance profile/role
 136         creds := credentials.NewChainCredentials(
 137                 []credentials.Provider{
 138                         &credentials.StaticProvider{Value: credentials.Value{AccessKeyID: instanceSet.ec2config.AccessKeyID, SecretAccessKey: instanceSet.ec2config.SecretAccessKey}},
 139                         &ec2rolecreds.EC2RoleProvider{Client: ec2metadata.New(sess)},
 140                 })
 141
 142         awsConfig := aws.NewConfig().WithCredentials(creds).WithRegion(instanceSet.ec2config.Region)
 143         instanceSet.client = ec2.New(session.Must(session.NewSession(awsConfig)))
 144         instanceSet.keys = make(map[string]string)
 145         if instanceSet.ec2config.EBSVolumeType == "" {
 146                 instanceSet.ec2config.EBSVolumeType = "gp2"
 147         }
 148
 149         // Set up metrics
 150         instanceSet.mInstances = prometheus.NewGaugeVec(prometheus.GaugeOpts{
 151                 Namespace: "arvados",
 152                 Subsystem: "dispatchcloud",
 153                 Name:      "ec2_instances",
 154                 Help:      "Number of instances running",
 155         }, []string{"subnet_id"})
 156         instanceSet.mInstanceStarts = prometheus.NewCounterVec(prometheus.CounterOpts{
 157                 Namespace: "arvados",
 158                 Subsystem: "dispatchcloud",
 159                 Name:      "ec2_instance_starts_total",
 160                 Help:      "Number of attempts to start a new instance",
 161         }, []string{"subnet_id", "success"})
 162         // Initialize all of the series we'll be reporting.  Otherwise
 163         // the {subnet=A, success=0} series doesn't appear in metrics
 164         // at all until there's a failure in subnet A.
 165         for _, subnet := range instanceSet.ec2config.SubnetID {
 166                 instanceSet.mInstanceStarts.WithLabelValues(subnet, "0").Add(0)
 167                 instanceSet.mInstanceStarts.WithLabelValues(subnet, "1").Add(0)
 168         }
 169         if len(instanceSet.ec2config.SubnetID) == 0 {
 170                 instanceSet.mInstanceStarts.WithLabelValues("", "0").Add(0)
 171                 instanceSet.mInstanceStarts.WithLabelValues("", "1").Add(0)
 172         }
 173         if reg != nil {
 174                 reg.MustRegister(instanceSet.mInstances)
 175                 reg.MustRegister(instanceSet.mInstanceStarts)
 176         }
 177
 178         return instanceSet, nil
 179 }
 180
 181 func awsKeyFingerprint(pk ssh.PublicKey) (md5fp string, sha1fp string, err error) {
 182         // AWS key fingerprints don't use the usual key fingerprint
 183         // you get from ssh-keygen or ssh.FingerprintLegacyMD5()
 184         // (you can get that from md5.Sum(pk.Marshal())
 185         //
 186         // AWS uses the md5 or sha1 of the PKIX DER encoding of the
 187         // public key, so calculate those fingerprints here.
 188         var rsaPub struct {
 189                 Name string
 190                 E    *big.Int
 191                 N    *big.Int
 192         }
 193         if err := ssh.Unmarshal(pk.Marshal(), &rsaPub); err != nil {
 194                 return "", "", fmt.Errorf("agent: Unmarshal failed to parse public key: %v", err)
 195         }
 196         rsaPk := rsa.PublicKey{
 197                 E: int(rsaPub.E.Int64()),
 198                 N: rsaPub.N,
 199         }
 200         pkix, _ := x509.MarshalPKIXPublicKey(&rsaPk)
 201         md5pkix := md5.Sum([]byte(pkix))
 202         sha1pkix := sha1.Sum([]byte(pkix))
 203         md5fp = ""
 204         sha1fp = ""
 205         for i := 0; i < len(md5pkix); i++ {
 206                 md5fp += fmt.Sprintf(":%02x", md5pkix[i])
 207         }
 208         for i := 0; i < len(sha1pkix); i++ {
 209                 sha1fp += fmt.Sprintf(":%02x", sha1pkix[i])
 210         }
 211         return md5fp[1:], sha1fp[1:], nil
 212 }
 213
 214 func (instanceSet *ec2InstanceSet) Create(
 215         instanceType arvados.InstanceType,
 216         imageID cloud.ImageID,
 217         newTags cloud.InstanceTags,
 218         initCommand cloud.InitCommand,
 219         publicKey ssh.PublicKey) (cloud.Instance, error) {
 220
 221         ec2tags := []*ec2.Tag{}
 222         for k, v := range newTags {
 223                 ec2tags = append(ec2tags, &ec2.Tag{
 224                         Key:   aws.String(k),
 225                         Value: aws.String(v),
 226                 })
 227         }
 228
 229         var groups []string
 230         for sg := range instanceSet.ec2config.SecurityGroupIDs {
 231                 groups = append(groups, sg)
 232         }
 233
 234         rii := ec2.RunInstancesInput{
 235                 ImageId:      aws.String(string(imageID)),
 236                 InstanceType: &instanceType.ProviderType,
 237                 MaxCount:     aws.Int64(1),
 238                 MinCount:     aws.Int64(1),
 239
 240                 NetworkInterfaces: []*ec2.InstanceNetworkInterfaceSpecification{
 241                         {
 242                                 AssociatePublicIpAddress: aws.Bool(false),
 243                                 DeleteOnTermination:      aws.Bool(true),
 244                                 DeviceIndex:              aws.Int64(0),
 245                                 Groups:                   aws.StringSlice(groups),
 246                         }},
 247                 DisableApiTermination:             aws.Bool(false),
 248                 InstanceInitiatedShutdownBehavior: aws.String("terminate"),
 249                 TagSpecifications: []*ec2.TagSpecification{
 250                         {
 251                                 ResourceType: aws.String("instance"),
 252                                 Tags:         ec2tags,
 253                         }},
 254                 UserData: aws.String(base64.StdEncoding.EncodeToString([]byte("#!/bin/sh\n" + initCommand + "\n"))),
 255         }
 256
 257         if publicKey != nil {
 258                 keyname, err := instanceSet.getKeyName(publicKey)
 259                 if err != nil {
 260                         return nil, err
 261                 }
 262                 rii.KeyName = &keyname
 263         }
 264
 265         if instanceType.AddedScratch > 0 {
 266                 rii.BlockDeviceMappings = []*ec2.BlockDeviceMapping{{
 267                         DeviceName: aws.String("/dev/xvdt"),
 268                         Ebs: &ec2.EbsBlockDevice{
 269                                 DeleteOnTermination: aws.Bool(true),
 270                                 VolumeSize:          aws.Int64((int64(instanceType.AddedScratch) + (1<<30 - 1)) >> 30),
 271                                 VolumeType:          &instanceSet.ec2config.EBSVolumeType,
 272                         }}}
 273         }
 274
 275         if instanceType.Preemptible {
 276                 rii.InstanceMarketOptions = &ec2.InstanceMarketOptionsRequest{
 277                         MarketType: aws.String("spot"),
 278                         SpotOptions: &ec2.SpotMarketOptions{
 279                                 InstanceInterruptionBehavior: aws.String("terminate"),
 280                                 MaxPrice:                     aws.String(fmt.Sprintf("%v", instanceType.Price)),
 281                         }}
 282         }
 283
 284         if instanceSet.ec2config.IAMInstanceProfile != "" {
 285                 rii.IamInstanceProfile = &ec2.IamInstanceProfileSpecification{
 286                         Name: aws.String(instanceSet.ec2config.IAMInstanceProfile),
 287                 }
 288         }
 289
 290         var rsv *ec2.Reservation
 291         var err error
 292         subnets := instanceSet.ec2config.SubnetID
 293         currentSubnetIDIndex := int(atomic.LoadInt32(&instanceSet.currentSubnetIDIndex))
 294         for tryOffset := 0; ; tryOffset++ {
 295                 tryIndex := 0
 296                 trySubnet := ""
 297                 if len(subnets) > 0 {
 298                         tryIndex = (currentSubnetIDIndex + tryOffset) % len(subnets)
 299                         trySubnet = subnets[tryIndex]
 300                         rii.NetworkInterfaces[0].SubnetId = aws.String(trySubnet)
 301                 }
 302                 rsv, err = instanceSet.client.RunInstances(&rii)
 303                 instanceSet.mInstanceStarts.WithLabelValues(trySubnet, boolLabelValue[err == nil]).Add(1)
 304                 if isErrorSubnetSpecific(err) &&
 305                         tryOffset < len(subnets)-1 {
 306                         instanceSet.logger.WithError(err).WithField("SubnetID", subnets[tryIndex]).
 307                                 Warn("RunInstances failed, trying next subnet")
 308                         continue
 309                 }
 310                 // Succeeded, or exhausted all subnets, or got a
 311                 // non-subnet-related error.
 312                 //
 313                 // We intentionally update currentSubnetIDIndex even
 314                 // in the non-retryable-failure case here to avoid a
 315                 // situation where successive calls to Create() keep
 316                 // returning errors for the same subnet (perhaps
 317                 // "subnet full") and never reveal the errors for the
 318                 // other configured subnets (perhaps "subnet ID
 319                 // invalid").
 320                 atomic.StoreInt32(&instanceSet.currentSubnetIDIndex, int32(tryIndex))
 321                 break
 322         }
 323         err = wrapError(err, &instanceSet.throttleDelayCreate)
 324         if err != nil {
 325                 return nil, err
 326         }
 327         return &ec2Instance{
 328                 provider: instanceSet,
 329                 instance: rsv.Instances[0],
 330         }, nil
 331 }
 332
 333 func (instanceSet *ec2InstanceSet) getKeyName(publicKey ssh.PublicKey) (string, error) {
 334         instanceSet.keysMtx.Lock()
 335         defer instanceSet.keysMtx.Unlock()
 336         md5keyFingerprint, sha1keyFingerprint, err := awsKeyFingerprint(publicKey)
 337         if err != nil {
 338                 return "", fmt.Errorf("Could not make key fingerprint: %v", err)
 339         }
 340         if keyname, ok := instanceSet.keys[md5keyFingerprint]; ok {
 341                 return keyname, nil
 342         }
 343         keyout, err := instanceSet.client.DescribeKeyPairs(&ec2.DescribeKeyPairsInput{
 344                 Filters: []*ec2.Filter{{
 345                         Name:   aws.String("fingerprint"),
 346                         Values: []*string{&md5keyFingerprint, &sha1keyFingerprint},
 347                 }},
 348         })
 349         if err != nil {
 350                 return "", fmt.Errorf("Could not search for keypair: %v", err)
 351         }
 352         if len(keyout.KeyPairs) > 0 {
 353                 return *(keyout.KeyPairs[0].KeyName), nil
 354         }
 355         keyname := "arvados-dispatch-keypair-" + md5keyFingerprint
 356         _, err = instanceSet.client.ImportKeyPair(&ec2.ImportKeyPairInput{
 357                 KeyName:           &keyname,
 358                 PublicKeyMaterial: ssh.MarshalAuthorizedKey(publicKey),
 359         })
 360         if err != nil {
 361                 return "", fmt.Errorf("Could not import keypair: %v", err)
 362         }
 363         instanceSet.keys[md5keyFingerprint] = keyname
 364         return keyname, nil
 365 }
 366
 367 func (instanceSet *ec2InstanceSet) Instances(tags cloud.InstanceTags) (instances []cloud.Instance, err error) {
 368         var filters []*ec2.Filter
 369         for k, v := range tags {
 370                 filters = append(filters, &ec2.Filter{
 371                         Name:   aws.String("tag:" + k),
 372                         Values: []*string{aws.String(v)},
 373                 })
 374         }
 375         needAZs := false
 376         dii := &ec2.DescribeInstancesInput{Filters: filters}
 377         for {
 378                 dio, err := instanceSet.client.DescribeInstances(dii)
 379                 err = wrapError(err, &instanceSet.throttleDelayInstances)
 380                 if err != nil {
 381                         return nil, err
 382                 }
 383
 384                 for _, rsv := range dio.Reservations {
 385                         for _, inst := range rsv.Instances {
 386                                 if *inst.State.Name != "shutting-down" && *inst.State.Name != "terminated" {
 387                                         instances = append(instances, &ec2Instance{
 388                                                 provider: instanceSet,
 389                                                 instance: inst,
 390                                         })
 391                                         if aws.StringValue(inst.InstanceLifecycle) == "spot" {
 392                                                 needAZs = true
 393                                         }
 394                                 }
 395                         }
 396                 }
 397                 if dio.NextToken == nil {
 398                         break
 399                 }
 400                 dii.NextToken = dio.NextToken
 401         }
 402         if needAZs && instanceSet.ec2config.SpotPriceUpdateInterval > 0 {
 403                 az := map[string]string{}
 404                 err := instanceSet.client.DescribeInstanceStatusPages(&ec2.DescribeInstanceStatusInput{
 405                         IncludeAllInstances: aws.Bool(true),
 406                 }, func(page *ec2.DescribeInstanceStatusOutput, lastPage bool) bool {
 407                         for _, ent := range page.InstanceStatuses {
 408                                 az[*ent.InstanceId] = *ent.AvailabilityZone
 409                         }
 410                         return true
 411                 })
 412                 if err != nil {
 413                         instanceSet.logger.Warnf("error getting instance statuses: %s", err)
 414                 }
 415                 for _, inst := range instances {
 416                         inst := inst.(*ec2Instance)
 417                         inst.availabilityZone = az[*inst.instance.InstanceId]
 418                 }
 419                 instanceSet.updateSpotPrices(instances)
 420         }
 421
 422         // Count instances in each subnet, and report in metrics.
 423         subnetInstances := map[string]int{"": 0}
 424         for _, subnet := range instanceSet.ec2config.SubnetID {
 425                 subnetInstances[subnet] = 0
 426         }
 427         for _, inst := range instances {
 428                 subnet := inst.(*ec2Instance).instance.SubnetId
 429                 if subnet != nil {
 430                         subnetInstances[*subnet]++
 431                 } else {
 432                         subnetInstances[""]++
 433                 }
 434         }
 435         for subnet, count := range subnetInstances {
 436                 instanceSet.mInstances.WithLabelValues(subnet).Set(float64(count))
 437         }
 438
 439         return instances, err
 440 }
 441
 442 type priceKey struct {
 443         instanceType     string
 444         spot             bool
 445         availabilityZone string
 446 }
 447
 448 // Refresh recent spot instance pricing data for the given instances,
 449 // unless we already have recent pricing data for all relevant types.
 450 func (instanceSet *ec2InstanceSet) updateSpotPrices(instances []cloud.Instance) {
 451         if len(instances) == 0 {
 452                 return
 453         }
 454
 455         instanceSet.pricesLock.Lock()
 456         defer instanceSet.pricesLock.Unlock()
 457         if instanceSet.prices == nil {
 458                 instanceSet.prices = map[priceKey][]cloud.InstancePrice{}
 459                 instanceSet.pricesUpdated = map[priceKey]time.Time{}
 460         }
 461
 462         updateTime := time.Now()
 463         staleTime := updateTime.Add(-instanceSet.ec2config.SpotPriceUpdateInterval.Duration())
 464         needUpdate := false
 465         allTypes := map[string]bool{}
 466
 467         for _, inst := range instances {
 468                 ec2inst := inst.(*ec2Instance).instance
 469                 if aws.StringValue(ec2inst.InstanceLifecycle) == "spot" {
 470                         pk := priceKey{
 471                                 instanceType:     *ec2inst.InstanceType,
 472                                 spot:             true,
 473                                 availabilityZone: inst.(*ec2Instance).availabilityZone,
 474                         }
 475                         if instanceSet.pricesUpdated[pk].Before(staleTime) {
 476                                 needUpdate = true
 477                         }
 478                         allTypes[*ec2inst.InstanceType] = true
 479                 }
 480         }
 481         if !needUpdate {
 482                 return
 483         }
 484         var typeFilterValues []*string
 485         for instanceType := range allTypes {
 486                 typeFilterValues = append(typeFilterValues, aws.String(instanceType))
 487         }
 488         // Get 3x update interval worth of pricing data. (Ideally the
 489         // AWS API would tell us "we have shown you all of the price
 490         // changes up to time T", but it doesn't, so we'll just ask
 491         // for 3 intervals worth of data on each update, de-duplicate
 492         // the data points, and not worry too much about occasionally
 493         // missing some data points when our lookups fail twice in a
 494         // row.
 495         dsphi := &ec2.DescribeSpotPriceHistoryInput{
 496                 StartTime: aws.Time(updateTime.Add(-3 * instanceSet.ec2config.SpotPriceUpdateInterval.Duration())),
 497                 Filters: []*ec2.Filter{
 498                         &ec2.Filter{Name: aws.String("instance-type"), Values: typeFilterValues},
 499                         &ec2.Filter{Name: aws.String("product-description"), Values: []*string{aws.String("Linux/UNIX")}},
 500                 },
 501         }
 502         err := instanceSet.client.DescribeSpotPriceHistoryPages(dsphi, func(page *ec2.DescribeSpotPriceHistoryOutput, lastPage bool) bool {
 503                 for _, ent := range page.SpotPriceHistory {
 504                         if ent.InstanceType == nil || ent.SpotPrice == nil || ent.Timestamp == nil {
 505                                 // bogus record?
 506                                 continue
 507                         }
 508                         price, err := strconv.ParseFloat(*ent.SpotPrice, 64)
 509                         if err != nil {
 510                                 // bogus record?
 511                                 continue
 512                         }
 513                         pk := priceKey{
 514                                 instanceType:     *ent.InstanceType,
 515                                 spot:             true,
 516                                 availabilityZone: *ent.AvailabilityZone,
 517                         }
 518                         instanceSet.prices[pk] = append(instanceSet.prices[pk], cloud.InstancePrice{
 519                                 StartTime: *ent.Timestamp,
 520                                 Price:     price,
 521                         })
 522                         instanceSet.pricesUpdated[pk] = updateTime
 523                 }
 524                 return true
 525         })
 526         if err != nil {
 527                 instanceSet.logger.Warnf("error retrieving spot instance prices: %s", err)
 528         }
 529
 530         expiredTime := updateTime.Add(-64 * instanceSet.ec2config.SpotPriceUpdateInterval.Duration())
 531         for pk, last := range instanceSet.pricesUpdated {
 532                 if last.Before(expiredTime) {
 533                         delete(instanceSet.pricesUpdated, pk)
 534                         delete(instanceSet.prices, pk)
 535                 }
 536         }
 537         for pk, prices := range instanceSet.prices {
 538                 instanceSet.prices[pk] = cloud.NormalizePriceHistory(prices)
 539         }
 540 }
 541
 542 func (instanceSet *ec2InstanceSet) Stop() {
 543 }
 544
 545 type ec2Instance struct {
 546         provider         *ec2InstanceSet
 547         instance         *ec2.Instance
 548         availabilityZone string // sometimes available for spot instances
 549 }
 550
 551 func (inst *ec2Instance) ID() cloud.InstanceID {
 552         return cloud.InstanceID(*inst.instance.InstanceId)
 553 }
 554
 555 func (inst *ec2Instance) String() string {
 556         return *inst.instance.InstanceId
 557 }
 558
 559 func (inst *ec2Instance) ProviderType() string {
 560         return *inst.instance.InstanceType
 561 }
 562
 563 func (inst *ec2Instance) SetTags(newTags cloud.InstanceTags) error {
 564         var ec2tags []*ec2.Tag
 565         for k, v := range newTags {
 566                 ec2tags = append(ec2tags, &ec2.Tag{
 567                         Key:   aws.String(k),
 568                         Value: aws.String(v),
 569                 })
 570         }
 571
 572         _, err := inst.provider.client.CreateTags(&ec2.CreateTagsInput{
 573                 Resources: []*string{inst.instance.InstanceId},
 574                 Tags:      ec2tags,
 575         })
 576
 577         return err
 578 }
 579
 580 func (inst *ec2Instance) Tags() cloud.InstanceTags {
 581         tags := make(map[string]string)
 582
 583         for _, t := range inst.instance.Tags {
 584                 tags[*t.Key] = *t.Value
 585         }
 586
 587         return tags
 588 }
 589
 590 func (inst *ec2Instance) Destroy() error {
 591         _, err := inst.provider.client.TerminateInstances(&ec2.TerminateInstancesInput{
 592                 InstanceIds: []*string{inst.instance.InstanceId},
 593         })
 594         return err
 595 }
 596
 597 func (inst *ec2Instance) Address() string {
 598         if inst.instance.PrivateIpAddress != nil {
 599                 return *inst.instance.PrivateIpAddress
 600         }
 601         return ""
 602 }
 603
 604 func (inst *ec2Instance) RemoteUser() string {
 605         return inst.provider.ec2config.AdminUsername
 606 }
 607
 608 func (inst *ec2Instance) VerifyHostKey(ssh.PublicKey, *ssh.Client) error {
 609         return cloud.ErrNotImplemented
 610 }
 611
 612 // PriceHistory returns the price history for this specific instance.
 613 //
 614 // AWS documentation is elusive about whether the hourly cost of a
 615 // given spot instance changes as the current spot price changes for
 616 // the corresponding instance type and availability zone. Our
 617 // implementation assumes the answer is yes, based on the following
 618 // hints.
 619 //
 620 // https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/spot-requests.html
 621 // says: "After your Spot Instance is running, if the Spot price rises
 622 // above your maximum price, Amazon EC2 interrupts your Spot
 623 // Instance." (This doesn't address what happens when the spot price
 624 // rises *without* exceeding your maximum price.)
 625 //
 626 // https://docs.aws.amazon.com/whitepapers/latest/cost-optimization-leveraging-ec2-spot-instances/how-spot-instances-work.html
 627 // says: "You pay the Spot price that's in effect, billed to the
 628 // nearest second." (But it's not explicitly stated whether "the price
 629 // in effect" changes over time for a given instance.)
 630 //
 631 // The same page also says, in a discussion about the effect of
 632 // specifying a maximum price: "Note that you never pay more than the
 633 // Spot price that is in effect when your Spot Instance is running."
 634 // (The use of the phrase "is running", as opposed to "was launched",
 635 // hints that pricing is dynamic.)
 636 func (inst *ec2Instance) PriceHistory(instType arvados.InstanceType) []cloud.InstancePrice {
 637         inst.provider.pricesLock.Lock()
 638         defer inst.provider.pricesLock.Unlock()
 639         // Note updateSpotPrices currently populates
 640         // inst.provider.prices only for spot instances, so if
 641         // spot==false here, we will return no data.
 642         pk := priceKey{
 643                 instanceType:     *inst.instance.InstanceType,
 644                 spot:             aws.StringValue(inst.instance.InstanceLifecycle) == "spot",
 645                 availabilityZone: inst.availabilityZone,
 646         }
 647         var prices []cloud.InstancePrice
 648         for _, price := range inst.provider.prices[pk] {
 649                 // ceil(added scratch space in GiB)
 650                 gib := (instType.AddedScratch + 1<<30 - 1) >> 30
 651                 monthly := inst.provider.ec2config.EBSPrice * float64(gib)
 652                 hourly := monthly / 30 / 24
 653                 price.Price += hourly
 654                 prices = append(prices, price)
 655         }
 656         return prices
 657 }
 658
 659 type rateLimitError struct {
 660         error
 661         earliestRetry time.Time
 662 }
 663
 664 func (err rateLimitError) EarliestRetry() time.Time {
 665         return err.earliestRetry
 666 }
 667
 668 type capacityError struct {
 669         error
 670         isInstanceTypeSpecific bool
 671 }
 672
 673 func (er *capacityError) IsCapacityError() bool {
 674         return true
 675 }
 676
 677 func (er *capacityError) IsInstanceTypeSpecific() bool {
 678         return er.isInstanceTypeSpecific
 679 }
 680
 681 var isCodeQuota = map[string]bool{
 682         "InstanceLimitExceeded":             true,
 683         "InsufficientAddressCapacity":       true,
 684         "InsufficientFreeAddressesInSubnet": true,
 685         "InsufficientVolumeCapacity":        true,
 686         "MaxSpotInstanceCountExceeded":      true,
 687         "VcpuLimitExceeded":                 true,
 688 }
 689
 690 // isErrorQuota returns whether the error indicates we have reached
 691 // some usage quota/limit -- i.e., immediately retrying with an equal
 692 // or larger instance type will probably not work.
 693 //
 694 // Returns false if error is nil.
 695 func isErrorQuota(err error) bool {
 696         if aerr, ok := err.(awserr.Error); ok && aerr != nil {
 697                 if _, ok := isCodeQuota[aerr.Code()]; ok {
 698                         return true
 699                 }
 700         }
 701         return false
 702 }
 703
 704 // isErrorSubnetSpecific returns true if the problem encountered by
 705 // RunInstances might be avoided by trying a different subnet.
 706 func isErrorSubnetSpecific(err error) bool {
 707         aerr, ok := err.(awserr.Error)
 708         if !ok {
 709                 return false
 710         }
 711         code := aerr.Code()
 712         return strings.Contains(code, "Subnet") ||
 713                 code == "InsufficientInstanceCapacity" ||
 714                 code == "InsufficientVolumeCapacity"
 715 }
 716
 717 type ec2QuotaError struct {
 718         error
 719 }
 720
 721 func (er *ec2QuotaError) IsQuotaError() bool {
 722         return true
 723 }
 724
 725 func wrapError(err error, throttleValue *atomic.Value) error {
 726         if request.IsErrorThrottle(err) {
 727                 // Back off exponentially until an upstream call
 728                 // either succeeds or returns a non-throttle error.
 729                 d, _ := throttleValue.Load().(time.Duration)
 730                 d = d*3/2 + time.Second
 731                 if d < throttleDelayMin {
 732                         d = throttleDelayMin
 733                 } else if d > throttleDelayMax {
 734                         d = throttleDelayMax
 735                 }
 736                 throttleValue.Store(d)
 737                 return rateLimitError{error: err, earliestRetry: time.Now().Add(d)}
 738         } else if isErrorQuota(err) {
 739                 return &ec2QuotaError{err}
 740         } else if aerr, ok := err.(awserr.Error); ok && aerr != nil && aerr.Code() == "InsufficientInstanceCapacity" {
 741                 return &capacityError{err, true}
 742         } else if err != nil {
 743                 throttleValue.Store(time.Duration(0))
 744                 return err
 745         }
 746         throttleValue.Store(time.Duration(0))
 747         return nil
 748 }
 749
 750 var boolLabelValue = map[bool]string{false: "0", true: "1"}