X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/c29c2feb691b5abbffaf8f177222d2973ad1b19f..9d1ff3299a57d0e820bf7975f0f3e6080b22f0a5:/lib/cloud/ec2/ec2.go diff --git a/lib/cloud/ec2/ec2.go b/lib/cloud/ec2/ec2.go index 816df48d90..6251f18df0 100644 --- a/lib/cloud/ec2/ec2.go +++ b/lib/cloud/ec2/ec2.go @@ -13,6 +13,7 @@ import ( "encoding/json" "fmt" "math/big" + "regexp" "strconv" "strings" "sync" @@ -251,6 +252,12 @@ func (instanceSet *ec2InstanceSet) Create( ResourceType: aws.String("instance"), Tags: ec2tags, }}, + MetadataOptions: &ec2.InstanceMetadataOptionsRequest{ + // Require IMDSv2, as described at + // https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/configuring-IMDS-new-instances.html + HttpEndpoint: aws.String(ec2.InstanceMetadataEndpointStateEnabled), + HttpTokens: aws.String(ec2.HttpTokensStateRequired), + }, UserData: aws.String(base64.StdEncoding.EncodeToString([]byte("#!/bin/sh\n" + initCommand + "\n"))), } @@ -288,7 +295,7 @@ func (instanceSet *ec2InstanceSet) Create( } var rsv *ec2.Reservation - var err error + var errToReturn error subnets := instanceSet.ec2config.SubnetID currentSubnetIDIndex := int(atomic.LoadInt32(&instanceSet.currentSubnetIDIndex)) for tryOffset := 0; ; tryOffset++ { @@ -299,8 +306,15 @@ func (instanceSet *ec2InstanceSet) Create( trySubnet = subnets[tryIndex] rii.NetworkInterfaces[0].SubnetId = aws.String(trySubnet) } + var err error rsv, err = instanceSet.client.RunInstances(&rii) instanceSet.mInstanceStarts.WithLabelValues(trySubnet, boolLabelValue[err == nil]).Add(1) + if !isErrorCapacity(errToReturn) || isErrorCapacity(err) { + // We want to return the last capacity error, + // if any; otherwise the last non-capacity + // error. + errToReturn = err + } if isErrorSubnetSpecific(err) && tryOffset < len(subnets)-1 { instanceSet.logger.WithError(err).WithField("SubnetID", subnets[tryIndex]). @@ -320,9 +334,8 @@ func (instanceSet *ec2InstanceSet) Create( atomic.StoreInt32(&instanceSet.currentSubnetIDIndex, int32(tryIndex)) break } - err = wrapError(err, &instanceSet.throttleDelayCreate) - if err != nil { - return nil, err + if rsv == nil || len(rsv.Instances) == 0 { + return nil, wrapError(errToReturn, &instanceSet.throttleDelayCreate) } return &ec2Instance{ provider: instanceSet, @@ -701,6 +714,8 @@ func isErrorQuota(err error) bool { return false } +var reSubnetSpecificInvalidParameterMessage = regexp.MustCompile(`(?ms).*( subnet |sufficient free [Ii]pv[46] addresses).*`) + // isErrorSubnetSpecific returns true if the problem encountered by // RunInstances might be avoided by trying a different subnet. func isErrorSubnetSpecific(err error) bool { @@ -711,7 +726,27 @@ func isErrorSubnetSpecific(err error) bool { code := aerr.Code() return strings.Contains(code, "Subnet") || code == "InsufficientInstanceCapacity" || - code == "InsufficientVolumeCapacity" + code == "InsufficientVolumeCapacity" || + code == "Unsupported" || + // See TestIsErrorSubnetSpecific for examples of why + // we look for substrings in code/message instead of + // only using specific codes here. + (strings.Contains(code, "InvalidParameter") && + reSubnetSpecificInvalidParameterMessage.MatchString(aerr.Message())) +} + +// isErrorCapacity returns true if the error indicates lack of +// capacity (either temporary or permanent) to run a specific instance +// type -- i.e., retrying with a different instance type might +// succeed. +func isErrorCapacity(err error) bool { + aerr, ok := err.(awserr.Error) + if !ok { + return false + } + code := aerr.Code() + return code == "InsufficientInstanceCapacity" || + (code == "Unsupported" && strings.Contains(aerr.Message(), "requested instance type")) } type ec2QuotaError struct { @@ -737,7 +772,7 @@ func wrapError(err error, throttleValue *atomic.Value) error { return rateLimitError{error: err, earliestRetry: time.Now().Add(d)} } else if isErrorQuota(err) { return &ec2QuotaError{err} - } else if aerr, ok := err.(awserr.Error); ok && aerr != nil && aerr.Code() == "InsufficientInstanceCapacity" { + } else if isErrorCapacity(err) { return &capacityError{err, true} } else if err != nil { throttleValue.Store(time.Duration(0))