X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/4d80b93ac628a19c1ab590dfc19b7ad58bbce950..b541c9d898d3dde983de2e0ea80a40e17d4c9b9f:/lib/cloud/ec2/ec2.go

diff --git a/lib/cloud/ec2/ec2.go b/lib/cloud/ec2/ec2.go
index aa190a055a..6251f18df0 100644
--- a/lib/cloud/ec2/ec2.go
+++ b/lib/cloud/ec2/ec2.go
@@ -13,7 +13,9 @@ import (
 	"encoding/json"
 	"fmt"
 	"math/big"
+	"regexp"
 	"strconv"
+	"strings"
 	"sync"
 	"sync/atomic"
 	"time"
@@ -28,6 +30,7 @@ import (
 	"github.com/aws/aws-sdk-go/aws/request"
 	"github.com/aws/aws-sdk-go/aws/session"
 	"github.com/aws/aws-sdk-go/service/ec2"
+	"github.com/prometheus/client_golang/prometheus"
 	"github.com/sirupsen/logrus"
 	"golang.org/x/crypto/ssh"
 )
@@ -45,7 +48,7 @@ type ec2InstanceSetConfig struct {
 	SecretAccessKey         string
 	Region                  string
 	SecurityGroupIDs        arvados.StringSet
-	SubnetID                string
+	SubnetID                sliceOrSingleString
 	AdminUsername           string
 	EBSVolumeType           string
 	EBSPrice                float64
@@ -53,6 +56,39 @@ type ec2InstanceSetConfig struct {
 	SpotPriceUpdateInterval arvados.Duration
 }
 
+type sliceOrSingleString []string
+
+// UnmarshalJSON unmarshals an array of strings, and also accepts ""
+// as [], and "foo" as ["foo"].
+func (ss *sliceOrSingleString) UnmarshalJSON(data []byte) error {
+	if len(data) == 0 {
+		*ss = nil
+	} else if data[0] == '[' {
+		var slice []string
+		err := json.Unmarshal(data, &slice)
+		if err != nil {
+			return err
+		}
+		if len(slice) == 0 {
+			*ss = nil
+		} else {
+			*ss = slice
+		}
+	} else {
+		var str string
+		err := json.Unmarshal(data, &str)
+		if err != nil {
+			return err
+		}
+		if str == "" {
+			*ss = nil
+		} else {
+			*ss = []string{str}
+		}
+	}
+	return nil
+}
+
 type ec2Interface interface {
 	DescribeKeyPairs(input *ec2.DescribeKeyPairsInput) (*ec2.DescribeKeyPairsOutput, error)
 	ImportKeyPair(input *ec2.ImportKeyPairInput) (*ec2.ImportKeyPairOutput, error)
@@ -66,6 +102,7 @@ type ec2Interface interface {
 
 type ec2InstanceSet struct {
 	ec2config              ec2InstanceSetConfig
+	currentSubnetIDIndex   int32
 	instanceSetID          cloud.InstanceSetID
 	logger                 logrus.FieldLogger
 	client                 ec2Interface
@@ -77,9 +114,12 @@ type ec2InstanceSet struct {
 	prices        map[priceKey][]cloud.InstancePrice
 	pricesLock    sync.Mutex
 	pricesUpdated map[priceKey]time.Time
+
+	mInstances      *prometheus.GaugeVec
+	mInstanceStarts *prometheus.CounterVec
 }
 
-func newEC2InstanceSet(config json.RawMessage, instanceSetID cloud.InstanceSetID, _ cloud.SharedResourceTags, logger logrus.FieldLogger) (prv cloud.InstanceSet, err error) {
+func newEC2InstanceSet(config json.RawMessage, instanceSetID cloud.InstanceSetID, _ cloud.SharedResourceTags, logger logrus.FieldLogger, reg *prometheus.Registry) (prv cloud.InstanceSet, err error) {
 	instanceSet := &ec2InstanceSet{
 		instanceSetID: instanceSetID,
 		logger:        logger,
@@ -106,6 +146,36 @@ func newEC2InstanceSet(config json.RawMessage, instanceSetID cloud.InstanceSetID
 	if instanceSet.ec2config.EBSVolumeType == "" {
 		instanceSet.ec2config.EBSVolumeType = "gp2"
 	}
+
+	// Set up metrics
+	instanceSet.mInstances = prometheus.NewGaugeVec(prometheus.GaugeOpts{
+		Namespace: "arvados",
+		Subsystem: "dispatchcloud",
+		Name:      "ec2_instances",
+		Help:      "Number of instances running",
+	}, []string{"subnet_id"})
+	instanceSet.mInstanceStarts = prometheus.NewCounterVec(prometheus.CounterOpts{
+		Namespace: "arvados",
+		Subsystem: "dispatchcloud",
+		Name:      "ec2_instance_starts_total",
+		Help:      "Number of attempts to start a new instance",
+	}, []string{"subnet_id", "success"})
+	// Initialize all of the series we'll be reporting.  Otherwise
+	// the {subnet=A, success=0} series doesn't appear in metrics
+	// at all until there's a failure in subnet A.
+	for _, subnet := range instanceSet.ec2config.SubnetID {
+		instanceSet.mInstanceStarts.WithLabelValues(subnet, "0").Add(0)
+		instanceSet.mInstanceStarts.WithLabelValues(subnet, "1").Add(0)
+	}
+	if len(instanceSet.ec2config.SubnetID) == 0 {
+		instanceSet.mInstanceStarts.WithLabelValues("", "0").Add(0)
+		instanceSet.mInstanceStarts.WithLabelValues("", "1").Add(0)
+	}
+	if reg != nil {
+		reg.MustRegister(instanceSet.mInstances)
+		reg.MustRegister(instanceSet.mInstanceStarts)
+	}
+
 	return instanceSet, nil
 }
 
@@ -149,40 +219,6 @@ func (instanceSet *ec2InstanceSet) Create(
 	initCommand cloud.InitCommand,
 	publicKey ssh.PublicKey) (cloud.Instance, error) {
 
-	md5keyFingerprint, sha1keyFingerprint, err := awsKeyFingerprint(publicKey)
-	if err != nil {
-		return nil, fmt.Errorf("Could not make key fingerprint: %v", err)
-	}
-	instanceSet.keysMtx.Lock()
-	var keyname string
-	var ok bool
-	if keyname, ok = instanceSet.keys[md5keyFingerprint]; !ok {
-		keyout, err := instanceSet.client.DescribeKeyPairs(&ec2.DescribeKeyPairsInput{
-			Filters: []*ec2.Filter{{
-				Name:   aws.String("fingerprint"),
-				Values: []*string{&md5keyFingerprint, &sha1keyFingerprint},
-			}},
-		})
-		if err != nil {
-			return nil, fmt.Errorf("Could not search for keypair: %v", err)
-		}
-
-		if len(keyout.KeyPairs) > 0 {
-			keyname = *(keyout.KeyPairs[0].KeyName)
-		} else {
-			keyname = "arvados-dispatch-keypair-" + md5keyFingerprint
-			_, err := instanceSet.client.ImportKeyPair(&ec2.ImportKeyPairInput{
-				KeyName:           &keyname,
-				PublicKeyMaterial: ssh.MarshalAuthorizedKey(publicKey),
-			})
-			if err != nil {
-				return nil, fmt.Errorf("Could not import keypair: %v", err)
-			}
-		}
-		instanceSet.keys[md5keyFingerprint] = keyname
-	}
-	instanceSet.keysMtx.Unlock()
-
 	ec2tags := []*ec2.Tag{}
 	for k, v := range newTags {
 		ec2tags = append(ec2tags, &ec2.Tag{
@@ -201,7 +237,6 @@ func (instanceSet *ec2InstanceSet) Create(
 		InstanceType: &instanceType.ProviderType,
 		MaxCount:     aws.Int64(1),
 		MinCount:     aws.Int64(1),
-		KeyName:      &keyname,
 
 		NetworkInterfaces: []*ec2.InstanceNetworkInterfaceSpecification{
 			{
@@ -209,7 +244,6 @@ func (instanceSet *ec2InstanceSet) Create(
 				DeleteOnTermination:      aws.Bool(true),
 				DeviceIndex:              aws.Int64(0),
 				Groups:                   aws.StringSlice(groups),
-				SubnetId:                 &instanceSet.ec2config.SubnetID,
 			}},
 		DisableApiTermination:             aws.Bool(false),
 		InstanceInitiatedShutdownBehavior: aws.String("terminate"),
@@ -218,9 +252,23 @@ func (instanceSet *ec2InstanceSet) Create(
 				ResourceType: aws.String("instance"),
 				Tags:         ec2tags,
 			}},
+		MetadataOptions: &ec2.InstanceMetadataOptionsRequest{
+			// Require IMDSv2, as described at
+			// https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/configuring-IMDS-new-instances.html
+			HttpEndpoint: aws.String(ec2.InstanceMetadataEndpointStateEnabled),
+			HttpTokens:   aws.String(ec2.HttpTokensStateRequired),
+		},
 		UserData: aws.String(base64.StdEncoding.EncodeToString([]byte("#!/bin/sh\n" + initCommand + "\n"))),
 	}
 
+	if publicKey != nil {
+		keyname, err := instanceSet.getKeyName(publicKey)
+		if err != nil {
+			return nil, err
+		}
+		rii.KeyName = &keyname
+	}
+
 	if instanceType.AddedScratch > 0 {
 		rii.BlockDeviceMappings = []*ec2.BlockDeviceMapping{{
 			DeviceName: aws.String("/dev/xvdt"),
@@ -246,10 +294,48 @@ func (instanceSet *ec2InstanceSet) Create(
 		}
 	}
 
-	rsv, err := instanceSet.client.RunInstances(&rii)
-	err = wrapError(err, &instanceSet.throttleDelayCreate)
-	if err != nil {
-		return nil, err
+	var rsv *ec2.Reservation
+	var errToReturn error
+	subnets := instanceSet.ec2config.SubnetID
+	currentSubnetIDIndex := int(atomic.LoadInt32(&instanceSet.currentSubnetIDIndex))
+	for tryOffset := 0; ; tryOffset++ {
+		tryIndex := 0
+		trySubnet := ""
+		if len(subnets) > 0 {
+			tryIndex = (currentSubnetIDIndex + tryOffset) % len(subnets)
+			trySubnet = subnets[tryIndex]
+			rii.NetworkInterfaces[0].SubnetId = aws.String(trySubnet)
+		}
+		var err error
+		rsv, err = instanceSet.client.RunInstances(&rii)
+		instanceSet.mInstanceStarts.WithLabelValues(trySubnet, boolLabelValue[err == nil]).Add(1)
+		if !isErrorCapacity(errToReturn) || isErrorCapacity(err) {
+			// We want to return the last capacity error,
+			// if any; otherwise the last non-capacity
+			// error.
+			errToReturn = err
+		}
+		if isErrorSubnetSpecific(err) &&
+			tryOffset < len(subnets)-1 {
+			instanceSet.logger.WithError(err).WithField("SubnetID", subnets[tryIndex]).
+				Warn("RunInstances failed, trying next subnet")
+			continue
+		}
+		// Succeeded, or exhausted all subnets, or got a
+		// non-subnet-related error.
+		//
+		// We intentionally update currentSubnetIDIndex even
+		// in the non-retryable-failure case here to avoid a
+		// situation where successive calls to Create() keep
+		// returning errors for the same subnet (perhaps
+		// "subnet full") and never reveal the errors for the
+		// other configured subnets (perhaps "subnet ID
+		// invalid").
+		atomic.StoreInt32(&instanceSet.currentSubnetIDIndex, int32(tryIndex))
+		break
+	}
+	if rsv == nil || len(rsv.Instances) == 0 {
+		return nil, wrapError(errToReturn, &instanceSet.throttleDelayCreate)
 	}
 	return &ec2Instance{
 		provider: instanceSet,
@@ -257,6 +343,40 @@ func (instanceSet *ec2InstanceSet) Create(
 	}, nil
 }
 
+func (instanceSet *ec2InstanceSet) getKeyName(publicKey ssh.PublicKey) (string, error) {
+	instanceSet.keysMtx.Lock()
+	defer instanceSet.keysMtx.Unlock()
+	md5keyFingerprint, sha1keyFingerprint, err := awsKeyFingerprint(publicKey)
+	if err != nil {
+		return "", fmt.Errorf("Could not make key fingerprint: %v", err)
+	}
+	if keyname, ok := instanceSet.keys[md5keyFingerprint]; ok {
+		return keyname, nil
+	}
+	keyout, err := instanceSet.client.DescribeKeyPairs(&ec2.DescribeKeyPairsInput{
+		Filters: []*ec2.Filter{{
+			Name:   aws.String("fingerprint"),
+			Values: []*string{&md5keyFingerprint, &sha1keyFingerprint},
+		}},
+	})
+	if err != nil {
+		return "", fmt.Errorf("Could not search for keypair: %v", err)
+	}
+	if len(keyout.KeyPairs) > 0 {
+		return *(keyout.KeyPairs[0].KeyName), nil
+	}
+	keyname := "arvados-dispatch-keypair-" + md5keyFingerprint
+	_, err = instanceSet.client.ImportKeyPair(&ec2.ImportKeyPairInput{
+		KeyName:           &keyname,
+		PublicKeyMaterial: ssh.MarshalAuthorizedKey(publicKey),
+	})
+	if err != nil {
+		return "", fmt.Errorf("Could not import keypair: %v", err)
+	}
+	instanceSet.keys[md5keyFingerprint] = keyname
+	return keyname, nil
+}
+
 func (instanceSet *ec2InstanceSet) Instances(tags cloud.InstanceTags) (instances []cloud.Instance, err error) {
 	var filters []*ec2.Filter
 	for k, v := range tags {
@@ -311,6 +431,24 @@ func (instanceSet *ec2InstanceSet) Instances(tags cloud.InstanceTags) (instances
 		}
 		instanceSet.updateSpotPrices(instances)
 	}
+
+	// Count instances in each subnet, and report in metrics.
+	subnetInstances := map[string]int{"": 0}
+	for _, subnet := range instanceSet.ec2config.SubnetID {
+		subnetInstances[subnet] = 0
+	}
+	for _, inst := range instances {
+		subnet := inst.(*ec2Instance).instance.SubnetId
+		if subnet != nil {
+			subnetInstances[*subnet]++
+		} else {
+			subnetInstances[""]++
+		}
+	}
+	for subnet, count := range subnetInstances {
+		instanceSet.mInstances.WithLabelValues(subnet).Set(float64(count))
+	}
+
 	return instances, err
 }
 
@@ -540,24 +678,77 @@ func (err rateLimitError) EarliestRetry() time.Time {
 	return err.earliestRetry
 }
 
-var isCodeCapacity = map[string]bool{
-	"InsufficientInstanceCapacity":      true,
-	"VcpuLimitExceeded":                 true,
-	"MaxSpotInstanceCountExceeded":      true,
+type capacityError struct {
+	error
+	isInstanceTypeSpecific bool
+}
+
+func (er *capacityError) IsCapacityError() bool {
+	return true
+}
+
+func (er *capacityError) IsInstanceTypeSpecific() bool {
+	return er.isInstanceTypeSpecific
+}
+
+var isCodeQuota = map[string]bool{
+	"InstanceLimitExceeded":             true,
+	"InsufficientAddressCapacity":       true,
 	"InsufficientFreeAddressesInSubnet": true,
+	"InsufficientVolumeCapacity":        true,
+	"MaxSpotInstanceCountExceeded":      true,
+	"VcpuLimitExceeded":                 true,
 }
 
-// isErrorCapacity returns whether the error is to be throttled based on its code.
+// isErrorQuota returns whether the error indicates we have reached
+// some usage quota/limit -- i.e., immediately retrying with an equal
+// or larger instance type will probably not work.
+//
 // Returns false if error is nil.
-func isErrorCapacity(err error) bool {
+func isErrorQuota(err error) bool {
 	if aerr, ok := err.(awserr.Error); ok && aerr != nil {
-		if _, ok := isCodeCapacity[aerr.Code()]; ok {
+		if _, ok := isCodeQuota[aerr.Code()]; ok {
 			return true
 		}
 	}
 	return false
 }
 
+var reSubnetSpecificInvalidParameterMessage = regexp.MustCompile(`(?ms).*( subnet |sufficient free [Ii]pv[46] addresses).*`)
+
+// isErrorSubnetSpecific returns true if the problem encountered by
+// RunInstances might be avoided by trying a different subnet.
+func isErrorSubnetSpecific(err error) bool {
+	aerr, ok := err.(awserr.Error)
+	if !ok {
+		return false
+	}
+	code := aerr.Code()
+	return strings.Contains(code, "Subnet") ||
+		code == "InsufficientInstanceCapacity" ||
+		code == "InsufficientVolumeCapacity" ||
+		code == "Unsupported" ||
+		// See TestIsErrorSubnetSpecific for examples of why
+		// we look for substrings in code/message instead of
+		// only using specific codes here.
+		(strings.Contains(code, "InvalidParameter") &&
+			reSubnetSpecificInvalidParameterMessage.MatchString(aerr.Message()))
+}
+
+// isErrorCapacity returns true if the error indicates lack of
+// capacity (either temporary or permanent) to run a specific instance
+// type -- i.e., retrying with a different instance type might
+// succeed.
+func isErrorCapacity(err error) bool {
+	aerr, ok := err.(awserr.Error)
+	if !ok {
+		return false
+	}
+	code := aerr.Code()
+	return code == "InsufficientInstanceCapacity" ||
+		(code == "Unsupported" && strings.Contains(aerr.Message(), "requested instance type"))
+}
+
 type ec2QuotaError struct {
 	error
 }
@@ -579,8 +770,10 @@ func wrapError(err error, throttleValue *atomic.Value) error {
 		}
 		throttleValue.Store(d)
 		return rateLimitError{error: err, earliestRetry: time.Now().Add(d)}
-	} else if isErrorCapacity(err) {
+	} else if isErrorQuota(err) {
 		return &ec2QuotaError{err}
+	} else if isErrorCapacity(err) {
+		return &capacityError{err, true}
 	} else if err != nil {
 		throttleValue.Store(time.Duration(0))
 		return err
@@ -588,3 +781,5 @@ func wrapError(err error, throttleValue *atomic.Value) error {
 	throttleValue.Store(time.Duration(0))
 	return nil
 }
+
+var boolLabelValue = map[bool]string{false: "0", true: "1"}