b7afdf0ecdfdae4ced0f1f869af81e17989a3329
[arvados.git] / lib / cloud / ec2 / ec2.go
1 // Copyright (C) The Arvados Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4
5 package ec2
6
7 import (
8         "crypto/md5"
9         "crypto/rsa"
10         "crypto/sha1"
11         "crypto/x509"
12         "encoding/base64"
13         "encoding/json"
14         "fmt"
15         "math/big"
16         "sync"
17         "sync/atomic"
18         "time"
19
20         "git.arvados.org/arvados.git/lib/cloud"
21         "git.arvados.org/arvados.git/sdk/go/arvados"
22         "github.com/aws/aws-sdk-go/aws"
23         "github.com/aws/aws-sdk-go/aws/credentials"
24         "github.com/aws/aws-sdk-go/aws/credentials/ec2rolecreds"
25         "github.com/aws/aws-sdk-go/aws/ec2metadata"
26         "github.com/aws/aws-sdk-go/aws/request"
27         "github.com/aws/aws-sdk-go/aws/session"
28         "github.com/aws/aws-sdk-go/service/ec2"
29         "github.com/sirupsen/logrus"
30         "golang.org/x/crypto/ssh"
31 )
32
33 // Driver is the ec2 implementation of the cloud.Driver interface.
34 var Driver = cloud.DriverFunc(newEC2InstanceSet)
35
36 const (
37         throttleDelayMin = time.Second
38         throttleDelayMax = time.Minute
39 )
40
41 type ec2InstanceSetConfig struct {
42         AccessKeyID      string
43         SecretAccessKey  string
44         Region           string
45         SecurityGroupIDs arvados.StringSet
46         SubnetID         string
47         AdminUsername    string
48         EBSVolumeType    string
49 }
50
51 type ec2Interface interface {
52         DescribeKeyPairs(input *ec2.DescribeKeyPairsInput) (*ec2.DescribeKeyPairsOutput, error)
53         ImportKeyPair(input *ec2.ImportKeyPairInput) (*ec2.ImportKeyPairOutput, error)
54         RunInstances(input *ec2.RunInstancesInput) (*ec2.Reservation, error)
55         DescribeInstances(input *ec2.DescribeInstancesInput) (*ec2.DescribeInstancesOutput, error)
56         CreateTags(input *ec2.CreateTagsInput) (*ec2.CreateTagsOutput, error)
57         TerminateInstances(input *ec2.TerminateInstancesInput) (*ec2.TerminateInstancesOutput, error)
58 }
59
60 type ec2InstanceSet struct {
61         ec2config     ec2InstanceSetConfig
62         instanceSetID cloud.InstanceSetID
63         logger        logrus.FieldLogger
64         client        ec2Interface
65         keysMtx       sync.Mutex
66         keys          map[string]string
67         throttleDelay atomic.Value
68 }
69
70 func newEC2InstanceSet(config json.RawMessage, instanceSetID cloud.InstanceSetID, _ cloud.SharedResourceTags, logger logrus.FieldLogger) (prv cloud.InstanceSet, err error) {
71         instanceSet := &ec2InstanceSet{
72                 instanceSetID: instanceSetID,
73                 logger:        logger,
74         }
75         err = json.Unmarshal(config, &instanceSet.ec2config)
76         if err != nil {
77                 return nil, err
78         }
79
80         sess, err := session.NewSession()
81         if err != nil {
82                 return nil, err
83         }
84         // First try any static credentials, fall back to an IAM instance profile/role
85         creds := credentials.NewChainCredentials(
86                 []credentials.Provider{
87                         &credentials.StaticProvider{Value: credentials.Value{AccessKeyID: instanceSet.ec2config.AccessKeyID, SecretAccessKey: instanceSet.ec2config.SecretAccessKey}},
88                         &ec2rolecreds.EC2RoleProvider{Client: ec2metadata.New(sess)},
89                 })
90
91         awsConfig := aws.NewConfig().WithCredentials(creds).WithRegion(instanceSet.ec2config.Region)
92         instanceSet.client = ec2.New(session.Must(session.NewSession(awsConfig)))
93         instanceSet.keys = make(map[string]string)
94         if instanceSet.ec2config.EBSVolumeType == "" {
95                 instanceSet.ec2config.EBSVolumeType = "gp2"
96         }
97         return instanceSet, nil
98 }
99
100 func awsKeyFingerprint(pk ssh.PublicKey) (md5fp string, sha1fp string, err error) {
101         // AWS key fingerprints don't use the usual key fingerprint
102         // you get from ssh-keygen or ssh.FingerprintLegacyMD5()
103         // (you can get that from md5.Sum(pk.Marshal())
104         //
105         // AWS uses the md5 or sha1 of the PKIX DER encoding of the
106         // public key, so calculate those fingerprints here.
107         var rsaPub struct {
108                 Name string
109                 E    *big.Int
110                 N    *big.Int
111         }
112         if err := ssh.Unmarshal(pk.Marshal(), &rsaPub); err != nil {
113                 return "", "", fmt.Errorf("agent: Unmarshal failed to parse public key: %v", err)
114         }
115         rsaPk := rsa.PublicKey{
116                 E: int(rsaPub.E.Int64()),
117                 N: rsaPub.N,
118         }
119         pkix, _ := x509.MarshalPKIXPublicKey(&rsaPk)
120         md5pkix := md5.Sum([]byte(pkix))
121         sha1pkix := sha1.Sum([]byte(pkix))
122         md5fp = ""
123         sha1fp = ""
124         for i := 0; i < len(md5pkix); i++ {
125                 md5fp += fmt.Sprintf(":%02x", md5pkix[i])
126         }
127         for i := 0; i < len(sha1pkix); i++ {
128                 sha1fp += fmt.Sprintf(":%02x", sha1pkix[i])
129         }
130         return md5fp[1:], sha1fp[1:], nil
131 }
132
133 func (instanceSet *ec2InstanceSet) Create(
134         instanceType arvados.InstanceType,
135         imageID cloud.ImageID,
136         newTags cloud.InstanceTags,
137         initCommand cloud.InitCommand,
138         publicKey ssh.PublicKey) (cloud.Instance, error) {
139
140         md5keyFingerprint, sha1keyFingerprint, err := awsKeyFingerprint(publicKey)
141         if err != nil {
142                 return nil, fmt.Errorf("Could not make key fingerprint: %v", err)
143         }
144         instanceSet.keysMtx.Lock()
145         var keyname string
146         var ok bool
147         if keyname, ok = instanceSet.keys[md5keyFingerprint]; !ok {
148                 keyout, err := instanceSet.client.DescribeKeyPairs(&ec2.DescribeKeyPairsInput{
149                         Filters: []*ec2.Filter{{
150                                 Name:   aws.String("fingerprint"),
151                                 Values: []*string{&md5keyFingerprint, &sha1keyFingerprint},
152                         }},
153                 })
154                 if err != nil {
155                         return nil, fmt.Errorf("Could not search for keypair: %v", err)
156                 }
157
158                 if len(keyout.KeyPairs) > 0 {
159                         keyname = *(keyout.KeyPairs[0].KeyName)
160                 } else {
161                         keyname = "arvados-dispatch-keypair-" + md5keyFingerprint
162                         _, err := instanceSet.client.ImportKeyPair(&ec2.ImportKeyPairInput{
163                                 KeyName:           &keyname,
164                                 PublicKeyMaterial: ssh.MarshalAuthorizedKey(publicKey),
165                         })
166                         if err != nil {
167                                 return nil, fmt.Errorf("Could not import keypair: %v", err)
168                         }
169                 }
170                 instanceSet.keys[md5keyFingerprint] = keyname
171         }
172         instanceSet.keysMtx.Unlock()
173
174         ec2tags := []*ec2.Tag{}
175         for k, v := range newTags {
176                 ec2tags = append(ec2tags, &ec2.Tag{
177                         Key:   aws.String(k),
178                         Value: aws.String(v),
179                 })
180         }
181
182         var groups []string
183         for sg := range instanceSet.ec2config.SecurityGroupIDs {
184                 groups = append(groups, sg)
185         }
186
187         rii := ec2.RunInstancesInput{
188                 ImageId:      aws.String(string(imageID)),
189                 InstanceType: &instanceType.ProviderType,
190                 MaxCount:     aws.Int64(1),
191                 MinCount:     aws.Int64(1),
192                 KeyName:      &keyname,
193
194                 NetworkInterfaces: []*ec2.InstanceNetworkInterfaceSpecification{
195                         {
196                                 AssociatePublicIpAddress: aws.Bool(false),
197                                 DeleteOnTermination:      aws.Bool(true),
198                                 DeviceIndex:              aws.Int64(0),
199                                 Groups:                   aws.StringSlice(groups),
200                                 SubnetId:                 &instanceSet.ec2config.SubnetID,
201                         }},
202                 DisableApiTermination:             aws.Bool(false),
203                 InstanceInitiatedShutdownBehavior: aws.String("terminate"),
204                 TagSpecifications: []*ec2.TagSpecification{
205                         {
206                                 ResourceType: aws.String("instance"),
207                                 Tags:         ec2tags,
208                         }},
209                 UserData: aws.String(base64.StdEncoding.EncodeToString([]byte("#!/bin/sh\n" + initCommand + "\n"))),
210         }
211
212         if instanceType.AddedScratch > 0 {
213                 rii.BlockDeviceMappings = []*ec2.BlockDeviceMapping{{
214                         DeviceName: aws.String("/dev/xvdt"),
215                         Ebs: &ec2.EbsBlockDevice{
216                                 DeleteOnTermination: aws.Bool(true),
217                                 VolumeSize:          aws.Int64((int64(instanceType.AddedScratch) + (1<<30 - 1)) >> 30),
218                                 VolumeType:          &instanceSet.ec2config.EBSVolumeType,
219                         }}}
220         }
221
222         if instanceType.Preemptible {
223                 rii.InstanceMarketOptions = &ec2.InstanceMarketOptionsRequest{
224                         MarketType: aws.String("spot"),
225                         SpotOptions: &ec2.SpotMarketOptions{
226                                 InstanceInterruptionBehavior: aws.String("terminate"),
227                                 MaxPrice:                     aws.String(fmt.Sprintf("%v", instanceType.Price)),
228                         }}
229         }
230
231         rsv, err := instanceSet.client.RunInstances(&rii)
232
233         if request.IsErrorThrottle(err) {
234                 // Back off exponentially until a create call either
235                 // succeeds or returns a non-throttle error.
236                 d, _ := instanceSet.throttleDelay.Load().(time.Duration)
237                 d = d*3/2 + time.Second
238                 if d < throttleDelayMin {
239                         d = throttleDelayMin
240                 } else if d > throttleDelayMax {
241                         d = throttleDelayMax
242                 }
243                 instanceSet.throttleDelay.Store(d)
244                 return nil, rateLimitError{error: err, earliestRetry: time.Now().Add(d)}
245         } else if err != nil {
246                 instanceSet.throttleDelay.Store(time.Duration(0))
247                 return nil, err
248         } else {
249                 instanceSet.throttleDelay.Store(time.Duration(0))
250         }
251
252         return &ec2Instance{
253                 provider: instanceSet,
254                 instance: rsv.Instances[0],
255         }, nil
256 }
257
258 func (instanceSet *ec2InstanceSet) Instances(tags cloud.InstanceTags) (instances []cloud.Instance, err error) {
259         var filters []*ec2.Filter
260         for k, v := range tags {
261                 filters = append(filters, &ec2.Filter{
262                         Name:   aws.String("tag:" + k),
263                         Values: []*string{aws.String(v)},
264                 })
265         }
266         dii := &ec2.DescribeInstancesInput{Filters: filters}
267         for {
268                 dio, err := instanceSet.client.DescribeInstances(dii)
269                 if err != nil {
270                         return nil, err
271                 }
272
273                 for _, rsv := range dio.Reservations {
274                         for _, inst := range rsv.Instances {
275                                 if *inst.State.Name != "shutting-down" && *inst.State.Name != "terminated" {
276                                         instances = append(instances, &ec2Instance{instanceSet, inst})
277                                 }
278                         }
279                 }
280                 if dio.NextToken == nil {
281                         return instances, err
282                 }
283                 dii.NextToken = dio.NextToken
284         }
285 }
286
287 func (instanceSet *ec2InstanceSet) Stop() {
288 }
289
290 type ec2Instance struct {
291         provider *ec2InstanceSet
292         instance *ec2.Instance
293 }
294
295 func (inst *ec2Instance) ID() cloud.InstanceID {
296         return cloud.InstanceID(*inst.instance.InstanceId)
297 }
298
299 func (inst *ec2Instance) String() string {
300         return *inst.instance.InstanceId
301 }
302
303 func (inst *ec2Instance) ProviderType() string {
304         return *inst.instance.InstanceType
305 }
306
307 func (inst *ec2Instance) SetTags(newTags cloud.InstanceTags) error {
308         var ec2tags []*ec2.Tag
309         for k, v := range newTags {
310                 ec2tags = append(ec2tags, &ec2.Tag{
311                         Key:   aws.String(k),
312                         Value: aws.String(v),
313                 })
314         }
315
316         _, err := inst.provider.client.CreateTags(&ec2.CreateTagsInput{
317                 Resources: []*string{inst.instance.InstanceId},
318                 Tags:      ec2tags,
319         })
320
321         return err
322 }
323
324 func (inst *ec2Instance) Tags() cloud.InstanceTags {
325         tags := make(map[string]string)
326
327         for _, t := range inst.instance.Tags {
328                 tags[*t.Key] = *t.Value
329         }
330
331         return tags
332 }
333
334 func (inst *ec2Instance) Destroy() error {
335         _, err := inst.provider.client.TerminateInstances(&ec2.TerminateInstancesInput{
336                 InstanceIds: []*string{inst.instance.InstanceId},
337         })
338         return err
339 }
340
341 func (inst *ec2Instance) Address() string {
342         if inst.instance.PrivateIpAddress != nil {
343                 return *inst.instance.PrivateIpAddress
344         }
345         return ""
346 }
347
348 func (inst *ec2Instance) RemoteUser() string {
349         return inst.provider.ec2config.AdminUsername
350 }
351
352 func (inst *ec2Instance) VerifyHostKey(ssh.PublicKey, *ssh.Client) error {
353         return cloud.ErrNotImplemented
354 }
355
356 type rateLimitError struct {
357         error
358         earliestRetry time.Time
359 }
360
361 func (err rateLimitError) EarliestRetry() time.Time {
362         return err.earliestRetry
363 }