Merge branch '20522-load-dispatch-priv-key'
[arvados.git] / lib / cloud / ec2 / ec2_test.go
1 // Copyright (C) The Arvados Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4 //
5 //
6 // How to manually run individual tests against the real cloud:
7 //
8 // $ go test -v git.arvados.org/arvados.git/lib/cloud/ec2 -live-ec2-cfg ec2config.yml -check.f=TestCreate
9 //
10 // Tests should be run individually and in the order they are listed in the file:
11 //
12 // Example ec2config.yml:
13 //
14 // ImageIDForTestSuite: ami-xxxxxxxxxxxxxxxxx
15 // DriverParameters:
16 //       AccessKeyID: XXXXXXXXXXXXXX
17 //       SecretAccessKey: xxxxxxxxxxxxxxxxxxxx
18 //       Region: us-east-1
19 //       SecurityGroupIDs: [sg-xxxxxxxx]
20 //       SubnetID: subnet-xxxxxxxx
21 //       AdminUsername: crunch
22
23 package ec2
24
25 import (
26         "encoding/json"
27         "flag"
28         "sync/atomic"
29         "testing"
30         "time"
31
32         "git.arvados.org/arvados.git/lib/cloud"
33         "git.arvados.org/arvados.git/lib/dispatchcloud/test"
34         "git.arvados.org/arvados.git/sdk/go/arvados"
35         "git.arvados.org/arvados.git/sdk/go/config"
36         "github.com/aws/aws-sdk-go/aws"
37         "github.com/aws/aws-sdk-go/aws/awserr"
38         "github.com/aws/aws-sdk-go/service/ec2"
39         "github.com/sirupsen/logrus"
40         check "gopkg.in/check.v1"
41 )
42
43 var live = flag.String("live-ec2-cfg", "", "Test with real EC2 API, provide config file")
44
45 // Gocheck boilerplate
46 func Test(t *testing.T) {
47         check.TestingT(t)
48 }
49
50 type EC2InstanceSetSuite struct{}
51
52 var _ = check.Suite(&EC2InstanceSetSuite{})
53
54 type testConfig struct {
55         ImageIDForTestSuite string
56         DriverParameters    json.RawMessage
57 }
58
59 type ec2stub struct {
60         c                     *check.C
61         reftime               time.Time
62         importKeyPairCalls    []*ec2.ImportKeyPairInput
63         describeKeyPairsCalls []*ec2.DescribeKeyPairsInput
64 }
65
66 func (e *ec2stub) ImportKeyPair(input *ec2.ImportKeyPairInput) (*ec2.ImportKeyPairOutput, error) {
67         e.importKeyPairCalls = append(e.importKeyPairCalls, input)
68         return nil, nil
69 }
70
71 func (e *ec2stub) DescribeKeyPairs(input *ec2.DescribeKeyPairsInput) (*ec2.DescribeKeyPairsOutput, error) {
72         e.describeKeyPairsCalls = append(e.describeKeyPairsCalls, input)
73         return &ec2.DescribeKeyPairsOutput{}, nil
74 }
75
76 func (e *ec2stub) RunInstances(input *ec2.RunInstancesInput) (*ec2.Reservation, error) {
77         return &ec2.Reservation{Instances: []*ec2.Instance{{
78                 InstanceId:   aws.String("i-123"),
79                 InstanceType: aws.String("t2.micro"),
80                 Tags:         input.TagSpecifications[0].Tags,
81         }}}, nil
82 }
83
84 func (e *ec2stub) DescribeInstances(input *ec2.DescribeInstancesInput) (*ec2.DescribeInstancesOutput, error) {
85         return &ec2.DescribeInstancesOutput{
86                 Reservations: []*ec2.Reservation{{
87                         Instances: []*ec2.Instance{{
88                                 InstanceId:        aws.String("i-123"),
89                                 InstanceLifecycle: aws.String("spot"),
90                                 InstanceType:      aws.String("t2.micro"),
91                                 PrivateIpAddress:  aws.String("10.1.2.3"),
92                                 State:             &ec2.InstanceState{Name: aws.String("running"), Code: aws.Int64(16)},
93                         }, {
94                                 InstanceId:        aws.String("i-124"),
95                                 InstanceLifecycle: aws.String("spot"),
96                                 InstanceType:      aws.String("t2.micro"),
97                                 PrivateIpAddress:  aws.String("10.1.2.4"),
98                                 State:             &ec2.InstanceState{Name: aws.String("running"), Code: aws.Int64(16)},
99                         }},
100                 }},
101         }, nil
102 }
103
104 func (e *ec2stub) DescribeInstanceStatusPages(input *ec2.DescribeInstanceStatusInput, fn func(*ec2.DescribeInstanceStatusOutput, bool) bool) error {
105         fn(&ec2.DescribeInstanceStatusOutput{
106                 InstanceStatuses: []*ec2.InstanceStatus{{
107                         InstanceId:       aws.String("i-123"),
108                         AvailabilityZone: aws.String("aa-east-1a"),
109                 }, {
110                         InstanceId:       aws.String("i-124"),
111                         AvailabilityZone: aws.String("aa-east-1a"),
112                 }},
113         }, true)
114         return nil
115 }
116
117 func (e *ec2stub) DescribeSpotPriceHistoryPages(input *ec2.DescribeSpotPriceHistoryInput, fn func(*ec2.DescribeSpotPriceHistoryOutput, bool) bool) error {
118         if !fn(&ec2.DescribeSpotPriceHistoryOutput{
119                 SpotPriceHistory: []*ec2.SpotPrice{
120                         &ec2.SpotPrice{
121                                 InstanceType:     aws.String("t2.micro"),
122                                 AvailabilityZone: aws.String("aa-east-1a"),
123                                 SpotPrice:        aws.String("0.005"),
124                                 Timestamp:        aws.Time(e.reftime.Add(-9 * time.Minute)),
125                         },
126                         &ec2.SpotPrice{
127                                 InstanceType:     aws.String("t2.micro"),
128                                 AvailabilityZone: aws.String("aa-east-1a"),
129                                 SpotPrice:        aws.String("0.015"),
130                                 Timestamp:        aws.Time(e.reftime.Add(-5 * time.Minute)),
131                         },
132                 },
133         }, false) {
134                 return nil
135         }
136         fn(&ec2.DescribeSpotPriceHistoryOutput{
137                 SpotPriceHistory: []*ec2.SpotPrice{
138                         &ec2.SpotPrice{
139                                 InstanceType:     aws.String("t2.micro"),
140                                 AvailabilityZone: aws.String("aa-east-1a"),
141                                 SpotPrice:        aws.String("0.01"),
142                                 Timestamp:        aws.Time(e.reftime.Add(-2 * time.Minute)),
143                         },
144                 },
145         }, true)
146         return nil
147 }
148
149 func (e *ec2stub) CreateTags(input *ec2.CreateTagsInput) (*ec2.CreateTagsOutput, error) {
150         return nil, nil
151 }
152
153 func (e *ec2stub) TerminateInstances(input *ec2.TerminateInstancesInput) (*ec2.TerminateInstancesOutput, error) {
154         return nil, nil
155 }
156
157 func GetInstanceSet(c *check.C) (*ec2InstanceSet, cloud.ImageID, arvados.Cluster) {
158         cluster := arvados.Cluster{
159                 InstanceTypes: arvados.InstanceTypeMap(map[string]arvados.InstanceType{
160                         "tiny": {
161                                 Name:         "tiny",
162                                 ProviderType: "t2.micro",
163                                 VCPUs:        1,
164                                 RAM:          4000000000,
165                                 Scratch:      10000000000,
166                                 Price:        .02,
167                                 Preemptible:  false,
168                         },
169                         "tiny-with-extra-scratch": {
170                                 Name:         "tiny-with-extra-scratch",
171                                 ProviderType: "t2.micro",
172                                 VCPUs:        1,
173                                 RAM:          4000000000,
174                                 Price:        .02,
175                                 Preemptible:  false,
176                                 AddedScratch: 20000000000,
177                         },
178                         "tiny-preemptible": {
179                                 Name:         "tiny-preemptible",
180                                 ProviderType: "t2.micro",
181                                 VCPUs:        1,
182                                 RAM:          4000000000,
183                                 Scratch:      10000000000,
184                                 Price:        .02,
185                                 Preemptible:  true,
186                         },
187                 })}
188         if *live != "" {
189                 var exampleCfg testConfig
190                 err := config.LoadFile(&exampleCfg, *live)
191                 c.Assert(err, check.IsNil)
192
193                 ap, err := newEC2InstanceSet(exampleCfg.DriverParameters, "test123", nil, logrus.StandardLogger())
194                 c.Assert(err, check.IsNil)
195                 return ap.(*ec2InstanceSet), cloud.ImageID(exampleCfg.ImageIDForTestSuite), cluster
196         }
197         ap := ec2InstanceSet{
198                 instanceSetID: "test123",
199                 logger:        logrus.StandardLogger(),
200                 client:        &ec2stub{c: c, reftime: time.Now().UTC()},
201                 keys:          make(map[string]string),
202         }
203         return &ap, cloud.ImageID("blob"), cluster
204 }
205
206 func (*EC2InstanceSetSuite) TestCreate(c *check.C) {
207         ap, img, cluster := GetInstanceSet(c)
208         pk, _ := test.LoadTestKey(c, "../../dispatchcloud/test/sshkey_dispatch")
209
210         inst, err := ap.Create(cluster.InstanceTypes["tiny"],
211                 img, map[string]string{
212                         "TestTagName": "test tag value",
213                 }, "umask 0600; echo -n test-file-data >/var/run/test-file", pk)
214         c.Assert(err, check.IsNil)
215
216         tags := inst.Tags()
217         c.Check(tags["TestTagName"], check.Equals, "test tag value")
218         c.Logf("inst.String()=%v Address()=%v Tags()=%v", inst.String(), inst.Address(), tags)
219
220         if *live == "" {
221                 c.Check(ap.client.(*ec2stub).describeKeyPairsCalls, check.HasLen, 1)
222                 c.Check(ap.client.(*ec2stub).importKeyPairCalls, check.HasLen, 1)
223         }
224 }
225
226 func (*EC2InstanceSetSuite) TestCreateWithExtraScratch(c *check.C) {
227         ap, img, cluster := GetInstanceSet(c)
228         inst, err := ap.Create(cluster.InstanceTypes["tiny-with-extra-scratch"],
229                 img, map[string]string{
230                         "TestTagName": "test tag value",
231                 }, "umask 0600; echo -n test-file-data >/var/run/test-file", nil)
232
233         c.Assert(err, check.IsNil)
234
235         tags := inst.Tags()
236         c.Check(tags["TestTagName"], check.Equals, "test tag value")
237         c.Logf("inst.String()=%v Address()=%v Tags()=%v", inst.String(), inst.Address(), tags)
238
239         if *live == "" {
240                 // Should not have called key pair APIs, because
241                 // publickey arg was nil
242                 c.Check(ap.client.(*ec2stub).describeKeyPairsCalls, check.HasLen, 0)
243                 c.Check(ap.client.(*ec2stub).importKeyPairCalls, check.HasLen, 0)
244         }
245 }
246
247 func (*EC2InstanceSetSuite) TestCreatePreemptible(c *check.C) {
248         ap, img, cluster := GetInstanceSet(c)
249         pk, _ := test.LoadTestKey(c, "../../dispatchcloud/test/sshkey_dispatch")
250
251         inst, err := ap.Create(cluster.InstanceTypes["tiny-preemptible"],
252                 img, map[string]string{
253                         "TestTagName": "test tag value",
254                 }, "umask 0600; echo -n test-file-data >/var/run/test-file", pk)
255
256         c.Assert(err, check.IsNil)
257
258         tags := inst.Tags()
259         c.Check(tags["TestTagName"], check.Equals, "test tag value")
260         c.Logf("inst.String()=%v Address()=%v Tags()=%v", inst.String(), inst.Address(), tags)
261
262 }
263
264 func (*EC2InstanceSetSuite) TestTagInstances(c *check.C) {
265         ap, _, _ := GetInstanceSet(c)
266         l, err := ap.Instances(nil)
267         c.Assert(err, check.IsNil)
268
269         for _, i := range l {
270                 tg := i.Tags()
271                 tg["TestTag2"] = "123 test tag 2"
272                 c.Check(i.SetTags(tg), check.IsNil)
273         }
274 }
275
276 func (*EC2InstanceSetSuite) TestListInstances(c *check.C) {
277         ap, _, _ := GetInstanceSet(c)
278         l, err := ap.Instances(nil)
279         c.Assert(err, check.IsNil)
280
281         for _, i := range l {
282                 tg := i.Tags()
283                 c.Logf("%v %v %v", i.String(), i.Address(), tg)
284         }
285 }
286
287 func (*EC2InstanceSetSuite) TestDestroyInstances(c *check.C) {
288         ap, _, _ := GetInstanceSet(c)
289         l, err := ap.Instances(nil)
290         c.Assert(err, check.IsNil)
291
292         for _, i := range l {
293                 c.Check(i.Destroy(), check.IsNil)
294         }
295 }
296
297 func (*EC2InstanceSetSuite) TestInstancePriceHistory(c *check.C) {
298         ap, img, cluster := GetInstanceSet(c)
299         pk, _ := test.LoadTestKey(c, "../../dispatchcloud/test/sshkey_dispatch")
300         tags := cloud.InstanceTags{"arvados-ec2-driver": "test"}
301
302         defer func() {
303                 instances, err := ap.Instances(tags)
304                 c.Assert(err, check.IsNil)
305                 for _, inst := range instances {
306                         c.Logf("cleanup: destroy instance %s", inst)
307                         c.Check(inst.Destroy(), check.IsNil)
308                 }
309         }()
310
311         ap.ec2config.SpotPriceUpdateInterval = arvados.Duration(time.Hour)
312         ap.ec2config.EBSPrice = 0.1 // $/GiB/month
313         inst1, err := ap.Create(cluster.InstanceTypes["tiny-preemptible"], img, tags, "true", pk)
314         c.Assert(err, check.IsNil)
315         defer inst1.Destroy()
316         inst2, err := ap.Create(cluster.InstanceTypes["tiny-preemptible"], img, tags, "true", pk)
317         c.Assert(err, check.IsNil)
318         defer inst2.Destroy()
319
320         // in live mode, we need to wait for the instances to reach
321         // running state before we can discover their availability
322         // zones and look up the appropriate prices.
323         var instances []cloud.Instance
324         for deadline := time.Now().Add(5 * time.Minute); ; {
325                 if deadline.Before(time.Now()) {
326                         c.Fatal("timed out")
327                 }
328                 instances, err = ap.Instances(tags)
329                 running := 0
330                 for _, inst := range instances {
331                         ec2i := inst.(*ec2Instance).instance
332                         if *ec2i.InstanceLifecycle == "spot" && *ec2i.State.Code&16 != 0 {
333                                 running++
334                         }
335                 }
336                 if running >= 2 {
337                         c.Logf("instances are running, and identifiable as spot instances")
338                         break
339                 }
340                 c.Logf("waiting for instances to reach running state so their availability zone becomes visible...")
341                 time.Sleep(10 * time.Second)
342         }
343
344         for _, inst := range instances {
345                 hist := inst.PriceHistory(arvados.InstanceType{})
346                 c.Logf("%s price history: %v", inst.ID(), hist)
347                 c.Check(len(hist) > 0, check.Equals, true)
348
349                 histWithScratch := inst.PriceHistory(arvados.InstanceType{AddedScratch: 640 << 30})
350                 c.Logf("%s price history with 640 GiB scratch: %v", inst.ID(), histWithScratch)
351
352                 for i, ip := range hist {
353                         c.Check(ip.Price, check.Not(check.Equals), 0.0)
354                         if i > 0 {
355                                 c.Check(ip.StartTime.Before(hist[i-1].StartTime), check.Equals, true)
356                         }
357                         c.Check(ip.Price < histWithScratch[i].Price, check.Equals, true)
358                 }
359         }
360 }
361
362 func (*EC2InstanceSetSuite) TestWrapError(c *check.C) {
363         retryError := awserr.New("Throttling", "", nil)
364         wrapped := wrapError(retryError, &atomic.Value{})
365         _, ok := wrapped.(cloud.RateLimitError)
366         c.Check(ok, check.Equals, true)
367
368         quotaError := awserr.New("InsufficientInstanceCapacity", "", nil)
369         wrapped = wrapError(quotaError, nil)
370         _, ok = wrapped.(cloud.QuotaError)
371         c.Check(ok, check.Equals, true)
372 }