Merge branch '21603-ec2-subnet-error'
[arvados.git] / lib / cloud / ec2 / ec2_test.go
1 // Copyright (C) The Arvados Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4 //
5 //
6 // How to manually run individual tests against the real cloud:
7 //
8 // $ go test -v git.arvados.org/arvados.git/lib/cloud/ec2 -live-ec2-cfg ec2config.yml -check.f=TestCreate
9 //
10 // Tests should be run individually and in the order they are listed in the file:
11 //
12 // Example ec2config.yml:
13 //
14 // ImageIDForTestSuite: ami-xxxxxxxxxxxxxxxxx
15 // DriverParameters:
16 //       AccessKeyID: XXXXXXXXXXXXXX
17 //       SecretAccessKey: xxxxxxxxxxxxxxxxxxxx
18 //       Region: us-east-1
19 //       SecurityGroupIDs: [sg-xxxxxxxx]
20 //       SubnetID: subnet-xxxxxxxx
21 //       AdminUsername: crunch
22
23 package ec2
24
25 import (
26         "encoding/json"
27         "errors"
28         "flag"
29         "fmt"
30         "sync/atomic"
31         "testing"
32         "time"
33
34         "git.arvados.org/arvados.git/lib/cloud"
35         "git.arvados.org/arvados.git/lib/dispatchcloud/test"
36         "git.arvados.org/arvados.git/sdk/go/arvados"
37         "git.arvados.org/arvados.git/sdk/go/arvadostest"
38         "git.arvados.org/arvados.git/sdk/go/config"
39         "git.arvados.org/arvados.git/sdk/go/ctxlog"
40         "github.com/aws/aws-sdk-go/aws"
41         "github.com/aws/aws-sdk-go/aws/awserr"
42         "github.com/aws/aws-sdk-go/service/ec2"
43         "github.com/ghodss/yaml"
44         "github.com/prometheus/client_golang/prometheus"
45         "github.com/sirupsen/logrus"
46         check "gopkg.in/check.v1"
47 )
48
49 var live = flag.String("live-ec2-cfg", "", "Test with real EC2 API, provide config file")
50
51 // Gocheck boilerplate
52 func Test(t *testing.T) {
53         check.TestingT(t)
54 }
55
56 type sliceOrStringSuite struct{}
57
58 var _ = check.Suite(&sliceOrStringSuite{})
59
60 func (s *sliceOrStringSuite) TestUnmarshal(c *check.C) {
61         var conf ec2InstanceSetConfig
62         for _, trial := range []struct {
63                 input  string
64                 output sliceOrSingleString
65         }{
66                 {``, nil},
67                 {`""`, nil},
68                 {`[]`, nil},
69                 {`"foo"`, sliceOrSingleString{"foo"}},
70                 {`["foo"]`, sliceOrSingleString{"foo"}},
71                 {`[foo]`, sliceOrSingleString{"foo"}},
72                 {`["foo", "bar"]`, sliceOrSingleString{"foo", "bar"}},
73                 {`[foo-bar, baz]`, sliceOrSingleString{"foo-bar", "baz"}},
74         } {
75                 c.Logf("trial: %+v", trial)
76                 err := yaml.Unmarshal([]byte("SubnetID: "+trial.input+"\n"), &conf)
77                 if !c.Check(err, check.IsNil) {
78                         continue
79                 }
80                 c.Check(conf.SubnetID, check.DeepEquals, trial.output)
81         }
82 }
83
84 type EC2InstanceSetSuite struct{}
85
86 var _ = check.Suite(&EC2InstanceSetSuite{})
87
88 type testConfig struct {
89         ImageIDForTestSuite string
90         DriverParameters    json.RawMessage
91 }
92
93 type ec2stub struct {
94         c                     *check.C
95         reftime               time.Time
96         importKeyPairCalls    []*ec2.ImportKeyPairInput
97         describeKeyPairsCalls []*ec2.DescribeKeyPairsInput
98         runInstancesCalls     []*ec2.RunInstancesInput
99         // {subnetID => error}: RunInstances returns error if subnetID
100         // matches.
101         subnetErrorOnRunInstances map[string]error
102 }
103
104 func (e *ec2stub) ImportKeyPair(input *ec2.ImportKeyPairInput) (*ec2.ImportKeyPairOutput, error) {
105         e.importKeyPairCalls = append(e.importKeyPairCalls, input)
106         return nil, nil
107 }
108
109 func (e *ec2stub) DescribeKeyPairs(input *ec2.DescribeKeyPairsInput) (*ec2.DescribeKeyPairsOutput, error) {
110         e.describeKeyPairsCalls = append(e.describeKeyPairsCalls, input)
111         return &ec2.DescribeKeyPairsOutput{}, nil
112 }
113
114 func (e *ec2stub) RunInstances(input *ec2.RunInstancesInput) (*ec2.Reservation, error) {
115         e.runInstancesCalls = append(e.runInstancesCalls, input)
116         if len(input.NetworkInterfaces) > 0 && input.NetworkInterfaces[0].SubnetId != nil {
117                 err := e.subnetErrorOnRunInstances[*input.NetworkInterfaces[0].SubnetId]
118                 if err != nil {
119                         return nil, err
120                 }
121         }
122         return &ec2.Reservation{Instances: []*ec2.Instance{{
123                 InstanceId:   aws.String("i-123"),
124                 InstanceType: aws.String("t2.micro"),
125                 Tags:         input.TagSpecifications[0].Tags,
126         }}}, nil
127 }
128
129 func (e *ec2stub) DescribeInstances(input *ec2.DescribeInstancesInput) (*ec2.DescribeInstancesOutput, error) {
130         return &ec2.DescribeInstancesOutput{
131                 Reservations: []*ec2.Reservation{{
132                         Instances: []*ec2.Instance{{
133                                 InstanceId:        aws.String("i-123"),
134                                 InstanceLifecycle: aws.String("spot"),
135                                 InstanceType:      aws.String("t2.micro"),
136                                 PrivateIpAddress:  aws.String("10.1.2.3"),
137                                 State:             &ec2.InstanceState{Name: aws.String("running"), Code: aws.Int64(16)},
138                         }, {
139                                 InstanceId:        aws.String("i-124"),
140                                 InstanceLifecycle: aws.String("spot"),
141                                 InstanceType:      aws.String("t2.micro"),
142                                 PrivateIpAddress:  aws.String("10.1.2.4"),
143                                 State:             &ec2.InstanceState{Name: aws.String("running"), Code: aws.Int64(16)},
144                         }},
145                 }},
146         }, nil
147 }
148
149 func (e *ec2stub) DescribeInstanceStatusPages(input *ec2.DescribeInstanceStatusInput, fn func(*ec2.DescribeInstanceStatusOutput, bool) bool) error {
150         fn(&ec2.DescribeInstanceStatusOutput{
151                 InstanceStatuses: []*ec2.InstanceStatus{{
152                         InstanceId:       aws.String("i-123"),
153                         AvailabilityZone: aws.String("aa-east-1a"),
154                 }, {
155                         InstanceId:       aws.String("i-124"),
156                         AvailabilityZone: aws.String("aa-east-1a"),
157                 }},
158         }, true)
159         return nil
160 }
161
162 func (e *ec2stub) DescribeSpotPriceHistoryPages(input *ec2.DescribeSpotPriceHistoryInput, fn func(*ec2.DescribeSpotPriceHistoryOutput, bool) bool) error {
163         if !fn(&ec2.DescribeSpotPriceHistoryOutput{
164                 SpotPriceHistory: []*ec2.SpotPrice{
165                         &ec2.SpotPrice{
166                                 InstanceType:     aws.String("t2.micro"),
167                                 AvailabilityZone: aws.String("aa-east-1a"),
168                                 SpotPrice:        aws.String("0.005"),
169                                 Timestamp:        aws.Time(e.reftime.Add(-9 * time.Minute)),
170                         },
171                         &ec2.SpotPrice{
172                                 InstanceType:     aws.String("t2.micro"),
173                                 AvailabilityZone: aws.String("aa-east-1a"),
174                                 SpotPrice:        aws.String("0.015"),
175                                 Timestamp:        aws.Time(e.reftime.Add(-5 * time.Minute)),
176                         },
177                 },
178         }, false) {
179                 return nil
180         }
181         fn(&ec2.DescribeSpotPriceHistoryOutput{
182                 SpotPriceHistory: []*ec2.SpotPrice{
183                         &ec2.SpotPrice{
184                                 InstanceType:     aws.String("t2.micro"),
185                                 AvailabilityZone: aws.String("aa-east-1a"),
186                                 SpotPrice:        aws.String("0.01"),
187                                 Timestamp:        aws.Time(e.reftime.Add(-2 * time.Minute)),
188                         },
189                 },
190         }, true)
191         return nil
192 }
193
194 func (e *ec2stub) CreateTags(input *ec2.CreateTagsInput) (*ec2.CreateTagsOutput, error) {
195         return nil, nil
196 }
197
198 func (e *ec2stub) TerminateInstances(input *ec2.TerminateInstancesInput) (*ec2.TerminateInstancesOutput, error) {
199         return nil, nil
200 }
201
202 type ec2stubError struct {
203         code    string
204         message string
205 }
206
207 func (err *ec2stubError) Code() string    { return err.code }
208 func (err *ec2stubError) Message() string { return err.message }
209 func (err *ec2stubError) Error() string   { return fmt.Sprintf("%s: %s", err.code, err.message) }
210 func (err *ec2stubError) OrigErr() error  { return errors.New("stub OrigErr") }
211
212 // Ensure ec2stubError satisfies the aws.Error interface
213 var _ = awserr.Error(&ec2stubError{})
214
215 func GetInstanceSet(c *check.C, conf string) (*ec2InstanceSet, cloud.ImageID, arvados.Cluster, *prometheus.Registry) {
216         reg := prometheus.NewRegistry()
217         cluster := arvados.Cluster{
218                 InstanceTypes: arvados.InstanceTypeMap(map[string]arvados.InstanceType{
219                         "tiny": {
220                                 Name:         "tiny",
221                                 ProviderType: "t2.micro",
222                                 VCPUs:        1,
223                                 RAM:          4000000000,
224                                 Scratch:      10000000000,
225                                 Price:        .02,
226                                 Preemptible:  false,
227                         },
228                         "tiny-with-extra-scratch": {
229                                 Name:         "tiny-with-extra-scratch",
230                                 ProviderType: "t2.micro",
231                                 VCPUs:        1,
232                                 RAM:          4000000000,
233                                 Price:        .02,
234                                 Preemptible:  false,
235                                 AddedScratch: 20000000000,
236                         },
237                         "tiny-preemptible": {
238                                 Name:         "tiny-preemptible",
239                                 ProviderType: "t2.micro",
240                                 VCPUs:        1,
241                                 RAM:          4000000000,
242                                 Scratch:      10000000000,
243                                 Price:        .02,
244                                 Preemptible:  true,
245                         },
246                 })}
247         if *live != "" {
248                 var exampleCfg testConfig
249                 err := config.LoadFile(&exampleCfg, *live)
250                 c.Assert(err, check.IsNil)
251
252                 is, err := newEC2InstanceSet(exampleCfg.DriverParameters, "test123", nil, logrus.StandardLogger(), reg)
253                 c.Assert(err, check.IsNil)
254                 return is.(*ec2InstanceSet), cloud.ImageID(exampleCfg.ImageIDForTestSuite), cluster, reg
255         } else {
256                 is, err := newEC2InstanceSet(json.RawMessage(conf), "test123", nil, ctxlog.TestLogger(c), reg)
257                 c.Assert(err, check.IsNil)
258                 is.(*ec2InstanceSet).client = &ec2stub{c: c, reftime: time.Now().UTC()}
259                 return is.(*ec2InstanceSet), cloud.ImageID("blob"), cluster, reg
260         }
261 }
262
263 func (*EC2InstanceSetSuite) TestCreate(c *check.C) {
264         ap, img, cluster, _ := GetInstanceSet(c, "{}")
265         pk, _ := test.LoadTestKey(c, "../../dispatchcloud/test/sshkey_dispatch")
266
267         inst, err := ap.Create(cluster.InstanceTypes["tiny"],
268                 img, map[string]string{
269                         "TestTagName": "test tag value",
270                 }, "umask 0600; echo -n test-file-data >/var/run/test-file", pk)
271         c.Assert(err, check.IsNil)
272
273         tags := inst.Tags()
274         c.Check(tags["TestTagName"], check.Equals, "test tag value")
275         c.Logf("inst.String()=%v Address()=%v Tags()=%v", inst.String(), inst.Address(), tags)
276
277         if *live == "" {
278                 c.Check(ap.client.(*ec2stub).describeKeyPairsCalls, check.HasLen, 1)
279                 c.Check(ap.client.(*ec2stub).importKeyPairCalls, check.HasLen, 1)
280
281                 runcalls := ap.client.(*ec2stub).runInstancesCalls
282                 if c.Check(runcalls, check.HasLen, 1) {
283                         c.Check(runcalls[0].MetadataOptions.HttpEndpoint, check.DeepEquals, aws.String("enabled"))
284                         c.Check(runcalls[0].MetadataOptions.HttpTokens, check.DeepEquals, aws.String("required"))
285                 }
286         }
287 }
288
289 func (*EC2InstanceSetSuite) TestCreateWithExtraScratch(c *check.C) {
290         ap, img, cluster, _ := GetInstanceSet(c, "{}")
291         inst, err := ap.Create(cluster.InstanceTypes["tiny-with-extra-scratch"],
292                 img, map[string]string{
293                         "TestTagName": "test tag value",
294                 }, "umask 0600; echo -n test-file-data >/var/run/test-file", nil)
295
296         c.Assert(err, check.IsNil)
297
298         tags := inst.Tags()
299         c.Check(tags["TestTagName"], check.Equals, "test tag value")
300         c.Logf("inst.String()=%v Address()=%v Tags()=%v", inst.String(), inst.Address(), tags)
301
302         if *live == "" {
303                 // Should not have called key pair APIs, because
304                 // publickey arg was nil
305                 c.Check(ap.client.(*ec2stub).describeKeyPairsCalls, check.HasLen, 0)
306                 c.Check(ap.client.(*ec2stub).importKeyPairCalls, check.HasLen, 0)
307         }
308 }
309
310 func (*EC2InstanceSetSuite) TestCreatePreemptible(c *check.C) {
311         ap, img, cluster, _ := GetInstanceSet(c, "{}")
312         pk, _ := test.LoadTestKey(c, "../../dispatchcloud/test/sshkey_dispatch")
313
314         inst, err := ap.Create(cluster.InstanceTypes["tiny-preemptible"],
315                 img, map[string]string{
316                         "TestTagName": "test tag value",
317                 }, "umask 0600; echo -n test-file-data >/var/run/test-file", pk)
318
319         c.Assert(err, check.IsNil)
320
321         tags := inst.Tags()
322         c.Check(tags["TestTagName"], check.Equals, "test tag value")
323         c.Logf("inst.String()=%v Address()=%v Tags()=%v", inst.String(), inst.Address(), tags)
324
325 }
326
327 func (*EC2InstanceSetSuite) TestCreateFailoverSecondSubnet(c *check.C) {
328         if *live != "" {
329                 c.Skip("not applicable in live mode")
330                 return
331         }
332
333         ap, img, cluster, reg := GetInstanceSet(c, `{"SubnetID":["subnet-full","subnet-good"]}`)
334         ap.client.(*ec2stub).subnetErrorOnRunInstances = map[string]error{
335                 "subnet-full": &ec2stubError{
336                         code:    "InsufficientFreeAddressesInSubnet",
337                         message: "subnet is full",
338                 },
339         }
340         inst, err := ap.Create(cluster.InstanceTypes["tiny"], img, nil, "", nil)
341         c.Check(err, check.IsNil)
342         c.Check(inst, check.NotNil)
343         c.Check(ap.client.(*ec2stub).runInstancesCalls, check.HasLen, 2)
344         metrics := arvadostest.GatherMetricsAsString(reg)
345         c.Check(metrics, check.Matches, `(?ms).*`+
346                 `arvados_dispatchcloud_ec2_instance_starts_total{subnet_id="subnet-full",success="0"} 1\n`+
347                 `arvados_dispatchcloud_ec2_instance_starts_total{subnet_id="subnet-full",success="1"} 0\n`+
348                 `arvados_dispatchcloud_ec2_instance_starts_total{subnet_id="subnet-good",success="0"} 0\n`+
349                 `arvados_dispatchcloud_ec2_instance_starts_total{subnet_id="subnet-good",success="1"} 1\n`+
350                 `.*`)
351
352         // Next RunInstances call should try the working subnet first
353         inst, err = ap.Create(cluster.InstanceTypes["tiny"], img, nil, "", nil)
354         c.Check(err, check.IsNil)
355         c.Check(inst, check.NotNil)
356         c.Check(ap.client.(*ec2stub).runInstancesCalls, check.HasLen, 3)
357         metrics = arvadostest.GatherMetricsAsString(reg)
358         c.Check(metrics, check.Matches, `(?ms).*`+
359                 `arvados_dispatchcloud_ec2_instance_starts_total{subnet_id="subnet-full",success="0"} 1\n`+
360                 `arvados_dispatchcloud_ec2_instance_starts_total{subnet_id="subnet-full",success="1"} 0\n`+
361                 `arvados_dispatchcloud_ec2_instance_starts_total{subnet_id="subnet-good",success="0"} 0\n`+
362                 `arvados_dispatchcloud_ec2_instance_starts_total{subnet_id="subnet-good",success="1"} 2\n`+
363                 `.*`)
364 }
365
366 func (*EC2InstanceSetSuite) TestIsErrorSubnetSpecific(c *check.C) {
367         c.Check(isErrorSubnetSpecific(nil), check.Equals, false)
368         c.Check(isErrorSubnetSpecific(errors.New("misc error")), check.Equals, false)
369
370         c.Check(isErrorSubnetSpecific(&ec2stubError{
371                 code: "InsufficientInstanceCapacity",
372         }), check.Equals, true)
373
374         c.Check(isErrorSubnetSpecific(&ec2stubError{
375                 code: "InsufficientVolumeCapacity",
376         }), check.Equals, true)
377
378         c.Check(isErrorSubnetSpecific(&ec2stubError{
379                 code:    "InsufficientFreeAddressesInSubnet",
380                 message: "Not enough free addresses in subnet subnet-abcdefg\n\tstatus code: 400, request id: abcdef01-2345-6789-abcd-ef0123456789",
381         }), check.Equals, true)
382
383         // #21603: (Sometimes?) EC2 returns code InvalidParameterValue
384         // even though the code "InsufficientFreeAddressesInSubnet"
385         // seems like it must be meant for exactly this error.
386         c.Check(isErrorSubnetSpecific(&ec2stubError{
387                 code:    "InvalidParameterValue",
388                 message: "Not enough free addresses in subnet subnet-abcdefg\n\tstatus code: 400, request id: abcdef01-2345-6789-abcd-ef0123456789",
389         }), check.Equals, true)
390
391         // Similarly, AWS docs
392         // (https://repost.aws/knowledge-center/vpc-insufficient-ip-errors)
393         // suggest the following code/message combinations also exist.
394         c.Check(isErrorSubnetSpecific(&ec2stubError{
395                 code:    "Client.InvalidParameterValue",
396                 message: "There aren't sufficient free Ipv4 addresses or prefixes",
397         }), check.Equals, true)
398         c.Check(isErrorSubnetSpecific(&ec2stubError{
399                 code:    "InvalidParameterValue",
400                 message: "There aren't sufficient free Ipv4 addresses or prefixes",
401         }), check.Equals, true)
402         // Meanwhile, other AWS docs
403         // (https://docs.aws.amazon.com/AWSEC2/latest/APIReference/errors-overview.html)
404         // suggest Client.InvalidParameterValue is not a real code but
405         // ClientInvalidParameterValue is.
406         c.Check(isErrorSubnetSpecific(&ec2stubError{
407                 code:    "ClientInvalidParameterValue",
408                 message: "There aren't sufficient free Ipv4 addresses or prefixes",
409         }), check.Equals, true)
410
411         c.Check(isErrorSubnetSpecific(&ec2stubError{
412                 code:    "InvalidParameterValue",
413                 message: "Some other invalid parameter error",
414         }), check.Equals, false)
415 }
416
417 func (*EC2InstanceSetSuite) TestCreateAllSubnetsFailing(c *check.C) {
418         if *live != "" {
419                 c.Skip("not applicable in live mode")
420                 return
421         }
422
423         ap, img, cluster, reg := GetInstanceSet(c, `{"SubnetID":["subnet-full","subnet-broken"]}`)
424         ap.client.(*ec2stub).subnetErrorOnRunInstances = map[string]error{
425                 "subnet-full": &ec2stubError{
426                         code:    "InsufficientFreeAddressesInSubnet",
427                         message: "subnet is full",
428                 },
429                 "subnet-broken": &ec2stubError{
430                         code:    "InvalidSubnetId.NotFound",
431                         message: "bogus subnet id",
432                 },
433         }
434         _, err := ap.Create(cluster.InstanceTypes["tiny"], img, nil, "", nil)
435         c.Check(err, check.NotNil)
436         c.Check(err, check.ErrorMatches, `.*InvalidSubnetId\.NotFound.*`)
437         c.Check(ap.client.(*ec2stub).runInstancesCalls, check.HasLen, 2)
438         metrics := arvadostest.GatherMetricsAsString(reg)
439         c.Check(metrics, check.Matches, `(?ms).*`+
440                 `arvados_dispatchcloud_ec2_instance_starts_total{subnet_id="subnet-broken",success="0"} 1\n`+
441                 `arvados_dispatchcloud_ec2_instance_starts_total{subnet_id="subnet-broken",success="1"} 0\n`+
442                 `arvados_dispatchcloud_ec2_instance_starts_total{subnet_id="subnet-full",success="0"} 1\n`+
443                 `arvados_dispatchcloud_ec2_instance_starts_total{subnet_id="subnet-full",success="1"} 0\n`+
444                 `.*`)
445
446         _, err = ap.Create(cluster.InstanceTypes["tiny"], img, nil, "", nil)
447         c.Check(err, check.NotNil)
448         c.Check(err, check.ErrorMatches, `.*InsufficientFreeAddressesInSubnet.*`)
449         c.Check(ap.client.(*ec2stub).runInstancesCalls, check.HasLen, 4)
450         metrics = arvadostest.GatherMetricsAsString(reg)
451         c.Check(metrics, check.Matches, `(?ms).*`+
452                 `arvados_dispatchcloud_ec2_instance_starts_total{subnet_id="subnet-broken",success="0"} 2\n`+
453                 `arvados_dispatchcloud_ec2_instance_starts_total{subnet_id="subnet-broken",success="1"} 0\n`+
454                 `arvados_dispatchcloud_ec2_instance_starts_total{subnet_id="subnet-full",success="0"} 2\n`+
455                 `arvados_dispatchcloud_ec2_instance_starts_total{subnet_id="subnet-full",success="1"} 0\n`+
456                 `.*`)
457 }
458
459 func (*EC2InstanceSetSuite) TestCreateOneSubnetFailingCapacity(c *check.C) {
460         if *live != "" {
461                 c.Skip("not applicable in live mode")
462                 return
463         }
464         ap, img, cluster, reg := GetInstanceSet(c, `{"SubnetID":["subnet-full","subnet-broken"]}`)
465         ap.client.(*ec2stub).subnetErrorOnRunInstances = map[string]error{
466                 "subnet-full": &ec2stubError{
467                         code:    "InsufficientFreeAddressesInSubnet",
468                         message: "subnet is full",
469                 },
470                 "subnet-broken": &ec2stubError{
471                         code:    "InsufficientInstanceCapacity",
472                         message: "insufficient capacity",
473                 },
474         }
475         for i := 0; i < 3; i++ {
476                 _, err := ap.Create(cluster.InstanceTypes["tiny"], img, nil, "", nil)
477                 c.Check(err, check.NotNil)
478                 c.Check(err, check.ErrorMatches, `.*InsufficientInstanceCapacity.*`)
479         }
480         c.Check(ap.client.(*ec2stub).runInstancesCalls, check.HasLen, 6)
481         metrics := arvadostest.GatherMetricsAsString(reg)
482         c.Check(metrics, check.Matches, `(?ms).*`+
483                 `arvados_dispatchcloud_ec2_instance_starts_total{subnet_id="subnet-broken",success="0"} 3\n`+
484                 `arvados_dispatchcloud_ec2_instance_starts_total{subnet_id="subnet-broken",success="1"} 0\n`+
485                 `arvados_dispatchcloud_ec2_instance_starts_total{subnet_id="subnet-full",success="0"} 3\n`+
486                 `arvados_dispatchcloud_ec2_instance_starts_total{subnet_id="subnet-full",success="1"} 0\n`+
487                 `.*`)
488 }
489
490 func (*EC2InstanceSetSuite) TestTagInstances(c *check.C) {
491         ap, _, _, _ := GetInstanceSet(c, "{}")
492         l, err := ap.Instances(nil)
493         c.Assert(err, check.IsNil)
494
495         for _, i := range l {
496                 tg := i.Tags()
497                 tg["TestTag2"] = "123 test tag 2"
498                 c.Check(i.SetTags(tg), check.IsNil)
499         }
500 }
501
502 func (*EC2InstanceSetSuite) TestListInstances(c *check.C) {
503         ap, _, _, reg := GetInstanceSet(c, "{}")
504         l, err := ap.Instances(nil)
505         c.Assert(err, check.IsNil)
506
507         for _, i := range l {
508                 tg := i.Tags()
509                 c.Logf("%v %v %v", i.String(), i.Address(), tg)
510         }
511
512         metrics := arvadostest.GatherMetricsAsString(reg)
513         c.Check(metrics, check.Matches, `(?ms).*`+
514                 `arvados_dispatchcloud_ec2_instances{subnet_id="[^"]*"} \d+\n`+
515                 `.*`)
516 }
517
518 func (*EC2InstanceSetSuite) TestDestroyInstances(c *check.C) {
519         ap, _, _, _ := GetInstanceSet(c, "{}")
520         l, err := ap.Instances(nil)
521         c.Assert(err, check.IsNil)
522
523         for _, i := range l {
524                 c.Check(i.Destroy(), check.IsNil)
525         }
526 }
527
528 func (*EC2InstanceSetSuite) TestInstancePriceHistory(c *check.C) {
529         ap, img, cluster, _ := GetInstanceSet(c, "{}")
530         pk, _ := test.LoadTestKey(c, "../../dispatchcloud/test/sshkey_dispatch")
531         tags := cloud.InstanceTags{"arvados-ec2-driver": "test"}
532
533         defer func() {
534                 instances, err := ap.Instances(tags)
535                 c.Assert(err, check.IsNil)
536                 for _, inst := range instances {
537                         c.Logf("cleanup: destroy instance %s", inst)
538                         c.Check(inst.Destroy(), check.IsNil)
539                 }
540         }()
541
542         ap.ec2config.SpotPriceUpdateInterval = arvados.Duration(time.Hour)
543         ap.ec2config.EBSPrice = 0.1 // $/GiB/month
544         inst1, err := ap.Create(cluster.InstanceTypes["tiny-preemptible"], img, tags, "true", pk)
545         c.Assert(err, check.IsNil)
546         defer inst1.Destroy()
547         inst2, err := ap.Create(cluster.InstanceTypes["tiny-preemptible"], img, tags, "true", pk)
548         c.Assert(err, check.IsNil)
549         defer inst2.Destroy()
550
551         // in live mode, we need to wait for the instances to reach
552         // running state before we can discover their availability
553         // zones and look up the appropriate prices.
554         var instances []cloud.Instance
555         for deadline := time.Now().Add(5 * time.Minute); ; {
556                 if deadline.Before(time.Now()) {
557                         c.Fatal("timed out")
558                 }
559                 instances, err = ap.Instances(tags)
560                 running := 0
561                 for _, inst := range instances {
562                         ec2i := inst.(*ec2Instance).instance
563                         if *ec2i.InstanceLifecycle == "spot" && *ec2i.State.Code&16 != 0 {
564                                 running++
565                         }
566                 }
567                 if running >= 2 {
568                         c.Logf("instances are running, and identifiable as spot instances")
569                         break
570                 }
571                 c.Logf("waiting for instances to reach running state so their availability zone becomes visible...")
572                 time.Sleep(10 * time.Second)
573         }
574
575         for _, inst := range instances {
576                 hist := inst.PriceHistory(arvados.InstanceType{})
577                 c.Logf("%s price history: %v", inst.ID(), hist)
578                 c.Check(len(hist) > 0, check.Equals, true)
579
580                 histWithScratch := inst.PriceHistory(arvados.InstanceType{AddedScratch: 640 << 30})
581                 c.Logf("%s price history with 640 GiB scratch: %v", inst.ID(), histWithScratch)
582
583                 for i, ip := range hist {
584                         c.Check(ip.Price, check.Not(check.Equals), 0.0)
585                         if i > 0 {
586                                 c.Check(ip.StartTime.Before(hist[i-1].StartTime), check.Equals, true)
587                         }
588                         c.Check(ip.Price < histWithScratch[i].Price, check.Equals, true)
589                 }
590         }
591 }
592
593 func (*EC2InstanceSetSuite) TestWrapError(c *check.C) {
594         retryError := awserr.New("Throttling", "", nil)
595         wrapped := wrapError(retryError, &atomic.Value{})
596         _, ok := wrapped.(cloud.RateLimitError)
597         c.Check(ok, check.Equals, true)
598
599         quotaError := awserr.New("InstanceLimitExceeded", "", nil)
600         wrapped = wrapError(quotaError, nil)
601         _, ok = wrapped.(cloud.QuotaError)
602         c.Check(ok, check.Equals, true)
603
604         for _, trial := range []struct {
605                 code string
606                 msg  string
607         }{
608                 {"InsufficientInstanceCapacity", ""},
609                 {"Unsupported", "Your requested instance type (t3.micro) is not supported in your requested Availability Zone (us-east-1e). Please retry your request by not specifying an Availability Zone or choosing us-east-1a, us-east-1b, us-east-1c, us-east-1d, us-east-1f."},
610         } {
611                 capacityError := awserr.New(trial.code, trial.msg, nil)
612                 wrapped = wrapError(capacityError, nil)
613                 caperr, ok := wrapped.(cloud.CapacityError)
614                 c.Check(ok, check.Equals, true)
615                 c.Check(caperr.IsCapacityError(), check.Equals, true)
616                 c.Check(caperr.IsInstanceTypeSpecific(), check.Equals, true)
617         }
618 }