21705: Migrate lib/cloud/ec2 from aws-sdk-go to aws-sdk-go-v2.
[arvados.git] / lib / cloud / ec2 / ec2_test.go
1 // Copyright (C) The Arvados Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4 //
5 //
6 // How to manually run individual tests against the real cloud:
7 //
8 // $ go test -v git.arvados.org/arvados.git/lib/cloud/ec2 -live-ec2-cfg ec2config.yml -check.f=TestCreate
9 //
10 // Tests should be run individually and in the order they are listed in the file:
11 //
12 // Example ec2config.yml:
13 //
14 // ImageIDForTestSuite: ami-xxxxxxxxxxxxxxxxx
15 // DriverParameters:
16 //       AccessKeyID: XXXXXXXXXXXXXX
17 //       SecretAccessKey: xxxxxxxxxxxxxxxxxxxx
18 //       Region: us-east-1
19 //       SecurityGroupIDs: [sg-xxxxxxxx]
20 //       SubnetID: subnet-xxxxxxxx
21 //       AdminUsername: crunch
22
23 package ec2
24
25 import (
26         "context"
27         "encoding/json"
28         "errors"
29         "flag"
30         "sync/atomic"
31         "testing"
32         "time"
33
34         "git.arvados.org/arvados.git/lib/cloud"
35         "git.arvados.org/arvados.git/lib/dispatchcloud/test"
36         "git.arvados.org/arvados.git/sdk/go/arvados"
37         "git.arvados.org/arvados.git/sdk/go/arvadostest"
38         "git.arvados.org/arvados.git/sdk/go/config"
39         "git.arvados.org/arvados.git/sdk/go/ctxlog"
40         "github.com/aws/aws-sdk-go-v2/aws"
41         "github.com/aws/aws-sdk-go-v2/service/ec2"
42         "github.com/aws/aws-sdk-go-v2/service/ec2/types"
43         "github.com/aws/smithy-go"
44         "github.com/ghodss/yaml"
45         "github.com/prometheus/client_golang/prometheus"
46         "github.com/sirupsen/logrus"
47         check "gopkg.in/check.v1"
48 )
49
50 var live = flag.String("live-ec2-cfg", "", "Test with real EC2 API, provide config file")
51
52 // Gocheck boilerplate
53 func Test(t *testing.T) {
54         check.TestingT(t)
55 }
56
57 type sliceOrStringSuite struct{}
58
59 var _ = check.Suite(&sliceOrStringSuite{})
60
61 func (s *sliceOrStringSuite) TestUnmarshal(c *check.C) {
62         var conf ec2InstanceSetConfig
63         for _, trial := range []struct {
64                 input  string
65                 output sliceOrSingleString
66         }{
67                 {``, nil},
68                 {`""`, nil},
69                 {`[]`, nil},
70                 {`"foo"`, sliceOrSingleString{"foo"}},
71                 {`["foo"]`, sliceOrSingleString{"foo"}},
72                 {`[foo]`, sliceOrSingleString{"foo"}},
73                 {`["foo", "bar"]`, sliceOrSingleString{"foo", "bar"}},
74                 {`[foo-bar, baz]`, sliceOrSingleString{"foo-bar", "baz"}},
75         } {
76                 c.Logf("trial: %+v", trial)
77                 err := yaml.Unmarshal([]byte("SubnetID: "+trial.input+"\n"), &conf)
78                 if !c.Check(err, check.IsNil) {
79                         continue
80                 }
81                 c.Check(conf.SubnetID, check.DeepEquals, trial.output)
82         }
83 }
84
85 type EC2InstanceSetSuite struct{}
86
87 var _ = check.Suite(&EC2InstanceSetSuite{})
88
89 type testConfig struct {
90         ImageIDForTestSuite string
91         DriverParameters    json.RawMessage
92 }
93
94 type ec2stub struct {
95         c                     *check.C
96         reftime               time.Time
97         importKeyPairCalls    []*ec2.ImportKeyPairInput
98         describeKeyPairsCalls []*ec2.DescribeKeyPairsInput
99         runInstancesCalls     []*ec2.RunInstancesInput
100         // {subnetID => error}: RunInstances returns error if subnetID
101         // matches.
102         subnetErrorOnRunInstances map[string]error
103 }
104
105 func (e *ec2stub) ImportKeyPair(ctx context.Context, input *ec2.ImportKeyPairInput, _ ...func(*ec2.Options)) (*ec2.ImportKeyPairOutput, error) {
106         e.importKeyPairCalls = append(e.importKeyPairCalls, input)
107         return nil, nil
108 }
109
110 func (e *ec2stub) DescribeKeyPairs(ctx context.Context, input *ec2.DescribeKeyPairsInput, _ ...func(*ec2.Options)) (*ec2.DescribeKeyPairsOutput, error) {
111         e.describeKeyPairsCalls = append(e.describeKeyPairsCalls, input)
112         return &ec2.DescribeKeyPairsOutput{}, nil
113 }
114
115 func (e *ec2stub) RunInstances(ctx context.Context, input *ec2.RunInstancesInput, _ ...func(*ec2.Options)) (*ec2.RunInstancesOutput, error) {
116         e.runInstancesCalls = append(e.runInstancesCalls, input)
117         if len(input.NetworkInterfaces) > 0 && input.NetworkInterfaces[0].SubnetId != nil {
118                 err := e.subnetErrorOnRunInstances[*input.NetworkInterfaces[0].SubnetId]
119                 if err != nil {
120                         return nil, err
121                 }
122         }
123         return &ec2.RunInstancesOutput{Instances: []types.Instance{{
124                 InstanceId:   aws.String("i-123"),
125                 InstanceType: types.InstanceTypeT2Micro,
126                 Tags:         input.TagSpecifications[0].Tags,
127         }}}, nil
128 }
129
130 func (e *ec2stub) DescribeInstances(ctx context.Context, input *ec2.DescribeInstancesInput, _ ...func(*ec2.Options)) (*ec2.DescribeInstancesOutput, error) {
131         return &ec2.DescribeInstancesOutput{
132                 Reservations: []types.Reservation{{
133                         Instances: []types.Instance{{
134                                 InstanceId:        aws.String("i-123"),
135                                 InstanceLifecycle: types.InstanceLifecycleTypeSpot,
136                                 InstanceType:      types.InstanceTypeT2Micro,
137                                 PrivateIpAddress:  aws.String("10.1.2.3"),
138                                 State:             &types.InstanceState{Name: types.InstanceStateNameRunning, Code: aws.Int32(16)},
139                         }, {
140                                 InstanceId:        aws.String("i-124"),
141                                 InstanceLifecycle: types.InstanceLifecycleTypeSpot,
142                                 InstanceType:      types.InstanceTypeT2Micro,
143                                 PrivateIpAddress:  aws.String("10.1.2.4"),
144                                 State:             &types.InstanceState{Name: types.InstanceStateNameRunning, Code: aws.Int32(16)},
145                         }},
146                 }},
147         }, nil
148 }
149
150 func (e *ec2stub) DescribeInstanceStatus(ctx context.Context, input *ec2.DescribeInstanceStatusInput, _ ...func(*ec2.Options)) (*ec2.DescribeInstanceStatusOutput, error) {
151         return &ec2.DescribeInstanceStatusOutput{
152                 InstanceStatuses: []types.InstanceStatus{{
153                         InstanceId:       aws.String("i-123"),
154                         AvailabilityZone: aws.String("aa-east-1a"),
155                 }, {
156                         InstanceId:       aws.String("i-124"),
157                         AvailabilityZone: aws.String("aa-east-1a"),
158                 }},
159         }, nil
160 }
161
162 func (e *ec2stub) DescribeSpotPriceHistory(ctx context.Context, input *ec2.DescribeSpotPriceHistoryInput, _ ...func(*ec2.Options)) (*ec2.DescribeSpotPriceHistoryOutput, error) {
163         if input.NextToken == nil {
164                 return &ec2.DescribeSpotPriceHistoryOutput{
165                         SpotPriceHistory: []types.SpotPrice{
166                                 types.SpotPrice{
167                                         InstanceType:     types.InstanceTypeT2Micro,
168                                         AvailabilityZone: aws.String("aa-east-1a"),
169                                         SpotPrice:        aws.String("0.005"),
170                                         Timestamp:        aws.Time(e.reftime.Add(-9 * time.Minute)),
171                                 },
172                                 types.SpotPrice{
173                                         InstanceType:     types.InstanceTypeT2Micro,
174                                         AvailabilityZone: aws.String("aa-east-1a"),
175                                         SpotPrice:        aws.String("0.015"),
176                                         Timestamp:        aws.Time(e.reftime.Add(-5 * time.Minute)),
177                                 },
178                         },
179                         NextToken: aws.String("stubnexttoken"),
180                 }, nil
181         } else {
182                 return &ec2.DescribeSpotPriceHistoryOutput{
183                         SpotPriceHistory: []types.SpotPrice{
184                                 types.SpotPrice{
185                                         InstanceType:     types.InstanceTypeT2Micro,
186                                         AvailabilityZone: aws.String("aa-east-1a"),
187                                         SpotPrice:        aws.String("0.01"),
188                                         Timestamp:        aws.Time(e.reftime.Add(-2 * time.Minute)),
189                                 },
190                         },
191                 }, nil
192         }
193 }
194
195 func (e *ec2stub) CreateTags(ctx context.Context, input *ec2.CreateTagsInput, _ ...func(*ec2.Options)) (*ec2.CreateTagsOutput, error) {
196         return nil, nil
197 }
198
199 func (e *ec2stub) TerminateInstances(ctx context.Context, input *ec2.TerminateInstancesInput, _ ...func(*ec2.Options)) (*ec2.TerminateInstancesOutput, error) {
200         return nil, nil
201 }
202
203 type ec2stubError = smithy.GenericAPIError
204
205 // Ensure ec2stubError satisfies the smithy.APIError interface
206 var _ = smithy.APIError(&ec2stubError{})
207
208 func GetInstanceSet(c *check.C, conf string) (*ec2InstanceSet, cloud.ImageID, arvados.Cluster, *prometheus.Registry) {
209         reg := prometheus.NewRegistry()
210         cluster := arvados.Cluster{
211                 InstanceTypes: arvados.InstanceTypeMap(map[string]arvados.InstanceType{
212                         "tiny": {
213                                 Name:         "tiny",
214                                 ProviderType: "t2.micro",
215                                 VCPUs:        1,
216                                 RAM:          4000000000,
217                                 Scratch:      10000000000,
218                                 Price:        .02,
219                                 Preemptible:  false,
220                         },
221                         "tiny-with-extra-scratch": {
222                                 Name:         "tiny-with-extra-scratch",
223                                 ProviderType: "t2.micro",
224                                 VCPUs:        1,
225                                 RAM:          4000000000,
226                                 Price:        .02,
227                                 Preemptible:  false,
228                                 AddedScratch: 20000000000,
229                         },
230                         "tiny-preemptible": {
231                                 Name:         "tiny-preemptible",
232                                 ProviderType: "t2.micro",
233                                 VCPUs:        1,
234                                 RAM:          4000000000,
235                                 Scratch:      10000000000,
236                                 Price:        .02,
237                                 Preemptible:  true,
238                         },
239                 })}
240         if *live != "" {
241                 var exampleCfg testConfig
242                 err := config.LoadFile(&exampleCfg, *live)
243                 c.Assert(err, check.IsNil)
244
245                 is, err := newEC2InstanceSet(exampleCfg.DriverParameters, "test123", nil, logrus.StandardLogger(), reg)
246                 c.Assert(err, check.IsNil)
247                 return is.(*ec2InstanceSet), cloud.ImageID(exampleCfg.ImageIDForTestSuite), cluster, reg
248         } else {
249                 is, err := newEC2InstanceSet(json.RawMessage(conf), "test123", nil, ctxlog.TestLogger(c), reg)
250                 c.Assert(err, check.IsNil)
251                 is.(*ec2InstanceSet).client = &ec2stub{c: c, reftime: time.Now().UTC()}
252                 return is.(*ec2InstanceSet), cloud.ImageID("blob"), cluster, reg
253         }
254 }
255
256 func (*EC2InstanceSetSuite) TestCreate(c *check.C) {
257         ap, img, cluster, _ := GetInstanceSet(c, "{}")
258         pk, _ := test.LoadTestKey(c, "../../dispatchcloud/test/sshkey_dispatch")
259
260         inst, err := ap.Create(cluster.InstanceTypes["tiny"],
261                 img, map[string]string{
262                         "TestTagName": "test tag value",
263                 }, "umask 0600; echo -n test-file-data >/var/run/test-file", pk)
264         c.Assert(err, check.IsNil)
265
266         tags := inst.Tags()
267         c.Check(tags["TestTagName"], check.Equals, "test tag value")
268         c.Logf("inst.String()=%v Address()=%v Tags()=%v", inst.String(), inst.Address(), tags)
269
270         if *live == "" {
271                 c.Check(ap.client.(*ec2stub).describeKeyPairsCalls, check.HasLen, 1)
272                 c.Check(ap.client.(*ec2stub).importKeyPairCalls, check.HasLen, 1)
273
274                 runcalls := ap.client.(*ec2stub).runInstancesCalls
275                 if c.Check(runcalls, check.HasLen, 1) {
276                         c.Check(runcalls[0].MetadataOptions.HttpEndpoint, check.DeepEquals, types.InstanceMetadataEndpointStateEnabled)
277                         c.Check(runcalls[0].MetadataOptions.HttpTokens, check.DeepEquals, types.HttpTokensStateRequired)
278                 }
279         }
280 }
281
282 func (*EC2InstanceSetSuite) TestCreateWithExtraScratch(c *check.C) {
283         ap, img, cluster, _ := GetInstanceSet(c, "{}")
284         inst, err := ap.Create(cluster.InstanceTypes["tiny-with-extra-scratch"],
285                 img, map[string]string{
286                         "TestTagName": "test tag value",
287                 }, "umask 0600; echo -n test-file-data >/var/run/test-file", nil)
288
289         c.Assert(err, check.IsNil)
290
291         tags := inst.Tags()
292         c.Check(tags["TestTagName"], check.Equals, "test tag value")
293         c.Logf("inst.String()=%v Address()=%v Tags()=%v", inst.String(), inst.Address(), tags)
294
295         if *live == "" {
296                 // Should not have called key pair APIs, because
297                 // publickey arg was nil
298                 c.Check(ap.client.(*ec2stub).describeKeyPairsCalls, check.HasLen, 0)
299                 c.Check(ap.client.(*ec2stub).importKeyPairCalls, check.HasLen, 0)
300         }
301 }
302
303 func (*EC2InstanceSetSuite) TestCreatePreemptible(c *check.C) {
304         ap, img, cluster, _ := GetInstanceSet(c, "{}")
305         pk, _ := test.LoadTestKey(c, "../../dispatchcloud/test/sshkey_dispatch")
306
307         inst, err := ap.Create(cluster.InstanceTypes["tiny-preemptible"],
308                 img, map[string]string{
309                         "TestTagName": "test tag value",
310                 }, "umask 0600; echo -n test-file-data >/var/run/test-file", pk)
311
312         c.Assert(err, check.IsNil)
313
314         tags := inst.Tags()
315         c.Check(tags["TestTagName"], check.Equals, "test tag value")
316         c.Logf("inst.String()=%v Address()=%v Tags()=%v", inst.String(), inst.Address(), tags)
317
318 }
319
320 func (*EC2InstanceSetSuite) TestCreateFailoverSecondSubnet(c *check.C) {
321         if *live != "" {
322                 c.Skip("not applicable in live mode")
323                 return
324         }
325
326         ap, img, cluster, reg := GetInstanceSet(c, `{"SubnetID":["subnet-full","subnet-good"]}`)
327         ap.client.(*ec2stub).subnetErrorOnRunInstances = map[string]error{
328                 "subnet-full": &ec2stubError{
329                         Code:    "InsufficientFreeAddressesInSubnet",
330                         Message: "subnet is full",
331                 },
332         }
333         inst, err := ap.Create(cluster.InstanceTypes["tiny"], img, nil, "", nil)
334         c.Check(err, check.IsNil)
335         c.Check(inst, check.NotNil)
336         c.Check(ap.client.(*ec2stub).runInstancesCalls, check.HasLen, 2)
337         metrics := arvadostest.GatherMetricsAsString(reg)
338         c.Check(metrics, check.Matches, `(?ms).*`+
339                 `arvados_dispatchcloud_ec2_instance_starts_total{subnet_id="subnet-full",success="0"} 1\n`+
340                 `arvados_dispatchcloud_ec2_instance_starts_total{subnet_id="subnet-full",success="1"} 0\n`+
341                 `arvados_dispatchcloud_ec2_instance_starts_total{subnet_id="subnet-good",success="0"} 0\n`+
342                 `arvados_dispatchcloud_ec2_instance_starts_total{subnet_id="subnet-good",success="1"} 1\n`+
343                 `.*`)
344
345         // Next RunInstances call should try the working subnet first
346         inst, err = ap.Create(cluster.InstanceTypes["tiny"], img, nil, "", nil)
347         c.Check(err, check.IsNil)
348         c.Check(inst, check.NotNil)
349         c.Check(ap.client.(*ec2stub).runInstancesCalls, check.HasLen, 3)
350         metrics = arvadostest.GatherMetricsAsString(reg)
351         c.Check(metrics, check.Matches, `(?ms).*`+
352                 `arvados_dispatchcloud_ec2_instance_starts_total{subnet_id="subnet-full",success="0"} 1\n`+
353                 `arvados_dispatchcloud_ec2_instance_starts_total{subnet_id="subnet-full",success="1"} 0\n`+
354                 `arvados_dispatchcloud_ec2_instance_starts_total{subnet_id="subnet-good",success="0"} 0\n`+
355                 `arvados_dispatchcloud_ec2_instance_starts_total{subnet_id="subnet-good",success="1"} 2\n`+
356                 `.*`)
357 }
358
359 func (*EC2InstanceSetSuite) TestIsErrorSubnetSpecific(c *check.C) {
360         c.Check(isErrorSubnetSpecific(nil), check.Equals, false)
361         c.Check(isErrorSubnetSpecific(errors.New("misc error")), check.Equals, false)
362
363         c.Check(isErrorSubnetSpecific(&ec2stubError{
364                 Code: "InsufficientInstanceCapacity",
365         }), check.Equals, true)
366
367         c.Check(isErrorSubnetSpecific(&ec2stubError{
368                 Code: "InsufficientVolumeCapacity",
369         }), check.Equals, true)
370
371         c.Check(isErrorSubnetSpecific(&ec2stubError{
372                 Code:    "InsufficientFreeAddressesInSubnet",
373                 Message: "Not enough free addresses in subnet subnet-abcdefg\n\tstatus code: 400, request id: abcdef01-2345-6789-abcd-ef0123456789",
374         }), check.Equals, true)
375
376         // #21603: (Sometimes?) EC2 returns code InvalidParameterValue
377         // even though the code "InsufficientFreeAddressesInSubnet"
378         // seems like it must be meant for exactly this error.
379         c.Check(isErrorSubnetSpecific(&ec2stubError{
380                 Code:    "InvalidParameterValue",
381                 Message: "Not enough free addresses in subnet subnet-abcdefg\n\tstatus code: 400, request id: abcdef01-2345-6789-abcd-ef0123456789",
382         }), check.Equals, true)
383
384         // Similarly, AWS docs
385         // (https://repost.aws/knowledge-center/vpc-insufficient-ip-errors)
386         // suggest the following code/message combinations also exist.
387         c.Check(isErrorSubnetSpecific(&ec2stubError{
388                 Code:    "Client.InvalidParameterValue",
389                 Message: "There aren't sufficient free Ipv4 addresses or prefixes",
390         }), check.Equals, true)
391         c.Check(isErrorSubnetSpecific(&ec2stubError{
392                 Code:    "InvalidParameterValue",
393                 Message: "There aren't sufficient free Ipv4 addresses or prefixes",
394         }), check.Equals, true)
395         // Meanwhile, other AWS docs
396         // (https://docs.aws.amazon.com/AWSEC2/latest/APIReference/errors-overview.html)
397         // suggest Client.InvalidParameterValue is not a real code but
398         // ClientInvalidParameterValue is.
399         c.Check(isErrorSubnetSpecific(&ec2stubError{
400                 Code:    "ClientInvalidParameterValue",
401                 Message: "There aren't sufficient free Ipv4 addresses or prefixes",
402         }), check.Equals, true)
403
404         c.Check(isErrorSubnetSpecific(&ec2stubError{
405                 Code:    "InvalidParameterValue",
406                 Message: "Some other invalid parameter error",
407         }), check.Equals, false)
408 }
409
410 func (*EC2InstanceSetSuite) TestCreateAllSubnetsFailing(c *check.C) {
411         if *live != "" {
412                 c.Skip("not applicable in live mode")
413                 return
414         }
415
416         ap, img, cluster, reg := GetInstanceSet(c, `{"SubnetID":["subnet-full","subnet-broken"]}`)
417         ap.client.(*ec2stub).subnetErrorOnRunInstances = map[string]error{
418                 "subnet-full": &ec2stubError{
419                         Code:    "InsufficientFreeAddressesInSubnet",
420                         Message: "subnet is full",
421                 },
422                 "subnet-broken": &ec2stubError{
423                         Code:    "InvalidSubnetId.NotFound",
424                         Message: "bogus subnet id",
425                 },
426         }
427         _, err := ap.Create(cluster.InstanceTypes["tiny"], img, nil, "", nil)
428         c.Check(err, check.NotNil)
429         c.Check(err, check.ErrorMatches, `.*InvalidSubnetId\.NotFound.*`)
430         c.Check(ap.client.(*ec2stub).runInstancesCalls, check.HasLen, 2)
431         metrics := arvadostest.GatherMetricsAsString(reg)
432         c.Check(metrics, check.Matches, `(?ms).*`+
433                 `arvados_dispatchcloud_ec2_instance_starts_total{subnet_id="subnet-broken",success="0"} 1\n`+
434                 `arvados_dispatchcloud_ec2_instance_starts_total{subnet_id="subnet-broken",success="1"} 0\n`+
435                 `arvados_dispatchcloud_ec2_instance_starts_total{subnet_id="subnet-full",success="0"} 1\n`+
436                 `arvados_dispatchcloud_ec2_instance_starts_total{subnet_id="subnet-full",success="1"} 0\n`+
437                 `.*`)
438
439         _, err = ap.Create(cluster.InstanceTypes["tiny"], img, nil, "", nil)
440         c.Check(err, check.NotNil)
441         c.Check(err, check.ErrorMatches, `.*InsufficientFreeAddressesInSubnet.*`)
442         c.Check(ap.client.(*ec2stub).runInstancesCalls, check.HasLen, 4)
443         metrics = arvadostest.GatherMetricsAsString(reg)
444         c.Check(metrics, check.Matches, `(?ms).*`+
445                 `arvados_dispatchcloud_ec2_instance_starts_total{subnet_id="subnet-broken",success="0"} 2\n`+
446                 `arvados_dispatchcloud_ec2_instance_starts_total{subnet_id="subnet-broken",success="1"} 0\n`+
447                 `arvados_dispatchcloud_ec2_instance_starts_total{subnet_id="subnet-full",success="0"} 2\n`+
448                 `arvados_dispatchcloud_ec2_instance_starts_total{subnet_id="subnet-full",success="1"} 0\n`+
449                 `.*`)
450 }
451
452 func (*EC2InstanceSetSuite) TestCreateOneSubnetFailingCapacity(c *check.C) {
453         if *live != "" {
454                 c.Skip("not applicable in live mode")
455                 return
456         }
457         ap, img, cluster, reg := GetInstanceSet(c, `{"SubnetID":["subnet-full","subnet-broken"]}`)
458         ap.client.(*ec2stub).subnetErrorOnRunInstances = map[string]error{
459                 "subnet-full": &ec2stubError{
460                         Code:    "InsufficientFreeAddressesInSubnet",
461                         Message: "subnet is full",
462                 },
463                 "subnet-broken": &ec2stubError{
464                         Code:    "InsufficientInstanceCapacity",
465                         Message: "insufficient capacity",
466                 },
467         }
468         for i := 0; i < 3; i++ {
469                 _, err := ap.Create(cluster.InstanceTypes["tiny"], img, nil, "", nil)
470                 c.Check(err, check.NotNil)
471                 c.Check(err, check.ErrorMatches, `.*InsufficientInstanceCapacity.*`)
472         }
473         c.Check(ap.client.(*ec2stub).runInstancesCalls, check.HasLen, 6)
474         metrics := arvadostest.GatherMetricsAsString(reg)
475         c.Check(metrics, check.Matches, `(?ms).*`+
476                 `arvados_dispatchcloud_ec2_instance_starts_total{subnet_id="subnet-broken",success="0"} 3\n`+
477                 `arvados_dispatchcloud_ec2_instance_starts_total{subnet_id="subnet-broken",success="1"} 0\n`+
478                 `arvados_dispatchcloud_ec2_instance_starts_total{subnet_id="subnet-full",success="0"} 3\n`+
479                 `arvados_dispatchcloud_ec2_instance_starts_total{subnet_id="subnet-full",success="1"} 0\n`+
480                 `.*`)
481 }
482
483 func (*EC2InstanceSetSuite) TestTagInstances(c *check.C) {
484         ap, _, _, _ := GetInstanceSet(c, "{}")
485         l, err := ap.Instances(nil)
486         c.Assert(err, check.IsNil)
487
488         for _, i := range l {
489                 tg := i.Tags()
490                 tg["TestTag2"] = "123 test tag 2"
491                 c.Check(i.SetTags(tg), check.IsNil)
492         }
493 }
494
495 func (*EC2InstanceSetSuite) TestListInstances(c *check.C) {
496         ap, _, _, reg := GetInstanceSet(c, "{}")
497         l, err := ap.Instances(nil)
498         c.Assert(err, check.IsNil)
499
500         for _, i := range l {
501                 tg := i.Tags()
502                 c.Logf("%v %v %v", i.String(), i.Address(), tg)
503         }
504
505         metrics := arvadostest.GatherMetricsAsString(reg)
506         c.Check(metrics, check.Matches, `(?ms).*`+
507                 `arvados_dispatchcloud_ec2_instances{subnet_id="[^"]*"} \d+\n`+
508                 `.*`)
509 }
510
511 func (*EC2InstanceSetSuite) TestDestroyInstances(c *check.C) {
512         ap, _, _, _ := GetInstanceSet(c, "{}")
513         l, err := ap.Instances(nil)
514         c.Assert(err, check.IsNil)
515
516         for _, i := range l {
517                 c.Check(i.Destroy(), check.IsNil)
518         }
519 }
520
521 func (*EC2InstanceSetSuite) TestInstancePriceHistory(c *check.C) {
522         ap, img, cluster, _ := GetInstanceSet(c, "{}")
523         pk, _ := test.LoadTestKey(c, "../../dispatchcloud/test/sshkey_dispatch")
524         tags := cloud.InstanceTags{"arvados-ec2-driver": "test"}
525
526         defer func() {
527                 instances, err := ap.Instances(tags)
528                 c.Assert(err, check.IsNil)
529                 for _, inst := range instances {
530                         c.Logf("cleanup: destroy instance %s", inst)
531                         c.Check(inst.Destroy(), check.IsNil)
532                 }
533         }()
534
535         ap.ec2config.SpotPriceUpdateInterval = arvados.Duration(time.Hour)
536         ap.ec2config.EBSPrice = 0.1 // $/GiB/month
537         inst1, err := ap.Create(cluster.InstanceTypes["tiny-preemptible"], img, tags, "true", pk)
538         c.Assert(err, check.IsNil)
539         defer inst1.Destroy()
540         inst2, err := ap.Create(cluster.InstanceTypes["tiny-preemptible"], img, tags, "true", pk)
541         c.Assert(err, check.IsNil)
542         defer inst2.Destroy()
543
544         // in live mode, we need to wait for the instances to reach
545         // running state before we can discover their availability
546         // zones and look up the appropriate prices.
547         var instances []cloud.Instance
548         for deadline := time.Now().Add(5 * time.Minute); ; {
549                 if deadline.Before(time.Now()) {
550                         c.Fatal("timed out")
551                 }
552                 instances, err = ap.Instances(tags)
553                 running := 0
554                 for _, inst := range instances {
555                         ec2i := inst.(*ec2Instance).instance
556                         if ec2i.InstanceLifecycle == types.InstanceLifecycleTypeSpot && *ec2i.State.Code&16 != 0 {
557                                 running++
558                         }
559                 }
560                 if running >= 2 {
561                         c.Logf("instances are running, and identifiable as spot instances")
562                         break
563                 }
564                 c.Logf("waiting for instances to reach running state so their availability zone becomes visible...")
565                 time.Sleep(10 * time.Second)
566         }
567
568         for _, inst := range instances {
569                 hist := inst.PriceHistory(arvados.InstanceType{})
570                 c.Logf("%s price history: %v", inst.ID(), hist)
571                 c.Check(len(hist) > 0, check.Equals, true)
572
573                 histWithScratch := inst.PriceHistory(arvados.InstanceType{AddedScratch: 640 << 30})
574                 c.Logf("%s price history with 640 GiB scratch: %v", inst.ID(), histWithScratch)
575
576                 for i, ip := range hist {
577                         c.Check(ip.Price, check.Not(check.Equals), 0.0)
578                         if i > 0 {
579                                 c.Check(ip.StartTime.Before(hist[i-1].StartTime), check.Equals, true)
580                         }
581                         c.Check(ip.Price < histWithScratch[i].Price, check.Equals, true)
582                 }
583         }
584 }
585
586 func (*EC2InstanceSetSuite) TestWrapError(c *check.C) {
587         retryError := &ec2stubError{Code: "Throttling"}
588         wrapped := wrapError(retryError, &atomic.Value{})
589         _, ok := wrapped.(cloud.RateLimitError)
590         c.Check(ok, check.Equals, true)
591
592         quotaError := &ec2stubError{Code: "InstanceLimitExceeded"}
593         wrapped = wrapError(quotaError, nil)
594         _, ok = wrapped.(cloud.QuotaError)
595         c.Check(ok, check.Equals, true)
596
597         for _, trial := range []struct {
598                 code string
599                 msg  string
600         }{
601                 {"InsufficientInstanceCapacity", ""},
602                 {"Unsupported", "Your requested instance type (t3.micro) is not supported in your requested Availability Zone (us-east-1e). Please retry your request by not specifying an Availability Zone or choosing us-east-1a, us-east-1b, us-east-1c, us-east-1d, us-east-1f."},
603         } {
604                 capacityError := &ec2stubError{Code: trial.code, Message: trial.msg}
605                 wrapped = wrapError(capacityError, nil)
606                 caperr, ok := wrapped.(cloud.CapacityError)
607                 c.Check(ok, check.Equals, true)
608                 c.Check(caperr.IsCapacityError(), check.Equals, true)
609                 c.Check(caperr.IsInstanceTypeSpecific(), check.Equals, true)
610         }
611 }