6f769bfaaa766bf91af23ea54c1ce63425369832
[arvados.git] / lib / dispatchcloud / azure.go
1 // Copyright (C) The Arvados Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4
5 package dispatchcloud
6
7 import (
8         "context"
9         "fmt"
10         "log"
11         "net/http"
12         "sync"
13         "time"
14
15         "git.curoverse.com/arvados.git/sdk/go/arvados"
16         "github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2018-06-01/compute"
17         "github.com/Azure/azure-sdk-for-go/services/network/mgmt/2018-06-01/network"
18         storageacct "github.com/Azure/azure-sdk-for-go/services/storage/mgmt/2018-02-01/storage"
19         "github.com/Azure/azure-sdk-for-go/storage"
20         "github.com/Azure/go-autorest/autorest/azure"
21         "github.com/Azure/go-autorest/autorest/azure/auth"
22         "github.com/Azure/go-autorest/autorest/to"
23         "github.com/jmcvetta/randutil"
24 )
25
26 type AzureProviderConfig struct {
27         SubscriptionID               string  `json:"subscription_id"`
28         ClientID                     string  `json:"key"`
29         ClientSecret                 string  `json:"secret"`
30         TenantID                     string  `json:"tenant_id"`
31         CloudEnv                     string  `json:"cloud_environment"`
32         ResourceGroup                string  `json:"resource_group"`
33         Location                     string  `json:"region"`
34         Network                      string  `json:"network"`
35         Subnet                       string  `json:"subnet"`
36         StorageAccount               string  `json:"storage_account"`
37         BlobContainer                string  `json:"blob_container"`
38         Image                        string  `json:"image"`
39         AuthorizedKey                string  `json:"authorized_key"`
40         DeleteDanglingResourcesAfter float64 `json:"delete_dangling_resources_after"`
41 }
42
43 type VirtualMachinesClientWrapper interface {
44         CreateOrUpdate(ctx context.Context,
45                 resourceGroupName string,
46                 VMName string,
47                 parameters compute.VirtualMachine) (result compute.VirtualMachine, err error)
48         Delete(ctx context.Context, resourceGroupName string, VMName string) (result *http.Response, err error)
49         ListComplete(ctx context.Context, resourceGroupName string) (result compute.VirtualMachineListResultIterator, err error)
50 }
51
52 type VirtualMachinesClientImpl struct {
53         inner compute.VirtualMachinesClient
54 }
55
56 func (cl *VirtualMachinesClientImpl) CreateOrUpdate(ctx context.Context,
57         resourceGroupName string,
58         VMName string,
59         parameters compute.VirtualMachine) (result compute.VirtualMachine, err error) {
60
61         future, err := cl.inner.CreateOrUpdate(ctx, resourceGroupName, VMName, parameters)
62         if err != nil {
63                 return compute.VirtualMachine{}, err
64         }
65         future.WaitForCompletionRef(ctx, cl.inner.Client)
66         return future.Result(cl.inner)
67 }
68
69 func (cl *VirtualMachinesClientImpl) Delete(ctx context.Context, resourceGroupName string, VMName string) (result *http.Response, err error) {
70         future, err := cl.inner.Delete(ctx, resourceGroupName, VMName)
71         if err != nil {
72                 return nil, err
73         }
74         err = future.WaitForCompletionRef(ctx, cl.inner.Client)
75         return future.Response(), err
76 }
77
78 func (cl *VirtualMachinesClientImpl) ListComplete(ctx context.Context, resourceGroupName string) (result compute.VirtualMachineListResultIterator, err error) {
79         return cl.inner.ListComplete(ctx, resourceGroupName)
80 }
81
82 type InterfacesClientWrapper interface {
83         CreateOrUpdate(ctx context.Context,
84                 resourceGroupName string,
85                 networkInterfaceName string,
86                 parameters network.Interface) (result network.Interface, err error)
87         Delete(ctx context.Context, resourceGroupName string, networkInterfaceName string) (result *http.Response, err error)
88         ListComplete(ctx context.Context, resourceGroupName string) (result network.InterfaceListResultIterator, err error)
89 }
90
91 type InterfacesClientImpl struct {
92         inner network.InterfacesClient
93 }
94
95 func (cl *InterfacesClientImpl) Delete(ctx context.Context, resourceGroupName string, VMName string) (result *http.Response, err error) {
96         future, err := cl.inner.Delete(ctx, resourceGroupName, VMName)
97         if err != nil {
98                 return nil, err
99         }
100         err = future.WaitForCompletionRef(ctx, cl.inner.Client)
101         return future.Response(), err
102 }
103
104 func (cl *InterfacesClientImpl) CreateOrUpdate(ctx context.Context,
105         resourceGroupName string,
106         networkInterfaceName string,
107         parameters network.Interface) (result network.Interface, err error) {
108
109         future, err := cl.inner.CreateOrUpdate(ctx, resourceGroupName, networkInterfaceName, parameters)
110         if err != nil {
111                 return network.Interface{}, err
112         }
113         future.WaitForCompletionRef(ctx, cl.inner.Client)
114         return future.Result(cl.inner)
115 }
116
117 func (cl *InterfacesClientImpl) ListComplete(ctx context.Context, resourceGroupName string) (result network.InterfaceListResultIterator, err error) {
118         return cl.inner.ListComplete(ctx, resourceGroupName)
119 }
120
121 type AzureProvider struct {
122         azconfig          AzureProviderConfig
123         arvconfig         arvados.Cluster
124         vmClient          VirtualMachinesClientWrapper
125         netClient         InterfacesClientWrapper
126         storageAcctClient storageacct.AccountsClient
127         azureEnv          azure.Environment
128 }
129
130 func NewAzureProvider(azcfg AzureProviderConfig, arvcfg arvados.Cluster) (prv Provider, err error) {
131         ap := AzureProvider{}
132         err = ap.setup(azcfg, arvcfg)
133         if err != nil {
134                 return nil, err
135         }
136         return &ap, nil
137 }
138
139 func (az *AzureProvider) setup(azcfg AzureProviderConfig, arvcfg arvados.Cluster) (err error) {
140         az.azconfig = azcfg
141         az.arvconfig = arvcfg
142         vmClient := compute.NewVirtualMachinesClient(az.azconfig.SubscriptionID)
143         netClient := network.NewInterfacesClient(az.azconfig.SubscriptionID)
144         storageAcctClient := storageacct.NewAccountsClient(az.azconfig.SubscriptionID)
145
146         az.azureEnv, err = azure.EnvironmentFromName(az.azconfig.CloudEnv)
147         if err != nil {
148                 return err
149         }
150
151         authorizer, err := auth.ClientCredentialsConfig{
152                 ClientID:     az.azconfig.ClientID,
153                 ClientSecret: az.azconfig.ClientSecret,
154                 TenantID:     az.azconfig.TenantID,
155                 Resource:     az.azureEnv.ResourceManagerEndpoint,
156                 AADEndpoint:  az.azureEnv.ActiveDirectoryEndpoint,
157         }.Authorizer()
158         if err != nil {
159                 return err
160         }
161
162         vmClient.Authorizer = authorizer
163         netClient.Authorizer = authorizer
164         storageAcctClient.Authorizer = authorizer
165
166         az.vmClient = &VirtualMachinesClientImpl{vmClient}
167         az.netClient = &InterfacesClientImpl{netClient}
168         az.storageAcctClient = storageAcctClient
169
170         return nil
171 }
172
173 func (az *AzureProvider) Create(ctx context.Context,
174         instanceType arvados.InstanceType,
175         imageId ImageID,
176         instanceTag []InstanceTag) (Instance, error) {
177
178         name, err := randutil.String(15, "abcdefghijklmnopqrstuvwxyz0123456789")
179         if err != nil {
180                 return nil, err
181         }
182
183         name = "compute-" + name
184         log.Printf("name is %v", name)
185
186         timestamp := time.Now().Format(time.RFC3339Nano)
187
188         nicParameters := network.Interface{
189                 Location: &az.azconfig.Location,
190                 Tags: map[string]*string{
191                         "arvados-class":   to.StringPtr("crunch-dynamic-compute"),
192                         "arvados-cluster": &az.arvconfig.ClusterID,
193                         "created-at":      &timestamp,
194                 },
195                 InterfacePropertiesFormat: &network.InterfacePropertiesFormat{
196                         IPConfigurations: &[]network.InterfaceIPConfiguration{
197                                 network.InterfaceIPConfiguration{
198                                         Name: to.StringPtr("ip1"),
199                                         InterfaceIPConfigurationPropertiesFormat: &network.InterfaceIPConfigurationPropertiesFormat{
200                                                 Subnet: &network.Subnet{
201                                                         ID: to.StringPtr(fmt.Sprintf("/subscriptions/%s/resourceGroups/%s/providers"+
202                                                                 "/Microsoft.Network/virtualnetworks/%s/subnets/%s",
203                                                                 az.azconfig.SubscriptionID,
204                                                                 az.azconfig.ResourceGroup,
205                                                                 az.azconfig.Network,
206                                                                 az.azconfig.Subnet)),
207                                                 },
208                                                 PrivateIPAllocationMethod: network.Dynamic,
209                                         },
210                                 },
211                         },
212                 },
213         }
214         nic, err := az.netClient.CreateOrUpdate(ctx, az.azconfig.ResourceGroup, name+"-nic", nicParameters)
215         if err != nil {
216                 return nil, err
217         }
218
219         log.Printf("Created NIC %v", *nic.ID)
220
221         instance_vhd := fmt.Sprintf("https://%s.blob.%s/%s/%s-os.vhd",
222                 az.azconfig.StorageAccount,
223                 az.azureEnv.StorageEndpointSuffix,
224                 az.azconfig.BlobContainer,
225                 name)
226
227         log.Printf("URI instance vhd %v", instance_vhd)
228
229         vmParameters := compute.VirtualMachine{
230                 Location: &az.azconfig.Location,
231                 Tags: map[string]*string{
232                         "arvados-class":   to.StringPtr("crunch-dynamic-compute"),
233                         "arvados-cluster": &az.arvconfig.ClusterID,
234                         "created-at":      &timestamp,
235                 },
236                 VirtualMachineProperties: &compute.VirtualMachineProperties{
237                         HardwareProfile: &compute.HardwareProfile{
238                                 VMSize: compute.VirtualMachineSizeTypes(instanceType.ProviderType),
239                         },
240                         StorageProfile: &compute.StorageProfile{
241                                 OsDisk: &compute.OSDisk{
242                                         OsType:       compute.Linux,
243                                         Name:         to.StringPtr(fmt.Sprintf("%v-os", name)),
244                                         CreateOption: compute.FromImage,
245                                         Image: &compute.VirtualHardDisk{
246                                                 URI: to.StringPtr(string(imageId)),
247                                         },
248                                         Vhd: &compute.VirtualHardDisk{
249                                                 URI: &instance_vhd,
250                                         },
251                                 },
252                         },
253                         NetworkProfile: &compute.NetworkProfile{
254                                 NetworkInterfaces: &[]compute.NetworkInterfaceReference{
255                                         compute.NetworkInterfaceReference{
256                                                 ID: nic.ID,
257                                                 NetworkInterfaceReferenceProperties: &compute.NetworkInterfaceReferenceProperties{
258                                                         Primary: to.BoolPtr(true),
259                                                 },
260                                         },
261                                 },
262                         },
263                         OsProfile: &compute.OSProfile{
264                                 ComputerName:  &name,
265                                 AdminUsername: to.StringPtr("arvados"),
266                                 LinuxConfiguration: &compute.LinuxConfiguration{
267                                         DisablePasswordAuthentication: to.BoolPtr(true),
268                                         SSH: &compute.SSHConfiguration{
269                                                 PublicKeys: &[]compute.SSHPublicKey{
270                                                         compute.SSHPublicKey{
271                                                                 Path:    to.StringPtr("/home/arvados/.ssh/authorized_keys"),
272                                                                 KeyData: to.StringPtr(az.azconfig.AuthorizedKey),
273                                                         },
274                                                 },
275                                         },
276                                 },
277                                 //CustomData: to.StringPtr(""),
278                         },
279                 },
280         }
281
282         vm, err := az.vmClient.CreateOrUpdate(ctx, az.azconfig.ResourceGroup, name+"-compute", vmParameters)
283         if err != nil {
284                 return nil, err
285         }
286
287         return &AzureInstance{
288                 instanceType: instanceType,
289                 provider:     az,
290                 nic:          nic,
291                 vm:           vm,
292         }, nil
293 }
294
295 func (az *AzureProvider) Instances(ctx context.Context) ([]Instance, error) {
296         result, err := az.vmClient.ListComplete(ctx, az.azconfig.ResourceGroup)
297         if err != nil {
298                 return nil, err
299         }
300
301         instances := make([]Instance, 0)
302
303         for ; result.NotDone(); err = result.Next() {
304                 if err != nil {
305                         return nil, err
306                 }
307                 log.Printf("%v", *result.Value().Name)
308                 if result.Value().Tags["arvados-class"] != nil &&
309                         (*result.Value().Tags["arvados-class"]) == "crunch-dynamic-compute" {
310                         instances = append(instances, &AzureInstance{
311                                 provider: az,
312                                 vm:       result.Value()})
313                 }
314         }
315         return instances, nil
316 }
317
318 func (az *AzureProvider) DeleteDanglingNics(ctx context.Context) {
319         result, err := az.netClient.ListComplete(ctx, az.azconfig.ResourceGroup)
320         if err != nil {
321                 return
322         }
323
324         timestamp := time.Now()
325         wg := sync.WaitGroup{}
326         defer wg.Wait()
327         for ; result.NotDone(); err = result.Next() {
328                 if err != nil {
329                         log.Printf("Error listing nics: %v", err)
330                         return
331                 }
332                 if !result.NotDone() {
333                         return
334                 }
335                 if result.Value().Tags["arvados-class"] != nil &&
336                         (*result.Value().Tags["arvados-class"]) == "crunch-dynamic-compute" &&
337                         result.Value().VirtualMachine == nil {
338
339                         if result.Value().Tags["created-at"] != nil {
340                                 created_at, err := time.Parse(time.RFC3339Nano, *result.Value().Tags["created-at"])
341                                 if err == nil {
342                                         log.Printf("found dangling NIC %v created %v seconds ago", *result.Value().Name, timestamp.Sub(created_at).Seconds())
343                                         if timestamp.Sub(created_at).Seconds() > az.azconfig.DeleteDanglingResourcesAfter {
344                                                 log.Printf("Will delete %v because it is older than %v s", *result.Value().Name, az.azconfig.DeleteDanglingResourcesAfter)
345                                                 wg.Add(1)
346                                                 go func(nicname string) {
347                                                         r, delerr := az.netClient.Delete(context.Background(), az.azconfig.ResourceGroup, nicname)
348                                                         log.Printf("%v %v", r, delerr)
349                                                         wg.Done()
350                                                 }(*result.Value().Name)
351                                         }
352                                 }
353                         }
354                 }
355         }
356
357 }
358
359 func (az *AzureProvider) DeleteDanglingBlobs(ctx context.Context) {
360         result, err := az.storageAcctClient.ListKeys(ctx, az.azconfig.ResourceGroup, az.azconfig.StorageAccount)
361         if err != nil {
362                 log.Printf("Couldn't get account keys %v", err)
363                 return
364         }
365
366         key1 := *(*result.Keys)[0].Value
367         client, err := storage.NewBasicClientOnSovereignCloud(az.azconfig.StorageAccount, key1, az.azureEnv)
368         if err != nil {
369                 log.Printf("Couldn't make client %v", err)
370                 return
371         }
372
373         blobsvc := client.GetBlobService()
374         blobcont := blobsvc.GetContainerReference(az.azconfig.BlobContainer)
375
376         timestamp := time.Now()
377         page := storage.ListBlobsParameters{Prefix: "compute-"}
378
379         for {
380                 response, err := blobcont.ListBlobs(page)
381                 if err != nil {
382                         log.Printf("Error listing blobs %v", err)
383                         return
384                 }
385                 for _, b := range response.Blobs {
386                         age := timestamp.Sub(time.Time(b.Properties.LastModified))
387                         if b.Properties.BlobType == storage.BlobTypePage &&
388                                 b.Properties.LeaseState == "available" &&
389                                 b.Properties.LeaseStatus == "unlocked" &&
390                                 age.Seconds() > az.azconfig.DeleteDanglingResourcesAfter {
391                                 log.Printf("Blob %v is unlocked and not modified for %v seconds, will delete", b.Name, age.Seconds())
392                         }
393                 }
394                 if response.NextMarker != "" {
395                         page.Marker = response.NextMarker
396                 } else {
397                         break
398                 }
399         }
400 }
401
402 type AzureInstance struct {
403         instanceType arvados.InstanceType
404         provider     *AzureProvider
405         nic          network.Interface
406         vm           compute.VirtualMachine
407 }
408
409 func (ai *AzureInstance) String() string {
410         return *ai.vm.ID
411 }
412
413 func (ai *AzureInstance) ProviderType() string {
414         return string(ai.vm.VirtualMachineProperties.HardwareProfile.VMSize)
415 }
416
417 func (ai *AzureInstance) InstanceType() arvados.InstanceType {
418         return ai.instanceType
419 }
420
421 func (ai *AzureInstance) SetTags([]InstanceTag) error {
422         return nil
423 }
424
425 func (ai *AzureInstance) GetTags() ([]InstanceTag, error) {
426         return nil, nil
427 }
428
429 func (ai *AzureInstance) Destroy(ctx context.Context) error {
430         _, err := ai.provider.vmClient.Delete(ctx, ai.provider.azconfig.ResourceGroup, *ai.vm.Name)
431         // check response code
432         return err
433 }
434
435 func (ai *AzureInstance) Address() string {
436         return *(*ai.nic.IPConfigurations)[0].PrivateIPAddress
437 }