1 // Copyright (C) The Arvados Authors. All rights reserved.
3 // SPDX-License-Identifier: AGPL-3.0
15 "git.curoverse.com/arvados.git/sdk/go/arvados"
16 "github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2018-06-01/compute"
17 "github.com/Azure/azure-sdk-for-go/services/network/mgmt/2018-06-01/network"
18 storageacct "github.com/Azure/azure-sdk-for-go/services/storage/mgmt/2018-02-01/storage"
19 "github.com/Azure/azure-sdk-for-go/storage"
20 "github.com/Azure/go-autorest/autorest/azure"
21 "github.com/Azure/go-autorest/autorest/azure/auth"
22 "github.com/Azure/go-autorest/autorest/to"
23 "github.com/jmcvetta/randutil"
26 type AzureProviderConfig struct {
27 SubscriptionID string `json:"subscription_id"`
28 ClientID string `json:"key"`
29 ClientSecret string `json:"secret"`
30 TenantID string `json:"tenant_id"`
31 CloudEnv string `json:"cloud_environment"`
32 ResourceGroup string `json:"resource_group"`
33 Location string `json:"region"`
34 Network string `json:"network"`
35 Subnet string `json:"subnet"`
36 StorageAccount string `json:"storage_account"`
37 BlobContainer string `json:"blob_container"`
38 Image string `json:"image"`
39 AuthorizedKey string `json:"authorized_key"`
40 DeleteDanglingResourcesAfter float64 `json:"delete_dangling_resources_after"`
43 type VirtualMachinesClientWrapper interface {
44 CreateOrUpdate(ctx context.Context,
45 resourceGroupName string,
47 parameters compute.VirtualMachine) (result compute.VirtualMachine, err error)
48 Delete(ctx context.Context, resourceGroupName string, VMName string) (result *http.Response, err error)
49 ListComplete(ctx context.Context, resourceGroupName string) (result compute.VirtualMachineListResultIterator, err error)
52 type VirtualMachinesClientImpl struct {
53 inner compute.VirtualMachinesClient
56 func (cl *VirtualMachinesClientImpl) CreateOrUpdate(ctx context.Context,
57 resourceGroupName string,
59 parameters compute.VirtualMachine) (result compute.VirtualMachine, err error) {
61 future, err := cl.inner.CreateOrUpdate(ctx, resourceGroupName, VMName, parameters)
63 return compute.VirtualMachine{}, err
65 future.WaitForCompletionRef(ctx, cl.inner.Client)
66 return future.Result(cl.inner)
69 func (cl *VirtualMachinesClientImpl) Delete(ctx context.Context, resourceGroupName string, VMName string) (result *http.Response, err error) {
70 future, err := cl.inner.Delete(ctx, resourceGroupName, VMName)
74 err = future.WaitForCompletionRef(ctx, cl.inner.Client)
75 return future.Response(), err
78 func (cl *VirtualMachinesClientImpl) ListComplete(ctx context.Context, resourceGroupName string) (result compute.VirtualMachineListResultIterator, err error) {
79 return cl.inner.ListComplete(ctx, resourceGroupName)
82 type InterfacesClientWrapper interface {
83 CreateOrUpdate(ctx context.Context,
84 resourceGroupName string,
85 networkInterfaceName string,
86 parameters network.Interface) (result network.Interface, err error)
87 Delete(ctx context.Context, resourceGroupName string, networkInterfaceName string) (result *http.Response, err error)
88 ListComplete(ctx context.Context, resourceGroupName string) (result network.InterfaceListResultIterator, err error)
91 type InterfacesClientImpl struct {
92 inner network.InterfacesClient
95 func (cl *InterfacesClientImpl) Delete(ctx context.Context, resourceGroupName string, VMName string) (result *http.Response, err error) {
96 future, err := cl.inner.Delete(ctx, resourceGroupName, VMName)
100 err = future.WaitForCompletionRef(ctx, cl.inner.Client)
101 return future.Response(), err
104 func (cl *InterfacesClientImpl) CreateOrUpdate(ctx context.Context,
105 resourceGroupName string,
106 networkInterfaceName string,
107 parameters network.Interface) (result network.Interface, err error) {
109 future, err := cl.inner.CreateOrUpdate(ctx, resourceGroupName, networkInterfaceName, parameters)
111 return network.Interface{}, err
113 future.WaitForCompletionRef(ctx, cl.inner.Client)
114 return future.Result(cl.inner)
117 func (cl *InterfacesClientImpl) ListComplete(ctx context.Context, resourceGroupName string) (result network.InterfaceListResultIterator, err error) {
118 return cl.inner.ListComplete(ctx, resourceGroupName)
121 type AzureProvider struct {
122 azconfig AzureProviderConfig
123 arvconfig arvados.Cluster
124 vmClient VirtualMachinesClientWrapper
125 netClient InterfacesClientWrapper
126 storageAcctClient storageacct.AccountsClient
127 azureEnv azure.Environment
130 func NewAzureProvider(azcfg AzureProviderConfig, arvcfg arvados.Cluster) (prv Provider, err error) {
131 ap := AzureProvider{}
132 err = ap.setup(azcfg, arvcfg)
139 func (az *AzureProvider) setup(azcfg AzureProviderConfig, arvcfg arvados.Cluster) (err error) {
141 az.arvconfig = arvcfg
142 vmClient := compute.NewVirtualMachinesClient(az.azconfig.SubscriptionID)
143 netClient := network.NewInterfacesClient(az.azconfig.SubscriptionID)
144 storageAcctClient := storageacct.NewAccountsClient(az.azconfig.SubscriptionID)
146 az.azureEnv, err = azure.EnvironmentFromName(az.azconfig.CloudEnv)
151 authorizer, err := auth.ClientCredentialsConfig{
152 ClientID: az.azconfig.ClientID,
153 ClientSecret: az.azconfig.ClientSecret,
154 TenantID: az.azconfig.TenantID,
155 Resource: az.azureEnv.ResourceManagerEndpoint,
156 AADEndpoint: az.azureEnv.ActiveDirectoryEndpoint,
162 vmClient.Authorizer = authorizer
163 netClient.Authorizer = authorizer
164 storageAcctClient.Authorizer = authorizer
166 az.vmClient = &VirtualMachinesClientImpl{vmClient}
167 az.netClient = &InterfacesClientImpl{netClient}
168 az.storageAcctClient = storageAcctClient
173 func (az *AzureProvider) Create(ctx context.Context,
174 instanceType arvados.InstanceType,
176 instanceTag []InstanceTag) (Instance, error) {
178 name, err := randutil.String(15, "abcdefghijklmnopqrstuvwxyz0123456789")
183 name = "compute-" + name
184 log.Printf("name is %v", name)
186 timestamp := time.Now().Format(time.RFC3339Nano)
188 nicParameters := network.Interface{
189 Location: &az.azconfig.Location,
190 Tags: map[string]*string{
191 "arvados-class": to.StringPtr("crunch-dynamic-compute"),
192 "arvados-cluster": &az.arvconfig.ClusterID,
193 "created-at": ×tamp,
195 InterfacePropertiesFormat: &network.InterfacePropertiesFormat{
196 IPConfigurations: &[]network.InterfaceIPConfiguration{
197 network.InterfaceIPConfiguration{
198 Name: to.StringPtr("ip1"),
199 InterfaceIPConfigurationPropertiesFormat: &network.InterfaceIPConfigurationPropertiesFormat{
200 Subnet: &network.Subnet{
201 ID: to.StringPtr(fmt.Sprintf("/subscriptions/%s/resourceGroups/%s/providers"+
202 "/Microsoft.Network/virtualnetworks/%s/subnets/%s",
203 az.azconfig.SubscriptionID,
204 az.azconfig.ResourceGroup,
206 az.azconfig.Subnet)),
208 PrivateIPAllocationMethod: network.Dynamic,
214 nic, err := az.netClient.CreateOrUpdate(ctx, az.azconfig.ResourceGroup, name+"-nic", nicParameters)
219 log.Printf("Created NIC %v", *nic.ID)
221 instance_vhd := fmt.Sprintf("https://%s.blob.%s/%s/%s-os.vhd",
222 az.azconfig.StorageAccount,
223 az.azureEnv.StorageEndpointSuffix,
224 az.azconfig.BlobContainer,
227 log.Printf("URI instance vhd %v", instance_vhd)
229 vmParameters := compute.VirtualMachine{
230 Location: &az.azconfig.Location,
231 Tags: map[string]*string{
232 "arvados-class": to.StringPtr("crunch-dynamic-compute"),
233 "arvados-cluster": &az.arvconfig.ClusterID,
234 "created-at": ×tamp,
236 VirtualMachineProperties: &compute.VirtualMachineProperties{
237 HardwareProfile: &compute.HardwareProfile{
238 VMSize: compute.VirtualMachineSizeTypes(instanceType.ProviderType),
240 StorageProfile: &compute.StorageProfile{
241 OsDisk: &compute.OSDisk{
242 OsType: compute.Linux,
243 Name: to.StringPtr(fmt.Sprintf("%v-os", name)),
244 CreateOption: compute.FromImage,
245 Image: &compute.VirtualHardDisk{
246 URI: to.StringPtr(string(imageId)),
248 Vhd: &compute.VirtualHardDisk{
253 NetworkProfile: &compute.NetworkProfile{
254 NetworkInterfaces: &[]compute.NetworkInterfaceReference{
255 compute.NetworkInterfaceReference{
257 NetworkInterfaceReferenceProperties: &compute.NetworkInterfaceReferenceProperties{
258 Primary: to.BoolPtr(true),
263 OsProfile: &compute.OSProfile{
265 AdminUsername: to.StringPtr("arvados"),
266 LinuxConfiguration: &compute.LinuxConfiguration{
267 DisablePasswordAuthentication: to.BoolPtr(true),
268 SSH: &compute.SSHConfiguration{
269 PublicKeys: &[]compute.SSHPublicKey{
270 compute.SSHPublicKey{
271 Path: to.StringPtr("/home/arvados/.ssh/authorized_keys"),
272 KeyData: to.StringPtr(az.azconfig.AuthorizedKey),
277 //CustomData: to.StringPtr(""),
282 vm, err := az.vmClient.CreateOrUpdate(ctx, az.azconfig.ResourceGroup, name+"-compute", vmParameters)
287 return &AzureInstance{
288 instanceType: instanceType,
295 func (az *AzureProvider) Instances(ctx context.Context) ([]Instance, error) {
296 result, err := az.vmClient.ListComplete(ctx, az.azconfig.ResourceGroup)
301 instances := make([]Instance, 0)
303 for ; result.NotDone(); err = result.Next() {
307 log.Printf("%v", *result.Value().Name)
308 if result.Value().Tags["arvados-class"] != nil &&
309 (*result.Value().Tags["arvados-class"]) == "crunch-dynamic-compute" {
310 instances = append(instances, &AzureInstance{
315 return instances, nil
318 func (az *AzureProvider) DeleteDanglingNics(ctx context.Context) {
319 result, err := az.netClient.ListComplete(ctx, az.azconfig.ResourceGroup)
324 timestamp := time.Now()
325 wg := sync.WaitGroup{}
327 for ; result.NotDone(); err = result.Next() {
329 log.Printf("Error listing nics: %v", err)
332 if !result.NotDone() {
335 if result.Value().Tags["arvados-class"] != nil &&
336 (*result.Value().Tags["arvados-class"]) == "crunch-dynamic-compute" &&
337 result.Value().VirtualMachine == nil {
339 if result.Value().Tags["created-at"] != nil {
340 created_at, err := time.Parse(time.RFC3339Nano, *result.Value().Tags["created-at"])
342 log.Printf("found dangling NIC %v created %v seconds ago", *result.Value().Name, timestamp.Sub(created_at).Seconds())
343 if timestamp.Sub(created_at).Seconds() > az.azconfig.DeleteDanglingResourcesAfter {
344 log.Printf("Will delete %v because it is older than %v s", *result.Value().Name, az.azconfig.DeleteDanglingResourcesAfter)
346 go func(nicname string) {
347 r, delerr := az.netClient.Delete(context.Background(), az.azconfig.ResourceGroup, nicname)
348 log.Printf("%v %v", r, delerr)
350 }(*result.Value().Name)
359 func (az *AzureProvider) DeleteDanglingBlobs(ctx context.Context) {
360 result, err := az.storageAcctClient.ListKeys(ctx, az.azconfig.ResourceGroup, az.azconfig.StorageAccount)
362 log.Printf("Couldn't get account keys %v", err)
366 key1 := *(*result.Keys)[0].Value
367 client, err := storage.NewBasicClientOnSovereignCloud(az.azconfig.StorageAccount, key1, az.azureEnv)
369 log.Printf("Couldn't make client %v", err)
373 blobsvc := client.GetBlobService()
374 blobcont := blobsvc.GetContainerReference(az.azconfig.BlobContainer)
376 timestamp := time.Now()
377 page := storage.ListBlobsParameters{Prefix: "compute-"}
380 response, err := blobcont.ListBlobs(page)
382 log.Printf("Error listing blobs %v", err)
385 for _, b := range response.Blobs {
386 age := timestamp.Sub(time.Time(b.Properties.LastModified))
387 if b.Properties.BlobType == storage.BlobTypePage &&
388 b.Properties.LeaseState == "available" &&
389 b.Properties.LeaseStatus == "unlocked" &&
390 age.Seconds() > az.azconfig.DeleteDanglingResourcesAfter {
391 log.Printf("Blob %v is unlocked and not modified for %v seconds, will delete", b.Name, age.Seconds())
394 if response.NextMarker != "" {
395 page.Marker = response.NextMarker
402 type AzureInstance struct {
403 instanceType arvados.InstanceType
404 provider *AzureProvider
405 nic network.Interface
406 vm compute.VirtualMachine
409 func (ai *AzureInstance) String() string {
413 func (ai *AzureInstance) ProviderType() string {
414 return string(ai.vm.VirtualMachineProperties.HardwareProfile.VMSize)
417 func (ai *AzureInstance) InstanceType() arvados.InstanceType {
418 return ai.instanceType
421 func (ai *AzureInstance) SetTags([]InstanceTag) error {
425 func (ai *AzureInstance) GetTags() ([]InstanceTag, error) {
429 func (ai *AzureInstance) Destroy(ctx context.Context) error {
430 _, err := ai.provider.vmClient.Delete(ctx, ai.provider.azconfig.ResourceGroup, *ai.vm.Name)
431 // check response code
435 func (ai *AzureInstance) Address() string {
436 return *(*ai.nic.IPConfigurations)[0].PrivateIPAddress