16625: a-d-c: add support for disk images in an Azure Shared Image
authorWard Vandewege <ward@curii.com>
Thu, 20 Aug 2020 17:36:22 +0000 (13:36 -0400)
committerWard Vandewege <ward@curii.com>
Thu, 20 Aug 2020 17:41:16 +0000 (13:41 -0400)
       Gallery. Also make sure to clean up nics in every scenario where
       Create errors out.

Arvados-DCO-1.1-Signed-off-by: Ward Vandewege <ward@curii.com>

doc/install/crunch2-cloud/install-dispatch-cloud.html.textile.liquid
lib/cloud/azure/azure.go
lib/config/config.default.yml
lib/config/generated_config.go

index 4b3387fcb7f7600041a077499b41e9512b83e6ce..68417784701ce387e7437bb0f0b8e62a2335e5ff 100644 (file)
@@ -124,6 +124,44 @@ Using managed disks:
 </code></pre>
 </notextile>
 
+Azure recommends using managed images. If you plan to start more than 20 VMs simultaneously, Azure recommends using a shared image gallery instead to avoid slowdowns and timeouts during the creation of the VMs.
+
+Using an image from a shared image gallery:
+
+<notextile>
+<pre><code>    Containers:
+      CloudVMs:
+        ImageID: "shared_image_gallery_image_definition_name"
+        Driver: azure
+        DriverParameters:
+          # Credentials.
+          SubscriptionID: XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX
+          ClientID: XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX
+          ClientSecret: XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+          TenantID: XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX
+
+          # Data center where VMs will be allocated
+          Location: centralus
+
+          # The resource group where the VM and virtual NIC will be
+          # created.
+          ResourceGroup: zzzzz
+          NetworkResourceGroup: yyyyy   # only if different from ResourceGroup
+          Network: xxxxx
+          Subnet: xxxxx-subnet-private
+
+          # The resource group where the disk image is stored, only needs to
+          # be specified if it is different from ResourceGroup
+          ImageResourceGroup: aaaaa
+
+          # (azure) shared image gallery: the name of the gallery
+          SharedImageGalleryName: "shared_image_gallery_1"
+          # (azure) shared image gallery: the version of the image definition
+          SharedImageGalleryImageVersion: "0.0.1"
+
+</code></pre>
+</notextile>
+
 Using unmanaged disks (deprecated):
 
 <notextile>
index 0accb67b30ea8b0e267c662b78eec50ec0b96dae..c26309aca5a95fc7d35495295d33f79c72c74cdf 100644 (file)
@@ -36,21 +36,23 @@ import (
 var Driver = cloud.DriverFunc(newAzureInstanceSet)
 
 type azureInstanceSetConfig struct {
-       SubscriptionID               string
-       ClientID                     string
-       ClientSecret                 string
-       TenantID                     string
-       CloudEnvironment             string
-       ResourceGroup                string
-       ImageResourceGroup           string
-       Location                     string
-       Network                      string
-       NetworkResourceGroup         string
-       Subnet                       string
-       StorageAccount               string
-       BlobContainer                string
-       DeleteDanglingResourcesAfter arvados.Duration
-       AdminUsername                string
+       SubscriptionID                 string
+       ClientID                       string
+       ClientSecret                   string
+       TenantID                       string
+       CloudEnvironment               string
+       ResourceGroup                  string
+       ImageResourceGroup             string
+       Location                       string
+       Network                        string
+       NetworkResourceGroup           string
+       Subnet                         string
+       StorageAccount                 string
+       BlobContainer                  string
+       SharedImageGalleryName         string
+       SharedImageGalleryImageVersion string
+       DeleteDanglingResourcesAfter   arvados.Duration
+       AdminUsername                  string
 }
 
 type containerWrapper interface {
@@ -373,6 +375,13 @@ func (az *azureInstanceSet) setup(azcfg azureInstanceSetConfig, dispatcherID str
        return nil
 }
 
+func (az *azureInstanceSet) cleanupNic(nic network.Interface) {
+       _, delerr := az.netClient.delete(context.Background(), az.azconfig.ResourceGroup, *nic.Name)
+       if delerr != nil {
+               az.logger.WithError(delerr).Warnf("Error cleaning up NIC after failed create")
+       }
+}
+
 func (az *azureInstanceSet) Create(
        instanceType arvados.InstanceType,
        imageID cloud.ImageID,
@@ -439,6 +448,7 @@ func (az *azureInstanceSet) Create(
        re := regexp.MustCompile(`^http(s?)://`)
        if re.MatchString(string(imageID)) {
                if az.blobcont == nil {
+                       az.cleanupNic(nic)
                        return nil, wrapAzureError(errors.New("Invalid configuration: can't configure unmanaged image URL without StorageAccount and BlobContainer"))
                }
                blobname = fmt.Sprintf("%s-os.vhd", name)
@@ -462,9 +472,16 @@ func (az *azureInstanceSet) Create(
                        },
                }
        } else {
+               id := to.StringPtr("/subscriptions/" + az.azconfig.SubscriptionID + "/resourceGroups/" + az.imageResourceGroup + "/providers/Microsoft.Compute/images/" + string(imageID))
+               if az.azconfig.SharedImageGalleryName != "" && az.azconfig.SharedImageGalleryImageVersion != "" {
+                       id = to.StringPtr("/subscriptions/" + az.azconfig.SubscriptionID + "/resourceGroups/" + az.imageResourceGroup + "/providers/Microsoft.Compute/galleries/" + az.azconfig.SharedImageGalleryName + "/images/" + string(imageID) + "/versions/" + az.azconfig.SharedImageGalleryImageVersion)
+               } else if az.azconfig.SharedImageGalleryName != "" || az.azconfig.SharedImageGalleryImageVersion != "" {
+                       az.cleanupNic(nic)
+                       return nil, wrapAzureError(errors.New("Invalid configuration: SharedImageGalleryName and SharedImageGalleryImageVersion must both be set or both be empty"))
+               }
                storageProfile = &compute.StorageProfile{
                        ImageReference: &compute.ImageReference{
-                               ID: to.StringPtr("/subscriptions/" + az.azconfig.SubscriptionID + "/resourceGroups/" + az.imageResourceGroup + "/providers/Microsoft.Compute/images/" + string(imageID)),
+                               ID: id,
                        },
                        OsDisk: &compute.OSDisk{
                                OsType:       compute.Linux,
@@ -518,18 +535,16 @@ func (az *azureInstanceSet) Create(
                // dispatcher keeps retrying, because the garbage collection in manageBlobs
                // and manageNics is only triggered periodically. This is most important
                // for nics, because those are subject to a quota.
+               az.cleanupNic(nic)
+
                if blobname != "" {
                        _, delerr := az.blobcont.GetBlobReference(blobname).DeleteIfExists(nil)
                        if delerr != nil {
                                az.logger.WithError(delerr).Warnf("Error cleaning up vhd blob after failed create")
                        }
                }
-               // Leave cleaning up of managed disks to the garbage collection in manageDisks()
 
-               _, delerr := az.netClient.delete(context.Background(), az.azconfig.ResourceGroup, *nic.Name)
-               if delerr != nil {
-                       az.logger.WithError(delerr).Warnf("Error cleaning up NIC after failed create")
-               }
+               // Leave cleaning up of managed disks to the garbage collection in manageDisks()
 
                return nil, wrapAzureError(err)
        }
index 89156719b2f34be76ca02ed58e83867514d1d1e2..a1b471bd229e7f27b0cbd90bf79919f0d7123992 100644 (file)
@@ -954,6 +954,8 @@ Clusters:
         # Worker VM image ID.
         # (aws) AMI identifier
         # (azure) managed disks: the name of the managed disk image
+        # (azure) shared image gallery: the name of the image definition. Also
+        # see the SharedImageGalleryName and SharedImageGalleryImageVersion fields.
         # (azure) unmanaged disks (deprecated): the complete URI of the VHD, e.g.
         # https://xxxxx.blob.core.windows.net/system/Microsoft.Compute/Images/images/xxxxx.vhd
         ImageID: ""
@@ -1028,6 +1030,11 @@ Clusters:
           # image can be found (if different from ResourceGroup).
           ImageResourceGroup: ""
 
+          # (azure) shared image gallery: the name of the gallery
+          SharedImageGalleryName: ""
+          # (azure) shared image gallery: the version of the image definition
+          SharedImageGalleryImageVersion: ""
+
           # (azure) unmanaged disks (deprecated): Where to store the VM VHD blobs
           StorageAccount: ""
           BlobContainer: ""
index f0582f948fa6b805d0b7ead117b9673907b96d3d..8e42eb350516d172cec46c99fc0c163dcaa4fb46 100644 (file)
@@ -960,6 +960,8 @@ Clusters:
         # Worker VM image ID.
         # (aws) AMI identifier
         # (azure) managed disks: the name of the managed disk image
+        # (azure) shared image gallery: the name of the image definition. Also
+        # see the SharedImageGalleryName and SharedImageGalleryImageVersion fields.
         # (azure) unmanaged disks (deprecated): the complete URI of the VHD, e.g.
         # https://xxxxx.blob.core.windows.net/system/Microsoft.Compute/Images/images/xxxxx.vhd
         ImageID: ""
@@ -1034,6 +1036,11 @@ Clusters:
           # image can be found (if different from ResourceGroup).
           ImageResourceGroup: ""
 
+          # (azure) shared image gallery: the name of the gallery
+          SharedImageGalleryName: ""
+          # (azure) shared image gallery: the version of the image definition
+          SharedImageGalleryImageVersion: ""
+
           # (azure) unmanaged disks (deprecated): Where to store the VM VHD blobs
           StorageAccount: ""
           BlobContainer: ""