From f7c83a41f84033e4ea9e570dd85e0152f0d81aab Mon Sep 17 00:00:00 2001 From: Ward Vandewege Date: Mon, 21 Feb 2022 20:37:47 -0500 Subject: [PATCH] 18772: address review feedback. Arvados-DCO-1.1-Signed-off-by: Ward Vandewege --- .../install-compute-node.html.textile.liquid | 61 ++++++++++++++++++- lib/cloud/ec2/ec2.go | 6 +- lib/config/config.default.yml | 4 +- .../scripts/create-ebs-volume-nvme.patch | 52 +++++++--------- 4 files changed, 88 insertions(+), 35 deletions(-) diff --git a/doc/install/crunch2-cloud/install-compute-node.html.textile.liquid b/doc/install/crunch2-cloud/install-compute-node.html.textile.liquid index 979bbad25b..e75be0881e 100644 --- a/doc/install/crunch2-cloud/install-compute-node.html.textile.liquid +++ b/doc/install/crunch2-cloud/install-compute-node.html.textile.liquid @@ -175,7 +175,40 @@ For @ClusterID@, fill in your cluster ID. The @VPC@ and @Subnet@ should be confi h3(#aws-ebs-autoscaler). Autoscaling compute node scratch space -If you want to add the AWS EBS autoscaler daemon in your images, add the @--aws-ebs-autoscale@ flag to the "the build script":#building. Doing so will make the compute image scratch space scale automatically as needed. The @Containers/InstanceTypes@ list should be modified so that all @AddedScratch@ lines are removed, and the @IncludedScratch@ value should be set to a (fictional) high number. This way, the scratch space requirements will be met by all the defined instance type. For example: +If you want to add the "AWS EBS autoscaler":https://github.com/awslabs/amazon-ebs-autoscale daemon in your images, add the @--aws-ebs-autoscale@ flag to the "the build script":#building. Doing so will make the compute image scratch space scale automatically as needed. + +The AWS EBS autoscaler daemon will be installed with this configuration: + +
{
+    "mountpoint": "/tmp",
+    "filesystem": "lvm.ext4",
+    "lvm": {
+      "volume_group": "autoscale_vg",
+      "logical_volume": "autoscale_lv"
+    },
+    "volume": {
+        "type": "gp3",
+        "iops": 3000,
+        "encrypted": 1
+    },
+    "detection_interval": 2,
+    "limits": {
+        "max_ebs_volume_size": 1500,
+        "max_logical_volume_size": 8000,
+        "max_ebs_volume_count": 16
+    },
+    "logging": {
+        "log_file": "/var/log/ebs-autoscale.log",
+        "log_interval": 300
+    }
+}
+
+ +Changing the configuration is left as an exercise for the reader. + +Using this feature also requires a few Arvados configuration changes in @config.yml@: + +* The @Containers/InstanceTypes@ list should be modified so that all @AddedScratch@ lines are removed, and the @IncludedScratch@ value should be set to a (fictional) high number. This way, the scratch space requirements will be met by all the defined instance type. For example:
    InstanceTypes:
       c5large:
@@ -193,6 +226,32 @@ If you want to add the AWS EBS autoscaler daemon in your images, add the @--aws-
 ...
 
+* You will also need to create an IAM role in AWS with these permissions: + +
{
+    "Version": "2012-10-17",
+    "Statement": [
+        {
+            "Effect": "Allow",
+            "Action": [
+                "ec2:AttachVolume",
+                "ec2:DescribeVolumeStatus",
+                "ec2:DescribeVolumes",
+                "ec2:DescribeTags",
+                "ec2:ModifyInstanceAttribute",
+                "ec2:DescribeVolumeAttribute",
+                "ec2:CreateVolume",
+                "ec2:DeleteVolume",
+                "ec2:CreateTags"
+            ],
+            "Resource": "*"
+        }
+    ]
+}
+
+ +Then, in @config.yml@ set @Containers/CloudVMs/DriverParameters/IAMInstanceProfile@ to the name of the IAM role. This will make @arvados-dispatch-cloud@ pass an IAMInstanceProfile to the compute nodes as they start up, giving them sufficient permissions to attach and grow EBS volumes. + h2(#azure). Build an Azure image
~$ ./build.sh --json-file arvados-images-azure.json \
diff --git a/lib/cloud/ec2/ec2.go b/lib/cloud/ec2/ec2.go
index 2cbe4cf290..52b73f781c 100644
--- a/lib/cloud/ec2/ec2.go
+++ b/lib/cloud/ec2/ec2.go
@@ -47,7 +47,7 @@ type ec2InstanceSetConfig struct {
 	SubnetID           string
 	AdminUsername      string
 	EBSVolumeType      string
-	IamInstanceProfile string
+	IAMInstanceProfile string
 }
 
 type ec2Interface interface {
@@ -231,9 +231,9 @@ func (instanceSet *ec2InstanceSet) Create(
 			}}
 	}
 
-	if instanceSet.ec2config.IamInstanceProfile != "" {
+	if instanceSet.ec2config.IAMInstanceProfile != "" {
 		rii.IamInstanceProfile = &ec2.IamInstanceProfileSpecification{
-			Name: aws.String(instanceSet.ec2config.IamInstanceProfile),
+			Name: aws.String(instanceSet.ec2config.IAMInstanceProfile),
 		}
 	}
 
diff --git a/lib/config/config.default.yml b/lib/config/config.default.yml
index 7f191eb118..9800be7047 100644
--- a/lib/config/config.default.yml
+++ b/lib/config/config.default.yml
@@ -1269,9 +1269,9 @@ Clusters:
           Region: ""
           EBSVolumeType: gp2
           AdminUsername: debian
-          # (ec2) name of the IamInstanceProfile for instances started by
+          # (ec2) name of the IAMInstanceProfile for instances started by
           # the cloud dispatcher. Leave blank when not needed.
-          IamInstanceProfile: ""
+          IAMInstanceProfile: ""
 
           # (azure) Credentials.
           SubscriptionID: ""
diff --git a/tools/compute-images/scripts/create-ebs-volume-nvme.patch b/tools/compute-images/scripts/create-ebs-volume-nvme.patch
index 1448ae1f2e..b6ef81148b 100644
--- a/tools/compute-images/scripts/create-ebs-volume-nvme.patch
+++ b/tools/compute-images/scripts/create-ebs-volume-nvme.patch
@@ -4,9 +4,11 @@
 
 Make the create-ebs-volume script work with nvme devices.
 
---- a/create-ebs-volume	2022-02-18 15:24:19.866607848 -0500
-+++ b/create-ebs-volume	2022-02-18 16:23:17.931870970 -0500
-@@ -149,9 +152,20 @@
+diff --git a/bin/create-ebs-volume b/bin/create-ebs-volume
+index 6857564..efeac35 100755
+--- a/create-ebs-volume
++++ b/create-ebs-volume
+@@ -149,10 +149,11 @@ function get_next_logical_device() {
      for letter in ${alphabet[@]}; do
          # use /dev/xvdb* device names to avoid contention for /dev/sd* and /dev/xvda names
          # only supported by HVM instances
@@ -16,48 +18,40 @@ Make the create-ebs-volume script work with nvme devices.
 +        fi
              echo "/dev/xvdb${letter}"
              break
-+    done
-+}
-+
-+numbers=( {1..255} )
-+function get_next_logical_nvme_device() {
-+    for num in ${numbers[@]}; do
-+        if [ ! -b "/dev/nvme${num}n1" ]; then
-+            echo "/dev/nvme${num}"
-+            break
-         fi
+-        fi
      done
  }
-@@ -243,10 +257,12 @@
-     
-     # check if there are available device names
-     local device=$(get_next_logical_device)
-+    local nvme_device=$(get_next_logical_nvme_device)
-     if [ -z "$device" ]; then
-         error "no device names available for volume"
-     fi
-     logthis "next available device: $device"
-+    logthis "next available nvme device: $nvme_device"
  
-     # create the volume
-     local tmpfile=$(mktemp /tmp/ebs-autoscale.create-volume.XXXXXXXXXX)
-@@ -323,8 +339,8 @@
+@@ -322,13 +323,21 @@ function create_and_attach_volume() {
+     set -e
  
      logthis "waiting for volume $volume_id on filesystem"
++    set +e
      while true; do
 -        if [ -e "$device" ]; then
 -            logthis "volume $volume_id on filesystem as $device"
-+        if [ -e "$nvme_device" ]; then
++        # AWS returns e.g. vol-00338247831716a7b4, the kernel changes that to vol00338247831716a7b
++        valid_volume_id=`echo $volume_id |sed -e 's/[^a-zA-Z0-9]//'`
++        # example lsblk output:
++        # nvme4n1                     259:7    0  150G  0 disk            vol00338247831716a7b
++        LSBLK=`lsblk -o +SERIAL |grep $valid_volume_id`
++        if [[ $? -eq 0 ]]; then
++            nvme_device=`echo $LSBLK|cut -f1 -d' '|xargs -I {} echo "/dev/{}"`
 +            logthis "volume $volume_id on filesystem as $nvme_device (aws device $device)"
              break
          fi
          sleep 1
-@@ -338,7 +354,7 @@
+     done
++    set -e
+ 
+     # set volume delete on termination
+     aws ec2 modify-instance-attribute \
+@@ -338,7 +347,7 @@ function create_and_attach_volume() {
      > /dev/null
      logthis "volume $volume_id DeleteOnTermination ENABLED"
  
 -    echo $device
-+    echo "$nvme_device"n1
++    echo "$nvme_device"
  }
  
  create_and_attach_volume
-- 
2.30.2