From e308ed737cb1645014f75eeb6774ad87d7d502a7 Mon Sep 17 00:00:00 2001 From: Ward Vandewege Date: Fri, 18 Feb 2022 19:27:11 -0500 Subject: [PATCH] 18772: add support for the AWS EBS autoscale script to the compute node image builder. Arvados-DCO-1.1-Signed-off-by: Ward Vandewege --- ...install-dispatch-cloud.html.textile.liquid | 26 ++++++++ lib/config/config.default.yml | 2 + tools/compute-images/arvados-images-aws.json | 11 +++- tools/compute-images/build.sh | 13 +++- tools/compute-images/scripts/base.sh | 26 +++++++- ...-encrypted-partitions-aws-ebs-autoscale.sh | 60 +++++++++++++++++++ 6 files changed, 134 insertions(+), 4 deletions(-) create mode 100644 tools/compute-images/scripts/usr-local-bin-ensure-encrypted-partitions-aws-ebs-autoscale.sh diff --git a/doc/install/crunch2-cloud/install-dispatch-cloud.html.textile.liquid b/doc/install/crunch2-cloud/install-dispatch-cloud.html.textile.liquid index 06a918dd37..ee71d7a3f6 100644 --- a/doc/install/crunch2-cloud/install-dispatch-cloud.html.textile.liquid +++ b/doc/install/crunch2-cloud/install-dispatch-cloud.html.textile.liquid @@ -120,6 +120,32 @@ The ImageID value is the compute node image that +Example policy for the IAM role used by the cloud dispatcher: + + +
+{
+    "Version": "2012-10-17",
+    "Id": "arvados-dispatch-cloud policy",
+    "Statement": [
+        {
+            "Effect": "Allow",
+            "Action": [
+                "iam:PassRole",
+                "ec2:DescribeKeyPairs",
+                "ec2:ImportKeyPair",
+                "ec2:RunInstances",
+                "ec2:DescribeInstances",
+                "ec2:CreateTags",
+                "ec2:TerminateInstances"
+            ],
+            "Resource": "*"
+        }
+    ]
+}
+
+
+ h4. Minimal configuration example for Azure Using managed disks: diff --git a/lib/config/config.default.yml b/lib/config/config.default.yml index 07ff772e21..7f191eb118 100644 --- a/lib/config/config.default.yml +++ b/lib/config/config.default.yml @@ -1269,6 +1269,8 @@ Clusters: Region: "" EBSVolumeType: gp2 AdminUsername: debian + # (ec2) name of the IamInstanceProfile for instances started by + # the cloud dispatcher. Leave blank when not needed. IamInstanceProfile: "" # (azure) Credentials. diff --git a/tools/compute-images/arvados-images-aws.json b/tools/compute-images/arvados-images-aws.json index 23b7832fcb..131aa8a878 100644 --- a/tools/compute-images/arvados-images-aws.json +++ b/tools/compute-images/arvados-images-aws.json @@ -6,6 +6,7 @@ "aws_profile": "", "aws_secret_key": "", "aws_source_ami": "ami-031283ff8a43b021c", + "aws_ebs_autoscale": "", "build_environment": "aws", "public_key_file": "", "mksquashfs_mem": "", @@ -76,6 +77,14 @@ "type": "file", "source": "scripts/usr-local-bin-ensure-encrypted-partitions.sh", "destination": "/tmp/usr-local-bin-ensure-encrypted-partitions.sh" + },{ + "type": "file", + "source": "scripts/usr-local-bin-ensure-encrypted-partitions-aws-ebs-autoscale.sh", + "destination": "/tmp/usr-local-bin-ensure-encrypted-partitions-aws-ebs-autoscale.sh" + },{ + "type": "file", + "source": "scripts/create-ebs-volume-nvme.patch", + "destination": "/tmp/create-ebs-volume-nvme.patch" },{ "type": "file", "source": "{{user `public_key_file`}}", @@ -84,6 +93,6 @@ "type": "shell", "execute_command": "sudo -S env {{ .Vars }} /bin/bash '{{ .Path }}'", "script": "scripts/base.sh", - "environment_vars": ["RESOLVER={{user `resolver`}}","REPOSUFFIX={{user `reposuffix`}}","MKSQUASHFS_MEM={{user `mksquashfs_mem`}}","NVIDIA_GPU_SUPPORT={{user `nvidia_gpu_support`}}","CLOUD=aws"] + "environment_vars": ["RESOLVER={{user `resolver`}}","REPOSUFFIX={{user `reposuffix`}}","MKSQUASHFS_MEM={{user `mksquashfs_mem`}}","NVIDIA_GPU_SUPPORT={{user `nvidia_gpu_support`}}","CLOUD=aws","AWS_EBS_AUTOSCALE={{user `aws_ebs_autoscale`}}"] }] } diff --git a/tools/compute-images/build.sh b/tools/compute-images/build.sh index fce8b1918b..ea7676db1d 100755 --- a/tools/compute-images/build.sh +++ b/tools/compute-images/build.sh @@ -33,6 +33,8 @@ Options: VPC id for AWS, otherwise packer will pick the default one --aws-subnet-id Subnet id for AWS otherwise packer will pick the default one for the VPC + --aws-ebs-autoscale (default: false) + Install the AWS EBS autoscaler daemon. --gcp-project-id (default: false, required if building for GCP) GCP project id --gcp-account-file (default: false, required if building for GCP) @@ -62,6 +64,8 @@ Options: --debug (default: false) Output debug information +For more information, see the Arvados documentation at https://doc.arvados.org/install/crunch2-cloud/install-compute-node.html + EOF JSON_FILE= @@ -71,6 +75,7 @@ AWS_SECRETS_FILE= AWS_SOURCE_AMI= AWS_VPC_ID= AWS_SUBNET_ID= +AWS_EBS_AUTOSCALE= GCP_PROJECT_ID= GCP_ACCOUNT_FILE= GCP_ZONE= @@ -86,7 +91,7 @@ MKSQUASHFS_MEM=256M NVIDIA_GPU_SUPPORT= PARSEDOPTS=$(getopt --name "$0" --longoptions \ - help,json-file:,arvados-cluster-id:,aws-source-ami:,aws-profile:,aws-secrets-file:,aws-region:,aws-vpc-id:,aws-subnet-id:,gcp-project-id:,gcp-account-file:,gcp-zone:,azure-secrets-file:,azure-resource-group:,azure-location:,azure-sku:,azure-cloud-environment:,ssh_user:,resolver:,reposuffix:,public-key-file:,mksquashfs-mem:,nvidia-gpu-support,debug \ + help,json-file:,arvados-cluster-id:,aws-source-ami:,aws-profile:,aws-secrets-file:,aws-region:,aws-vpc-id:,aws-subnet-id:,aws-ebs-autoscale,gcp-project-id:,gcp-account-file:,gcp-zone:,azure-secrets-file:,azure-resource-group:,azure-location:,azure-sku:,azure-cloud-environment:,ssh_user:,resolver:,reposuffix:,public-key-file:,mksquashfs-mem:,nvidia-gpu-support,debug \ -- "" "$@") if [ $? -ne 0 ]; then exit 1 @@ -124,6 +129,9 @@ while [ $# -gt 0 ]; do --aws-subnet-id) AWS_SUBNET_ID="$2"; shift ;; + --aws-ebs-autoscale) + AWS_EBS_AUTOSCALE=1 + ;; --gcp-project-id) GCP_PROJECT_ID="$2"; shift ;; @@ -235,6 +243,9 @@ fi if [[ "$AWS_DEFAULT_REGION" != "" ]]; then EXTRA2+=" -var aws_default_region=$AWS_DEFAULT_REGION" fi +if [[ "$AWS_EBS_AUTOSCALE" != "" ]]; then + EXTRA2+=" -var aws_ebs_autoscale=$AWS_EBS_AUTOSCALE" +fi if [[ "$GCP_PROJECT_ID" != "" ]]; then EXTRA2+=" -var project_id=$GCP_PROJECT_ID" fi diff --git a/tools/compute-images/scripts/base.sh b/tools/compute-images/scripts/base.sh index 450a8b3c54..f02c94eda0 100644 --- a/tools/compute-images/scripts/base.sh +++ b/tools/compute-images/scripts/base.sh @@ -142,8 +142,30 @@ $SUDO chmod 700 /home/crunch/.ssh/ if [ "x$RESOLVER" != "x" ]; then $SUDO sed -i "s/#prepend domain-name-servers 127.0.0.1;/prepend domain-name-servers ${RESOLVER};/" /etc/dhcp/dhclient.conf fi -# Set up the cloud-init script that will ensure encrypted disks -$SUDO mv /tmp/usr-local-bin-ensure-encrypted-partitions.sh /usr/local/bin/ensure-encrypted-partitions.sh + +if [ "$AWS_EBS_AUTOSCALE" != "1" ]; then + # Set up the cloud-init script that will ensure encrypted disks + $SUDO mv /tmp/usr-local-bin-ensure-encrypted-partitions.sh /usr/local/bin/ensure-encrypted-partitions.sh +else + wait_for_apt_locks && $SUDO DEBIAN_FRONTEND=noninteractive apt-get -qq --yes install jq unzip + + curl -s "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "/tmp/awscliv2.zip" + unzip -q /tmp/awscliv2.zip -d /tmp && $SUDO /tmp/aws/install + # Pinned to v2.4.5 because we apply a patch below + #export EBS_AUTOSCALE_VERSION=$(curl --silent "https://api.github.com/repos/awslabs/amazon-ebs-autoscale/releases/latest" | jq -r .tag_name) + export EBS_AUTOSCALE_VERSION="v2.4.5" + cd /opt && $SUDO git clone https://github.com/awslabs/amazon-ebs-autoscale.git + cd /opt/amazon-ebs-autoscale && $SUDO git checkout $EBS_AUTOSCALE_VERSION + cd bin + $SUDO patch -p1 < /tmp/create-ebs-volume-nvme.patch + + # This script really requires bash and the shebang line is wrong + $SUDO sed -i 's|^#!/bin/sh|#!/bin/bash|' /opt/amazon-ebs-autoscale/bin/ebs-autoscale + + # Set up the cloud-init script that makes use of the AWS EBS autoscaler + $SUDO mv /tmp/usr-local-bin-ensure-encrypted-partitions-aws-ebs-autoscale.sh /usr/local/bin/ensure-encrypted-partitions.sh +fi + $SUDO chmod 755 /usr/local/bin/ensure-encrypted-partitions.sh $SUDO chown root:root /usr/local/bin/ensure-encrypted-partitions.sh $SUDO mv /tmp/etc-cloud-cloud.cfg.d-07_compute_arvados_dispatch_cloud.cfg /etc/cloud/cloud.cfg.d/07_compute_arvados_dispatch_cloud.cfg diff --git a/tools/compute-images/scripts/usr-local-bin-ensure-encrypted-partitions-aws-ebs-autoscale.sh b/tools/compute-images/scripts/usr-local-bin-ensure-encrypted-partitions-aws-ebs-autoscale.sh new file mode 100644 index 0000000000..4b73c8bc4f --- /dev/null +++ b/tools/compute-images/scripts/usr-local-bin-ensure-encrypted-partitions-aws-ebs-autoscale.sh @@ -0,0 +1,60 @@ +#!/bin/bash + +# Copyright (C) The Arvados Authors. All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +set -e +set -x + +MOUNTPATH=/tmp + +findmntq() { + findmnt "$@" >/dev/null +} + +ensure_umount() { + if findmntq "$1"; then + umount "$1" + fi +} + +# First make sure docker is not using /tmp, then unmount everything under it. +if [ -d /etc/sv/docker.io ] +then + sv stop docker.io || service stop docker.io || true +else + service docker stop || true +fi + +ensure_umount "$MOUNTPATH/docker/aufs" + +/bin/bash /opt/amazon-ebs-autoscale/install.sh -f lvm.ext4 -m $MOUNTPATH 2>&1 > /var/log/ebs-autoscale-install.log + +# Make sure docker uses the big partition +cat < /etc/docker/daemon.json +{ + "data-root": "$MOUNTPATH/docker-data" +} +EOF + +# restart docker +if [ -d /etc/sv/docker.io ] +then + ## runit + sv up docker.io +else + service docker start +fi + +end=$((SECONDS+60)) + +while [ $SECONDS -lt $end ]; do + if /usr/bin/docker ps -q >/dev/null; then + exit 0 + fi + sleep 1 +done + +# Docker didn't start within a minute, abort +exit 1 -- 2.30.2