20680: Only do a rolling update when there is multiple controllers
[arvados.git] / tools / salt-install / installer.sh
index 0f1d16ddee94375a97035adddd19c8137ef4fd25..fe04047431b42091d290ee05f0f49262275f1765 100755 (executable)
@@ -35,6 +35,11 @@ declare DOMAIN
 # This will be populated by loadconfig()
 declare -A NODES
 
+# A bash associative array listing each role and mapping to the nodes
+# that should be provisioned with this role.
+# This will be populated by loadconfig()
+declare -A ROLE2NODES
+
 # The ssh user we'll use
 # This will be populated by loadconfig()
 declare DEPLOY_USER
@@ -47,6 +52,11 @@ declare GITTARGET
 # This will be populated by loadconfig()
 declare USE_SSH_JUMPHOST
 
+# The temp file that will get used to disable envvar forwarding to avoid locale
+# issues in Debian distros.
+# This will be populated by loadconfig()
+declare SSH_CONFFILE
+
 checktools() {
     local MISSING=''
     for a in git ip ; do
@@ -60,6 +70,13 @@ checktools() {
     fi
 }
 
+cleanup() {
+    local NODE=$1
+    local SSH=`ssh_cmd "$NODE"`
+    # Delete the old repository
+    $SSH $DEPLOY_USER@$NODE rm -rf ${GITTARGET}.git ${GITTARGET}
+}
+
 sync() {
     local NODE=$1
     local BRANCH=$2
@@ -68,43 +85,39 @@ sync() {
     # each node, pushing our branch, and updating the checkout.
 
     if [[ "$NODE" != localhost ]] ; then
-               SSH=`ssh_cmd "$NODE"`
-               GIT="eval `git_cmd $NODE`"
-               if ! $SSH $DEPLOY_USER@$NODE test -d ${GITTARGET}.git ; then
-
-                       # Initialize the git repository (1st time case).  We're
-                       # actually going to make two repositories here because git
-                       # will complain if you try to push to a repository with a
-                       # checkout. So we're going to create a "bare" repository
-                       # and then clone a regular repository (with a checkout)
-                       # from that.
-
-                       $SSH $DEPLOY_USER@$NODE git init --bare --shared=0600 ${GITTARGET}.git
-                       if ! $GIT remote add $NODE $DEPLOY_USER@$NODE:${GITTARGET}.git ; then
-                               $GIT remote set-url $NODE $DEPLOY_USER@$NODE:${GITTARGET}.git
-                       fi
-                       $GIT push $NODE $BRANCH
-                       $SSH $DEPLOY_USER@$NODE "umask 0077 && git clone ${GITTARGET}.git ${GITTARGET}"
-               fi
+       SSH=`ssh_cmd "$NODE"`
+       GIT="eval `git_cmd $NODE`"
+
+       cleanup $NODE
 
-               # The update case.
-               #
-               # Push to the bare repository on the remote node, then in the
-               # remote node repository with the checkout, pull the branch
-               # from the bare repository.
+       # Update the git remote for the remote repository.
+       if ! $GIT remote add $NODE $DEPLOY_USER@$NODE:${GITTARGET}.git ; then
+           $GIT remote set-url $NODE $DEPLOY_USER@$NODE:${GITTARGET}.git
+       fi
+
+       # Initialize the git repository.  We're
+       # actually going to make two repositories here because git
+       # will complain if you try to push to a repository with a
+       # checkout. So we're going to create a "bare" repository
+       # and then clone a regular repository (with a checkout)
+       # from that.
 
-               $GIT push $NODE $BRANCH
-               $SSH $DEPLOY_USER@$NODE "git -C ${GITTARGET} checkout ${BRANCH} && git -C ${GITTARGET} pull"
+       $SSH $DEPLOY_USER@$NODE git init --bare --shared=0600 ${GITTARGET}.git
+       $GIT push $NODE $BRANCH
+       $SSH $DEPLOY_USER@$NODE "umask 0077 && git clone -s ${GITTARGET}.git ${GITTARGET} && git -C ${GITTARGET} checkout ${BRANCH}"
     fi
 }
 
 deploynode() {
     local NODE=$1
     local ROLES=$2
+    local BRANCH=$3
 
     # Deploy a node.  This runs the provision script on the node, with
     # the appropriate roles.
 
+    sync $NODE $BRANCH
+
     if [[ -z "$ROLES" ]] ; then
                echo "No roles specified for $NODE, will deploy all roles"
     else
@@ -112,7 +125,7 @@ deploynode() {
     fi
 
     logfile=deploy-${NODE}-$(date -Iseconds).log
-       SSH=`ssh_cmd "$NODE"`
+    SSH=`ssh_cmd "$NODE"`
 
     if [[ "$NODE" = localhost ]] ; then
            SUDO=''
@@ -120,25 +133,32 @@ deploynode() {
                        SUDO=sudo
                fi
                $SUDO ./provision.sh --config ${CONFIG_FILE} ${ROLES} 2>&1 | tee $logfile
-       else
-               $SSH $DEPLOY_USER@$NODE "cd ${GITTARGET} && sudo ./provision.sh --config ${CONFIG_FILE} ${ROLES}" 2>&1 | tee $logfile
+    else
+           $SSH $DEPLOY_USER@$NODE "cd ${GITTARGET} && git log -n1 HEAD && DISABLED_CONTROLLER=\"$DISABLED_CONTROLLER\" sudo --preserve-env=DISABLED_CONTROLLER ./provision.sh --config ${CONFIG_FILE} ${ROLES}" 2>&1 | tee $logfile
+           cleanup $NODE
     fi
 }
 
 loadconfig() {
-    if [[ ! -s $CONFIG_FILE ]] ; then
+    if ! [[ -s ${CONFIG_FILE} && -s ${CONFIG_FILE}.secrets ]]; then
                echo "Must be run from initialized setup dir, maybe you need to 'initialize' first?"
     fi
-    source ${CONFIG_FILE}
+    source common.sh
     GITTARGET=arvados-deploy-config-${CLUSTER}
+
+       # Set up SSH so that it doesn't forward any environment variable. This is to avoid
+       # getting "setlocale" errors on the first run, depending on the distro being used
+       # to run the installer (like Debian).
+       SSH_CONFFILE=$(mktemp)
+       echo "Include config SendEnv -*" > ${SSH_CONFFILE}
 }
 
 ssh_cmd() {
        local NODE=$1
        if [ -z "${USE_SSH_JUMPHOST}" -o "${NODE}" == "${USE_SSH_JUMPHOST}" -o "${NODE}" == "localhost" ]; then
-               echo "ssh"
+               echo "ssh -F ${SSH_CONFFILE}"
        else
-               echo "ssh -J ${DEPLOY_USER}@${USE_SSH_JUMPHOST}"
+               echo "ssh -F ${SSH_CONFFILE} -J ${DEPLOY_USER}@${USE_SSH_JUMPHOST}"
        fi
 }
 
@@ -197,22 +217,24 @@ case "$subcmd" in
        cp -r *.sh tests $SETUPDIR
 
        cp local.params.example.$PARAMS $SETUPDIR/${CONFIG_FILE}
+       cp local.params.secrets.example $SETUPDIR/${CONFIG_FILE}.secrets
        cp -r config_examples/$SLS $SETUPDIR/${CONFIG_DIR}
 
        if [[ -n "$TERRAFORM" ]] ; then
            mkdir $SETUPDIR/terraform
            cp -r $TERRAFORM/* $SETUPDIR/terraform/
-               cp $TERRAFORM/.gitignore $SETUPDIR/terraform/
        fi
 
        cd $SETUPDIR
        echo '*.log' > .gitignore
+       echo '**/.terraform' >> .gitignore
+       echo '**/.infracost' >> .gitignore
 
        if [[ -n "$TERRAFORM" ]] ; then
                git add terraform
        fi
 
-       git add *.sh ${CONFIG_FILE} ${CONFIG_DIR} tests .gitignore
+       git add *.sh ${CONFIG_FILE} ${CONFIG_FILE}.secrets ${CONFIG_DIR} tests .gitignore
        git commit -m"initial commit"
 
        echo
@@ -223,7 +245,7 @@ case "$subcmd" in
            (cd $SETUPDIR/terraform/services && terraform init)
            echo "Now go to $SETUPDIR, customize 'terraform/vpc/terraform.tfvars' as needed, then run 'installer.sh terraform'"
        else
-           echo "Now go to $SETUPDIR, customize '${CONFIG_FILE}' and '${CONFIG_DIR}' as needed, then run 'installer.sh deploy'"
+               echo "Now go to $SETUPDIR, customize '${CONFIG_FILE}', '${CONFIG_FILE}.secrets' and '${CONFIG_DIR}' as needed, then run 'installer.sh deploy'"
        fi
        ;;
 
@@ -235,6 +257,13 @@ case "$subcmd" in
        (cd terraform/services && echo -n 'letsencrypt_iam_secret_access_key = ' && terraform output letsencrypt_iam_secret_access_key) 2>&1 | tee -a $logfile
        ;;
 
+    terraform-destroy)
+       logfile=terraform-$(date -Iseconds).log
+       (cd terraform/services && terraform destroy) 2>&1 | tee -a $logfile
+       (cd terraform/data-storage && terraform destroy) 2>&1 | tee -a $logfile
+       (cd terraform/vpc && terraform destroy) 2>&1 | tee -a $logfile
+       ;;
+
     generate-tokens)
        for i in BLOB_SIGNING_KEY MANAGEMENT_TOKEN SYSTEM_ROOT_TOKEN ANONYMOUS_USER_TOKEN WORKBENCH_SECRET_KEY DATABASE_PASSWORD; do
            echo ${i}=$(tr -dc A-Za-z0-9 </dev/urandom | head -c 32 ; echo '')
@@ -250,58 +279,88 @@ case "$subcmd" in
 
        loadconfig
 
-       if grep -rni 'fixme' ${CONFIG_FILE} ${CONFIG_DIR} ; then
+       if grep -rni 'fixme' ${CONFIG_FILE} ${CONFIG_FILE}.secrets ${CONFIG_DIR} ; then
            echo
            echo "Some parameters still need to be updated.  Please fix them and then re-run deploy."
            exit 1
        fi
 
-       BRANCH=$(git branch --show-current)
+       BRANCH=$(git rev-parse --abbrev-ref HEAD)
 
        set -x
 
        git add -A
-       if ! git diff --cached --exit-code ; then
+       if ! git diff --cached --exit-code --quiet ; then
            git commit -m"prepare for deploy"
        fi
 
+       # Used for rolling updates to disable individual nodes at the
+       # load balancer.
+       export DISABLED_CONTROLLER=""
        if [[ -z "$NODE" ]]; then
            for NODE in "${!NODES[@]}"
            do
-               # First, push the git repo to each node.  This also
-               # confirms that we have git and can log into each
-               # node.
-               sync $NODE $BRANCH
+               # First, just confirm we can ssh to each node.
+               `ssh_cmd "$NODE"` $DEPLOY_USER@$NODE true
            done
 
            for NODE in "${!NODES[@]}"
            do
                # Do 'database' role first,
                if [[ "${NODES[$NODE]}" =~ database ]] ; then
-                   deploynode $NODE "${NODES[$NODE]}"
+                   deploynode $NODE "${NODES[$NODE]}" $BRANCH
                    unset NODES[$NODE]
                fi
            done
 
-           for NODE in "${!NODES[@]}"
-           do
-               # then  'api' or 'controller' roles
-               if [[ "${NODES[$NODE]}" =~ (api|controller) ]] ; then
-                   deploynode $NODE "${NODES[$NODE]}"
-                   unset NODES[$NODE]
-               fi
-           done
+           BALANCER=${ROLE2NODES['balancer']:-}
+
+           # Check if there are multiple controllers, they'll be comma-separated
+           # in ROLE2NODES
+           if [[ ${ROLE2NODES['controller']} =~ , ]] ;
+           then
+               # If we have multiple controllers then there must be
+               # load balancer. We want to do a rolling update, take
+               # down each node at the load balancer before updating
+               # it.
+
+               for NODE in "${!NODES[@]}"
+               do
+                   if [[ "${NODES[$NODE]}" =~ controller ]] ; then
+                       export DISABLED_CONTROLLER=$NODE
+
+                       # Update balancer that the node is disabled
+                       deploynode $BALANCER "${NODES[$BALANCER]}" $BRANCH
+
+                       # Now update the node itself
+                       deploynode $NODE "${NODES[$NODE]}" $BRANCH
+                       unset NODES[$NODE]
+                   fi
+               done
+           else
+               # Only one controller
+               NODE=${ROLE2NODES['controller']}
+               deploynode $NODE "${NODES[$NODE]}" $BRANCH
+               unset NODES[$NODE]
+           fi
+
+           if [[ -n "$BALANCER" ]] ; then
+               # Deploy balancer. In the rolling update case, this
+               # will re-enable all the controllers at the balancer.
+               export DISABLED_CONTROLLER=""
+               deploynode $BALANCER "${NODES[$BALANCER]}" $BRANCH
+               unset NODES[$BALANCER]
+           fi
 
            for NODE in "${!NODES[@]}"
            do
                # Everything else (we removed the nodes that we
                # already deployed from the list)
-               deploynode $NODE "${NODES[$NODE]}"
+               deploynode $NODE "${NODES[$NODE]}" $BRANCH
            done
        else
            # Just deploy the node that was supplied on the command line.
-           sync $NODE $BRANCH
-           deploynode $NODE ""
+           deploynode $NODE "${NODES[$NODE]}" $BRANCH
        fi
 
        set +x
@@ -330,7 +389,7 @@ case "$subcmd" in
            exit 1
        fi
 
-       export ARVADOS_API_HOST="${CLUSTER}.${DOMAIN}:${CONTROLLER_EXT_SSL_PORT}"
+       export ARVADOS_API_HOST="${DOMAIN}:${CONTROLLER_EXT_SSL_PORT}"
        export ARVADOS_API_TOKEN="$SYSTEM_ROOT_TOKEN"
 
        arvados-client diagnostics $LOCATION
@@ -341,6 +400,7 @@ case "$subcmd" in
        echo ""
        echo "initialize        initialize the setup directory for configuration"
        echo "terraform         create cloud resources using terraform"
+       echo "terraform-destroy destroy cloud resources created by terraform"
        echo "generate-tokens   generate random values for tokens"
        echo "deploy            deploy the configuration from the setup directory"
        echo "diagnostics       check your install using diagnostics"