20680: installer does rolling update of controller
authorPeter Amstutz <peter.amstutz@curii.com>
Tue, 8 Aug 2023 13:36:15 +0000 (09:36 -0400)
committerPeter Amstutz <peter.amstutz@curii.com>
Tue, 8 Aug 2023 13:36:15 +0000 (09:36 -0400)
When there is a load balancer, disable each controller node in turn
before updating it.

Also, don't reduce controller_nr when a controller is down, having
worker_connections go up and down during deploy seems like in could
cause problems.

Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz@curii.com>

tools/salt-install/common.sh
tools/salt-install/config_examples/multi_host/aws/pillars/nginx_balancer_configuration.sls
tools/salt-install/installer.sh
tools/salt-install/provision.sh

index d406f2ff60ab70522fc8f101d4c76f30ea17c15a..0c5dd50e6c6b99b75a3132cd0160db302adde1f2 100644 (file)
@@ -29,7 +29,6 @@ done
 
 # The mapping of roles to nodes. This is used to dynamically adjust
 # salt pillars.
-declare -A ROLE2NODES
 for node in "${!NODES[@]}"; do
   roles="${NODES[$node]}"
 
index b2b4db145c58672694acfa624d32442051bb0f6f..a3a3d405f1de5bd2ac4600989d1f8325ce10ea9d 100644 (file)
@@ -8,9 +8,6 @@
 {%- set balancer_backends = "__CONTROLLER_NODES__".split(",") %}
 {%- set controller_nr = balancer_backends|length %}
 {%- set disabled_controller = "__DISABLED_CONTROLLER__" %}
-{%- if disabled_controller != "" %}
-  {%- set controller_nr = controller_nr - 1 %}
-{%- endif %}
 {%- set max_reqs = ("__CONTROLLER_MAX_QUEUED_REQUESTS__" or 128)|int %}
 
 ### NGINX
index 37007da7b66dc44aae4b998b1e7d65fcee25e360..3c9df7c5df24670b95ac89392dab8086f8ad7df8 100755 (executable)
@@ -35,6 +35,11 @@ declare DOMAIN
 # This will be populated by loadconfig()
 declare -A NODES
 
+# A bash associative array listing each role and mapping to the nodes
+# that should be provisioned with this role.
+# This will be populated by loadconfig()
+declare -A ROLE2NODES
+
 # The ssh user we'll use
 # This will be populated by loadconfig()
 declare DEPLOY_USER
@@ -106,10 +111,13 @@ sync() {
 deploynode() {
     local NODE=$1
     local ROLES=$2
+    local BRANCH=$3
 
     # Deploy a node.  This runs the provision script on the node, with
     # the appropriate roles.
 
+    sync $NODE $BRANCH
+
     if [[ -z "$ROLES" ]] ; then
                echo "No roles specified for $NODE, will deploy all roles"
     else
@@ -126,7 +134,7 @@ deploynode() {
                fi
                $SUDO ./provision.sh --config ${CONFIG_FILE} ${ROLES} 2>&1 | tee $logfile
     else
-           $SSH $DEPLOY_USER@$NODE "cd ${GITTARGET} && git log -n1 HEAD && sudo ./provision.sh --config ${CONFIG_FILE} ${ROLES}" 2>&1 | tee $logfile
+           $SSH $DEPLOY_USER@$NODE "cd ${GITTARGET} && git log -n1 HEAD && DISABLED_CONTROLLER=\"$DISABLED_CONTROLLER\" sudo --preserve-env=DISABLED_CONTROLLER ./provision.sh --config ${CONFIG_FILE} ${ROLES}" 2>&1 | tee $logfile
            cleanup $NODE
     fi
 }
@@ -286,52 +294,66 @@ case "$subcmd" in
            git commit -m"prepare for deploy"
        fi
 
+       # Used for rolling updates to disable individual nodes at the
+       # load balancer.
+       export DISABLED_CONTROLLER=""
        if [[ -z "$NODE" ]]; then
            for NODE in "${!NODES[@]}"
            do
-               # First, push the git repo to each node.  This also
-               # confirms that we have git and can log into each
-               # node.
-               sync $NODE $BRANCH
+               # First, just confirm we can ssh to each node.
+               `ssh_cmd "$NODE"` $DEPLOY_USER@$NODE true
            done
 
            for NODE in "${!NODES[@]}"
            do
                # Do 'database' role first,
                if [[ "${NODES[$NODE]}" =~ database ]] ; then
-                   deploynode $NODE "${NODES[$NODE]}"
-                   unset NODES[$NODE]
-               fi
-           done
-
-           for NODE in "${!NODES[@]}"
-           do
-               # then 'balancer' role
-               if [[ "${NODES[$NODE]}" =~ balancer ]] ; then
-                   deploynode $NODE "${NODES[$NODE]}"
+                   deploynode $NODE "${NODES[$NODE]}" $BRANCH
                    unset NODES[$NODE]
                fi
            done
 
-           for NODE in "${!NODES[@]}"
-           do
-               # then 'controller' role
-               if [[ "${NODES[$NODE]}" =~ controller ]] ; then
-                   deploynode $NODE "${NODES[$NODE]}"
-                   unset NODES[$NODE]
-               fi
-           done
+           if [[ ${ENABLE_BALANCER} == yes ]] ;
+           then
+               # We have a load balancer, so do a rolling update,
+               # take down each node at the load balancer before
+               # updating it.
+               BALANCER=${ROLE2NODES['balancer']}
+
+               for NODE in "${!NODES[@]}"
+               do
+                   if [[ "${NODES[$NODE]}" =~ controller ]] ; then
+                       export DISABLED_CONTROLLER=$NODE
+
+                       # Update balancer that the node is disabled
+                       deploynode $BALANCER "${NODES[$BALANCER]}" $BRANCH
+
+                       # Now update the node itself
+                       deploynode $NODE "${NODES[$NODE]}" $BRANCH
+                       unset NODES[$NODE]
+                   fi
+               done
+
+               # Now make sure all nodes are enabled.
+               export DISABLED_CONTROLLER=""
+               deploynode $BALANCER "${NODES[$BALANCER]}" $BRANCH
+               unset NODES[$BALANCER]
+           else
+               # No balancer, should only be one controller
+               NODE=${ROLE2NODES['controller']}
+               deploynode $NODE "${NODES[$NODE]}" $BRANCH
+               unset NODES[$NODE]
+           fi
 
            for NODE in "${!NODES[@]}"
            do
                # Everything else (we removed the nodes that we
                # already deployed from the list)
-               deploynode $NODE "${NODES[$NODE]}"
+               deploynode $NODE "${NODES[$NODE]}" $BRANCH
            done
        else
            # Just deploy the node that was supplied on the command line.
-           sync $NODE $BRANCH
-           deploynode $NODE "${NODES[$NODE]}"
+           deploynode $NODE "${NODES[$NODE]}" $BRANCH
        fi
 
        set +x
index 203b4b7e7020b682fa64c46418aa97b8c731b14c..3aa94aa58537dc0d7987740513eeb2553ea748e9 100755 (executable)
@@ -309,6 +309,7 @@ arguments ${@}
 
 declare -A NODES
 declare -A ROLES
+declare -A ROLE2NODES
 declare NODELIST
 
 source common.sh