Merge branch '18870-installer' refs #18870
authorPeter Amstutz <peter.amstutz@curii.com>
Thu, 30 Jun 2022 18:06:48 +0000 (14:06 -0400)
committerPeter Amstutz <peter.amstutz@curii.com>
Thu, 30 Jun 2022 18:06:48 +0000 (14:06 -0400)
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz@curii.com>

15 files changed:
services/login-sync/bin/arvados-login-sync
tools/salt-install/config_examples/multi_host/aws/pillars/arvados.sls
tools/salt-install/config_examples/multi_host/aws/pillars/postgresql.sls
tools/salt-install/config_examples/multi_host/aws/states/shell_cron_add_login_sync.sls
tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_passenger.sls
tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/postgresql.sls
tools/salt-install/config_examples/single_host/multiple_hostnames/states/host_entries.sls
tools/salt-install/config_examples/single_host/single_hostname/pillars/nginx_passenger.sls
tools/salt-install/config_examples/single_host/single_hostname/pillars/postgresql.sls
tools/salt-install/config_examples/single_host/single_hostname/states/host_entries.sls
tools/salt-install/installer.sh [new file with mode: 0755]
tools/salt-install/local.params.example.multiple_hosts
tools/salt-install/local.params.example.single_host_multiple_hostnames
tools/salt-install/local.params.example.single_host_single_hostname
tools/salt-install/provision.sh

index da8a21efa37c8a8db91b925bc56040f7bff494b8..5c6691ab95279920498875a7e49295c8a2b4a5a4 100755 (executable)
@@ -10,6 +10,7 @@ require 'etc'
 require 'fileutils'
 require 'yaml'
 require 'optparse'
+require 'open3'
 
 req_envs = %w(ARVADOS_API_HOST ARVADOS_API_TOKEN ARVADOS_VIRTUAL_MACHINE_UUID)
 req_envs.each do |k|
@@ -124,11 +125,12 @@ begin
     unless pwnam[l[:username]]
       STDERR.puts "Creating account #{l[:username]}"
       # Create new user
-      unless system("useradd", "-m",
+      out, st = Open3.capture2e("useradd", "-m",
                 "-c", username,
                 "-s", "/bin/bash",
                 username)
-        STDERR.puts "Account creation failed for #{l[:username]}: #{$?}"
+      if st.exitstatus != 0
+        STDERR.puts "Account creation failed for #{l[:username]}:\n#{out}"
         next
       end
       begin
@@ -150,7 +152,10 @@ begin
       if existing_groups.index(addgroup).nil?
         # User should be in group, but isn't, so add them.
         STDERR.puts "Add user #{username} to #{addgroup} group"
-        system("usermod", "-aG", addgroup, username)
+        out, st = Open3.capture2e("usermod", "-aG", addgroup, username)
+        if st.exitstatus != 0
+          STDERR.puts "Failed to add #{username} to #{addgroup} group:\n#{out}"
+        end
       end
     end
 
@@ -158,7 +163,10 @@ begin
       if groups.index(removegroup).nil?
         # User is in a group, but shouldn't be, so remove them.
         STDERR.puts "Remove user #{username} from #{removegroup} group"
-        system("gpasswd", "-d", username, removegroup)
+        out, st = Open3.capture2e("gpasswd", "-d", username, removegroup)
+        if st.exitstatus != 0
+          STDERR.puts "Failed to remove user #{username} from #{removegroup} group:\n#{out}"
+        end
       end
     end
 
index f41b6ac5b36d45fc1e4ff0503c2072890c22a81a..02653082f30cafb75c03d28764d7b9115aab9214 100644 (file)
@@ -93,7 +93,7 @@ arvados:
     resources:
       virtual_machines:
         shell:
-          name: shell
+          name: shell.__CLUSTER__.__DOMAIN__
           backend: __SHELL_INT_IP__
           port: 4200
 
index e06ddd041c9acb4d01a1bab8a3deb8de6253f287..d6320da24651612e760178fa598bdd0fb6353b83 100644 (file)
@@ -19,7 +19,7 @@ postgres:
   users:
     __CLUSTER___arvados:
       ensure: present
-      password: __DATABASE_PASSWORD__
+      password: "__DATABASE_PASSWORD__"
 
   # tablespaces:
   #   arvados_tablespace:
index 86c591e97ed3679eb8687fddb19939839e63ad92..9028b9b1001f2f297d170f4d882574d42b875548 100644 (file)
@@ -75,6 +75,13 @@ extra_shell_cron_add_login_sync_add_{{ vm }}_arvados_virtual_machine_uuid_cron_e
     - onlyif:
       - /bin/grep -qE "[a-z0-9]{5}-2x53u-[a-z0-9]{15}" /tmp/vm_uuid_{{ vm }}
 
+extra_shell_cron_add_login_sync_add_{{ vm }}_sbin_to_path_cron_env_present:
+  cron.env_present:
+    - name: PATH
+    - value: "/bin:/usr/bin:/usr/sbin"
+    - onlyif:
+      - /bin/grep -qE "[a-z0-9]{5}-2x53u-[a-z0-9]{15}" /tmp/vm_uuid_{{ vm }}
+
 extra_shell_cron_add_login_sync_add_{{ vm }}_arvados_login_sync_cron_present:
   cron.present:
     - name: /usr/local/bin/arvados-login-sync
index dfddf3b62361ae66305f178bd3c0a84436a50082..cf087797159077c42334f4c93fe3df54a238906a 100644 (file)
@@ -55,7 +55,7 @@ nginx:
       - add_header: 'Strict-Transport-Security "max-age=63072000" always'
 
       # OCSP stapling
-      # FIXME! Stapling does not work with self-signed certificates, so disabling for tests
+      # NOTE! Stapling does not work with self-signed certificates, so disabling for tests
       # - ssl_stapling: 'on'
       # - ssl_stapling_verify: 'on'
 
index f3bc09f65036c7349e8f9f9fa1cd21746c25cdec..edb961ebaaeccca0899d0c2633ca7c0957369805 100644 (file)
@@ -38,7 +38,7 @@ postgres:
   users:
     __CLUSTER___arvados:
       ensure: present
-      password: __DATABASE_PASSWORD__
+      password: "__DATABASE_PASSWORD__"
 
   # tablespaces:
   #   arvados_tablespace:
index 379f4765cb0aa88689f31d99bf5c03ea84d5e560..c2d34ea28c1dd2c0551473eac8943973d5183804 100644 (file)
@@ -12,7 +12,7 @@ arvados_test_salt_states_examples_single_host_etc_hosts_host_present:
     - ip: 127.0.1.1
     - names:
       - {{ arvados.cluster.name }}.{{ arvados.cluster.domain }}
-      # FIXME! This just works for our testings.
+      # NOTE! This just works for our testings.
       # Won't work if the cluster name != host name
       {%- for entry in [
           'api',
index 21c1510de8aa36a153d76d2c7bd8ee8ae44d4cd2..26e2baf0446b861fccadfd466da74414c9c77856 100644 (file)
@@ -55,7 +55,7 @@ nginx:
       - add_header: 'Strict-Transport-Security "max-age=63072000" always'
 
       # OCSP stapling
-      # FIXME! Stapling does not work with self-signed certificates, so disabling for tests
+      # NOTE! Stapling does not work with self-signed certificates, so disabling for tests
       # - ssl_stapling: 'on'
       # - ssl_stapling_verify: 'on'
 
index a69b88cb173aa4f72e17343997c65d072456b9b9..14452a990541bf47fee379a33345895f6652cbd8 100644 (file)
@@ -40,7 +40,7 @@ postgres:
   users:
     __CLUSTER___arvados:
       ensure: present
-      password: __DATABASE_PASSWORD__
+      password: "__DATABASE_PASSWORD__"
 
   # tablespaces:
   #   arvados_tablespace:
index a688f4f8c11535fdcaaac7b33eaaccf5cddd16c9..51308fffa2c75445df9cd41287675a4a8d4aaedd 100644 (file)
@@ -21,7 +21,7 @@ arvados_test_salt_states_examples_single_host_etc_hosts_host_present:
     - ip: 127.0.1.1
     - names:
       - {{ arvados.cluster.name }}.{{ arvados.cluster.domain }}
-      # FIXME! This just works for our testing.
+      # NOTE! This just works for our testing.
       # Won't work if the cluster name != host name
       {%- for entry in [
           'api',
diff --git a/tools/salt-install/installer.sh b/tools/salt-install/installer.sh
new file mode 100755 (executable)
index 0000000..e5ff7be
--- /dev/null
@@ -0,0 +1,257 @@
+#!/bin/bash
+
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: CC-BY-SA-3.0
+
+#
+# installer.sh
+#
+# Helps manage the configuration in a git repository, and then deploy
+# nodes by pushing a copy of the git repository to each node and
+# running the provision script to do the actual installation and
+# configuration.
+#
+
+set -eu
+
+# The parameter file
+declare CONFIG_FILE=local.params
+
+# The salt template directory
+declare CONFIG_DIR=local_config_dir
+
+# The 5-character Arvados cluster id
+# This will be populated by loadconfig()
+declare CLUSTER
+
+# The parent domain (not including the cluster id)
+# This will be populated by loadconfig()
+declare DOMAIN
+
+# A bash associative array listing each node and mapping to the roles
+# that should be provisioned on those nodes.
+# This will be populated by loadconfig()
+declare -A NODES
+
+# The ssh user we'll use
+# This will be populated by loadconfig()
+declare DEPLOY_USER
+
+# The git repository that we'll push to on all the nodes
+# This will be populated by loadconfig()
+declare GITTARGET
+
+sync() {
+    local NODE=$1
+    local BRANCH=$2
+
+    # Synchronizes the configuration by creating a git repository on
+    # each node, pushing our branch, and updating the checkout.
+
+    if [[ "$NODE" != localhost ]] ; then
+       if ! ssh $NODE test -d ${GITTARGET}.git ; then
+
+           # Initialize the git repository (1st time case).  We're
+           # actually going to make two repositories here because git
+           # will complain if you try to push to a repository with a
+           # checkout. So we're going to create a "bare" repository
+           # and then clone a regular repository (with a checkout)
+           # from that.
+
+           ssh $NODE git init --bare ${GITTARGET}.git
+           if ! git remote add $NODE $DEPLOY_USER@$NODE:${GITTARGET}.git ; then
+               git remote set-url $NODE $DEPLOY_USER@$NODE:${GITTARGET}.git
+           fi
+           git push $NODE $BRANCH
+           ssh $NODE git clone ${GITTARGET}.git ${GITTARGET}
+       fi
+
+       # The update case.
+       #
+       # Push to the bare repository on the remote node, then in the
+       # remote node repository with the checkout, pull the branch
+       # from the bare repository.
+
+       git push $NODE $BRANCH
+       ssh $NODE "git -C ${GITTARGET} checkout ${BRANCH} && git -C ${GITTARGET} pull"
+    fi
+}
+
+deploynode() {
+    local NODE=$1
+    local ROLES=$2
+
+    # Deploy a node.  This runs the provision script on the node, with
+    # the appropriate roles.
+
+    if [[ -z "$ROLES" ]] ; then
+       echo "No roles declared for '$NODE' in ${CONFIG_FILE}"
+       exit 1
+    fi
+
+    if [[ "$NODE" = localhost ]] ; then
+       sudo ./provision.sh --config ${CONFIG_FILE} --roles ${ROLES}
+    else
+       ssh $DEPLOY_USER@$NODE "cd ${GITTARGET} && sudo ./provision.sh --config ${CONFIG_FILE} --roles ${ROLES}"
+    fi
+}
+
+loadconfig() {
+    if [[ ! -s $CONFIG_FILE ]] ; then
+       echo "Must be run from initialized setup dir, maybe you need to 'initialize' first?"
+    fi
+    source ${CONFIG_FILE}
+    GITTARGET=arvados-deploy-config-${CLUSTER}
+}
+
+subcmd="$1"
+if [[ -n "$subcmd" ]] ; then
+    shift
+fi
+case "$subcmd" in
+    initialize)
+       if [[ ! -f provision.sh ]] ; then
+           echo "Must be run from arvados/tools/salt-install"
+           exit
+       fi
+
+       set +u
+       SETUPDIR=$1
+       PARAMS=$2
+       SLS=$3
+       set -u
+
+       err=
+       if [[ -z "$PARAMS" || ! -f local.params.example.$PARAMS ]] ; then
+           echo "Not found: local.params.example.$PARAMS"
+           echo "Expected one of multiple_hosts, single_host_multiple_hostnames, single_host_single_hostname"
+           err=1
+       fi
+
+       if [[ -z "$SLS" || ! -d config_examples/$SLS ]] ; then
+           echo "Not found: config_examples/$SLS"
+           echo "Expected one of multi_host/aws, single_host/multiple_hostnames, single_host/single_hostname"
+           err=1
+       fi
+
+       if [[ -z "$SETUPDIR" || -z "$PARAMS" || -z "$SLS" ]]; then
+           echo "installer.sh <setup dir to initialize> <params template> <config template>"
+           err=1
+       fi
+
+       if [[ -n "$err" ]] ; then
+           exit 1
+       fi
+
+       echo "Initializing $SETUPDIR"
+       git init $SETUPDIR
+       cp -r *.sh tests $SETUPDIR
+
+       cp local.params.example.$PARAMS $SETUPDIR/${CONFIG_FILE}
+       cp -r config_examples/$SLS $SETUPDIR/${CONFIG_DIR}
+
+       cd $SETUPDIR
+       git add *.sh ${CONFIG_FILE} ${CONFIG_DIR} tests
+       git commit -m"initial commit"
+
+       echo "setup directory initialized, now go to $SETUPDIR, edit '${CONFIG_FILE}' and '${CONFIG_DIR}' as needed, then run 'installer.sh deploy'"
+       ;;
+    deploy)
+       set +u
+       NODE=$1
+       set -u
+
+       loadconfig
+
+       if grep -rni 'fixme' ${CONFIG_FILE} ${CONFIG_DIR} ; then
+           echo
+           echo "Some parameters still need to be updated.  Please fix them and then re-run deploy."
+           exit 1
+       fi
+
+       BRANCH=$(git branch --show-current)
+
+       set -x
+
+       git add -A
+       if ! git diff --cached --exit-code ; then
+           git commit -m"prepare for deploy"
+       fi
+
+       if [[ -z "$NODE" ]]; then
+           for NODE in "${!NODES[@]}"
+           do
+               # First, push the git repo to each node.  This also
+               # confirms that we have git and can log into each
+               # node.
+               sync $NODE $BRANCH
+           done
+
+           for NODE in "${!NODES[@]}"
+           do
+               # Do 'database' role first,
+               if [[ "${NODES[$NODE]}" =~ database ]] ; then
+                   deploynode $NODE ${NODES[$NODE]}
+                   unset NODES[$NODE]
+               fi
+           done
+
+           for NODE in "${!NODES[@]}"
+           do
+               # then  'api' or 'controller' roles
+               if [[ "${NODES[$NODE]}" =~ (api|controller) ]] ; then
+                   deploynode $NODE ${NODES[$NODE]}
+                   unset NODES[$NODE]
+               fi
+           done
+
+           for NODE in "${!NODES[@]}"
+           do
+               # Everything else (we removed the nodes that we
+               # already deployed from the list)
+               deploynode $NODE ${NODES[$NODE]}
+           done
+       else
+           # Just deploy the node that was supplied on the command line.
+           sync $NODE $BRANCH
+           deploynode $NODE
+       fi
+
+       echo
+       echo "Completed deploy, run 'installer.sh diagnostics' to verify the install"
+
+       ;;
+    diagnostics)
+       loadconfig
+
+       set +u
+       declare LOCATION=$1
+       set -u
+
+       if ! which arvados-client ; then
+           echo "arvados-client not found, install 'arvados-client' package with 'apt-get' or 'yum'"
+           exit 1
+       fi
+
+       if [[ -z "$LOCATION" ]] ; then
+           echo "Need to provide '-internal-client' or '-external-client'"
+           echo
+           echo "-internal-client    You are running this on the same private network as the Arvados cluster (e.g. on one of the Arvados nodes)"
+           echo "-external-client    You are running this outside the private network of the Arvados cluster (e.g. your workstation)"
+           exit 1
+       fi
+
+       export ARVADOS_API_HOST="${CLUSTER}.${DOMAIN}"
+       export ARVADOS_API_TOKEN="$SYSTEM_ROOT_TOKEN"
+
+       arvados-client diagnostics $LOCATION
+       ;;
+    *)
+       echo "Arvados installer"
+       echo ""
+       echo "initialize   initialize the setup directory for configuration"
+       echo "deploy       deploy the configuration from the setup directory"
+       echo "diagnostics  check your install using diagnostics"
+       ;;
+esac
index 31a69e9840cdfabbee21609b662816b7df60c362..ade1ad46715fd0440b703aa63b5379ff4cf73ce1 100644 (file)
@@ -8,9 +8,26 @@
 # The Arvados cluster ID, needs to be 5 lowercase alphanumeric characters.
 CLUSTER="cluster_fixme_or_this_wont_work"
 
-# The domainname you want tou give to your cluster's hosts
+# The domain name you want to give to your cluster's hosts
+# the end result hostnames will be $SERVICE.$CLUSTER.$DOMAIN
 DOMAIN="domain_fixme_or_this_wont_work"
 
+# For multi-node installs, the ssh log in for each node
+# must be root or able to sudo
+DEPLOY_USER=root
+
+# The mapping of nodes to roles
+# installer.sh will log in to each of these nodes and then provision
+# it for the specified roles.
+NODES=(
+  [controller.${CLUSTER}.${DOMAIN}]=api,controller,websocket,dispatcher,keepbalance
+  [keep0.${CLUSTER}.${DOMAIN}]=keepstore
+  [keep1.${CLUSTER}.${DOMAIN}]=keepstore
+  [keep.${CLUSTER}.${DOMAIN}]=keepproxy,keepweb
+  [workbench.${CLUSTER}.${DOMAIN}]=workbench,workbench2,webshell
+  [shell.${CLUSTER}.${DOMAIN}]=shell
+)
+
 # Host SSL port where you want to point your browser to access Arvados
 # Defaults to 443 for regular runs, and to 8443 when called in Vagrant.
 # You can point it to another port if desired
index 2ce1556511bc7d57ddc5a58f53b5840de7353abf..20f334166e419ee806b608ac37fd3a27b10dca82 100644 (file)
@@ -11,6 +11,17 @@ CLUSTER="cluster_fixme_or_this_wont_work"
 # The domainname you want tou give to your cluster's hosts
 DOMAIN="domain_fixme_or_this_wont_work"
 
+# For multi-node installs, the ssh log in for each node
+# must be root or able to sudo
+DEPLOY_USER=root
+
+# The mapping of nodes to roles
+# installer.sh will log in to each of these nodes and then provision
+# it for the specified roles.
+NODES=(
+  [localhost]=api,controller,websocket,dispatcher,keepbalance,keepstore,keepproxy,keepweb,workbench,workbench2,webshell
+)
+
 # External ports used by the Arvados services
 CONTROLLER_EXT_SSL_PORT=443
 KEEP_EXT_SSL_PORT=25101
index 7add9868d9223f90c53d3ef209aa57d875a7328c..a68450094161accb43ef472def621e15b20b2d79 100644 (file)
@@ -11,6 +11,17 @@ CLUSTER="cluster_fixme_or_this_wont_work"
 # The domainname for your cluster's hosts
 DOMAIN="domain_fixme_or_this_wont_work"
 
+# For multi-node installs, the ssh log in for each node
+# must be root or able to sudo
+DEPLOY_USER=root
+
+# The mapping of nodes to roles
+# installer.sh will log in to each of these nodes and then provision
+# it for the specified roles.
+NODES=(
+  [localhost]=api,controller,websocket,dispatcher,keepbalance,keepstore,keepproxy,keepweb,workbench,workbench2,webshell
+)
+
 # Set this value when installing a cluster in a single host with a single
 # hostname to access all the instances. HOSTNAME_EXT should be set to the
 # external hostname for the instance.
index 3c5fb41e0ffc4cf02469e8ffb6d597aca419ea45..f4660be370990302cadbb9b3e11d8f2a44f5de10 100755 (executable)
@@ -237,6 +237,8 @@ T_DIR="/tmp/cluster_tests"
 
 arguments ${@}
 
+declare -A NODES
+
 if [ -s ${CONFIG_FILE} ]; then
   source ${CONFIG_FILE}
 else
@@ -255,7 +257,7 @@ if [ ! -d ${CONFIG_DIR} ]; then
   exit 1
 fi
 
-if grep -q 'fixme_or_this_wont_work' ${CONFIG_FILE} ; then
+if grep -rni 'fixme' ${CONFIG_FILE} ${CONFIG_DIR} ; then
   echo >&2 "The config file ${CONFIG_FILE} has some parameters that need to be modified."
   echo >&2 "Please, fix them and re-run the provision script."
   exit 1