X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/4ca7d6b73acea26410b1d7fdc6f299a09468316e..2efd88cf64130bb0ebb0549d30053b85baaae2f9:/tools/salt-install/provision.sh diff --git a/tools/salt-install/provision.sh b/tools/salt-install/provision.sh index fd88d97a94..f4660be370 100755 --- a/tools/salt-install/provision.sh +++ b/tools/salt-install/provision.sh @@ -12,6 +12,17 @@ set -o pipefail +_exit_handler() { + local rc="$?" + trap - EXIT + if [ "$rc" -ne 0 ]; then + echo "Error occurred ($rc) while running $0 at line $1 : $BASH_COMMAND" + fi + exit "$rc" +} + +trap '_exit_handler $LINENO' EXIT ERR + # capture the directory that the script is running from SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" @@ -29,6 +40,7 @@ usage() { echo >&2 " controller" echo >&2 " dispatcher" echo >&2 " keepproxy" + echo >&2 " keepbalance" echo >&2 " keepstore" echo >&2 " keepweb" echo >&2 " shell" @@ -40,13 +52,13 @@ usage() { echo >&2 " -h, --help Display this help and exit" echo >&2 " --dump-config Dumps the pillars and states to a directory" echo >&2 " This parameter does not perform any installation at all. It's" - echo >&2 " intended to give you a parsed sot of configuration files so" + echo >&2 " intended to give you a parsed set of configuration files so" echo >&2 " you can inspect them or use them in you Saltstack infrastructure." echo >&2 " It" echo >&2 " - parses the pillar and states templates," echo >&2 " - downloads the helper formulas with their desired versions," echo >&2 " - prepares the 'top.sls' files both for pillars and states" - echo >&2 " for the selected role/s" + echo >&2 " for the selected role(s)" echo >&2 " - writes the resulting files into " echo >&2 " -v, --vagrant Run in vagrant and use the /vagrant shared dir" echo >&2 " --development Run in dev mode, using snakeoil certs" @@ -107,7 +119,7 @@ arguments() { for i in ${2//,/ } do # Verify the role exists - if [[ ! "database,api,controller,keepstore,websocket,keepweb,workbench2,webshell,keepproxy,shell,workbench,dispatcher" == *"$i"* ]]; then + if [[ ! "database,api,controller,keepstore,websocket,keepweb,workbench2,webshell,keepbalance,keepproxy,shell,workbench,dispatcher" == *"$i"* ]]; then echo "The role '${i}' is not a valid role" usage exit 1 @@ -164,6 +176,8 @@ LOG_LEVEL="info" CONTROLLER_EXT_SSL_PORT=443 TESTS_DIR="tests" +NGINX_INSTALL_SOURCE="install_from_repo" + CLUSTER="" DOMAIN="" @@ -186,7 +200,7 @@ WORKBENCH2_EXT_SSL_PORT=3001 SSL_MODE="self-signed" USE_LETSENCRYPT_ROUTE53="no" -CUSTOM_CERTS_DIR="${SCRIPT_DIR}/certs" +CUSTOM_CERTS_DIR="${SCRIPT_DIR}/local_config_dir/certs" ## These are ARVADOS-related parameters # For a stable release, change RELEASE "production" and VERSION to the @@ -204,9 +218,9 @@ VERSION="latest" # BRANCH="main" # Other formula versions we depend on -POSTGRES_TAG="v0.43.0" -NGINX_TAG="v2.8.0" -DOCKER_TAG="v2.0.7" +POSTGRES_TAG="v0.44.0" +NGINX_TAG="v2.8.1" +DOCKER_TAG="v2.4.2" LOCALE_TAG="v0.3.4" LETSENCRYPT_TAG="v2.1.0" @@ -223,6 +237,8 @@ T_DIR="/tmp/cluster_tests" arguments ${@} +declare -A NODES + if [ -s ${CONFIG_FILE} ]; then source ${CONFIG_FILE} else @@ -241,14 +257,14 @@ if [ ! -d ${CONFIG_DIR} ]; then exit 1 fi -if grep -q 'fixme_or_this_wont_work' ${CONFIG_FILE} ; then +if grep -rni 'fixme' ${CONFIG_FILE} ${CONFIG_DIR} ; then echo >&2 "The config file ${CONFIG_FILE} has some parameters that need to be modified." echo >&2 "Please, fix them and re-run the provision script." exit 1 fi if ! grep -qE '^[[:alnum:]]{5}$' <<<${CLUSTER} ; then - echo >&2 "ERROR: must be exactly 5 alphanumeric characters long" + echo >&2 "ERROR: must be exactly 5 lowercase alphanumeric characters long" echo >&2 "Fix the cluster name in the 'local.params' file and re-run the provision script" exit 1 fi @@ -257,6 +273,14 @@ fi if [ ! -z "${HOSTNAME_EXT}" ] ; then # We need to add some extra control vars to manage a single certificate vs. multiple USE_SINGLE_HOSTNAME="yes" + # Make sure that the value configured as IP_INT is a real IP on the system. + # If we don't error out early here when there is a mismatch, the formula will + # fail with hard to interpret nginx errors later on. + ip addr list |grep "${IP_INT}/" >/dev/null + if [[ $? -ne 0 ]]; then + echo "Unable to find the IP_INT address '${IP_INT}' on the system, please correct the value in local.params. Exiting..." + exit 1 + fi else USE_SINGLE_HOSTNAME="no" # We set this variable, anyway, so sed lines do not fail and we don't need to add more @@ -275,12 +299,15 @@ else case ${OS_ID} in "centos") echo "WARNING! Disabling SELinux, see https://dev.arvados.org/issues/18019" - sed -i 's/SELINUX=enforcing/SELINUX=permissive' /etc/sysconfig/selinux + sed -i 's/SELINUX=enforcing/SELINUX=permissive/g' /etc/sysconfig/selinux setenforce permissive yum install -y curl git jq ;; "debian"|"ubuntu") - DEBIAN_FRONTEND=noninteractive apt update + # Wait 2 minutes for any apt locks to clear + # This option is supported from apt 1.9.1 and ignored in older apt versions. + # Cf. https://blog.sinjakli.co.uk/2021/10/25/waiting-for-apt-locks-without-the-hacky-bash-scripts/ + DEBIAN_FRONTEND=noninteractive apt -o DPkg::Lock::Timeout=120 update DEBIAN_FRONTEND=noninteractive apt install -y curl git jq ;; esac @@ -339,9 +366,9 @@ echo "...arvados" git clone --quiet https://git.arvados.org/arvados-formula.git ${F_DIR}/arvados # If we want to try a specific branch of the formula -if [ "x${BRANCH}" != "x" ]; then +if [ "x${BRANCH}" != "x" -a $(git rev-parse --abbrev-ref HEAD) != "${BRANCH}" ]; then ( cd ${F_DIR}/arvados && git checkout --quiet -t origin/"${BRANCH}" -b "${BRANCH}" ) -elif [ "x${ARVADOS_TAG}" != "x" ]; then +elif [ "x${ARVADOS_TAG}" != "x" -a $(git rev-parse --abbrev-ref HEAD) != "${ARVADOS_TAG}" ]; then ( cd ${F_DIR}/arvados && git checkout --quiet tags/"${ARVADOS_TAG}" -b "${ARVADOS_TAG}" ) fi @@ -406,30 +433,34 @@ for f in $(ls "${SOURCE_PILLARS_DIR}"/*); do "${f}" > "${P_DIR}"/$(basename "${f}") done -if [ "x${TEST}" = "xyes" ] && [ ! -d "${SOURCE_TESTS_DIR}" ]; then - echo "You requested to run tests, but ${SOURCE_TESTS_DIR} does not exist or is not a directory. Exiting." - exit 1 -fi -mkdir -p ${T_DIR} -# Replace cluster and domain name in the test files -for f in $(ls "${SOURCE_TESTS_DIR}"/*); do - FILTERS="s#__CLUSTER__#${CLUSTER}#g; - s#__CONTROLLER_EXT_SSL_PORT__#${CONTROLLER_EXT_SSL_PORT}#g; - s#__DOMAIN__#${DOMAIN}#g; - s#__IP_INT__#${IP_INT}#g; - s#__INITIAL_USER_EMAIL__#${INITIAL_USER_EMAIL}#g; - s#__INITIAL_USER_PASSWORD__#${INITIAL_USER_PASSWORD}#g - s#__INITIAL_USER__#${INITIAL_USER}#g; - s#__DATABASE_PASSWORD__#${DATABASE_PASSWORD}#g; - s#__SYSTEM_ROOT_TOKEN__#${SYSTEM_ROOT_TOKEN}#g" - if [ "$USE_SINGLE_HOSTNAME" = "yes" ]; then - FILTERS="s#__CLUSTER__.__DOMAIN__#${HOSTNAME_EXT}#g; - $FILTERS" +if [ ! -d "${SOURCE_TESTS_DIR}" ]; then + echo "WARNING: The tests directory was not copied to \"${SOURCE_TESTS_DIR}\"." + if [ "x${TEST}" = "xyes" ]; then + echo "WARNING: Disabling tests for this installation." fi - sed "$FILTERS" \ - "${f}" > ${T_DIR}/$(basename "${f}") -done -chmod 755 ${T_DIR}/run-test.sh + TEST="no" +else + mkdir -p ${T_DIR} + # Replace cluster and domain name in the test files + for f in $(ls "${SOURCE_TESTS_DIR}"/*); do + FILTERS="s#__CLUSTER__#${CLUSTER}#g; + s#__CONTROLLER_EXT_SSL_PORT__#${CONTROLLER_EXT_SSL_PORT}#g; + s#__DOMAIN__#${DOMAIN}#g; + s#__IP_INT__#${IP_INT}#g; + s#__INITIAL_USER_EMAIL__#${INITIAL_USER_EMAIL}#g; + s#__INITIAL_USER_PASSWORD__#${INITIAL_USER_PASSWORD}#g + s#__INITIAL_USER__#${INITIAL_USER}#g; + s#__DATABASE_PASSWORD__#${DATABASE_PASSWORD}#g; + s#__SYSTEM_ROOT_TOKEN__#${SYSTEM_ROOT_TOKEN}#g" + if [ "$USE_SINGLE_HOSTNAME" = "yes" ]; then + FILTERS="s#__CLUSTER__.__DOMAIN__#${HOSTNAME_EXT}#g; + $FILTERS" + fi + sed "$FILTERS" \ + "${f}" > ${T_DIR}/$(basename "${f}") + done + chmod 755 ${T_DIR}/run-test.sh +fi # Replace helper state files that differ from the formula's examples if [ -d "${SOURCE_STATES_DIR}" ]; then @@ -465,6 +496,7 @@ if [ -d "${SOURCE_STATES_DIR}" ]; then s#__WORKBENCH2_INT_IP__#${WORKBENCH2_INT_IP}#g; s#__DATABASE_INT_IP__#${DATABASE_INT_IP}#g; s#__WEBSHELL_EXT_SSL_PORT__#${WEBSHELL_EXT_SSL_PORT}#g; + s#__SHELL_INT_IP__#${SHELL_INT_IP}#g; s#__WEBSOCKET_EXT_SSL_PORT__#${WEBSOCKET_EXT_SSL_PORT}#g; s#__WORKBENCH1_EXT_SSL_PORT__#${WORKBENCH1_EXT_SSL_PORT}#g; s#__WORKBENCH2_EXT_SSL_PORT__#${WORKBENCH2_EXT_SSL_PORT}#g; @@ -502,7 +534,7 @@ if [ -d "${F_DIR}"/extra/extra ]; then # Same when using self-signed certificates. SKIP_SNAKE_OIL="dont_add_snakeoil_certs" fi - for f in $(ls "${F_DIR}"/extra/extra/*.sls | grep -v ${SKIP_SNAKE_OIL}); do + for f in $(ls "${F_DIR}"/extra/extra/*.sls | egrep -v "${SKIP_SNAKE_OIL}|shell_"); do echo " - extra.$(basename ${f} | sed 's/.sls$//g')" >> ${S_DIR}/top.sls done # Use byo or self-signed certificates @@ -522,19 +554,23 @@ if [ -z "${ROLES}" ]; then fi grep -q "letsencrypt" ${S_DIR}/top.sls || echo " - letsencrypt" >> ${S_DIR}/top.sls else - # Use custom certs, as both bring-your-own and self-signed are copied using this state - # Copy certs to formula extra/files - # In dev mode, the files will be created and put in the destination directory by the - # snakeoil_certs.sls state file mkdir -p /srv/salt/certs - cp -rv ${CUSTOM_CERTS_DIR}/* /srv/salt/certs/ - # We add the custom_certs state - grep -q "custom_certs" ${S_DIR}/top.sls || echo " - extra.custom_certs" >> ${S_DIR}/top.sls + if [ "${SSL_MODE}" = "bring-your-own" ]; then + # Copy certs to formula extra/files + cp -rv ${CUSTOM_CERTS_DIR}/* /srv/salt/certs/ + # We add the custom_certs state + grep -q "custom_certs" ${S_DIR}/top.sls || echo " - extra.custom_certs" >> ${S_DIR}/top.sls + fi + # In self-signed mode, the certificate files will be created and put in the + # destination directory by the snakeoil_certs.sls state file fi echo " - postgres" >> ${S_DIR}/top.sls echo " - docker.software" >> ${S_DIR}/top.sls echo " - arvados" >> ${S_DIR}/top.sls + echo " - extra.shell_sudo_passwordless" >> ${S_DIR}/top.sls + echo " - extra.shell_cron_add_login_sync" >> ${S_DIR}/top.sls + echo " - extra.passenger_rvm" >> ${S_DIR}/top.sls # Pillars echo " - docker" >> ${P_DIR}/top.sls @@ -549,22 +585,34 @@ if [ -z "${ROLES}" ]; then echo " - nginx_workbench_configuration" >> ${P_DIR}/top.sls echo " - postgresql" >> ${P_DIR}/top.sls + # We need to tweak the Nginx's pillar depending whether we want plan nginx or nginx+passenger + NGINX_INSTALL_SOURCE="install_from_phusionpassenger" + sed -i "s/__NGINX_INSTALL_SOURCE__/${NGINX_INSTALL_SOURCE}/g" ${P_DIR}/nginx_passenger.sls + if [ "${SSL_MODE}" = "lets-encrypt" ]; then if [ "${USE_LETSENCRYPT_ROUTE53}" = "yes" ]; then grep -q "aws_credentials" ${P_DIR}/top.sls || echo " - aws_credentials" >> ${P_DIR}/top.sls fi grep -q "letsencrypt" ${P_DIR}/top.sls || echo " - letsencrypt" >> ${P_DIR}/top.sls - # As the pillar differ whether we use LE or custom certs, we need to do a final edition on them - for c in controller websocket workbench workbench2 webshell keepweb keepproxy; do + hosts=("controller" "websocket" "workbench" "workbench2" "webshell" "keepproxy") + if [ ${USE_SINGLE_HOSTNAME} = "no" ]; then + hosts+=("download" "collections") + else + hosts+=("keepweb") + fi + + for c in "${hosts[@]}"; do + # Are we in a single-host-single-hostname env? if [ "${USE_SINGLE_HOSTNAME}" = "yes" ]; then # Are we in a single-host-single-hostname env? CERT_NAME=${HOSTNAME_EXT} else - # We are in a single-host-multiple-hostnames env + # We are in a multiple-hostnames env CERT_NAME=${c}.${CLUSTER}.${DOMAIN} fi + # As the pillar differs whether we use LE or custom certs, we need to do a final edition on them sed -i "s/__CERT_REQUIRES__/cmd: create-initial-cert-${CERT_NAME}*/g; s#__CERT_PEM__#/etc/letsencrypt/live/${CERT_NAME}/fullchain.pem#g; s#__CERT_KEY__#/etc/letsencrypt/live/${CERT_NAME}/privkey.pem#g" \ @@ -587,7 +635,7 @@ if [ -z "${ROLES}" ]; then CERT_NAME=${c} fi - if [[ "${SSL_MODE}" = "bring-your-own" || "${SSL_MODE}" == "self-signed" ]]; then + if [[ "$SSL_MODE" == "bring-your-own" ]]; then copy_custom_cert ${CUSTOM_CERTS_DIR} ${CERT_NAME} fi @@ -626,10 +674,14 @@ else # States # FIXME: https://dev.arvados.org/issues/17352 grep -q "postgres.client" ${S_DIR}/top.sls || echo " - postgres.client" >> ${S_DIR}/top.sls - grep -q "nginx.passenger" ${S_DIR}/top.sls || echo " - nginx.passenger" >> ${S_DIR}/top.sls + if grep -q " - nginx.*$" ${S_DIR}/top.sls; then + sed -i s/"^ - nginx.*$"/" - nginx.passenger"/g ${S_DIR}/top.sls + else + echo " - nginx.passenger" >> ${S_DIR}/top.sls + fi + echo " - extra.passenger_rvm" >> ${S_DIR}/top.sls ### If we don't install and run LE before arvados-api-server, it fails and breaks everything ### after it. So we add this here as we are, after all, sharing the host for api and controller - # Currently, only available on config_examples/multi_host/aws if [ "${SSL_MODE}" = "lets-encrypt" ]; then if [ "${USE_LETSENCRYPT_ROUTE53}" = "yes" ]; then grep -q "aws_credentials" ${S_DIR}/top.sls || echo " - aws_credentials" >> ${S_DIR}/top.sls @@ -648,11 +700,24 @@ else grep -q "postgresql" ${P_DIR}/top.sls || echo " - postgresql" >> ${P_DIR}/top.sls grep -q "nginx_passenger" ${P_DIR}/top.sls || echo " - nginx_passenger" >> ${P_DIR}/top.sls grep -q "nginx_${R}_configuration" ${P_DIR}/top.sls || echo " - nginx_${R}_configuration" >> ${P_DIR}/top.sls + + # We need to tweak the Nginx's pillar depending whether we want plain nginx or nginx+passenger + NGINX_INSTALL_SOURCE="install_from_phusionpassenger" + sed -i "s/__NGINX_INSTALL_SOURCE__/${NGINX_INSTALL_SOURCE}/g" ${P_DIR}/nginx_passenger.sls ;; "controller" | "websocket" | "workbench" | "workbench2" | "webshell" | "keepweb" | "keepproxy") + NGINX_INSTALL_SOURCE="install_from_repo" # States - grep -q "nginx.passenger" ${S_DIR}/top.sls || echo " - nginx.passenger" >> ${S_DIR}/top.sls - # Currently, only available on config_examples/multi_host/aws + if [ "${R}" = "workbench" ]; then + NGINX_INSTALL_SOURCE="install_from_phusionpassenger" + if grep -q " - nginx$" ${S_DIR}/top.sls; then + sed -i s/"^ - nginx.*$"/" - nginx.passenger"/g ${S_DIR}/top.sls + else + echo " - nginx.passenger" >> ${S_DIR}/top.sls + fi + else + grep -q "nginx" ${S_DIR}/top.sls || echo " - nginx" >> ${S_DIR}/top.sls + fi if [ "${SSL_MODE}" = "lets-encrypt" ]; then if [ "x${USE_LETSENCRYPT_ROUTE53}" = "xyes" ]; then grep -q "aws_credentials" ${S_DIR}/top.sls || echo " - aws_credentials" >> ${S_DIR}/top.sls @@ -684,7 +749,6 @@ else grep -q "nginx_collections_configuration" ${P_DIR}/top.sls || echo " - nginx_collections_configuration" >> ${P_DIR}/top.sls fi - # Currently, only available on config_examples/multi_host/aws if [ "${SSL_MODE}" = "lets-encrypt" ]; then if [ "${USE_LETSENCRYPT_ROUTE53}" = "yes" ]; then grep -q "aws_credentials" ${P_DIR}/top.sls || echo " - aws_credentials" >> ${P_DIR}/top.sls @@ -723,24 +787,22 @@ else s#__CERT_PEM__#/etc/nginx/ssl/arvados-${R}.pem#g; s#__CERT_KEY__#/etc/nginx/ssl/arvados-${R}.key#g" \ ${P_DIR}/nginx_${R}_configuration.sls - grep -q ${R} ${P_DIR}/extra_custom_certs.sls || echo " - ${R}" >> ${P_DIR}/extra_custom_certs.sls + grep -q ${R}$ ${P_DIR}/extra_custom_certs.sls || echo " - ${R}" >> ${P_DIR}/extra_custom_certs.sls fi fi + # We need to tweak the Nginx's pillar depending whether we want plain nginx or nginx+passenger + sed -i "s/__NGINX_INSTALL_SOURCE__/${NGINX_INSTALL_SOURCE}/g" ${P_DIR}/nginx_passenger.sls ;; "shell") # States + echo " - extra.shell_sudo_passwordless" >> ${S_DIR}/top.sls + echo " - extra.shell_cron_add_login_sync" >> ${S_DIR}/top.sls grep -q "docker" ${S_DIR}/top.sls || echo " - docker.software" >> ${S_DIR}/top.sls grep -q "arvados.${R}" ${S_DIR}/top.sls || echo " - arvados.${R}" >> ${S_DIR}/top.sls # Pillars grep -q "docker" ${P_DIR}/top.sls || echo " - docker" >> ${P_DIR}/top.sls ;; - "dispatcher") - # States - grep -q "arvados.${R}" ${S_DIR}/top.sls || echo " - arvados.${R}" >> ${S_DIR}/top.sls - # Pillars - # ATM, no specific pillar needed - ;; - "keepstore") + "dispatcher" | "keepbalance" | "keepstore") # States grep -q "arvados.${R}" ${S_DIR}/top.sls || echo " - arvados.${R}" >> ${S_DIR}/top.sls # Pillars @@ -759,45 +821,28 @@ if [ "${DUMP_CONFIG}" = "yes" ]; then exit 0 fi -# FIXME! #16992 Temporary fix for psql call in arvados-api-server -if [ -e /root/.psqlrc ]; then - if ! ( grep 'pset pager off' /root/.psqlrc ); then - RESTORE_PSQL="yes" - cp /root/.psqlrc /root/.psqlrc.provision.backup - fi -else - DELETE_PSQL="yes" -fi - -echo '\pset pager off' >> /root/.psqlrc -# END FIXME! #16992 Temporary fix for psql call in arvados-api-server - # Now run the install salt-call --local state.apply -l ${LOG_LEVEL} -# FIXME! #16992 Temporary fix for psql call in arvados-api-server -if [ "x${DELETE_PSQL}" = "xyes" ]; then - echo "Removing .psql file" - rm /root/.psqlrc -fi - -if [ "x${RESTORE_PSQL}" = "xyes" ]; then - echo "Restoring .psql file" - mv -v /root/.psqlrc.provision.backup /root/.psqlrc +# Finally, make sure that /etc/hosts is not overwritten on reboot +if [ -d /etc/cloud/cloud.cfg.d ]; then + # TODO: will this work on CentOS? + sed -i 's/^manage_etc_hosts: true/#manage_etc_hosts: true/g' /etc/cloud/cloud.cfg.d/* fi -# END FIXME! #16992 Temporary fix for psql call in arvados-api-server # Leave a copy of the Arvados CA so the user can copy it where it's required if [ "$DEV_MODE" = "yes" ]; then - echo "Copying the Arvados CA certificate to the installer dir, so you can import it" + ARVADOS_SNAKEOIL_CA_DEST_FILE="${SCRIPT_DIR}/${CLUSTER}.${DOMAIN}-arvados-snakeoil-ca.pem" + # If running in a vagrant VM, also add default user to docker group if [ "x${VAGRANT}" = "xyes" ]; then - cp /etc/ssl/certs/arvados-snakeoil-ca.pem /vagrant/${CLUSTER}.${DOMAIN}-arvados-snakeoil-ca.pem - echo "Adding the vagrant user to the docker group" usermod -a -G docker vagrant - else - cp /etc/ssl/certs/arvados-snakeoil-ca.pem ${SCRIPT_DIR}/${CLUSTER}.${DOMAIN}-arvados-snakeoil-ca.pem + ARVADOS_SNAKEOIL_CA_DEST_FILE="/vagrant/${CLUSTER}.${DOMAIN}-arvados-snakeoil-ca.pem" + fi + if [ -f /etc/ssl/certs/arvados-snakeoil-ca.pem ]; then + echo "Copying the Arvados CA certificate to the installer dir, so you can import it" + cp /etc/ssl/certs/arvados-snakeoil-ca.pem ${ARVADOS_SNAKEOIL_CA_DEST_FILE} fi fi