3 # Copyright (C) The Arvados Authors. All rights reserved.
5 # SPDX-License-Identifier: CC-BY-SA-3.0
10 # Helps manage the configuration in a git repository, and then deploy
11 # nodes by pushing a copy of the git repository to each node and
12 # running the provision script to do the actual installation and
20 declare CONFIG_FILE=local.params
22 # The salt template directory
23 declare CONFIG_DIR=local_config_dir
25 # The 5-character Arvados cluster id
26 # This will be populated by loadconfig()
29 # The parent domain (not including the cluster id)
30 # This will be populated by loadconfig()
33 # A bash associative array listing each node and mapping to the roles
34 # that should be provisioned on those nodes.
35 # This will be populated by loadconfig()
38 # A bash associative array listing each role and mapping to the nodes
39 # that should be provisioned with this role.
40 # This will be populated by loadconfig()
43 # The ssh user we'll use
44 # This will be populated by loadconfig()
47 # The git repository that we'll push to on all the nodes
48 # This will be populated by loadconfig()
51 # The public host used as an SSH jump host
52 # This will be populated by loadconfig()
53 declare USE_SSH_JUMPHOST
55 # The temp file that will get used to disable envvar forwarding to avoid locale
56 # issues in Debian distros.
57 # This will be populated by loadconfig()
67 if [[ -n "$MISSING" ]]; then
68 echo "Some tools are missing, please make sure you have the 'git' and 'iproute2' packages installed"
75 local SSH=$(ssh_cmd "$NODE")
76 # Delete the old repository
77 $SSH $DEPLOY_USER@$NODE rm -rf ${GITTARGET}.git ${GITTARGET}
84 # Synchronizes the configuration by creating a git repository on
85 # each node, pushing our branch, and updating the checkout.
87 if [[ "$NODE" != localhost ]]; then
88 SSH=$(ssh_cmd "$NODE")
89 GIT="eval $(git_cmd $NODE)"
93 # Update the git remote for the remote repository.
94 if ! $GIT remote add $NODE $DEPLOY_USER@$NODE:${GITTARGET}.git; then
95 $GIT remote set-url $NODE $DEPLOY_USER@$NODE:${GITTARGET}.git
98 # Initialize the git repository. We're
99 # actually going to make two repositories here because git
100 # will complain if you try to push to a repository with a
101 # checkout. So we're going to create a "bare" repository
102 # and then clone a regular repository (with a checkout)
105 $SSH $DEPLOY_USER@$NODE git init --bare --shared=0600 ${GITTARGET}.git
106 if [[ "$BRANCH" == "HEAD" ]]; then
107 # When deploying from an individual commit instead of a branch. This can
108 # happen when deploying from a Jenkins pipeline.
109 $GIT push $NODE HEAD:refs/heads/HEAD
110 $SSH $DEPLOY_USER@$NODE "umask 0077 && git clone -s ${GITTARGET}.git ${GITTARGET} && git -C ${GITTARGET} checkout remotes/origin/HEAD"
112 $GIT push $NODE $BRANCH
113 $SSH $DEPLOY_USER@$NODE "umask 0077 && git clone -s ${GITTARGET}.git ${GITTARGET} && git -C ${GITTARGET} checkout ${BRANCH}"
123 # Deploy a node. This runs the provision script on the node, with
124 # the appropriate roles.
128 if [[ -z "$ROLES" ]]; then
129 echo "No roles specified for $NODE, will deploy all roles"
131 ROLES="--roles ${ROLES}"
134 logfile=deploy-${NODE}-$(date -Iseconds).log
135 SSH=$(ssh_cmd "$NODE")
137 if [[ "$NODE" = localhost ]]; then
139 if [[ $(whoami) != 'root' ]]; then
142 $SUDO ./provision.sh --config ${CONFIG_FILE} ${ROLES} 2>&1 | tee $logfile
144 $SSH $DEPLOY_USER@$NODE "cd ${GITTARGET} && git log -n1 HEAD && DISABLED_CONTROLLER=\"$DISABLED_CONTROLLER\" sudo --preserve-env=DISABLED_CONTROLLER ./provision.sh --config ${CONFIG_FILE} ${ROLES}" 2>&1 | tee $logfile
151 local CERTPATH="${CONFIG_DIR}/certs/${CERTNAME}"
152 if [[ ! -f "${CERTPATH}.crt" || ! -e "${CERTPATH}.key" ]]; then
153 echo "Missing ${CERTPATH}.crt or ${CERTPATH}.key files"
159 if ! [[ -s ${CONFIG_FILE} && -s ${CONFIG_FILE}.secrets ]]; then
160 echo "Must be run from initialized setup dir, maybe you need to 'initialize' first?"
163 GITTARGET=arvados-deploy-config-${CLUSTER}
165 # Set up SSH so that it doesn't forward any environment variable. This is to avoid
166 # getting "setlocale" errors on the first run, depending on the distro being used
167 # to run the installer (like Debian).
168 SSH_CONFFILE=$(mktemp)
169 echo "Include config SendEnv -*" >${SSH_CONFFILE}
174 if [ -z "${USE_SSH_JUMPHOST}" -o "${NODE}" == "${USE_SSH_JUMPHOST}" -o "${NODE}" == "localhost" ]; then
175 echo "ssh -F ${SSH_CONFFILE}"
177 echo "ssh -F ${SSH_CONFFILE} -J ${DEPLOY_USER}@${USE_SSH_JUMPHOST}"
183 echo "GIT_SSH_COMMAND=\"$(ssh_cmd ${NODE})\" git"
190 if [[ -n "$subcmd" ]]; then
195 if [[ ! -f provision.sh ]]; then
196 echo "Must be run from arvados/tools/salt-install"
210 if [[ -z "$PARAMS" || ! -f local.params.example.$PARAMS ]]; then
211 echo "Not found: local.params.example.$PARAMS"
212 echo "Expected one of multiple_hosts, single_host_multiple_hostnames, single_host_single_hostname"
216 if [[ -z "$SLS" || ! -d config_examples/$SLS ]]; then
217 echo "Not found: config_examples/$SLS"
218 echo "Expected one of multi_host/aws, single_host/multiple_hostnames, single_host/single_hostname"
222 if [[ -z "$SETUPDIR" || -z "$PARAMS" || -z "$SLS" ]]; then
223 echo "installer.sh <setup dir to initialize> <params template> <config template>"
227 if [[ -n "$err" ]]; then
231 echo "Initializing $SETUPDIR"
232 git init --shared=0600 $SETUPDIR
233 cp -r *.sh tests $SETUPDIR
235 cp local.params.example.$PARAMS $SETUPDIR/${CONFIG_FILE}
236 cp local.params.secrets.example $SETUPDIR/${CONFIG_FILE}.secrets
237 cp -r config_examples/$SLS $SETUPDIR/${CONFIG_DIR}
239 if [[ -n "$TERRAFORM" ]]; then
240 mkdir $SETUPDIR/terraform
241 cp -r $TERRAFORM/* $SETUPDIR/terraform/
245 echo '*.log' >.gitignore
246 echo '**/.terraform' >>.gitignore
247 echo '**/.infracost' >>.gitignore
249 if [[ -n "$TERRAFORM" ]]; then
253 git add *.sh ${CONFIG_FILE} ${CONFIG_FILE}.secrets ${CONFIG_DIR} tests .gitignore
254 git commit -m"initial commit"
257 echo "Setup directory $SETUPDIR initialized."
258 if [[ -n "$TERRAFORM" ]]; then
259 (cd $SETUPDIR/terraform/vpc && terraform init)
260 (cd $SETUPDIR/terraform/data-storage && terraform init)
261 (cd $SETUPDIR/terraform/services && terraform init)
262 echo "Now go to $SETUPDIR, customize 'terraform/vpc/terraform.tfvars' as needed, then run 'installer.sh terraform'"
264 echo "Now go to $SETUPDIR, customize '${CONFIG_FILE}', '${CONFIG_FILE}.secrets' and '${CONFIG_DIR}' as needed, then run 'installer.sh deploy'"
269 logfile=terraform-$(date -Iseconds).log
270 (cd terraform/vpc && terraform apply -auto-approve) 2>&1 | tee -a $logfile
271 (cd terraform/data-storage && terraform apply -auto-approve) 2>&1 | tee -a $logfile
272 (cd terraform/services && \
273 terraform apply -auto-approve) 2>&1 | \
274 grep -v letsencrypt_iam_secret_access_key | \
275 grep -v database_password | \
277 (cd terraform/services && \
278 echo -n 'letsencrypt_iam_secret_access_key = ' && \
279 terraform output letsencrypt_iam_secret_access_key && \
280 echo -n 'database_password = ' && \
281 terraform output database_password 2>/dev/null || echo '<not set>' && \
282 echo -n 'loki_iam_secret_access_key = ' && \
283 terraform output loki_iam_secret_access_key
284 ) 2>&1 | tee -a $logfile
288 logfile=terraform-$(date -Iseconds).log
289 (cd terraform/services && terraform destroy) 2>&1 | tee -a $logfile
290 (cd terraform/data-storage && terraform destroy) 2>&1 | tee -a $logfile
291 (cd terraform/vpc && terraform destroy) 2>&1 | tee -a $logfile
295 for i in BLOB_SIGNING_KEY MANAGEMENT_TOKEN SYSTEM_ROOT_TOKEN ANONYMOUS_USER_TOKEN DATABASE_PASSWORD; do
297 tr -dc A-Za-z0-9 </dev/urandom | head -c 32
312 if grep -rni 'fixme' ${CONFIG_FILE} ${CONFIG_FILE}.secrets ${CONFIG_DIR}; then
314 echo "Some parameters still need to be updated. Please fix them and then re-run deploy."
318 if [[ -z "${DATABASE_POSTGRESQL_VERSION:-}" ]]; then
320 echo "Please configure DATABASE_POSTGRESQL_VERSION in local.params: It should match the version of the PostgreSQL service you're going to use."
324 if [[ ${SSL_MODE} == "bring-your-own" ]]; then
325 if [[ ! -z "${ROLE2NODES['balancer']:-}" ]]; then
328 if [[ ! -z "${ROLE2NODES['controller']:-}" ]]; then
331 if [[ ! -z "${ROLE2NODES['keepproxy']:-}" ]]; then
334 if [[ ! -z "${ROLE2NODES['keepweb']:-}" ]]; then
335 checkcert collections
338 if [[ ! -z "${ROLE2NODES['monitoring']:-}" ]]; then
342 if [[ ! -z "${ROLE2NODES['webshell']:-}" ]]; then
345 if [[ ! -z "${ROLE2NODES['websocket']:-}" ]]; then
348 if [[ ! -z "${ROLE2NODES['workbench']:-}" ]]; then
351 if [[ ! -z "${ROLE2NODES['workbench2']:-}" ]]; then
356 BRANCH=$(git rev-parse --abbrev-ref HEAD)
361 if ! git diff --cached --exit-code --quiet; then
362 git commit -m"prepare for deploy"
365 # Used for rolling updates to disable individual nodes at the
367 export DISABLED_CONTROLLER=""
368 if [[ -z "$NODE" ]]; then
369 for NODE in "${!NODES[@]}"; do
370 # First, just confirm we can ssh to each node.
371 $(ssh_cmd "$NODE") $DEPLOY_USER@$NODE true
374 for NODE in "${!NODES[@]}"; do
375 # Do 'database' role first,
376 if [[ "${NODES[$NODE]}" =~ database ]]; then
377 deploynode $NODE "${NODES[$NODE]}" $BRANCH
382 BALANCER=${ROLE2NODES['balancer']:-}
384 # Check if there are multiple controllers, they'll be comma-separated
386 if [[ ${ROLE2NODES['controller']} =~ , ]]; then
387 # If we have multiple controllers then there must be
388 # load balancer. We want to do a rolling update, take
389 # down each node at the load balancer before updating
392 for NODE in "${!NODES[@]}"; do
393 if [[ "${NODES[$NODE]}" =~ controller ]]; then
394 export DISABLED_CONTROLLER=$NODE
396 # Update balancer that the node is disabled
397 deploynode $BALANCER "${NODES[$BALANCER]}" $BRANCH
399 # Now update the node itself
400 deploynode $NODE "${NODES[$NODE]}" $BRANCH
405 # Only one controller, check if it wasn't already taken care of.
406 NODE=${ROLE2NODES['controller']}
407 if [[ ! -z "${NODES[$NODE]:-}" ]]; then
408 deploynode $NODE "${NODES[$NODE]}" $BRANCH
413 if [[ -n "$BALANCER" ]]; then
414 # Deploy balancer. In the rolling update case, this
415 # will re-enable all the controllers at the balancer.
416 export DISABLED_CONTROLLER=""
417 deploynode $BALANCER "${NODES[$BALANCER]}" $BRANCH
418 unset NODES[$BALANCER]
421 for NODE in "${!NODES[@]}"; do
422 # Everything else (we removed the nodes that we
423 # already deployed from the list)
424 deploynode $NODE "${NODES[$NODE]}" $BRANCH
427 # Just deploy the node that was supplied on the command line.
428 deploynode $NODE "${NODES[$NODE]}" $BRANCH
433 echo "Completed deploy, run 'installer.sh diagnostics' to verify the install"
444 if ! which arvados-client; then
445 echo "arvados-client not found, install 'arvados-client' package with 'apt-get' or 'yum'"
449 if [[ -z "$LOCATION" ]]; then
450 echo "Need to provide '-internal-client' or '-external-client'"
452 echo "-internal-client You are running this on the same private network as the Arvados cluster (e.g. on one of the Arvados nodes)"
453 echo "-external-client You are running this outside the private network of the Arvados cluster (e.g. your workstation)"
457 export ARVADOS_API_HOST="${DOMAIN}:${CONTROLLER_EXT_SSL_PORT}"
458 export ARVADOS_API_TOKEN="$SYSTEM_ROOT_TOKEN"
460 arvados-client diagnostics $LOCATION
463 diagnostics-internal)
467 if [ -z "${ROLE2NODES['shell']:-}" ]; then
468 echo "No node with 'shell' role was found, cannot run diagnostics-internal"
472 # Pick the first shell node for test running
473 declare TESTNODE=$(echo ${ROLE2NODES['shell']} | cut -d\, -f1)
474 declare SSH=$(ssh_cmd "$TESTNODE")
477 echo "Running diagnostics in $TESTNODE..."
478 $SSH $DEPLOY_USER@$TESTNODE bash <<EOF
479 export ARVADOS_API_HOST="${DOMAIN}:${CONTROLLER_EXT_SSL_PORT}"
480 export ARVADOS_API_TOKEN="$SYSTEM_ROOT_TOKEN"
481 sudo --preserve-env=ARVADOS_API_HOST,ARVADOS_API_TOKEN arvados-client diagnostics -internal-client
487 echo "Arvados installer"
489 echo "initialize initialize the setup directory for configuration"
490 echo "terraform create cloud resources using terraform"
491 echo "terraform-destroy destroy cloud resources created by terraform"
492 echo "generate-tokens generate random values for tokens"
493 echo "deploy deploy the configuration from the setup directory"
494 echo "diagnostics check your install running diagnostics locally"
495 echo "diagnostics-internal check your install running diagnostics on a shell node"