#!/bin/bash
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+set -e
+
+DEBUG=0
+UNMANAGED=0
+SSH_PORT=22
+PUPPET_CONCURRENCY=5
+
+read -d] -r SCOPES <<EOF
+--scopes
+'["GET /arvados/v1/virtual_machines",\n
+"GET /arvados/v1/keep_services",\n
+"GET /arvados/v1/keep_services/",\n
+"GET /arvados/v1/groups",\n
+"GET /arvados/v1/groups/",\n
+"GET /arvados/v1/links",\n
+"GET /arvados/v1/collections",\n
+"POST /arvados/v1/collections",\n
+"POST /arvados/v1/links",\n
+"GET /arvados/v1/users/current",\n
+"POST /arvados/v1/users/current",\n
+"GET /arvados/v1/jobs",\n
+"POST /arvados/v1/jobs",\n
+"GET /arvados/v1/pipeline_instances",\n
+"POST /arvados/v1/pipeline_instances",\n
+"PUT /arvados/v1/pipeline_instances/",\n
+"GET /arvados/v1/collections/",\n
+"POST /arvados/v1/collections/",\n
+"GET /arvados/v1/logs"]'
+EOF
+
+function usage {
+ echo >&2
+ echo >&2 "usage: $0 [options] <identifier>"
+ echo >&2
+ echo >&2 " <identifier> Arvados cluster name"
+ echo >&2
+ echo >&2 "$0 options:"
+ echo >&2 " -n, --node <node> Single machine to deploy, use fqdn, optional"
+ echo >&2 " -p, --port <ssh port> SSH port to use (default 22)"
+ echo >&2 " -c, --concurrency <max> Maximum concurrency for puppet runs (default 5)"
+ echo >&2 " -u, --unmanaged Deploy to unmanaged node/cluster"
+ echo >&2 " -d, --debug Enable debug output"
+ echo >&2 " -h, --help Display this help and exit"
+ echo >&2
+ echo >&2 "Note: this script requires an arvados token created with these permissions:"
+ echo >&2 ' arv api_client_authorization create_system_auth \'
+ echo -e $SCOPES"]'" >&2
+ echo >&2
+}
+
+
+# NOTE: This requires GNU getopt (part of the util-linux package on Debian-based distros).
+TEMP=`getopt -o hudp:c:n: \
+ --long help,unmanaged,debug,port:,concurrency:,node: \
+ -n "$0" -- "$@"`
+
+if [ $? != 0 ] ; then echo "Use -h for help"; exit 1 ; fi
+# Note the quotes around `$TEMP': they are essential!
+eval set -- "$TEMP"
+
+while [ $# -ge 1 ]
+do
+ case $1 in
+ -n | --node)
+ NODE="$2"; shift 2
+ ;;
+ -p | --port)
+ SSH_PORT="$2"; shift 2
+ ;;
+ -c | --concurrency)
+ PUPPET_CONCURRENCY="$2"; shift 2
+ ;;
+ -u | --unmanaged)
+ UNMANAGED=1
+ shift
+ ;;
+ -d | --debug)
+ DEBUG=1
+ set -x
+ shift
+ ;;
+ --)
+ shift
+ break
+ ;;
+ *)
+ usage
+ exit 1
+ ;;
+ esac
+done
+
IDENTIFIER=$1
-DEPLOY_REPO=$2
if [[ "$IDENTIFIER" == '' ]]; then
- echo "Syntax: $0 <identifier> <deploy_repo_name>"
- exit 1
-fi
-
-if [[ "$DEPLOY_REPO" == '' ]]; then
- echo "Syntax: $0 <identifier> <deploy_repo_name>"
+ usage
exit 1
fi
COLUMNS=80
+PUPPET_AGENT='
+if [[ -e "/usr/local/rvm/scripts/rvm" ]]; then
+ source /usr/local/rvm/scripts/rvm
+ __rvm_unload
+fi
+now() { date +%s; }
+let endtime="$(now) + 600"
+while [ "$endtime" -gt "$(now)" ]; do
+ puppet agent --test --detailed-exitcodes
+ agent_exitcode=$?
+ if [ 0 = "$agent_exitcode" ] || [ 2 = "$agent_exitcode" ]; then
+ break
+ else
+ sleep 10s
+ fi
+done
+exit ${agent_exitcode:-99}
+'
+
+APT_AGENT='
+now() { date +%s; }
+let endtime="$(now) + 600"
+while [ "$endtime" -gt "$(now)" ]; do
+ apt-get update
+ DEBIAN_FRONTEND=noninteractive apt-get -y upgrade
+ apt_exitcode=$?
+ if [ 0 = "$apt_exitcode" ]; then
+ break
+ else
+ sleep 10s
+ fi
+done
+exit ${apt_exitcode:-99}
+'
+
title () {
- printf "\n%*s\n\n" $(((${#title}+$COLUMNS)/2)) "********** $1 **********"
+ date=`date +'%Y-%m-%d %H:%M:%S'`
+ printf "$date $1\n"
}
-# We only install capistrano in dev mode
-export RAILS_ENV=development
-
-source /etc/profile.d/rvm.sh
-echo $WORKSPACE
-
-# Weirdly, jenkins/rvm ties itself in a knot.
-rvm use default
-
-# Just say what version of ruby we're running
-ruby --version
-
-function ensure_symlink() {
- if [[ ! -L $WORKSPACE/$1 ]]; then
- ln -s $WORKSPACE/$DEPLOY_REPO/$1 $WORKSPACE/$1
+function update_node() {
+ if [[ $UNMANAGED -ne 0 ]]; then
+ run_apt $@
+ else
+ run_puppet $@
fi
}
-# Check out/update the $DEPLOY_REPO repository
-if [[ ! -d $DEPLOY_REPO ]]; then
- mkdir $DEPLOY_REPO
- git clone git@git.curoverse.com:$DEPLOY_REPO.git
-else
- cd $DEPLOY_REPO
- git pull
-fi
-
-# Make sure the necessary symlinks are in place
-cd "$WORKSPACE"
-ensure_symlink "apps/workbench/Capfile.workbench.$IDENTIFIER"
-ensure_symlink "apps/workbench/config/deploy.common.rb"
-ensure_symlink "apps/workbench/config/deploy.curoverse.rb"
-ensure_symlink "apps/workbench/config/deploy.workbench.$IDENTIFIER.rb"
-
-ensure_symlink "services/api/Capfile.$IDENTIFIER"
-ensure_symlink "services/api/config/deploy.common.rb"
-ensure_symlink "services/api/config/deploy.$IDENTIFIER.rb"
-
-# Deploy API server
-title "Deploying API server"
-cd "$WORKSPACE"
-cd services/api
-
-bundle install --deployment
-
-# make sure we do not print the output of config:check
-sed -i'' -e "s/RAILS_ENV=production #{rake} config:check/RAILS_ENV=production QUIET=true #{rake} config:check/" $WORKSPACE/$DEPLOY_REPO/services/api/config/deploy.common.rb
-
-bundle exec cap deploy -f Capfile.$IDENTIFIER
-
-ECODE=$?
-
-# restore unaltered deploy.common.rb
-cd $WORKSPACE/$DEPLOY_REPO
-git checkout services/api/config/deploy.common.rb
-
-if [[ "$ECODE" != "0" ]]; then
- title "!!!!!! DEPLOYING API SERVER FAILED !!!!!!"
- EXITCODE=$(($EXITCODE + $ECODE))
- exit $EXITCODE
-fi
-
-title "Deploying API server complete"
-
-# Install updated debian packages
-title "Deploying updated arvados debian packages"
-
-ssh -p2222 root@$IDENTIFIER.arvadosapi.com -C "apt-get update && apt-get -qqy install arvados-src python-arvados-fuse python-arvados-python-client"
-
-if [[ "$ECODE" != "0" ]]; then
- title "!!!!!! DEPLOYING DEBIAN PACKAGES FAILED !!!!!!"
- EXITCODE=$(($EXITCODE + $ECODE))
- exit $EXITCODE
-fi
-
-title "Deploying updated arvados debian packages complete"
-
-# Install updated arvados gems
-title "Deploying updated arvados gems"
-
-ssh -p2222 root@$IDENTIFIER.arvadosapi.com -C "/usr/local/rvm/bin/rvm default do gem install arvados arvados-cli && /usr/local/rvm/bin/rvm default do gem clean arvados arvados-cli"
-
-if [[ "$ECODE" != "0" ]]; then
- title "!!!!!! DEPLOYING ARVADOS GEMS FAILED !!!!!!"
- EXITCODE=$(($EXITCODE + $ECODE))
- exit $EXITCODE
-fi
-
-title "Deploying updated arvados gems complete"
+function run_apt() {
+ node=$1
-# Deploy Workbench
-title "Deploying workbench"
-cd "$WORKSPACE"
-cd apps/workbench
-bundle install --deployment
-
-# make sure we do not print the output of config:check
-sed -i'' -e "s/RAILS_ENV=production #{rake} config:check/RAILS_ENV=production QUIET=true #{rake} config:check/" $WORKSPACE/$DEPLOY_REPO/apps/workbench/config/deploy.common.rb
-
-bundle exec cap deploy -f Capfile.workbench.$IDENTIFIER
+ title "Running apt on $node"
+ sleep $[ $RANDOM / 6000 ].$[ $RANDOM / 1000 ]
+ TMP_FILE=`mktemp`
+ if [[ "$DEBUG" != "0" ]]; then
+ ssh -t -p$SSH_PORT -o "StrictHostKeyChecking no" -o "ConnectTimeout 5" root@$node -C bash -c "'$APT_AGENT'" 2>&1 | sed 's/^/['"${node}"'] /' | tee $TMP_FILE
+ else
+ ssh -t -p$SSH_PORT -o "StrictHostKeyChecking no" -o "ConnectTimeout 5" root@$node -C bash -c "'$APT_AGENT'" 2>&1 | sed 's/^/['"${node}"'] /' > $TMP_FILE 2>&1
+ fi
-ECODE=$?
+ ECODE=${PIPESTATUS[0]}
+ RESULT=$(cat $TMP_FILE)
-# restore unaltered deploy.common.rb
-cd $WORKSPACE/$DEPLOY_REPO
-git checkout apps/workbench/config/deploy.common.rb
+ if [[ "$ECODE" != "255" && "$ECODE" != "0" ]]; then
+ # Ssh exits 255 if the connection timed out. Just ignore that.
+ echo "ERROR running apt on $node: exit code $ECODE"
+ if [[ "$DEBUG" == "0" ]]; then
+ title "Command output follows:"
+ echo $RESULT
+ fi
+ fi
+ if [[ "$ECODE" == "255" ]]; then
+ title "Connection timed out"
+ ECODE=0
+ fi
-if [[ "$ECODE" != "0" ]]; then
- title "!!!!!! DEPLOYING WORKBENCH FAILED !!!!!!"
- EXITCODE=$(($EXITCODE + $ECODE))
- exit $EXITCODE
-fi
+ if [[ "$ECODE" == "0" ]]; then
+ rm -f $TMP_FILE
+ title "$node successfully updated"
+ else
+ title "$node exit code: $ECODE see $TMP_FILE for details"
+ fi
+}
-title "Deploying workbench complete"
+function run_puppet() {
+ node=$1
-# Update compute node(s)
-title "Update compute node(s)"
+ title "Running puppet on $node"
+ sleep $[ $RANDOM / 6000 ].$[ $RANDOM / 1000 ]
+ TMP_FILE=`mktemp`
+ if [[ "$DEBUG" != "0" ]]; then
+ ssh -t -p$SSH_PORT -o "StrictHostKeyChecking no" -o "ConnectTimeout 5" root@$node -C bash -c "'$PUPPET_AGENT'" 2>&1 | sed 's/^/['"${node}"'] /' | tee $TMP_FILE
+ else
+ ssh -t -p$SSH_PORT -o "StrictHostKeyChecking no" -o "ConnectTimeout 5" root@$node -C bash -c "'$PUPPET_AGENT'" 2>&1 | sed 's/^/['"${node}"'] /' > $TMP_FILE 2>&1
+ fi
-# Get list of nodes that are up
-COMPRESSED_NODE_LIST=`ssh -p2222 root@$IDENTIFIER -C "sinfo --long -p crypto -r -o "%N" -h"`
+ ECODE=${PIPESTATUS[0]}
+ RESULT=$(cat $TMP_FILE)
+
+ if [[ "$ECODE" != "255" && ! ("$RESULT" =~ 'already in progress') && "$ECODE" != "2" && "$ECODE" != "0" ]]; then
+ # Ssh exits 255 if the connection timed out. Just ignore that.
+ # Puppet exits 2 if there are changes. For real!
+ # Puppet prints 'Notice: Run of Puppet configuration client already in progress' if another puppet process
+ # was already running
+ echo "ERROR running puppet on $node: exit code $ECODE"
+ if [[ "$DEBUG" == "0" ]]; then
+ title "Command output follows:"
+ echo $RESULT
+ fi
+ fi
+ if [[ "$ECODE" == "255" ]]; then
+ title "Connection timed out"
+ ECODE=0
+ fi
+ if [[ "$ECODE" == "2" ]]; then
+ ECODE=0
+ fi
-if [[ "$COMPRESSED_NODE_LIST" != '' ]]; then
- COMPUTE_NODES=`ssh -p2222 root@$IDENTIFIER -C "scontrol show hostname $COMPRESSED_NODE_LIST"`
+ if [[ "$ECODE" == "0" ]]; then
+ rm -f $TMP_FILE
+ echo $node successfully updated
+ else
+ echo $node exit code: $ECODE see $TMP_FILE for details
+ fi
+}
+if [[ "$NODE" == "" ]] || [[ "$NODE" == "$IDENTIFIER.arvadosapi.com" ]]; then
+ title "Updating API server"
SUM_ECODE=0
- for node in $COMPUTE_NODES; do
- echo "Updating $node.$IDENTIFIER"
- ssh -p2222 -o "StrictHostKeyChecking no" -o "ConnectTimeout 5" root@$node.$IDENTIFIER -C "/usr/bin/puppet agent -t"
- ECODE=$?
- if [[ "$ECODE" != "255" && "$ECODE" != "2" && "$ECODE" != "0" ]]; then
- # 255 -> connection timed out. Just ignore that, it's possible the compute node was being shut down.
- # Puppet exits '2' if there are changes. For real!
- SUM_ECODE=$(($SUM_ECODE + $ECODE))
- echo "ERROR updating $node.$IDENTIFIER: exit code $ECODE"
- fi
- done
+ update_node $IDENTIFIER.arvadosapi.com ECODE
+ SUM_ECODE=$(($SUM_ECODE + $ECODE))
if [[ "$SUM_ECODE" != "0" ]]; then
- title "!!!!!! Update compute node(s) FAILED !!!!!!"
+ title "ERROR: Updating API server FAILED"
EXITCODE=$(($EXITCODE + $SUM_ECODE))
+ exit $EXITCODE
fi
fi
-title "Update compute node(s) complete"
-
-title "Update shell"
+if [[ "$NODE" == "$IDENTIFIER.arvadosapi.com" ]]; then
+ # we are done
+ exit 0
+fi
-ssh -p2222 root@shell.$IDENTIFIER -C "/usr/bin/puppet agent -t"
+title "Loading ARVADOS_API_HOST and ARVADOS_API_TOKEN"
+if [[ -f "$HOME/.config/arvados/$IDENTIFIER.arvadosapi.com.conf" ]]; then
+ . $HOME/.config/arvados/$IDENTIFIER.arvadosapi.com.conf
+else
+ title "WARNING: $HOME/.config/arvados/$IDENTIFIER.arvadosapi.com.conf not found."
+fi
+if [[ "$ARVADOS_API_HOST" == "" ]] || [[ "$ARVADOS_API_TOKEN" == "" ]]; then
+ title "ERROR: ARVADOS_API_HOST and/or ARVADOS_API_TOKEN environment variables are not set."
+ exit 1
+fi
-ECODE=$?
+title "Gathering list of nodes"
+start_nodes="workbench"
+if [[ "$IDENTIFIER" != "ce8i5" ]] && [[ "$IDENTIFIER" != "tordo" ]]; then
+ start_nodes="$start_nodes manage switchyard"
+fi
+SHELL_NODES=`ARVADOS_API_HOST=$ARVADOS_API_HOST ARVADOS_API_TOKEN=$ARVADOS_API_TOKEN arv virtual_machine list |jq .items[].hostname -r`
+KEEP_NODES=`ARVADOS_API_HOST=$ARVADOS_API_HOST ARVADOS_API_TOKEN=$ARVADOS_API_TOKEN arv keep_service list |jq .items[].service_host -r`
+SHELL_NODE_FOR_ARV_KEEPDOCKER="shell.$IDENTIFIER"
+start_nodes="$start_nodes $SHELL_NODES $KEEP_NODES"
-if [[ "$ECODE" == "2" ]]; then
- # Puppet exits '2' if there are changes. For real!
+nodes=""
+for n in $start_nodes; do
ECODE=0
+ if [[ $n =~ $ARVADOS_API_HOST$ ]]; then
+ # e.g. keep.qr1hi.arvadosapi.com
+ node=$n
+ else
+ # e.g. shell
+ node=$n.$ARVADOS_API_HOST
+ fi
+ if [[ "$NODE" == "" ]] || [[ "$NODE" == "$node" ]]; then
+ # e.g. keep.qr1hi
+ nodes="$nodes ${node%.arvadosapi.com}"
+ fi
+done
+
+if [[ "$nodes" != "" ]]; then
+ ## at this point nodes should be an array containing
+ ## manage.qr1hi, keep.qr1hi, etc
+ ## that should be defined in the .ssh/config file
+ title "Updating in parallel:$nodes"
+ export -f update_node
+ export -f run_puppet
+ export -f run_apt
+ export -f title
+ export SSH_PORT
+ export PUPPET_AGENT
+ export APT_AGENT
+ export UNMANAGED
+ echo $nodes|xargs -d " " -n 1 -P $PUPPET_CONCURRENCY -I {} bash -c "update_node {}"
fi
-if [[ "$ECODE" != "0" ]]; then
- title "!!!!!! Update shell FAILED !!!!!!"
- EXITCODE=$(($EXITCODE + $ECODE))
-fi
+if [[ "$NODE" == "" ]]; then
+ title "Locating Arvados Standard Docker images project"
-title "Update shell complete"
+ JSON_FILTER="[[\"name\", \"=\", \"Arvados Standard Docker Images\"], [\"owner_uuid\", \"=\", \"$IDENTIFIER-tpzed-000000000000000\"]]"
+ DOCKER_IMAGES_PROJECT=`ARVADOS_API_HOST=$ARVADOS_API_HOST ARVADOS_API_TOKEN=$ARVADOS_API_TOKEN arv --format=uuid group list --filters="$JSON_FILTER"`
-title "Update keep0"
+ if [[ "$DOCKER_IMAGES_PROJECT" == "" ]]; then
+ title "Warning: Arvados Standard Docker Images project not found. Creating it."
-ssh -p2222 root@keep0.$IDENTIFIER -C "/usr/bin/puppet agent -t"
+ DOCKER_IMAGES_PROJECT=`ARVADOS_API_HOST=$ARVADOS_API_HOST ARVADOS_API_TOKEN=$ARVADOS_API_TOKEN arv --format=uuid group create --group "{\"owner_uuid\":\"$IDENTIFIER-tpzed-000000000000000\", \"name\":\"Arvados Standard Docker Images\", \"group_class\":\"project\"}"`
+ ARVADOS_API_HOST=$ARVADOS_API_HOST ARVADOS_API_TOKEN=$ARVADOS_API_TOKEN arv link create --link "{\"tail_uuid\":\"$IDENTIFIER-j7d0g-fffffffffffffff\", \"head_uuid\":\"$DOCKER_IMAGES_PROJECT\", \"link_class\":\"permission\", \"name\":\"can_read\" }"
+ if [[ "$?" != "0" ]]; then
+ title "ERROR: could not create standard Docker images project Please create it, cf. http://doc.arvados.org/install/create-standard-objects.html"
+ exit 1
+ fi
+ fi
-ECODE=$?
+ title "Found Arvados Standard Docker Images project with uuid $DOCKER_IMAGES_PROJECT"
-if [[ "$ECODE" == "2" ]]; then
- # Puppet exits '2' if there are changes. For real!
- ECODE=0
-fi
+ if [[ "$SHELL_NODE_FOR_ARV_KEEPDOCKER" == "" ]]; then
+ VERSION=`ssh -t -p$SSH_PORT -o "StrictHostKeyChecking no" -o "ConnectTimeout 125" -o "LogLevel QUIET" $IDENTIFIER apt-cache policy python3-arvados-cwl-runner|grep Candidate`
+ VERSION=`echo $VERSION|cut -f2 -d' '|cut -f1 -d-`
-if [[ "$ECODE" != "0" ]]; then
- title "!!!!!! Update keep0 FAILED !!!!!!"
- EXITCODE=$(($EXITCODE + $ECODE))
-fi
+ if [[ "$?" != "0" ]] || [[ "$VERSION" == "" ]]; then
+ title "ERROR: unable to get python3-arvados-cwl-runner version"
+ exit 1
+ else
+ title "Found version for python3-arvados-cwl-runner: $VERSION"
+ fi
-title "Update keep0 complete"
+ set +e
+ CLEAN_VERSION=`echo $VERSION | sed s/~dev/.dev/g | sed s/~rc/rc/g`
+ ARVADOS_API_HOST=$ARVADOS_API_HOST ARVADOS_API_TOKEN=$ARVADOS_API_TOKEN arv-keepdocker |grep -qP "arvados/jobs +$CLEAN_VERSION "
+ if [[ $? -eq 0 ]]; then
+ set -e
+ title "Found arvados/jobs Docker image version $CLEAN_VERSION, nothing to upload"
+ else
+ set -e
+ title "Installing arvados/jobs Docker image version $CLEAN_VERSION"
+ ARVADOS_API_HOST=$ARVADOS_API_HOST ARVADOS_API_TOKEN=$ARVADOS_API_TOKEN arv-keepdocker --pull --project-uuid=$DOCKER_IMAGES_PROJECT arvados/jobs $CLEAN_VERSION
+ if [[ $? -ne 0 ]]; then
+ title "'arv-keepdocker' failed..."
+ exit 1
+ fi
+ fi
+ else
+ VERSION=`ssh -t -p$SSH_PORT -o "StrictHostKeyChecking no" -o "ConnectTimeout 125" -o "LogLevel QUIET" $SHELL_NODE_FOR_ARV_KEEPDOCKER apt-cache policy python3-arvados-cwl-runner|grep Candidate`
+ VERSION=`echo $VERSION|cut -f2 -d' '|cut -f1 -d-`
+
+ if [[ "$?" != "0" ]] || [[ "$VERSION" == "" ]]; then
+ title "ERROR: unable to get python3-arvados-cwl-runner version"
+ exit 1
+ else
+ title "Found version for python3-arvados-cwl-runner: $VERSION"
+ fi
-exit $EXITCODE
+ set +e
+ CLEAN_VERSION=`echo $VERSION | sed s/~dev/.dev/g | sed s/~rc/rc/g`
+ ssh -t -p$SSH_PORT -o "StrictHostKeyChecking no" -o "ConnectTimeout 125" -o "LogLevel QUIET" $SHELL_NODE_FOR_ARV_KEEPDOCKER "ARVADOS_API_HOST=$ARVADOS_API_HOST ARVADOS_API_TOKEN=$ARVADOS_API_TOKEN arv-keepdocker" |grep -qP "arvados/jobs +$CLEAN_VERSION "
+ if [[ $? -eq 0 ]]; then
+ set -e
+ title "Found arvados/jobs Docker image version $CLEAN_VERSION, nothing to upload"
+ else
+ set -e
+ title "Installing arvados/jobs Docker image version $CLEAN_VERSION"
+ ssh -t -p$SSH_PORT -o "StrictHostKeyChecking no" -o "ConnectTimeout 125" -o "LogLevel QUIET" $SHELL_NODE_FOR_ARV_KEEPDOCKER "ARVADOS_API_HOST=$ARVADOS_API_HOST ARVADOS_API_TOKEN=$ARVADOS_API_TOKEN arv-keepdocker --pull --project-uuid=$DOCKER_IMAGES_PROJECT arvados/jobs $CLEAN_VERSION"
+ if [[ $? -ne 0 ]]; then
+ title "'arv-keepdocker' failed..."
+ exit 1
+ fi
+ fi
+ fi
+fi