X-Git-Url: https://git.arvados.org/arvados-dev.git/blobdiff_plain/ad3df9216ff20898eb96353e5d6c52b271a87375..a1d78727dda75af922ad07e8ef180f855b2f41ee:/jenkins/run-deploy.sh diff --git a/jenkins/run-deploy.sh b/jenkins/run-deploy.sh index a7ffac7..72cc744 100755 --- a/jenkins/run-deploy.sh +++ b/jenkins/run-deploy.sh @@ -1,15 +1,96 @@ #!/bin/bash +# Copyright (C) The Arvados Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +DEBUG=0 +SSH_PORT=22 +PUPPET_CONCURRENCY=5 + +read -d] -r SCOPES <&2 + echo >&2 "usage: $0 [options] " + echo >&2 + echo >&2 " Arvados cluster name" + echo >&2 + echo >&2 "$0 options:" + echo >&2 " -n, --node Single machine to deploy, use fqdn, optional" + echo >&2 " -p, --port SSH port to use (default 22)" + echo >&2 " -c, --concurrency Maximum concurrency for puppet runs (default 5)" + echo >&2 " -d, --debug Enable debug output" + echo >&2 " -h, --help Display this help and exit" + echo >&2 + echo >&2 "Note: this script requires an arvados token created with these permissions:" + echo >&2 ' arv api_client_authorization create_system_auth \' + echo -e $SCOPES"]'" >&2 + echo >&2 +} + + +# NOTE: This requires GNU getopt (part of the util-linux package on Debian-based distros). +TEMP=`getopt -o hdp:c:n: \ + --long help,debug,port:,concurrency:,node: \ + -n "$0" -- "$@"` + +if [ $? != 0 ] ; then echo "Use -h for help"; exit 1 ; fi +# Note the quotes around `$TEMP': they are essential! +eval set -- "$TEMP" + +while [ $# -ge 1 ] +do + case $1 in + -n | --node) + NODE="$2"; shift 2 + ;; + -p | --port) + SSH_PORT="$2"; shift 2 + ;; + -c | --concurrency) + PUPPET_CONCURRENCY="$2"; shift 2 + ;; + -d | --debug) + DEBUG=1 + shift + ;; + --) + shift + break + ;; + *) + usage + exit 1 + ;; + esac +done + IDENTIFIER=$1 -DEPLOY_REPO=$2 if [[ "$IDENTIFIER" == '' ]]; then - echo "Syntax: $0 " - exit 1 -fi - -if [[ "$DEPLOY_REPO" == '' ]]; then - echo "Syntax: $0 " + usage exit 1 fi @@ -17,178 +98,211 @@ EXITCODE=0 COLUMNS=80 +PUPPET_AGENT=' +now() { date +%s; } +let endtime="$(now) + 600" +while [ "$endtime" -gt "$(now)" ]; do + puppet agent --test --detailed-exitcodes + agent_exitcode=$? + if [ 0 = "$agent_exitcode" ] || [ 2 = "$agent_exitcode" ]; then + break + else + sleep 10s + fi +done +exit ${agent_exitcode:-99} +' + title () { - printf "\n%*s\n\n" $(((${#title}+$COLUMNS)/2)) "********** $1 **********" + date=`date +'%Y-%m-%d %H:%M:%S'` + printf "$date $1\n" } -# We only install capistrano in dev mode -export RAILS_ENV=development +function run_puppet() { + node=$1 -source /etc/profile.d/rvm.sh -echo $WORKSPACE - -# Weirdly, jenkins/rvm ties itself in a knot. -rvm use default + title "Running puppet on $node" + sleep $[ $RANDOM / 6000 ].$[ $RANDOM / 1000 ] + TMP_FILE=`mktemp` + if [[ "$DEBUG" != "0" ]]; then + ssh -t -p$SSH_PORT -o "StrictHostKeyChecking no" -o "ConnectTimeout 5" root@$node -C bash -c "'$PUPPET_AGENT'" | tee $TMP_FILE + else + ssh -t -p$SSH_PORT -o "StrictHostKeyChecking no" -o "ConnectTimeout 5" root@$node -C bash -c "'$PUPPET_AGENT'" > $TMP_FILE 2>&1 + fi -# Just say what version of ruby we're running -ruby --version + ECODE=${PIPESTATUS[0]} + RESULT=$(cat $TMP_FILE) + + if [[ "$ECODE" != "255" && ! ("$RESULT" =~ 'already in progress') && "$ECODE" != "2" && "$ECODE" != "0" ]]; then + # Ssh exits 255 if the connection timed out. Just ignore that. + # Puppet exits 2 if there are changes. For real! + # Puppet prints 'Notice: Run of Puppet configuration client already in progress' if another puppet process + # was already running + echo "ERROR running puppet on $node: exit code $ECODE" + if [[ "$DEBUG" == "0" ]]; then + title "Command output follows:" + echo $RESULT + fi + fi + if [[ "$ECODE" == "255" ]]; then + title "Connection timed out" + ECODE=0 + fi + if [[ "$ECODE" == "2" ]]; then + ECODE=0 + fi -function ensure_symlink() { - if [[ ! -L $WORKSPACE/$1 ]]; then - ln -s $WORKSPACE/$DEPLOY_REPO/$1 $WORKSPACE/$1 + if [[ "$ECODE" == "0" ]]; then + rm -f $TMP_FILE + echo $node successfully updates + else + echo $node exit code: $ECODE see $TMP_FILE for details fi } -# Check out/update the $DEPLOY_REPO repository -if [[ ! -d $DEPLOY_REPO ]]; then - mkdir $DEPLOY_REPO - git clone git@git.curoverse.com:$DEPLOY_REPO.git -else - cd $DEPLOY_REPO - git pull -fi - -# Make sure the necessary symlinks are in place -cd "$WORKSPACE" -ensure_symlink "apps/workbench/Capfile.workbench.$IDENTIFIER" -ensure_symlink "apps/workbench/config/deploy.common.rb" -ensure_symlink "apps/workbench/config/deploy.curoverse.rb" -ensure_symlink "apps/workbench/config/deploy.workbench.$IDENTIFIER.rb" - -ensure_symlink "services/api/Capfile.$IDENTIFIER" -ensure_symlink "services/api/config/deploy.common.rb" -ensure_symlink "services/api/config/deploy.$IDENTIFIER.rb" - -# Deploy API server -title "Deploying API server" -cd "$WORKSPACE" -cd services/api - -bundle install --deployment - -# make sure we do not print the output of config:check -sed -i'' -e "s/RAILS_ENV=production #{rake} config:check/RAILS_ENV=production QUIET=true #{rake} config:check/" $WORKSPACE/$DEPLOY_REPO/services/api/config/deploy.common.rb - -bundle exec cap deploy -f Capfile.$IDENTIFIER - -ECODE=$? - -# restore unaltered deploy.common.rb -cd $WORKSPACE/$DEPLOY_REPO -git checkout services/api/config/deploy.common.rb - -if [[ "$ECODE" != "0" ]]; then - title "!!!!!! DEPLOYING API SERVER FAILED !!!!!!" - EXITCODE=$(($EXITCODE + $ECODE)) - exit $EXITCODE -fi - -title "Deploying API server complete" - -# Install updated debian packages -title "Deploying updated arvados debian packages" - -ssh -p2222 root@$IDENTIFIER.arvadosapi.com -C "apt-get update && apt-get -qqy install arvados-src python-arvados-fuse python-arvados-python-client" - -if [[ "$ECODE" != "0" ]]; then - title "!!!!!! DEPLOYING DEBIAN PACKAGES FAILED !!!!!!" - EXITCODE=$(($EXITCODE + $ECODE)) - exit $EXITCODE -fi - -title "Deploying updated arvados debian packages complete" +function run_command() { + node=$1 + return_var=$2 + command=$3 + + title "Running '$command' on $node" + TMP_FILE=`mktemp` + if [[ "$DEBUG" != "0" ]]; then + ssh -t -p$SSH_PORT -o "StrictHostKeyChecking no" -o "ConnectTimeout 125" root@$node -C "$command" | tee $TMP_FILE + else + ssh -t -p$SSH_PORT -o "StrictHostKeyChecking no" -o "ConnectTimeout 125" root@$node -C "$command" > $TMP_FILE 2>&1 + fi -# Install updated arvados gems -title "Deploying updated arvados gems" + ECODE=$? + RESULT=$(cat $TMP_FILE) + + if [[ "$ECODE" != "255" && "$ECODE" != "0" ]]; then + # Ssh exists 255 if the connection timed out. Just ignore that, it's possible that this node is + # a shell node that is down. + title "ERROR running command on $node: exit code $ECODE" + if [[ "$DEBUG" == "0" ]]; then + title "Command output follows:" + echo $RESULT + fi + fi + if [[ "$ECODE" == "255" ]]; then + title "Connection timed out" + ECODE=0 + fi + rm -f $TMP_FILE + eval "$return_var=$ECODE" +} -ssh -p2222 root@$IDENTIFIER.arvadosapi.com -C "/usr/local/rvm/bin/rvm default do gem install arvados arvados-cli && /usr/local/rvm/bin/rvm default do gem clean arvados arvados-cli" +if [[ "$NODE" == "" ]] || [[ "$NODE" == "$IDENTIFIER.arvadosapi.com" ]]; then + title "Updating API server" + SUM_ECODE=0 + run_puppet $IDENTIFIER.arvadosapi.com ECODE + SUM_ECODE=$(($SUM_ECODE + $ECODE)) -if [[ "$ECODE" != "0" ]]; then - title "!!!!!! DEPLOYING ARVADOS GEMS FAILED !!!!!!" - EXITCODE=$(($EXITCODE + $ECODE)) - exit $EXITCODE + if [[ "$SUM_ECODE" != "0" ]]; then + title "ERROR: Updating API server FAILED" + EXITCODE=$(($EXITCODE + $SUM_ECODE)) + exit $EXITCODE + fi fi -title "Deploying updated arvados gems complete" - -# Deploy Workbench -title "Deploying workbench" -cd "$WORKSPACE" -cd apps/workbench -bundle install --deployment - -# make sure we do not print the output of config:check -sed -i'' -e "s/RAILS_ENV=production #{rake} config:check/RAILS_ENV=production QUIET=true #{rake} config:check/" $WORKSPACE/$DEPLOY_REPO/apps/workbench/config/deploy.common.rb - -bundle exec cap deploy -f Capfile.workbench.$IDENTIFIER - -ECODE=$? - -# restore unaltered deploy.common.rb -cd $WORKSPACE/$DEPLOY_REPO -git checkout apps/workbench/config/deploy.common.rb - -if [[ "$ECODE" != "0" ]]; then - title "!!!!!! DEPLOYING WORKBENCH FAILED !!!!!!" - EXITCODE=$(($EXITCODE + $ECODE)) - exit $EXITCODE +if [[ "$NODE" == "$IDENTIFIER.arvadosapi.com" ]]; then + # we are done + exit 0 fi -title "Deploying workbench complete" - -# Update compute0 -title "Update compute0" - -ssh -p2222 root@compute0.$IDENTIFIER -C "/usr/bin/puppet agent -t" - -ECODE=$? - -if [[ "$ECODE" == "2" ]]; then - # Puppet exits '2' if there are changes. For real! - ECODE=0 +title "Loading ARVADOS_API_HOST and ARVADOS_API_TOKEN" +if [[ -f "$HOME/.config/arvados/$IDENTIFIER.arvadosapi.com.conf" ]]; then + . $HOME/.config/arvados/$IDENTIFIER.arvadosapi.com.conf +else + title "WARNING: $HOME/.config/arvados/$IDENTIFIER.arvadosapi.com.conf not found." fi - -if [[ "$ECODE" != "0" ]]; then - title "!!!!!! Update compute0 FAILED !!!!!!" - EXITCODE=$(($EXITCODE + $ECODE)) +if [[ "$ARVADOS_API_HOST" == "" ]] || [[ "$ARVADOS_API_TOKEN" == "" ]]; then + title "ERROR: ARVADOS_API_HOST and/or ARVADOS_API_TOKEN environment variables are not set." + exit 1 fi -title "Update compute0 complete" - -title "Update shell" +title "Gathering list of shell and Keep nodes" +SHELL_NODES=`ARVADOS_API_HOST=$ARVADOS_API_HOST ARVADOS_API_TOKEN=$ARVADOS_API_TOKEN rvm-exec default arv virtual_machine list |jq .items[].hostname -r` +KEEP_NODES=`ARVADOS_API_HOST=$ARVADOS_API_HOST ARVADOS_API_TOKEN=$ARVADOS_API_TOKEN rvm-exec default arv keep_service list |jq .items[].service_host -r` -ssh -p2222 root@shell.$IDENTIFIER -C "/usr/bin/puppet agent -t" - -ECODE=$? - -if [[ "$ECODE" == "2" ]]; then - # Puppet exits '2' if there are changes. For real! +nodes="" +for n in workbench manage switchyard $SHELL_NODES $KEEP_NODES; do ECODE=0 + if [[ $n =~ $ARVADOS_API_HOST$ ]]; then + # e.g. keep.qr1hi.arvadosapi.com + node=$n + else + # e.g. shell + node=$n.$ARVADOS_API_HOST + fi + if [[ "$NODE" == "" ]] || [[ "$NODE" == "$node" ]]; then + # e.g. keep.qr1hi + nodes="$nodes ${node%.arvadosapi.com}" + fi +done + +if [[ "$nodes" != "" ]]; then + ## at this point nodes should be an array containing + ## manage.qr1hi, keep.qr1hi, etc + ## that should be defined in the .ssh/config file + title "Updating in parallel: $nodes" + export -f run_puppet + export -f title + export SSH_PORT + export PUPPET_AGENT + echo $nodes|xargs -d " " -n 1 -P $PUPPET_CONCURRENCY -I {} bash -c "run_puppet {}" fi -if [[ "$ECODE" != "0" ]]; then - title "!!!!!! Update shell FAILED !!!!!!" - EXITCODE=$(($EXITCODE + $ECODE)) -fi +if [[ "$NODE" == "" ]]; then + title "Locating Arvados Standard Docker images project" -title "Update shell complete" + JSON_FILTER="[[\"name\", \"=\", \"Arvados Standard Docker Images\"], [\"owner_uuid\", \"=\", \"$IDENTIFIER-tpzed-000000000000000\"]]" + DOCKER_IMAGES_PROJECT=`ARVADOS_API_HOST=$ARVADOS_API_HOST ARVADOS_API_TOKEN=$ARVADOS_API_TOKEN arv --format=uuid group list --filters="$JSON_FILTER"` -title "Update keep0" + if [[ "$DOCKER_IMAGES_PROJECT" == "" ]]; then + title "Warning: Arvados Standard Docker Images project not found. Creating it." -ssh -p2222 root@keep0.$IDENTIFIER -C "/usr/bin/puppet agent -t" + DOCKER_IMAGES_PROJECT=`ARVADOS_API_HOST=$ARVADOS_API_HOST ARVADOS_API_TOKEN=$ARVADOS_API_TOKEN arv --format=uuid group create --group "{\"owner_uuid\":\"$IDENTIFIER-tpzed-000000000000000\", \"name\":\"Arvados Standard Docker Images\", \"group_class\":\"project\"}"` + ARVADOS_API_HOST=$ARVADOS_API_HOST ARVADOS_API_TOKEN=$ARVADOS_API_TOKEN arv link create --link "{\"tail_uuid\":\"$IDENTIFIER-j7d0g-fffffffffffffff\", \"head_uuid\":\"$DOCKER_IMAGES_PROJECT\", \"link_class\":\"permission\", \"name\":\"can_read\" }" + if [[ "$?" != "0" ]]; then + title "ERROR: could not create standard Docker images project Please create it, cf. http://doc.arvados.org/install/create-standard-objects.html" + exit 1 + fi + fi -ECODE=$? + title "Found Arvados Standard Docker Images project with uuid $DOCKER_IMAGES_PROJECT" + GIT_COMMIT=`ssh -o "StrictHostKeyChecking no" shell.$IDENTIFIER "python -c 'import arvados_cwl ; print arvados_cwl.__version__'" 2>&1 |grep -v INFO:rdflib:RDFLib` -if [[ "$ECODE" == "2" ]]; then - # Puppet exits '2' if there are changes. For real! - ECODE=0 -fi + if [[ "$?" != "0" ]] || [[ "$GIT_COMMIT" == "" ]]; then + title "ERROR: unable to get arvados/jobs Docker image git revision" + exit 1 + else + title "Found git commit for arvados/jobs Docker image: $GIT_COMMIT" + fi -if [[ "$ECODE" != "0" ]]; then - title "!!!!!! Update keep0 FAILED !!!!!!" - EXITCODE=$(($EXITCODE + $ECODE)) + run_command shell.$IDENTIFIER ECODE "ARVADOS_API_HOST=$ARVADOS_API_HOST ARVADOS_API_TOKEN=$ARVADOS_API_TOKEN /usr/local/rvm/bin/rvm-exec default arv keep docker" |grep -q $GIT_COMMIT + + if [[ "$?" == "0" ]]; then + title "Found latest arvados/jobs Docker image, nothing to upload" + # Just in case it isn't yet, tag the image as latest + ssh -o "StrictHostKeyChecking no" shell.$IDENTIFIER "ARVADOS_API_HOST=$ARVADOS_API_HOST ARVADOS_API_TOKEN=$ARVADOS_API_TOKEN arv-keepdocker arvados/jobs latest" + else + title "Installing latest arvados/jobs Docker image" + ssh -o "StrictHostKeyChecking no" shell.$IDENTIFIER "ARVADOS_API_HOST=$ARVADOS_API_HOST ARVADOS_API_TOKEN=$ARVADOS_API_TOKEN /usr/local/rvm/bin/rvm-exec default arv keep docker --pull --project-uuid=$DOCKER_IMAGES_PROJECT arvados/jobs $GIT_COMMIT" + ssh -o "StrictHostKeyChecking no" shell.$IDENTIFIER docker tag --force >/dev/null 2>&1 + # docker 1.13 no longer supports --force. Sigh. + if [[ "$?" == "125" ]]; then + FORCE_TAG="" + else + FORCE_TAG="--force" + fi + ## adding latest tag too refs 9254 + ssh -o "StrictHostKeyChecking no" shell.$IDENTIFIER docker tag $FORCE_TAG arvados/jobs:$GIT_COMMIT arvados/jobs:latest + ssh -o "StrictHostKeyChecking no" shell.$IDENTIFIER "ARVADOS_API_HOST=$ARVADOS_API_HOST ARVADOS_API_TOKEN=$ARVADOS_API_TOKEN arv-keepdocker --project-uuid=$DOCKER_IMAGES_PROJECT arvados/jobs latest" + if [[ "$?" -ne 0 ]]; then + title "'git pull' failed exiting..." + exit 1 + fi + fi fi - -title "Update keep0 complete" - -exit $EXITCODE