3 # Copyright (C) The Arvados Authors. All rights reserved.
5 # SPDX-License-Identifier: AGPL-3.0
12 read -d] -r SCOPES <<EOF
14 '["GET /arvados/v1/virtual_machines",\n
15 "GET /arvados/v1/keep_services",\n
16 "GET /arvados/v1/keep_services/",\n
17 "GET /arvados/v1/groups",\n
18 "GET /arvados/v1/groups/",\n
19 "GET /arvados/v1/links",\n
20 "GET /arvados/v1/collections",\n
21 "POST /arvados/v1/collections",\n
22 "POST /arvados/v1/links",\n
23 "GET /arvados/v1/users/current",\n
24 "POST /arvados/v1/users/current",\n
25 "GET /arvados/v1/jobs",\n
26 "POST /arvados/v1/jobs",\n
27 "GET /arvados/v1/pipeline_instances",\n
28 "POST /arvados/v1/pipeline_instances",\n
29 "PUT /arvados/v1/pipeline_instances/",\n
30 "GET /arvados/v1/collections/",\n
31 "POST /arvados/v1/collections/",\n
32 "GET /arvados/v1/logs"]'
37 echo >&2 "usage: $0 [options] <identifier>"
39 echo >&2 " <identifier> Arvados cluster name"
41 echo >&2 "$0 options:"
42 echo >&2 " -n, --node <node> Single machine to deploy, use fqdn, optional"
43 echo >&2 " -p, --port <ssh port> SSH port to use (default 22)"
44 echo >&2 " -c, --concurrency <max> Maximum concurrency for puppet runs (default 5)"
45 echo >&2 " -u, --unmanaged Deploy to unmanaged node/cluster"
46 echo >&2 " -d, --debug Enable debug output"
47 echo >&2 " -h, --help Display this help and exit"
49 echo >&2 "Note: this script requires an arvados token created with these permissions:"
50 echo >&2 ' arv api_client_authorization create_system_auth \'
51 echo -e $SCOPES"]'" >&2
56 # NOTE: This requires GNU getopt (part of the util-linux package on Debian-based distros).
57 TEMP=`getopt -o hudp:c:n: \
58 --long help,unmanaged,debug,port:,concurrency:,node: \
61 if [ $? != 0 ] ; then echo "Use -h for help"; exit 1 ; fi
62 # Note the quotes around `$TEMP': they are essential!
72 SSH_PORT="$2"; shift 2
75 PUPPET_CONCURRENCY="$2"; shift 2
99 if [[ "$IDENTIFIER" == '' ]]; then
111 let endtime="$(now) + 600"
112 while [ "$endtime" -gt "$(now)" ]; do
113 puppet agent --test --detailed-exitcodes
115 if [ 0 = "$agent_exitcode" ] || [ 2 = "$agent_exitcode" ]; then
121 exit ${agent_exitcode:-99}
126 let endtime="$(now) + 600"
127 while [ "$endtime" -gt "$(now)" ]; do
131 if [ 0 = "$apt_exitcode" ]; then
137 exit ${apt_exitcode:-99}
141 date=`date +'%Y-%m-%d %H:%M:%S'`
145 function update_node() {
146 if [[ $UNMANAGED -ne 0 ]]; then
156 title "Running apt on $node"
157 sleep $[ $RANDOM / 6000 ].$[ $RANDOM / 1000 ]
159 if [[ "$DEBUG" != "0" ]]; then
160 ssh -t -p$SSH_PORT -o "StrictHostKeyChecking no" -o "ConnectTimeout 5" root@$node -C bash -c "'$APT_AGENT'" | tee $TMP_FILE
162 ssh -t -p$SSH_PORT -o "StrictHostKeyChecking no" -o "ConnectTimeout 5" root@$node -C bash -c "'$APT_AGENT'" > $TMP_FILE 2>&1
165 ECODE=${PIPESTATUS[0]}
166 RESULT=$(cat $TMP_FILE)
168 if [[ "$ECODE" != "255" && "$ECODE" != "0" ]]; then
169 # Ssh exits 255 if the connection timed out. Just ignore that.
170 echo "ERROR running apt on $node: exit code $ECODE"
171 if [[ "$DEBUG" == "0" ]]; then
172 title "Command output follows:"
176 if [[ "$ECODE" == "255" ]]; then
177 title "Connection timed out"
181 if [[ "$ECODE" == "0" ]]; then
183 echo $node successfully updated
185 echo $node exit code: $ECODE see $TMP_FILE for details
189 function run_puppet() {
192 title "Running puppet on $node"
193 sleep $[ $RANDOM / 6000 ].$[ $RANDOM / 1000 ]
195 if [[ "$DEBUG" != "0" ]]; then
196 ssh -t -p$SSH_PORT -o "StrictHostKeyChecking no" -o "ConnectTimeout 5" root@$node -C bash -c "'$PUPPET_AGENT'" | tee $TMP_FILE
198 ssh -t -p$SSH_PORT -o "StrictHostKeyChecking no" -o "ConnectTimeout 5" root@$node -C bash -c "'$PUPPET_AGENT'" > $TMP_FILE 2>&1
201 ECODE=${PIPESTATUS[0]}
202 RESULT=$(cat $TMP_FILE)
204 if [[ "$ECODE" != "255" && ! ("$RESULT" =~ 'already in progress') && "$ECODE" != "2" && "$ECODE" != "0" ]]; then
205 # Ssh exits 255 if the connection timed out. Just ignore that.
206 # Puppet exits 2 if there are changes. For real!
207 # Puppet prints 'Notice: Run of Puppet configuration client already in progress' if another puppet process
208 # was already running
209 echo "ERROR running puppet on $node: exit code $ECODE"
210 if [[ "$DEBUG" == "0" ]]; then
211 title "Command output follows:"
215 if [[ "$ECODE" == "255" ]]; then
216 title "Connection timed out"
219 if [[ "$ECODE" == "2" ]]; then
223 if [[ "$ECODE" == "0" ]]; then
225 echo $node successfully updated
227 echo $node exit code: $ECODE see $TMP_FILE for details
231 function run_command() {
236 title "Running '$command' on $node"
238 if [[ "$DEBUG" != "0" ]]; then
239 ssh -t -p$SSH_PORT -o "StrictHostKeyChecking no" -o "ConnectTimeout 125" root@$node -C "$command" | tee $TMP_FILE
241 ssh -t -p$SSH_PORT -o "StrictHostKeyChecking no" -o "ConnectTimeout 125" root@$node -C "$command" > $TMP_FILE 2>&1
245 RESULT=$(cat $TMP_FILE)
247 if [[ "$ECODE" != "255" && "$ECODE" != "0" ]]; then
248 # Ssh exists 255 if the connection timed out. Just ignore that, it's possible that this node is
249 # a shell node that is down.
250 title "ERROR running command on $node: exit code $ECODE"
251 if [[ "$DEBUG" == "0" ]]; then
252 title "Command output follows:"
256 if [[ "$ECODE" == "255" ]]; then
257 title "Connection timed out"
262 eval "$return_var=$ECODE"
265 if [[ "$NODE" == "" ]] || [[ "$NODE" == "$IDENTIFIER.arvadosapi.com" ]]; then
266 title "Updating API server"
268 update_node $IDENTIFIER.arvadosapi.com ECODE
269 SUM_ECODE=$(($SUM_ECODE + $ECODE))
271 if [[ "$SUM_ECODE" != "0" ]]; then
272 title "ERROR: Updating API server FAILED"
273 EXITCODE=$(($EXITCODE + $SUM_ECODE))
278 if [[ "$NODE" == "$IDENTIFIER.arvadosapi.com" ]]; then
283 title "Loading ARVADOS_API_HOST and ARVADOS_API_TOKEN"
284 if [[ -f "$HOME/.config/arvados/$IDENTIFIER.arvadosapi.com.conf" ]]; then
285 . $HOME/.config/arvados/$IDENTIFIER.arvadosapi.com.conf
287 title "WARNING: $HOME/.config/arvados/$IDENTIFIER.arvadosapi.com.conf not found."
289 if [[ "$ARVADOS_API_HOST" == "" ]] || [[ "$ARVADOS_API_TOKEN" == "" ]]; then
290 title "ERROR: ARVADOS_API_HOST and/or ARVADOS_API_TOKEN environment variables are not set."
294 title "Gathering list of nodes"
295 start_nodes="workbench"
296 if [[ "$IDENTIFIER" != "ce8i5" ]]; then
297 start_nodes="$start_nodes manage switchyard"
299 SHELL_NODES=`ARVADOS_API_HOST=$ARVADOS_API_HOST ARVADOS_API_TOKEN=$ARVADOS_API_TOKEN arv virtual_machine list |jq .items[].hostname -r`
300 KEEP_NODES=`ARVADOS_API_HOST=$ARVADOS_API_HOST ARVADOS_API_TOKEN=$ARVADOS_API_TOKEN arv keep_service list |jq .items[].service_host -r`
301 SHELL_NODE_FOR_ARV_KEEPDOCKER="shell.$IDENTIFIER"
302 start_nodes="$start_nodes $SHELL_NODES $KEEP_NODES $ARVADOS_API_HOST"
305 for n in $start_nodes; do
307 if [[ $n =~ $ARVADOS_API_HOST$ ]]; then
308 # e.g. keep.qr1hi.arvadosapi.com
312 node=$n.$ARVADOS_API_HOST
314 if [[ "$NODE" == "" ]] || [[ "$NODE" == "$node" ]]; then
316 nodes="$nodes ${node%.arvadosapi.com}"
320 if [[ "$nodes" != "" ]]; then
321 ## at this point nodes should be an array containing
322 ## manage.qr1hi, keep.qr1hi, etc
323 ## that should be defined in the .ssh/config file
324 title "Updating in parallel: $nodes"
325 export -f update_node
333 echo $nodes|xargs -d " " -n 1 -P $PUPPET_CONCURRENCY -I {} bash -c "update_node {}"
336 if [[ "$NODE" == "" ]]; then
337 title "Locating Arvados Standard Docker images project"
339 JSON_FILTER="[[\"name\", \"=\", \"Arvados Standard Docker Images\"], [\"owner_uuid\", \"=\", \"$IDENTIFIER-tpzed-000000000000000\"]]"
340 DOCKER_IMAGES_PROJECT=`ARVADOS_API_HOST=$ARVADOS_API_HOST ARVADOS_API_TOKEN=$ARVADOS_API_TOKEN arv --format=uuid group list --filters="$JSON_FILTER"`
342 if [[ "$DOCKER_IMAGES_PROJECT" == "" ]]; then
343 title "Warning: Arvados Standard Docker Images project not found. Creating it."
345 DOCKER_IMAGES_PROJECT=`ARVADOS_API_HOST=$ARVADOS_API_HOST ARVADOS_API_TOKEN=$ARVADOS_API_TOKEN arv --format=uuid group create --group "{\"owner_uuid\":\"$IDENTIFIER-tpzed-000000000000000\", \"name\":\"Arvados Standard Docker Images\", \"group_class\":\"project\"}"`
346 ARVADOS_API_HOST=$ARVADOS_API_HOST ARVADOS_API_TOKEN=$ARVADOS_API_TOKEN arv link create --link "{\"tail_uuid\":\"$IDENTIFIER-j7d0g-fffffffffffffff\", \"head_uuid\":\"$DOCKER_IMAGES_PROJECT\", \"link_class\":\"permission\", \"name\":\"can_read\" }"
347 if [[ "$?" != "0" ]]; then
348 title "ERROR: could not create standard Docker images project Please create it, cf. http://doc.arvados.org/install/create-standard-objects.html"
353 title "Found Arvados Standard Docker Images project with uuid $DOCKER_IMAGES_PROJECT"
355 VERSION=`ssh -t -p$SSH_PORT -o "StrictHostKeyChecking no" -o "ConnectTimeout 125" $IDENTIFIER apt-cache policy python-arvados-cwl-runner|grep Candidate`
356 VERSION=`echo $VERSION|cut -f2 -d' '|cut -f1 -d-`
358 if [[ "$?" != "0" ]] || [[ "$VERSION" == "" ]]; then
359 title "ERROR: unable to get arvados/jobs Docker image version"
362 title "Found version for arvados/jobs Docker image: $VERSION"
365 if [[ "$SHELL_NODE_FOR_ARV_KEEPDOCKER" == "" ]]; then
366 ARVADOS_API_HOST=$ARVADOS_API_HOST ARVADOS_API_TOKEN=$ARVADOS_API_TOKEN arv-keepdocker |grep -q $VERSION
367 if [[ "$?" == "0" ]]; then
368 title "Found latest arvados/jobs Docker image, nothing to upload"
369 # Just in case it isn't yet, tag the image as latest
370 title "Tag arvados/jobs Docker image $VERSION as latest"
371 ARVADOS_API_HOST=$ARVADOS_API_HOST ARVADOS_API_TOKEN=$ARVADOS_API_TOKEN arv-keepdocker --project-uuid=$DOCKER_IMAGES_PROJECT arvados/jobs latest
372 if [[ $? -ne 0 ]]; then
373 title "'arv-keepdocker' failed..."
377 title "Installing latest arvados/jobs Docker image"
378 ARVADOS_API_HOST=$ARVADOS_API_HOST ARVADOS_API_TOKEN=$ARVADOS_API_TOKEN arv-keepdocker --pull --project-uuid=$DOCKER_IMAGES_PROJECT arvados/jobs $VERSION
379 if [[ $? -ne 0 ]]; then
380 title "'arv-keepdocker' failed..."
383 ## adding latest tag too refs 9254
384 docker tag arvados/jobs:$VERSION arvados/jobs:latest
385 ARVADOS_API_HOST=$ARVADOS_API_HOST ARVADOS_API_TOKEN=$ARVADOS_API_TOKEN arv-keepdocker --project-uuid=$DOCKER_IMAGES_PROJECT arvados/jobs latest
386 if [[ $? -ne 0 ]]; then
387 title "'arv-keepdocker' failed..."
392 run_command $SHELL_NODE_FOR_ARV_KEEPDOCKER ECODE "ARVADOS_API_HOST=$ARVADOS_API_HOST ARVADOS_API_TOKEN=$ARVADOS_API_TOKEN arv-keepdocker" |grep -q $VERSION
394 if [[ "$?" == "0" ]]; then
395 title "Found latest arvados/jobs Docker image, nothing to upload"
396 # Just in case it isn't yet, tag the image as latest
397 title "Tag arvados/jobs Docker image $VERSION as latest"
398 ssh -t -p$SSH_PORT -o "StrictHostKeyChecking no" -o "ConnectTimeout 125" $SHELL_NODE_FOR_ARV_KEEPDOCKER "ARVADOS_API_HOST=$ARVADOS_API_HOST ARVADOS_API_TOKEN=$ARVADOS_API_TOKEN arv-keepdocker --project-uuid=$DOCKER_IMAGES_PROJECT arvados/jobs latest"
399 if [[ $? -ne 0 ]]; then
400 title "'arv-keepdocker' failed..."
404 title "Installing latest arvados/jobs Docker image"
405 ssh -t -p$SSH_PORT -o "StrictHostKeyChecking no" -o "ConnectTimeout 125" $SHELL_NODE_FOR_ARV_KEEPDOCKER "ARVADOS_API_HOST=$ARVADOS_API_HOST ARVADOS_API_TOKEN=$ARVADOS_API_TOKEN arv-keepdocker --pull --project-uuid=$DOCKER_IMAGES_PROJECT arvados/jobs $VERSION"
406 if [[ $? -ne 0 ]]; then
407 title "'arv-keepdocker' failed..."
410 ## adding latest tag too refs 9254
411 ssh -t -p$SSH_PORT -o "StrictHostKeyChecking no" -o "ConnectTimeout 125" $SHELL_NODE_FOR_ARV_KEEPDOCKER docker tag arvados/jobs:$VERSION arvados/jobs:latest
412 ssh -t -p$SSH_PORT -o "StrictHostKeyChecking no" -o "ConnectTimeout 125" $SHELL_NODE_FOR_ARV_KEEPDOCKER "ARVADOS_API_HOST=$ARVADOS_API_HOST ARVADOS_API_TOKEN=$ARVADOS_API_TOKEN arv-keepdocker --project-uuid=$DOCKER_IMAGES_PROJECT arvados/jobs latest"
413 if [[ $? -ne 0 ]]; then
414 title "'arv-keepdocker' failed..."