X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/e2cd53d9007d56e1de4816f6aeab4bd769271162..9fdfd5c5b229ea42193710f891e953b452bd90e7:/services/nodemanager/arvnodeman/daemon.py diff --git a/services/nodemanager/arvnodeman/daemon.py b/services/nodemanager/arvnodeman/daemon.py index 7e63c782ed..ca3029d9e1 100644 --- a/services/nodemanager/arvnodeman/daemon.py +++ b/services/nodemanager/arvnodeman/daemon.py @@ -1,4 +1,7 @@ #!/usr/bin/env python +# Copyright (C) The Arvados Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 from __future__ import absolute_import, print_function @@ -75,7 +78,10 @@ class _ArvadosNodeTracker(_BaseNodeTracker): item_key = staticmethod(lambda arvados_node: arvados_node['uuid']) def find_stale_node(self, stale_time): - for record in self.nodes.itervalues(): + # Try to select a stale node record that have an assigned slot first + for record in sorted(self.nodes.itervalues(), + key=lambda r: r.arvados_node['slot_number'], + reverse=True): node = record.arvados_node if (not cnode.timestamp_fresh(cnode.arvados_node_mtime(node), stale_time) and @@ -387,7 +393,7 @@ class NodeManagerDaemonActor(actor_class): arvados_client=self._new_arvados(), arvados_node=arvados_node, cloud_client=self._new_cloud(), - cloud_size=cloud_size).proxy() + cloud_size=self.server_calculator.find_size(cloud_size.id)).proxy() self.booting[new_setup.actor_ref.actor_urn] = new_setup self.sizes_booting[new_setup.actor_ref.actor_urn] = cloud_size @@ -495,8 +501,19 @@ class NodeManagerDaemonActor(actor_class): except pykka.ActorDeadError: return cloud_node_id = cloud_node.id - record = self.cloud_nodes[cloud_node_id] - shutdown_actor.stop() + + try: + shutdown_actor.stop() + except pykka.ActorDeadError: + pass + + try: + record = self.cloud_nodes[cloud_node_id] + except KeyError: + # Cloud node was already removed from the cloud node list + # supposedly while the destroy_node call was finishing its + # job. + return record.shutdown_actor = None if not success: