From a6b15a15c6edb39d17ce79d71ec7b0816d7dcc0f Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Thu, 26 May 2016 09:51:24 -0400 Subject: [PATCH] 9303: Fetch arv_node before trying to shut down node, because monitor actor may go away once the node has been successfully shut down. Also handle case of node_finished_shutdown called after shutdown actor is stopped. --- .../arvnodeman/computenode/dispatch/__init__.py | 2 +- services/nodemanager/arvnodeman/daemon.py | 7 +++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py b/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py index 96b2353ce0..a950210aa8 100644 --- a/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py +++ b/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py @@ -223,6 +223,7 @@ class ComputeNodeShutdownActor(ComputeNodeStateChangeBase): @RetryMixin._retry() def shutdown_node(self): self._logger.info("Starting shutdown") + arv_node = self._arvados_node() if not self._cloud.destroy_node(self.cloud_node): if self._cloud.broken(self.cloud_node): self._later.cancel_shutdown(self.NODE_BROKEN) @@ -231,7 +232,6 @@ class ComputeNodeShutdownActor(ComputeNodeStateChangeBase): # Force a retry. raise cloud_types.LibcloudError("destroy_node failed") self._logger.info("Shutdown success") - arv_node = self._arvados_node() if arv_node is None: self._finished(success_flag=True) else: diff --git a/services/nodemanager/arvnodeman/daemon.py b/services/nodemanager/arvnodeman/daemon.py index 32bc5c3739..a809148cdf 100644 --- a/services/nodemanager/arvnodeman/daemon.py +++ b/services/nodemanager/arvnodeman/daemon.py @@ -436,8 +436,11 @@ class NodeManagerDaemonActor(actor_class): self._begin_node_shutdown(node_actor, cancellable=False) def node_finished_shutdown(self, shutdown_actor): - cloud_node, success, cancel_reason = self._get_actor_attrs( - shutdown_actor, 'cloud_node', 'success', 'cancel_reason') + try: + cloud_node, success, cancel_reason = self._get_actor_attrs( + shutdown_actor, 'cloud_node', 'success', 'cancel_reason') + except pykka.ActorDeadError: + return cloud_node_id = cloud_node.id record = self.cloud_nodes[cloud_node_id] shutdown_actor.stop() -- 2.30.2