12216: Merge branch 'master' into 12216-webdav-list
[arvados.git] / services / nodemanager / arvnodeman / daemon.py
index e476e5e3e21c07205144deb6226e54c9762f8c58..ca3029d9e1bc3c376b119cca367b3767f3a8bb45 100644 (file)
@@ -78,7 +78,10 @@ class _ArvadosNodeTracker(_BaseNodeTracker):
     item_key = staticmethod(lambda arvados_node: arvados_node['uuid'])
 
     def find_stale_node(self, stale_time):
-        for record in self.nodes.itervalues():
+        # Try to select a stale node record that have an assigned slot first
+        for record in sorted(self.nodes.itervalues(),
+                             key=lambda r: r.arvados_node['slot_number'],
+                             reverse=True):
             node = record.arvados_node
             if (not cnode.timestamp_fresh(cnode.arvados_node_mtime(node),
                                           stale_time) and
@@ -498,8 +501,19 @@ class NodeManagerDaemonActor(actor_class):
         except pykka.ActorDeadError:
             return
         cloud_node_id = cloud_node.id
-        record = self.cloud_nodes[cloud_node_id]
-        shutdown_actor.stop()
+
+        try:
+            shutdown_actor.stop()
+        except pykka.ActorDeadError:
+            pass
+
+        try:
+            record = self.cloud_nodes[cloud_node_id]
+        except KeyError:
+            # Cloud node was already removed from the cloud node list
+            # supposedly while the destroy_node call was finishing its
+            # job.
+            return
         record.shutdown_actor = None
 
         if not success: