Merge branch 'pr/28'
[arvados.git] / services / nodemanager / arvnodeman / daemon.py
index 44f15132643f558e3ed3591935ac1f517de589d5..1d52073ce5ff7a362e0b8e9cc13b12c0f2d3b6a2 100644 (file)
@@ -159,7 +159,9 @@ class NodeManagerDaemonActor(actor_class):
             timer_actor=self._timer,
             arvados_node=None,
             poll_stale_after=self.poll_stale_after,
-            node_stale_after=self.node_stale_after).proxy()
+            node_stale_after=self.node_stale_after,
+            cloud_client=self._cloud_driver,
+            boot_fail_after=self.boot_fail_after).proxy()
         actor.subscribe(self._later.node_can_shutdown)
         self._cloud_nodes_actor.subscribe_to(cloud_node.id,
                                              actor.update_cloud_node)
@@ -180,9 +182,14 @@ class NodeManagerDaemonActor(actor_class):
                     self._pair_nodes(record, arv_rec.arvados_node)
                     break
         for key, record in self.cloud_nodes.orphans.iteritems():
+            if key in self.shutdowns:
+                try:
+                    self.shutdowns[key].stop().get()
+                except pykka.ActorDeadError:
+                    pass
+                del self.shutdowns[key]
             record.actor.stop()
             record.cloud_node = None
-            self.shutdowns.pop(key, None)
 
     def update_arvados_nodes(self, nodelist):
         self._update_poll_time('arvados_nodes')
@@ -207,6 +214,13 @@ class NodeManagerDaemonActor(actor_class):
                                  self.cloud_nodes.nodes.itervalues())
                    if busy)
 
+    def _nodes_missing(self):
+        return sum(1 for arv_node in
+                   pykka.get_all(rec.actor.arvados_node for rec in
+                                 self.cloud_nodes.nodes.itervalues()
+                                 if rec.actor.cloud_node.get().id not in self.shutdowns)
+                   if arv_node and cnode.arvados_node_missing(arv_node, self.node_stale_after))
+
     def _nodes_wanted(self):
         up_count = self._nodes_up()
         under_min = self.min_nodes - up_count
@@ -216,7 +230,7 @@ class NodeManagerDaemonActor(actor_class):
         elif under_min > 0:
             return under_min
         else:
-            up_count -= len(self.shutdowns) + self._nodes_busy()
+            up_count -= len(self.shutdowns) + self._nodes_busy() + self._nodes_missing()
             return len(self.last_wishlist) - up_count
 
     def _nodes_excess(self):