Merge branch '8697-ruby187-compat'
[arvados.git] / services / nodemanager / arvnodeman / daemon.py
index 33b6cd58f6aff2897cef4c89d0c4b60a149b0ee4..11204409c03cd1894ac4128f1bfc84e3a4767652 100644 (file)
@@ -238,8 +238,18 @@ class NodeManagerDaemonActor(actor_class):
                   for c in self.cloud_nodes.nodes.itervalues()
                   if size is None or c.cloud_node.size.id == size.id)
 
+    def _nodes_down(self, size):
+        # Make sure to iterate over self.cloud_nodes because what we're
+        # counting here are compute nodes that are reported by the cloud
+        # provider but are considered "down" by Arvados.
+        return sum(1 for down in
+                   pykka.get_all(rec.actor.in_state('down') for rec in
+                                 self.cloud_nodes.nodes.itervalues()
+                                 if size is None or rec.cloud_node.size.id == size.id)
+                   if down)
+
     def _nodes_up(self, size):
-        up = self._nodes_booting(size) + self._nodes_booted(size)
+        up = (self._nodes_booting(size) + self._nodes_booted(size)) - self._nodes_down(size)
         return up
 
     def _total_price(self):
@@ -292,17 +302,17 @@ class NodeManagerDaemonActor(actor_class):
         booting_count = self._nodes_booting(size) + self._nodes_unpaired(size)
         shutdown_count = self._size_shutdowns(size)
         busy_count = self._nodes_busy(size)
-        up_count = self._nodes_up(size) - (shutdown_count + busy_count + self._nodes_missing(size))
+        idle_count = self._nodes_up(size) - (busy_count + self._nodes_missing(size))
 
         self._logger.info("%s: wishlist %i, up %i (booting %i, idle %i, busy %i), shutting down %i", size.name,
                           self._size_wishlist(size),
-                          up_count + busy_count,
+                          idle_count + busy_count,
                           booting_count,
-                          up_count - booting_count,
+                          idle_count - booting_count,
                           busy_count,
                           shutdown_count)
 
-        wanted = self._size_wishlist(size) - up_count
+        wanted = self._size_wishlist(size) - idle_count
         if wanted > 0 and self.max_total_price and ((total_price + (size.price*wanted)) > self.max_total_price):
             can_boot = int((self.max_total_price - total_price) / size.price)
             if can_boot == 0:
@@ -313,7 +323,7 @@ class NodeManagerDaemonActor(actor_class):
             return wanted
 
     def _nodes_excess(self, size):
-        up_count = self._nodes_up(size) - self._size_shutdowns(size)
+        up_count = (self._nodes_booting(size) + self._nodes_booted(size)) - self._size_shutdowns(size)
         if size.id == self.min_cloud_size.id:
             up_count -= self.min_nodes
         return up_count - self._nodes_busy(size) - self._size_wishlist(size)
@@ -361,7 +371,7 @@ class NodeManagerDaemonActor(actor_class):
             arvados_client=self._new_arvados(),
             arvados_node=arvados_node,
             cloud_client=self._new_cloud(),
-            cloud_size=cloud_size).tell_proxy()
+            cloud_size=cloud_size).proxy()
         self.booting[new_setup.actor_ref.actor_urn] = new_setup
         self.sizes_booting_shutdown[new_setup.actor_ref.actor_urn] = cloud_size
 
@@ -411,8 +421,8 @@ class NodeManagerDaemonActor(actor_class):
         shutdown = self._node_shutdown.start(
             timer_actor=self._timer, cloud_client=self._new_cloud(),
             arvados_client=self._new_arvados(),
-            node_monitor=node_actor.actor_ref, cancellable=cancellable).proxy()
-        self.shutdowns[cloud_node_id] = shutdown
+            node_monitor=node_actor.actor_ref, cancellable=cancellable)
+        self.shutdowns[cloud_node_id] = shutdown.proxy()
         self.sizes_booting_shutdown[cloud_node_id] = cloud_node_obj.size
         shutdown.tell_proxy().subscribe(self._later.node_finished_shutdown)