X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/ab3afbb684bc1b32577c2696e13882123bfff7d2..70e5c7a3c6a5860d702d5e5c219dc0f3a3696d35:/services/nodemanager/arvnodeman/daemon.py diff --git a/services/nodemanager/arvnodeman/daemon.py b/services/nodemanager/arvnodeman/daemon.py index 2e71b316df..1edf4dc479 100644 --- a/services/nodemanager/arvnodeman/daemon.py +++ b/services/nodemanager/arvnodeman/daemon.py @@ -112,7 +112,8 @@ class NodeManagerDaemonActor(actor_class): node_setup_class=dispatch.ComputeNodeSetupActor, node_shutdown_class=dispatch.ComputeNodeShutdownActor, node_actor_class=dispatch.ComputeNodeMonitorActor, - max_total_price=0): + max_total_price=0, + consecutive_idle_count=1): super(NodeManagerDaemonActor, self).__init__() self._node_setup = node_setup_class self._node_shutdown = node_shutdown_class @@ -133,6 +134,7 @@ class NodeManagerDaemonActor(actor_class): self.poll_stale_after = poll_stale_after self.boot_fail_after = boot_fail_after self.node_stale_after = node_stale_after + self.consecutive_idle_count = consecutive_idle_count self.last_polls = {} for poll_name in ['server_wishlist', 'arvados_nodes', 'cloud_nodes']: poll_actor = locals()[poll_name + '_actor'] @@ -173,7 +175,8 @@ class NodeManagerDaemonActor(actor_class): poll_stale_after=self.poll_stale_after, node_stale_after=self.node_stale_after, cloud_client=self._cloud_driver, - boot_fail_after=self.boot_fail_after) + boot_fail_after=self.boot_fail_after, + consecutive_idle_count=self.consecutive_idle_count) actorTell = actor.tell_proxy() actorTell.subscribe(self._later.node_can_shutdown) self._cloud_nodes_actor.subscribe_to(cloud_node.id, @@ -318,7 +321,7 @@ class NodeManagerDaemonActor(actor_class): busy_count = counts["busy"] wishlist_count = self._size_wishlist(size) - self._logger.info("%s: wishlist %i, up %i (booting %i, unpaired %i, idle %i, busy %i), down %i, shutdown %i", size.name, + self._logger.info("%s: wishlist %i, up %i (booting %i, unpaired %i, idle %i, busy %i), down %i, shutdown %i", size.id, wishlist_count, up_count, counts["booting"], @@ -338,7 +341,7 @@ class NodeManagerDaemonActor(actor_class): can_boot = int((self.max_total_price - total_price) / size.price) if can_boot == 0: self._logger.info("Not booting %s (price %s) because with it would exceed max_total_price of %s (current total_price is %s)", - size.name, size.price, self.max_total_price, total_price) + size.id, size.price, self.max_total_price, total_price) return can_boot else: return wanted @@ -394,7 +397,7 @@ class NodeManagerDaemonActor(actor_class): if not self.cancel_node_shutdown(cloud_size): arvados_node = self.arvados_nodes.find_stale_node(self.node_stale_after) self._logger.info("Want %i more %s nodes. Booting a node.", - nodes_wanted, cloud_size.name) + nodes_wanted, cloud_size.id) new_setup = self._node_setup.start( timer_actor=self._timer, arvados_client=self._new_arvados(), @@ -475,7 +478,7 @@ class NodeManagerDaemonActor(actor_class): for record in self.cloud_nodes.nodes.itervalues(): try: if (record.shutdown_actor is not None and - record.size.id == size.id and + record.cloud_node.size.id == size.id and record.shutdown_actor.cancel_shutdown("Node size is in wishlist").get(2)): return True except (pykka.ActorDeadError, pykka.Timeout) as e: