projects
/
arvados.git
/ blobdiff
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
Merge branch '13933-dispatch-batch-size'
[arvados.git]
/
services
/
nodemanager
/
arvnodeman
/
daemon.py
diff --git
a/services/nodemanager/arvnodeman/daemon.py
b/services/nodemanager/arvnodeman/daemon.py
index 2e71b316df1459c3255f0235889e554eea5fc728..1edf4dc4792e5b7a9f638a17599c66c76410ab84 100644
(file)
--- a/
services/nodemanager/arvnodeman/daemon.py
+++ b/
services/nodemanager/arvnodeman/daemon.py
@@
-112,7
+112,8
@@
class NodeManagerDaemonActor(actor_class):
node_setup_class=dispatch.ComputeNodeSetupActor,
node_shutdown_class=dispatch.ComputeNodeShutdownActor,
node_actor_class=dispatch.ComputeNodeMonitorActor,
node_setup_class=dispatch.ComputeNodeSetupActor,
node_shutdown_class=dispatch.ComputeNodeShutdownActor,
node_actor_class=dispatch.ComputeNodeMonitorActor,
- max_total_price=0):
+ max_total_price=0,
+ consecutive_idle_count=1):
super(NodeManagerDaemonActor, self).__init__()
self._node_setup = node_setup_class
self._node_shutdown = node_shutdown_class
super(NodeManagerDaemonActor, self).__init__()
self._node_setup = node_setup_class
self._node_shutdown = node_shutdown_class
@@
-133,6
+134,7
@@
class NodeManagerDaemonActor(actor_class):
self.poll_stale_after = poll_stale_after
self.boot_fail_after = boot_fail_after
self.node_stale_after = node_stale_after
self.poll_stale_after = poll_stale_after
self.boot_fail_after = boot_fail_after
self.node_stale_after = node_stale_after
+ self.consecutive_idle_count = consecutive_idle_count
self.last_polls = {}
for poll_name in ['server_wishlist', 'arvados_nodes', 'cloud_nodes']:
poll_actor = locals()[poll_name + '_actor']
self.last_polls = {}
for poll_name in ['server_wishlist', 'arvados_nodes', 'cloud_nodes']:
poll_actor = locals()[poll_name + '_actor']
@@
-173,7
+175,8
@@
class NodeManagerDaemonActor(actor_class):
poll_stale_after=self.poll_stale_after,
node_stale_after=self.node_stale_after,
cloud_client=self._cloud_driver,
poll_stale_after=self.poll_stale_after,
node_stale_after=self.node_stale_after,
cloud_client=self._cloud_driver,
- boot_fail_after=self.boot_fail_after)
+ boot_fail_after=self.boot_fail_after,
+ consecutive_idle_count=self.consecutive_idle_count)
actorTell = actor.tell_proxy()
actorTell.subscribe(self._later.node_can_shutdown)
self._cloud_nodes_actor.subscribe_to(cloud_node.id,
actorTell = actor.tell_proxy()
actorTell.subscribe(self._later.node_can_shutdown)
self._cloud_nodes_actor.subscribe_to(cloud_node.id,
@@
-318,7
+321,7
@@
class NodeManagerDaemonActor(actor_class):
busy_count = counts["busy"]
wishlist_count = self._size_wishlist(size)
busy_count = counts["busy"]
wishlist_count = self._size_wishlist(size)
- self._logger.info("%s: wishlist %i, up %i (booting %i, unpaired %i, idle %i, busy %i), down %i, shutdown %i", size.
name
,
+ self._logger.info("%s: wishlist %i, up %i (booting %i, unpaired %i, idle %i, busy %i), down %i, shutdown %i", size.
id
,
wishlist_count,
up_count,
counts["booting"],
wishlist_count,
up_count,
counts["booting"],
@@
-338,7
+341,7
@@
class NodeManagerDaemonActor(actor_class):
can_boot = int((self.max_total_price - total_price) / size.price)
if can_boot == 0:
self._logger.info("Not booting %s (price %s) because with it would exceed max_total_price of %s (current total_price is %s)",
can_boot = int((self.max_total_price - total_price) / size.price)
if can_boot == 0:
self._logger.info("Not booting %s (price %s) because with it would exceed max_total_price of %s (current total_price is %s)",
- size.
name
, size.price, self.max_total_price, total_price)
+ size.
id
, size.price, self.max_total_price, total_price)
return can_boot
else:
return wanted
return can_boot
else:
return wanted
@@
-394,7
+397,7
@@
class NodeManagerDaemonActor(actor_class):
if not self.cancel_node_shutdown(cloud_size):
arvados_node = self.arvados_nodes.find_stale_node(self.node_stale_after)
self._logger.info("Want %i more %s nodes. Booting a node.",
if not self.cancel_node_shutdown(cloud_size):
arvados_node = self.arvados_nodes.find_stale_node(self.node_stale_after)
self._logger.info("Want %i more %s nodes. Booting a node.",
- nodes_wanted, cloud_size.
name
)
+ nodes_wanted, cloud_size.
id
)
new_setup = self._node_setup.start(
timer_actor=self._timer,
arvados_client=self._new_arvados(),
new_setup = self._node_setup.start(
timer_actor=self._timer,
arvados_client=self._new_arvados(),
@@
-475,7
+478,7
@@
class NodeManagerDaemonActor(actor_class):
for record in self.cloud_nodes.nodes.itervalues():
try:
if (record.shutdown_actor is not None and
for record in self.cloud_nodes.nodes.itervalues():
try:
if (record.shutdown_actor is not None and
- record.size.id == size.id and
+ record.
cloud_node.
size.id == size.id and
record.shutdown_actor.cancel_shutdown("Node size is in wishlist").get(2)):
return True
except (pykka.ActorDeadError, pykka.Timeout) as e:
record.shutdown_actor.cancel_shutdown("Node size is in wishlist").get(2)):
return True
except (pykka.ActorDeadError, pykka.Timeout) as e: