def __init__(self, server_wishlist_actor, arvados_nodes_actor,
cloud_nodes_actor, cloud_update_actor, timer_actor,
arvados_factory, cloud_factory,
- shutdown_windows, max_nodes,
+ shutdown_windows, min_nodes, max_nodes,
poll_stale_after=600, node_stale_after=7200,
node_setup_class=cnode.ComputeNodeSetupActor,
node_shutdown_class=cnode.ComputeNodeShutdownActor,
self._logger = logging.getLogger('arvnodeman.daemon')
self._later = self.actor_ref.proxy()
self.shutdown_windows = shutdown_windows
+ self.min_nodes = min_nodes
self.max_nodes = max_nodes
self.poll_stale_after = poll_stale_after
self.node_stale_after = node_stale_after
self.cloud_nodes = _CloudNodeTracker()
self.arvados_nodes = _ArvadosNodeTracker()
self.booting = {} # Actor IDs to ComputeNodeSetupActors
+ self.booted = {} # Cloud node IDs to _ComputeNodeRecords
self.shutdowns = {} # Cloud node IDs to ComputeNodeShutdownActors
self._logger.debug("Daemon initialized")
self._cloud_nodes_actor.subscribe_to(cloud_node.id,
actor.update_cloud_node)
record = _ComputeNodeRecord(actor, cloud_node)
- self.cloud_nodes.add(record)
return record
def update_cloud_nodes(self, nodelist):
self._update_poll_time('cloud_nodes')
for key, node in self.cloud_nodes.update_from(nodelist):
self._logger.info("Registering new cloud node %s", key)
- record = self._new_node(node)
+ if key in self.booted:
+ record = self.booted.pop(key)
+ else:
+ record = self._new_node(node)
+ self.cloud_nodes.add(record)
for arv_rec in self.arvados_nodes.unpaired():
if record.actor.offer_arvados_pair(arv_rec.arvados_node).get():
self._pair_nodes(record, arv_rec.arvados_node)
break
for key, record in self.cloud_nodes.orphans.iteritems():
record.actor.stop()
- if key in self.shutdowns:
- self.shutdowns.pop(key).stop()
+ self.shutdowns.pop(key, None)
def update_arvados_nodes(self, nodelist):
self._update_poll_time('arvados_nodes')
self._pair_nodes(cloud_rec, arv_node)
break
- def _node_count(self):
- up = sum(len(nodelist) for nodelist in [self.cloud_nodes, self.booting])
+ def _nodes_up(self):
+ up = sum(len(nodelist) for nodelist in
+ [self.cloud_nodes, self.booted, self.booting])
return up - len(self.shutdowns)
+ def _nodes_busy(self):
+ return sum(1 for idle in
+ pykka.get_all(rec.actor.in_state('idle') for rec in
+ self.cloud_nodes.nodes.itervalues())
+ if idle is False)
+
def _nodes_wanted(self):
- return len(self.last_wishlist) - self._node_count()
+ return min(len(self.last_wishlist) + self._nodes_busy(),
+ self.max_nodes) - self._nodes_up()
def _nodes_excess(self):
- return -self._nodes_wanted()
+ needed_nodes = self._nodes_busy() + len(self.last_wishlist)
+ return (self._nodes_up() - max(self.min_nodes, needed_nodes))
def update_server_wishlist(self, wishlist):
self._update_poll_time('server_wishlist')
- self.last_wishlist = wishlist[:self.max_nodes]
+ self.last_wishlist = wishlist
nodes_wanted = self._nodes_wanted()
if nodes_wanted > 0:
self._later.start_node()
record = self.cloud_nodes.get(cloud_node.id)
if record is None:
record = self._new_node(cloud_node)
+ self.booted[cloud_node.id] = record
self._pair_nodes(record, arvados_node)
@_check_poll_freshness
cloud_client=self._new_cloud(),
cloud_node=cloud_node).proxy()
self.shutdowns[cloud_node.id] = shutdown
+ shutdown.subscribe(self._later.node_finished_shutdown)
+
+ def node_finished_shutdown(self, shutdown_actor):
+ cloud_node_id = shutdown_actor.cloud_node.get().id
+ shutdown_actor.stop()
+ if cloud_node_id in self.booted:
+ self.booted.pop(cloud_node_id).actor.stop()
+ del self.shutdowns[cloud_node_id]
def shutdown(self):
self._logger.info("Shutting down after signal.")