From: Peter Amstutz Date: Mon, 16 Jul 2018 20:05:02 +0000 (-0400) Subject: 13804: Node must report as "idle" two consecutive times X-Git-Tag: 1.2.0~72^2~5 X-Git-Url: https://git.arvados.org/arvados.git/commitdiff_plain/265fe64e5b7a931736f156c3cb446fbbbc27f018 13804: Node must report as "idle" two consecutive times Gives idle nodes 1 poll period to be allocated by slurm before becoming candidates for shutdown. Arvados-DCO-1.1-Signed-off-by: Peter Amstutz --- diff --git a/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py b/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py index 9106ea67cc..d9b475b908 100644 --- a/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py +++ b/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py @@ -350,6 +350,7 @@ class ComputeNodeMonitorActor(config.actor_class): self.boot_fail_after = boot_fail_after self.subscribers = set() self.arvados_node = None + self.consecutive_idle = 0 self._later.update_arvados_node(arvados_node) self.last_shutdown_opening = None self._later.consider_shutdown() @@ -451,8 +452,14 @@ class ComputeNodeMonitorActor(config.actor_class): else: boot_grace = "boot exceeded" - # API server side not implemented yet. - idle_grace = 'idle exceeded' + if crunch_worker_state == "idle": + # Must report as "idle" at least two consecutive times + if self.consecutive_idle < 2: + idle_grace = 'idle wait' + else: + idle_grace = 'idle exceeded' + else: + idle_grace = 'not idle' node_state = (crunch_worker_state, window, boot_grace, idle_grace) t = transitions[node_state] @@ -512,4 +519,8 @@ class ComputeNodeMonitorActor(config.actor_class): if arvados_node is not None: self.arvados_node = arvados_node self._update.sync_node(self.cloud_node, self.arvados_node) + if self.arvados_node['crunch_worker_state'] == "idle": + self.consecutive_idle += 1 + else: + self.consecutive_idle = 0 self._later.consider_shutdown()