- if not self._shutdowns.window_open():
- return "shutdown window is not open."
- if self.arvados_node is None:
- # Node is unpaired.
- # If it hasn't pinged Arvados after boot_fail seconds, shut it down
- if timestamp_fresh(self.cloud_node_start_time, self.boot_fail_after):
- return "node is still booting, will be considered a failed boot at %s" % time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(self.cloud_node_start_time + self.boot_fail_after))
- else:
- return True
- missing = arvados_node_missing(self.arvados_node, self.node_stale_after)
- if missing and self._cloud.broken(self.cloud_node):
- # Node is paired, but Arvados says it is missing and the cloud says the node
- # is in an error state, so shut it down.
- return True
- if missing is None and self._cloud.broken(self.cloud_node):
- self._logger.info(
- "Cloud node considered 'broken' but paired node %s last_ping_at is None, " +
- "cannot check node_stale_after (node may be shut down and we just haven't gotten the message yet).",
- self.arvados_node['uuid'])
- if self.in_state('idle'):
- return True
+ Returns a tuple of (boolean, string) where the first value is whether
+ the node is candidate for shut down, and the second value is the
+ reason for the decision.
+ """
+
+ # Collect states and then consult state transition table whether we
+ # should shut down. Possible states are:
+ # crunch_worker_state = ['unpaired', 'busy', 'idle', 'down']
+ # window = ["open", "closed"]
+ # boot_grace = ["boot wait", "boot exceeded"]
+ # idle_grace = ["not idle", "idle wait", "idle exceeded"]
+
+ if self.arvados_node and not timestamp_fresh(arvados_node_mtime(self.arvados_node), self.node_stale_after):
+ return (False, "node state is stale")
+
+ crunch_worker_state = self.get_state()
+
+ window = "open" if self._shutdowns.window_open() else "closed"
+
+ if timestamp_fresh(self.cloud_node_start_time, self.boot_fail_after):
+ boot_grace = "boot wait"
+ else:
+ boot_grace = "boot exceeded"
+
+ # API server side not implemented yet.
+ idle_grace = 'idle exceeded'
+
+ node_state = (crunch_worker_state, window, boot_grace, idle_grace)
+ t = transitions[node_state]
+ if t is not None:
+ # yes, shutdown eligible
+ return (True, "node state is %s" % (node_state,))