class ComputeNodeMonitorActor(SlurmMixin, MonitorActorBase):
def shutdown_eligible(self):
- if (self.arvados_node is not None and
- self._get_slurm_state(self.arvados_node['hostname']) in self.SLURM_END_STATES):
- return True
- else:
- return super(ComputeNodeMonitorActor, self).shutdown_eligible()
+ if self.arvados_node is not None:
+ state = self._get_slurm_state(self.arvados_node['hostname'])
+ # Automatically eligible for shutdown if it's down or failed, but
+ # not drain to avoid a race condition with resume_node().
+ if state in self.SLURM_END_STATES and state not in self.SLURM_DRAIN_STATES:
+ return True
+ return super(ComputeNodeMonitorActor, self).shutdown_eligible()
def resume_node(self):
try: