projects
/
arvados.git
/ blobdiff
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
7713: Node Manager blackholes broken nodes that can't shut down.
[arvados.git]
/
services
/
nodemanager
/
arvnodeman
/
computenode
/
dispatch
/
slurm.py
diff --git
a/services/nodemanager/arvnodeman/computenode/dispatch/slurm.py
b/services/nodemanager/arvnodeman/computenode/dispatch/slurm.py
index ec5014e9f9cf1e8848353cf3c755e22875227850..919b57f42c8973bab91de742d1fee48598296f35 100644
(file)
--- a/
services/nodemanager/arvnodeman/computenode/dispatch/slurm.py
+++ b/
services/nodemanager/arvnodeman/computenode/dispatch/slurm.py
@@
-43,7
+43,7
@@
class ComputeNodeShutdownActor(ShutdownActorBase):
# error are still being investigated.
@ShutdownActorBase._retry((subprocess.CalledProcessError, OSError))
# error are still being investigated.
@ShutdownActorBase._retry((subprocess.CalledProcessError, OSError))
- def cancel_shutdown(self):
+ def cancel_shutdown(self
, reason
):
if self._nodename:
if self._get_slurm_state() in self.SLURM_DRAIN_STATES:
# Resume from "drng" or "drain"
if self._nodename:
if self._get_slurm_state() in self.SLURM_DRAIN_STATES:
# Resume from "drng" or "drain"
@@
-52,7
+52,7
@@
class ComputeNodeShutdownActor(ShutdownActorBase):
# Node is in a state such as 'idle' or 'alloc' so don't
# try to resume it because that will just raise an error.
pass
# Node is in a state such as 'idle' or 'alloc' so don't
# try to resume it because that will just raise an error.
pass
- return super(ComputeNodeShutdownActor, self).cancel_shutdown()
+ return super(ComputeNodeShutdownActor, self).cancel_shutdown(
reason
)
@ShutdownActorBase._retry((subprocess.CalledProcessError, OSError))
@ShutdownActorBase._stop_if_window_closed
@ShutdownActorBase._retry((subprocess.CalledProcessError, OSError))
@ShutdownActorBase._stop_if_window_closed