projects
/
arvados.git
/ blobdiff
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
Merge branch 'master' into 9369-arv-cwl-docs
[arvados.git]
/
services
/
nodemanager
/
arvnodeman
/
baseactor.py
diff --git
a/services/nodemanager/arvnodeman/baseactor.py
b/services/nodemanager/arvnodeman/baseactor.py
index 0cf7c9686bd0fdba77b5951f90660ba18257192b..68ea97ab75aac1b71aca36427165ed54cef97cc2 100644
(file)
--- a/
services/nodemanager/arvnodeman/baseactor.py
+++ b/
services/nodemanager/arvnodeman/baseactor.py
@@
-98,8
+98,9
@@
class WatchdogActor(pykka.ThreadingActor):
self.actor_ref = TellableActorRef(self)
self._later = self.actor_ref.tell_proxy()
self.actor_ref = TellableActorRef(self)
self._later = self.actor_ref.tell_proxy()
- def kill_self(self, act):
+ def kill_self(self,
e,
act):
lg = getattr(self, "_logger", logging)
lg = getattr(self, "_logger", logging)
+ lg.critical("Watchdog exception", exc_info=e)
lg.critical("Actor %s watchdog ping time out, killing Node Manager", act)
os.kill(os.getpid(), signal.SIGKILL)
lg.critical("Actor %s watchdog ping time out, killing Node Manager", act)
os.kill(os.getpid(), signal.SIGKILL)
@@
-107,14
+108,11
@@
class WatchdogActor(pykka.ThreadingActor):
self._later.run()
def run(self):
self._later.run()
def run(self):
- for a in self.actors:
- try:
+ a = None
+ try:
+ for a in self.actors:
a.ping().get(self.timeout)
a.ping().get(self.timeout)
- except pykka.ActorDeadError:
- pass
- except pykka.Timeout:
- self.kill_self(a)
- return
-
- time.sleep(20)
- self._later.run()
+ time.sleep(20)
+ self._later.run()
+ except Exception as e:
+ self.kill_self(e, a)