Merge branch 'master' into 9369-arv-cwl-docs
[arvados.git] / services / nodemanager / arvnodeman / baseactor.py
index 0cf7c9686bd0fdba77b5951f90660ba18257192b..68ea97ab75aac1b71aca36427165ed54cef97cc2 100644 (file)
@@ -98,8 +98,9 @@ class WatchdogActor(pykka.ThreadingActor):
          self.actor_ref = TellableActorRef(self)
          self._later = self.actor_ref.tell_proxy()
 
-    def kill_self(self, act):
+    def kill_self(self, e, act):
         lg = getattr(self, "_logger", logging)
+        lg.critical("Watchdog exception", exc_info=e)
         lg.critical("Actor %s watchdog ping time out, killing Node Manager", act)
         os.kill(os.getpid(), signal.SIGKILL)
 
@@ -107,14 +108,11 @@ class WatchdogActor(pykka.ThreadingActor):
         self._later.run()
 
     def run(self):
-        for a in self.actors:
-            try:
+        a = None
+        try:
+            for a in self.actors:
                 a.ping().get(self.timeout)
-            except pykka.ActorDeadError:
-                pass
-            except pykka.Timeout:
-                self.kill_self(a)
-                return
-
-        time.sleep(20)
-        self._later.run()
+            time.sleep(20)
+            self._later.run()
+        except Exception as e:
+            self.kill_self(e, a)