X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/aea5300167770beb3cca6ad90e5ebb04da961416..0eb72b526bf8bbb011551ecf019f604e17a534f1:/services/nodemanager/arvnodeman/baseactor.py diff --git a/services/nodemanager/arvnodeman/baseactor.py b/services/nodemanager/arvnodeman/baseactor.py index 0cf7c9686b..988b83c142 100644 --- a/services/nodemanager/arvnodeman/baseactor.py +++ b/services/nodemanager/arvnodeman/baseactor.py @@ -1,3 +1,7 @@ +# Copyright (C) The Arvados Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + from __future__ import absolute_import, print_function import errno @@ -98,8 +102,9 @@ class WatchdogActor(pykka.ThreadingActor): self.actor_ref = TellableActorRef(self) self._later = self.actor_ref.tell_proxy() - def kill_self(self, act): + def kill_self(self, e, act): lg = getattr(self, "_logger", logging) + lg.critical("Watchdog exception", exc_info=e) lg.critical("Actor %s watchdog ping time out, killing Node Manager", act) os.kill(os.getpid(), signal.SIGKILL) @@ -107,14 +112,11 @@ class WatchdogActor(pykka.ThreadingActor): self._later.run() def run(self): - for a in self.actors: - try: + a = None + try: + for a in self.actors: a.ping().get(self.timeout) - except pykka.ActorDeadError: - pass - except pykka.Timeout: - self.kill_self(a) - return - - time.sleep(20) - self._later.run() + time.sleep(20) + self._later.run() + except Exception as e: + self.kill_self(e, a)