Try to make logging identify the actor consistently
authorPeter Amstutz <peter.amstutz@curoverse.com>
Wed, 3 Feb 2016 22:51:46 +0000 (17:51 -0500)
committerPeter Amstutz <peter.amstutz@curoverse.com>
Wed, 3 Feb 2016 22:51:46 +0000 (17:51 -0500)
services/nodemanager/arvnodeman/clientactor.py
services/nodemanager/arvnodeman/computenode/dispatch/__init__.py
services/nodemanager/arvnodeman/computenode/dispatch/slurm.py
services/nodemanager/arvnodeman/daemon.py
services/nodemanager/arvnodeman/jobqueue.py
services/nodemanager/arvnodeman/nodelist.py

index 6319f4bbfc5cece52832842cfda05d9ae9253005..5e5354a7da427ceb9cfab38b5d65b7f6c120bff5 100644 (file)
@@ -38,10 +38,8 @@ class RemotePollLoopActor(actor_class):
         super(RemotePollLoopActor, self).__init__()
         self._client = client
         self._timer = timer_actor
-        self._logger = logging.getLogger(self.LOGGER_NAME)
         self._later = self.actor_ref.proxy()
         self._polling_started = False
-        self.log_prefix = "{} (at {})".format(self.__class__.__name__, id(self))
         self.min_poll_wait = poll_wait
         self.max_poll_wait = max_poll_wait
         self.poll_wait = self.min_poll_wait
@@ -50,6 +48,9 @@ class RemotePollLoopActor(actor_class):
         if hasattr(self, '_item_key'):
             self.subscribe_to = self._subscribe_to
 
+    def on_start(self):
+        self._logger = logging.getLogger("%s.%s" % (self.__class__.__name__, id(self.actor_urn[9:])))
+
     def _start_polling(self):
         if not self._polling_started:
             self._polling_started = True
@@ -71,8 +72,7 @@ class RemotePollLoopActor(actor_class):
         raise NotImplementedError("subclasses must implement request method")
 
     def _got_response(self, response):
-        self._logger.debug("%s got response with %d items",
-                           self.log_prefix, len(response))
+        self._logger.debug("got response with %d items", len(response))
         self.poll_wait = self.min_poll_wait
         _notify_subscribers(response, self.all_subscribers)
         if hasattr(self, '_item_key'):
@@ -89,7 +89,7 @@ class RemotePollLoopActor(actor_class):
         return False
 
     def poll(self, scheduled_start=None):
-        self._logger.debug("%s sending poll", self.log_prefix)
+        self._logger.debug("sending request")
         start_time = time.time()
         if scheduled_start is None:
             scheduled_start = start_time
index 8c983c1ca4042726881ce0e9019da0e9c2df9b60..45468c31d79c54f34a5c4c5c60c94c1ba52d89b2 100644 (file)
@@ -21,18 +21,24 @@ class ComputeNodeStateChangeBase(config.actor_class, RetryMixin):
     This base class takes care of retrying changes and notifying
     subscribers when the change is finished.
     """
-    def __init__(self, logger_name, cloud_client, arvados_client, timer_actor,
+    def __init__(self, cloud_client, arvados_client, timer_actor,
                  retry_wait, max_retry_wait):
         super(ComputeNodeStateChangeBase, self).__init__()
+        self._later = self.actor_ref.proxy()
+        self._arvados = arvados_client
+        self.subscribers = set()
+
+    def _set_logger(self):
+        self._logger = logging.getLogger("%s.%s" % (self.__class__.__name__, self.actor_urn[9:]))
+
+    def on_start(self):
+        self._set_logger()
         RetryMixin.__init__(self,
                             retry_wait,
                             max_retry_wait,
-                            logging.getLogger(logger_name),
+                            self._logger,
                             cloud_client,
                             timer_actor)
-        self._later = self.actor_ref.proxy()
-        self._arvados = arvados_client
-        self.subscribers = set()
 
     def _finished(self):
         _notify_subscribers(self._later, self.subscribers)
@@ -74,7 +80,7 @@ class ComputeNodeSetupActor(ComputeNodeStateChangeBase):
                  cloud_size, arvados_node=None,
                  retry_wait=1, max_retry_wait=180):
         super(ComputeNodeSetupActor, self).__init__(
-            'arvnodeman.nodeup', cloud_client, arvados_client, timer_actor,
+            cloud_client, arvados_client, timer_actor,
             retry_wait, max_retry_wait)
         self.cloud_size = cloud_size
         self.arvados_node = None
@@ -97,7 +103,7 @@ class ComputeNodeSetupActor(ComputeNodeStateChangeBase):
 
     @RetryMixin._retry()
     def create_cloud_node(self):
-        self._logger.info("Creating cloud node with size %s.",
+        self._logger.info("Creating cloud node of size %s.",
                           self.cloud_size.name)
         self.cloud_node = self._cloud.create_node(self.cloud_size,
                                                   self.arvados_node)
@@ -160,7 +166,7 @@ class ComputeNodeShutdownActor(ComputeNodeStateChangeBase):
         # eligible.  Normal shutdowns based on job demand should be
         # cancellable; shutdowns based on node misbehavior should not.
         super(ComputeNodeShutdownActor, self).__init__(
-            'arvnodeman.nodedown', cloud_client, arvados_client, timer_actor,
+            cloud_client, arvados_client, timer_actor,
             retry_wait, max_retry_wait)
         self._monitor = node_monitor.proxy()
         self.cloud_node = self._monitor.cloud_node.get()
@@ -168,7 +174,11 @@ class ComputeNodeShutdownActor(ComputeNodeStateChangeBase):
         self.cancel_reason = None
         self.success = None
 
+    def _set_logger(self):
+        self._logger = logging.getLogger("%s.%s.%s" % (self.__class__.__name__, self.actor_urn[9:], self.cloud_node.id))
+
     def on_start(self):
+        super(ComputeNodeShutdownActor, self).on_start()
         self._later.shutdown_node()
 
     def _arvados_node(self):
@@ -181,7 +191,7 @@ class ComputeNodeShutdownActor(ComputeNodeStateChangeBase):
 
     def cancel_shutdown(self, reason):
         self.cancel_reason = reason
-        self._logger.info("Cloud node %s shutdown cancelled: %s.",
+        self._logger.info("Shutdown cancelled: %s.",
                           self.cloud_node.id, reason)
         self._finished(success_flag=False)
 
@@ -199,13 +209,14 @@ class ComputeNodeShutdownActor(ComputeNodeStateChangeBase):
     @_stop_if_window_closed
     @RetryMixin._retry()
     def shutdown_node(self):
+        self._logger.info("Starting shutdown", self.cloud_node.id)
         if not self._cloud.destroy_node(self.cloud_node):
             if self._cloud.broken(self.cloud_node):
                 self._later.cancel_shutdown(self.NODE_BROKEN)
             else:
                 # Force a retry.
                 raise cloud_types.LibcloudError("destroy_node failed")
-        self._logger.info("Cloud node %s shut down.", self.cloud_node.id)
+        self._logger.info("Shutdown success", self.cloud_node.id)
         arv_node = self._arvados_node()
         if arv_node is None:
             self._finished(success_flag=True)
@@ -281,7 +292,6 @@ class ComputeNodeMonitorActor(config.actor_class):
     ):
         super(ComputeNodeMonitorActor, self).__init__()
         self._later = self.actor_ref.proxy()
-        self._logger = logging.getLogger('arvnodeman.computenode')
         self._last_log = None
         self._shutdowns = shutdown_timer
         self._cloud_node_fqdn = cloud_fqdn_func
@@ -299,6 +309,12 @@ class ComputeNodeMonitorActor(config.actor_class):
         self.last_shutdown_opening = None
         self._later.consider_shutdown()
 
+    def _set_logger(self):
+        self._logger = logging.getLogger("%s.%s.%s" % (self.__class__.__name__, self.actor_urn[9:], self.cloud_node.name))
+
+    def on_start(self):
+        self._set_logger()
+
     def subscribe(self, subscriber):
         self.subscribers.add(subscriber)
 
@@ -346,13 +362,13 @@ class ComputeNodeMonitorActor(config.actor_class):
     def consider_shutdown(self):
         next_opening = self._shutdowns.next_opening()
         if self.shutdown_eligible():
-            self._debug("Node %s suggesting shutdown.", self.cloud_node.id)
+            self._debug("Suggesting shutdown.", self.cloud_node.id)
             _notify_subscribers(self._later, self.subscribers)
         elif self._shutdowns.window_open():
-            self._debug("Node %s shutdown window open but node busy.",
+            self._debug("Shutdown window open but node busy.",
                         self.cloud_node.id)
         elif self.last_shutdown_opening != next_opening:
-            self._debug("Node %s shutdown window closed.  Next at %s.",
+            self._debug("Shutdown window closed.  Next at %s.",
                         self.cloud_node.id, time.ctime(next_opening))
             self._timer.schedule(next_opening, self._later.consider_shutdown)
             self.last_shutdown_opening = next_opening
index 43f61c7c2c38d8ca95aa7614a8782a77672a816b..4d70436801564e9a35675e95c18f33fddc125806 100644 (file)
@@ -22,6 +22,7 @@ class ComputeNodeShutdownActor(ShutdownActorBase):
             self._nodename = None
             return super(ComputeNodeShutdownActor, self).on_start()
         else:
+            self._set_logger()
             self._nodename = arv_node['hostname']
             self._logger.info("Draining SLURM node %s", self._nodename)
             self._later.issue_slurm_drain()
index 243d3bfaa4cd13fbf9a540affc20ae6e562979a9..42ee4db1871317088dbf2eeedf5b74b2174dcb2f 100644 (file)
@@ -121,7 +121,6 @@ class NodeManagerDaemonActor(actor_class):
         self._new_arvados = arvados_factory
         self._new_cloud = cloud_factory
         self._cloud_driver = self._new_cloud()
-        self._logger = logging.getLogger('arvnodeman.daemon')
         self._later = self.actor_ref.proxy()
         self.shutdown_windows = shutdown_windows
         self.server_calculator = server_calculator
@@ -145,6 +144,9 @@ class NodeManagerDaemonActor(actor_class):
         self.shutdowns = {}     # Cloud node IDs to ComputeNodeShutdownActors
         self._logger.debug("Daemon initialized")
 
+    def on_start(self):
+        self._logger = logging.getLogger("%s.%s" % (self.__class__.__name__, id(self.actor_urn[9:])))
+
     def _update_poll_time(self, poll_key):
         self.last_polls[poll_key] = time.time()
 
index e0f0a5b1ec26045745fe479f76b8614eb700875d..87cf738311730feed045d23c63af6481c0731e06 100644 (file)
@@ -102,7 +102,6 @@ class JobQueueMonitorActor(clientactor.RemotePollLoopActor):
     """
 
     CLIENT_ERRORS = ARVADOS_ERRORS
-    LOGGER_NAME = 'arvnodeman.jobqueue'
 
     def __init__(self, client, timer_actor, server_calc, *args, **kwargs):
         super(JobQueueMonitorActor, self).__init__(
@@ -114,6 +113,6 @@ class JobQueueMonitorActor(clientactor.RemotePollLoopActor):
 
     def _got_response(self, queue):
         server_list = self._calculator.servers_for_queue(queue)
-        self._logger.debug("Sending server wishlist: %s",
+        self._logger.debug("Calculated wishlist: %s",
                            ', '.join(s.name for s in server_list) or "(empty)")
         return super(JobQueueMonitorActor, self)._got_response(server_list)
index 83dd93f077bfb504a8f41b6f0addd93c717ac7da..26f49a34eca6937260d2721083971c1cb5a7fc84 100644 (file)
@@ -11,7 +11,6 @@ class ArvadosNodeListMonitorActor(clientactor.RemotePollLoopActor):
     This actor regularly polls the list of Arvados node records, and
     sends it to subscribers.
     """
-    LOGGER_NAME = 'arvnodeman.arvados_nodes'
 
     def is_common_error(self, exception):
         return isinstance(exception, config.ARVADOS_ERRORS)
@@ -29,7 +28,6 @@ class CloudNodeListMonitorActor(clientactor.RemotePollLoopActor):
     This actor regularly polls the cloud to get a list of running compute
     nodes, and sends it to subscribers.
     """
-    LOGGER_NAME = 'arvnodeman.cloud_nodes'
 
     def is_common_error(self, exception):
         return self._client.is_cloud_exception(exception)