X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/2ec2c8ed2c5db174f3a83dc257fa4c4b3190f47b..de985c815dd4a81dcb5b03475452e2c93d3dadb4:/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py diff --git a/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py b/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py index a239f1f252..42960efeba 100644 --- a/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py +++ b/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py @@ -46,7 +46,7 @@ class ComputeNodeStateChangeBase(config.actor_class, RetryMixin): def subscribe(self, subscriber): if self.subscribers is None: try: - subscriber(self._later) + subscriber(self.actor_ref.proxy()) except pykka.ActorDeadError: pass else: @@ -256,12 +256,6 @@ class ComputeNodeUpdateActor(config.actor_class): this to perform maintenance tasks on themselves. Having a dedicated actor for this gives us the opportunity to control the flow of requests; e.g., by backing off when errors occur. - - This actor is most like a "traditional" Pykka actor: there's no - subscribing, but instead methods return real driver results. If - you're interested in those results, you should get them from the - Future that the proxy method returns. Be prepared to handle exceptions - from the cloud driver when you do. """ def __init__(self, cloud_factory, max_retry_wait=180): super(ComputeNodeUpdateActor, self).__init__() @@ -270,6 +264,12 @@ class ComputeNodeUpdateActor(config.actor_class): self.error_streak = 0 self.next_request_time = time.time() + def _set_logger(self): + self._logger = logging.getLogger("%s.%s" % (self.__class__.__name__, self.actor_urn[33:])) + + def on_start(self): + self._set_logger() + def _throttle_errors(orig_func): @functools.wraps(orig_func) def throttle_wrapper(self, *args, **kwargs): @@ -283,7 +283,9 @@ class ComputeNodeUpdateActor(config.actor_class): self.error_streak += 1 self.next_request_time += min(2 ** self.error_streak, self.max_retry_wait) - raise + self._logger.error( + "Caught unknown error (no retry): %s", + error, exc_info=error) else: self.error_streak = 0 return result @@ -406,7 +408,7 @@ class ComputeNodeMonitorActor(config.actor_class): first_ping_s = arvados_node.get('first_ping_at') if (self.arvados_node is not None) or (not first_ping_s): return None - elif ((arvados_node['ip_address'] in self.cloud_node.private_ips) and + elif ((arvados_node['info'].get('ec2_instance_id') == self._cloud.node_id(self.cloud_node)) and (arvados_timestamp(first_ping_s) >= self.cloud_node_start_time)): self._later.update_arvados_node(arvados_node) return self.cloud_node.id