X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/7e814093e36dd6fc96353584c2c0797f4654f317..44823f311ea0328c3a2aeefb7208a74031436d52:/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py diff --git a/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py b/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py index 310e7887d5..70817627df 100644 --- a/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py +++ b/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py @@ -253,13 +253,14 @@ class ComputeNodeMonitorActor(config.actor_class): for shutdown. """ def __init__(self, cloud_node, cloud_node_start_time, shutdown_timer, - timer_actor, update_actor, arvados_node=None, + cloud_fqdn_func, timer_actor, update_actor, arvados_node=None, poll_stale_after=600, node_stale_after=3600): super(ComputeNodeMonitorActor, self).__init__() self._later = self.actor_ref.proxy() self._logger = logging.getLogger('arvnodeman.computenode') self._last_log = None self._shutdowns = shutdown_timer + self._cloud_node_fqdn = cloud_fqdn_func self._timer = timer_actor self._update = update_actor self.cloud_node = cloud_node @@ -339,9 +340,17 @@ class ComputeNodeMonitorActor(config.actor_class): self._later.consider_shutdown() def update_arvados_node(self, arvados_node): + # If the cloud node's FQDN doesn't match what's in the Arvados node + # record, make them match. + # This method is a little unusual in the way it just fires off the + # request without checking the result or retrying errors. That's + # because this update happens every time we reload the Arvados node + # list: if a previous sync attempt failed, we'll see that the names + # are out of sync and just try again. ComputeNodeUpdateActor has + # the logic to throttle those effective retries when there's trouble. if arvados_node is not None: self.arvados_node = arvados_node - new_hostname = arvados_node_fqdn(self.arvados_node) - if new_hostname != self.cloud_node.name: + if (self._cloud_node_fqdn(self.cloud_node) != + arvados_node_fqdn(self.arvados_node)): self._update.sync_node(self.cloud_node, self.arvados_node) self._later.consider_shutdown()