item_key = staticmethod(lambda arvados_node: arvados_node['uuid'])
def find_stale_node(self, stale_time):
- for record in self.nodes.itervalues():
+ # Try to select a stale node record that have an assigned slot first
+ for record in sorted(self.nodes.itervalues(),
+ key=lambda r: r.arvados_node['slot_number'],
+ reverse=True):
node = record.arvados_node
if (not cnode.timestamp_fresh(cnode.arvados_node_mtime(node),
stale_time) and
want_sizes=[testutil.MockSize(1)])
self.busywait(lambda: not self.node_setup.start.called)
+ def test_select_stale_node_records_with_slot_numbers_first(self):
+ """
+ Stale node records with slot_number assigned can exist when
+ clean_arvados_node() isn't executed after a node shutdown, for
+ various reasons.
+ NodeManagerDaemonActor should use these stale node records first, so
+ that they don't accumulate unused, reducing the slots available.
+ """
+ size = testutil.MockSize(1)
+ a_long_time_ago = '1970-01-01T01:02:03.04050607Z'
+ arvados_nodes = []
+ for n in range(9):
+ # Add several stale node records without slot_number assigned
+ arvados_nodes.append(
+ testutil.arvados_node_mock(
+ n+1,
+ slot_number=None,
+ modified_at=a_long_time_ago))
+ # Add one record with stale_node assigned, it should be the
+ # first one selected
+ arv_node = testutil.arvados_node_mock(
+ 123,
+ modified_at=a_long_time_ago)
+ arvados_nodes.append(arv_node)
+ cloud_node = testutil.cloud_node_mock(125, size=size)
+ self.make_daemon(cloud_nodes=[cloud_node],
+ arvados_nodes=arvados_nodes)
+ arvados_nodes_tracker = self.daemon.arvados_nodes.get()
+ # Here, find_stale_node() should return the node record with
+ # the slot_number assigned.
+ self.assertEqual(arv_node,
+ arvados_nodes_tracker.find_stale_node(3601))
+
def test_dont_count_missing_as_busy(self):
size = testutil.MockSize(1)
self.make_daemon(cloud_nodes=[testutil.cloud_node_mock(1, size=size),