Merge branch '12213-expression-keepref' closes #12213
authorPeter Amstutz <peter.amstutz@curoverse.com>
Tue, 3 Oct 2017 13:25:10 +0000 (09:25 -0400)
committerPeter Amstutz <peter.amstutz@curoverse.com>
Tue, 3 Oct 2017 13:25:12 +0000 (09:25 -0400)
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz@veritasgenetics.com>

services/nodemanager/arvnodeman/daemon.py
services/nodemanager/tests/test_daemon.py

index d8087a1dc1cf0d7472acc1c19e429dd36c72dacb..ca3029d9e1bc3c376b119cca367b3767f3a8bb45 100644 (file)
@@ -78,7 +78,10 @@ class _ArvadosNodeTracker(_BaseNodeTracker):
     item_key = staticmethod(lambda arvados_node: arvados_node['uuid'])
 
     def find_stale_node(self, stale_time):
-        for record in self.nodes.itervalues():
+        # Try to select a stale node record that have an assigned slot first
+        for record in sorted(self.nodes.itervalues(),
+                             key=lambda r: r.arvados_node['slot_number'],
+                             reverse=True):
             node = record.arvados_node
             if (not cnode.timestamp_fresh(cnode.arvados_node_mtime(node),
                                           stale_time) and
index d6820803a6524c69a1de608aa2b5bcc8b02848b7..ebe7408e705b02e2d55b2d757ef5367953f23242 100644 (file)
@@ -193,6 +193,39 @@ class NodeManagerDaemonActorTestCase(testutil.ActorTestMixin,
                          want_sizes=[testutil.MockSize(1)])
         self.busywait(lambda: not self.node_setup.start.called)
 
+    def test_select_stale_node_records_with_slot_numbers_first(self):
+        """
+        Stale node records with slot_number assigned can exist when
+        clean_arvados_node() isn't executed after a node shutdown, for
+        various reasons.
+        NodeManagerDaemonActor should use these stale node records first, so
+        that they don't accumulate unused, reducing the slots available.
+        """
+        size = testutil.MockSize(1)
+        a_long_time_ago = '1970-01-01T01:02:03.04050607Z'
+        arvados_nodes = []
+        for n in range(9):
+            # Add several stale node records without slot_number assigned
+            arvados_nodes.append(
+                testutil.arvados_node_mock(
+                    n+1,
+                    slot_number=None,
+                    modified_at=a_long_time_ago))
+        # Add one record with stale_node assigned, it should be the
+        # first one selected
+        arv_node = testutil.arvados_node_mock(
+            123,
+            modified_at=a_long_time_ago)
+        arvados_nodes.append(arv_node)
+        cloud_node = testutil.cloud_node_mock(125, size=size)
+        self.make_daemon(cloud_nodes=[cloud_node],
+                         arvados_nodes=arvados_nodes)
+        arvados_nodes_tracker = self.daemon.arvados_nodes.get()
+        # Here, find_stale_node() should return the node record with
+        # the slot_number assigned.
+        self.assertEqual(arv_node,
+                         arvados_nodes_tracker.find_stale_node(3601))
+
     def test_dont_count_missing_as_busy(self):
         size = testutil.MockSize(1)
         self.make_daemon(cloud_nodes=[testutil.cloud_node_mock(1, size=size),