10847: Daemon shutdown now stops most actors, only waits for setup actors.
authorPeter Amstutz <peter.amstutz@curoverse.com>
Wed, 31 May 2017 19:37:15 +0000 (15:37 -0400)
committerPeter Amstutz <peter.amstutz@curoverse.com>
Wed, 7 Jun 2017 19:52:50 +0000 (15:52 -0400)
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz@curoverse.com>

services/nodemanager/arvnodeman/computenode/__init__.py
services/nodemanager/arvnodeman/daemon.py
services/nodemanager/arvnodeman/test/fake_driver.py
services/nodemanager/arvnodeman/timedcallback.py
services/nodemanager/tests/integration_test.py

index 54d6a82bcefa1cf38ad06f58cbbf89fafe55ecd1..20b274b1587f6acbf276b193dda89370956a44ec 100644 (file)
@@ -82,8 +82,11 @@ class RetryMixin(object):
                             raise
 
                         self._logger.warning(
-                            "Client error: %s - waiting %s seconds",
-                            error, self.retry_wait, exc_info=error)
+                            "Client error: %s - %s %s seconds",
+                            error,
+                            "scheduling retry in" if self._timer else "sleeping",
+                            self.retry_wait,
+                            exc_info=error)
 
                         if self._timer:
                             start_time = time.time()
index c0413f626c0fe9ad3173af784e4f315baaaa289f..7e63c782ede1fecee931d088505aed549a21c9df 100644 (file)
@@ -517,6 +517,16 @@ class NodeManagerDaemonActor(actor_class):
     def shutdown(self):
         self._logger.info("Shutting down after signal.")
         self.poll_stale_after = -1  # Inhibit starting/stopping nodes
+
+        # Shut down pollers
+        self._server_wishlist_actor.stop()
+        self._arvados_nodes_actor.stop()
+        self._cloud_nodes_actor.stop()
+
+        # Clear cloud node list
+        self.update_cloud_nodes([])
+
+        # Stop setup actors unless they are in the middle of setup.
         setup_stops = {key: node.stop_if_no_cloud_node()
                        for key, node in self.booting.iteritems()}
         self.booting = {key: self.booting[key]
index ee49305664444396ee027f738d9598be33caef4d..1785e0559ee8bd74a9045907088d54a19e55e946 100644 (file)
@@ -84,3 +84,16 @@ class QuotaDriver(FakeDriver):
         if len(all_nodes) == 0:
             quota = 4
         return True
+
+class FailingDriver(FakeDriver):
+    def create_node(self, name=None,
+                    size=None,
+                    image=None,
+                    auth=None,
+                    ex_storage_account=None,
+                    ex_customdata=None,
+                    ex_resource_group=None,
+                    ex_user_name=None,
+                    ex_tags=None,
+                    ex_network=None):
+        raise Exception("nope")
index 12d6280873e8fe23669bbf6f1dce08a952bfcda2..c020a7ef033cf645f77675123f22dfb1378963ab 100644 (file)
@@ -29,9 +29,9 @@ class TimedCallBackActor(actor_class):
 
     def deliver(self):
         if not self.messages:
-            return None
+            return
         til_next = self.messages[0][0] - time.time()
-        if til_next < 0:
+        if til_next <= 0:
             t, receiver, args, kwargs = heapq.heappop(self.messages)
             try:
                 receiver(*args, **kwargs)
index 78233659a59b0aa978dcce35ba4e15cc9189bdf3..d4b7e0214abf97ae824be61ee897b9538f7810cc 100755 (executable)
@@ -304,6 +304,21 @@ def main():
              "34t0i-dz642-h42bg3hq4bdfpf2": "ReqNodeNotAvail",
              "34t0i-dz642-h42bg3hq4bdfpf3": "ReqNodeNotAvail",
              "34t0i-dz642-h42bg3hq4bdfpf4": "ReqNodeNotAvail"
+         }),
+        "test5": (
+            [
+                (r".*Daemon started", set_squeue),
+                (r".*Client error: nope", noop),
+                (r".*Client error: nope", noop),
+                (r".*Client error: nope", noop),
+                (r".*Client error: nope", noop),
+            ],
+            {},
+            "arvnodeman.test.fake_driver.FailingDriver",
+            {"34t0i-dz642-h42bg3hq4bdfpf1": "ReqNodeNotAvail",
+             "34t0i-dz642-h42bg3hq4bdfpf2": "ReqNodeNotAvail",
+             "34t0i-dz642-h42bg3hq4bdfpf3": "ReqNodeNotAvail",
+             "34t0i-dz642-h42bg3hq4bdfpf4": "ReqNodeNotAvail"
          })
     }