11345: Fix unit tests after refactoring error types.
authorPeter Amstutz <peter.amstutz@curoverse.com>
Tue, 6 Jun 2017 20:22:02 +0000 (16:22 -0400)
committerPeter Amstutz <peter.amstutz@curoverse.com>
Thu, 8 Jun 2017 19:34:54 +0000 (15:34 -0400)
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz@curoverse.com>

services/nodemanager/arvnodeman/computenode/__init__.py
services/nodemanager/arvnodeman/computenode/dispatch/__init__.py
services/nodemanager/arvnodeman/computenode/dispatch/slurm.py
services/nodemanager/arvnodeman/computenode/driver/__init__.py
services/nodemanager/arvnodeman/computenode/driver/azure.py
services/nodemanager/arvnodeman/config.py
services/nodemanager/tests/test_computenode_dispatch.py
services/nodemanager/tests/test_computenode_dispatch_slurm.py
services/nodemanager/tests/test_computenode_driver_azure.py
services/nodemanager/tests/test_computenode_driver_ec2.py

index 7cf9d63d5e178b4bbddebce2bda6b6e4d14b5ea0..b11b2de9ef0a6bd1daf6baff2909aec85a2fb7fc 100644 (file)
@@ -8,6 +8,7 @@ import itertools
 import re
 import time
 
+from ..config import CLOUD_ERRORS
 from libcloud.common.exceptions import BaseHTTPError
 
 ARVADOS_TIMEFMT = '%Y-%m-%dT%H:%M:%SZ'
@@ -86,7 +87,7 @@ class RetryMixin(object):
                                     pass
                             if error.code == 429 or error.code >= 500:
                                 should_retry = True
-                        elif isinstance(error, errors):
+                        elif isinstance(error, CLOUD_ERRORS) or isinstance(error, errors) or type(error) is Exception:
                             should_retry = True
 
                         if not should_retry:
index 8a397dcc8556f1ba8550a9cbda37eed0d964b7c8..4463ec6f53ff6adb8fdf82918308d04951f607dd 100644 (file)
@@ -296,6 +296,7 @@ class ComputeNodeUpdateActor(config.actor_class, RetryMixin):
         RetryMixin.__init__(self, 1, max_retry_wait,
                             None, cloud_factory(), timer_actor)
         self._cloud = cloud_factory()
+        self._later = self.actor_ref.tell_proxy()
 
     def _set_logger(self):
         self._logger = logging.getLogger("%s.%s" % (self.__class__.__name__, self.actor_urn[33:]))
index 0c8ddc29eb6ca7b37ebc947addfffbf08dc7ab46..11cc4e5384637a2def7a4235582cb402818dd0cc 100644 (file)
@@ -39,7 +39,7 @@ class ComputeNodeShutdownActor(SlurmMixin, ShutdownActorBase):
             self._logger.info("Draining SLURM node %s", self._nodename)
             self._later.issue_slurm_drain()
 
-    @RetryMixin._retry((subprocess.CalledProcessError,))
+    @RetryMixin._retry((subprocess.CalledProcessError, OSError))
     def cancel_shutdown(self, reason, try_resume=True):
         if self._nodename:
             if try_resume and self._get_slurm_state(self._nodename) in self.SLURM_DRAIN_STATES:
@@ -51,7 +51,7 @@ class ComputeNodeShutdownActor(SlurmMixin, ShutdownActorBase):
                 pass
         return super(ComputeNodeShutdownActor, self).cancel_shutdown(reason)
 
-    @RetryMixin._retry((subprocess.CalledProcessError,))
+    @RetryMixin._retry((subprocess.CalledProcessError, OSError))
     def issue_slurm_drain(self):
         if self.cancel_reason is not None:
             return
@@ -62,7 +62,7 @@ class ComputeNodeShutdownActor(SlurmMixin, ShutdownActorBase):
         else:
             self._later.shutdown_node()
 
-    @RetryMixin._retry((subprocess.CalledProcessError,))
+    @RetryMixin._retry((subprocess.CalledProcessError, OSError))
     def await_slurm_drain(self):
         if self.cancel_reason is not None:
             return
index 6d23c2b5a3711ac0c1f50770d12023573ca5fffa..c8c54dc0bfb9384c3f79545309fff86433409133 100644 (file)
@@ -6,10 +6,9 @@ import logging
 from operator import attrgetter
 
 import libcloud.common.types as cloud_types
-from libcloud.common.exceptions import BaseHTTPError
 from libcloud.compute.base import NodeDriver, NodeAuthSSHKey
 
-from ...config import NETWORK_ERRORS
+from ...config import CLOUD_ERRORS
 from .. import RetryMixin
 
 class BaseComputeNodeDriver(RetryMixin):
@@ -25,7 +24,7 @@ class BaseComputeNodeDriver(RetryMixin):
     Subclasses must implement arvados_create_kwargs, sync_node,
     node_fqdn, and node_start_time.
     """
-    CLOUD_ERRORS = NETWORK_ERRORS + (cloud_types.LibcloudError,)
+
 
     @RetryMixin._retry()
     def _create_driver(self, driver_class, **auth_kwargs):
@@ -169,7 +168,7 @@ class BaseComputeNodeDriver(RetryMixin):
             kwargs.update(self.arvados_create_kwargs(size, arvados_node))
             kwargs['size'] = size
             return self.real.create_node(**kwargs)
-        except self.CLOUD_ERRORS as create_error:
+        except CLOUD_ERRORS as create_error:
             # Workaround for bug #6702: sometimes the create node request
             # succeeds but times out and raises an exception instead of
             # returning a result.  If this happens, we get stuck in a retry
@@ -209,7 +208,7 @@ class BaseComputeNodeDriver(RetryMixin):
     def destroy_node(self, cloud_node):
         try:
             return self.real.destroy_node(cloud_node)
-        except self.CLOUD_ERRORS as destroy_error:
+        except CLOUD_ERRORS as destroy_error:
             # Sometimes the destroy node request succeeds but times out and
             # raises an exception instead of returning success.  If this
             # happens, we get a noisy stack trace.  Check if the node is still
index 6e7392a1eb7706592942b9483563739a613f3963..c707c2a9f7bc2c274b2a4d295a2f59139dc47ef5 100644 (file)
@@ -17,7 +17,6 @@ class ComputeNodeDriver(BaseComputeNodeDriver):
 
     DEFAULT_DRIVER = cloud_provider.get_driver(cloud_types.Provider.AZURE_ARM)
     SEARCH_CACHE = {}
-    CLOUD_ERRORS = BaseComputeNodeDriver.CLOUD_ERRORS
 
     def __init__(self, auth_kwargs, list_kwargs, create_kwargs,
                  driver_class=DEFAULT_DRIVER):
index f884295e37c7556976ce35ba186954936cc22ed4..a16e0a8e62bac7930c3c4e6dfcded4cca21b2c3a 100644 (file)
@@ -14,11 +14,15 @@ from apiclient import errors as apierror
 
 from .baseactor import BaseNodeManagerActor
 
+from libcloud.common.types import LibcloudError
+from libcloud.common.exceptions import BaseHTTPError
+
 # IOError is the base class for socket.error, ssl.SSLError, and friends.
 # It seems like it hits the sweet spot for operations we want to retry:
 # it's low-level, but unlikely to catch code bugs.
 NETWORK_ERRORS = (IOError,)
 ARVADOS_ERRORS = NETWORK_ERRORS + (apierror.Error,)
+CLOUD_ERRORS = NETWORK_ERRORS + (LibcloudError, BaseHTTPError)
 
 actor_class = BaseNodeManagerActor
 
index b950cc1169c23b6990ae8cc4f0dd0aafe69914ba..598b293420b39f2bd1213be106aad4cf0937e7bc 100644 (file)
@@ -28,7 +28,6 @@ class ComputeNodeSetupActorTestCase(testutil.ActorTestMixin, unittest.TestCase):
         self.api_client.nodes().update().execute.side_effect = arvados_effect
         self.cloud_client = mock.MagicMock(name='cloud_client')
         self.cloud_client.create_node.return_value = testutil.cloud_node_mock(1)
-        self.cloud_client.is_cloud_exception = BaseComputeNodeDriver.is_cloud_exception
 
     def make_actor(self, arv_node=None):
         if not hasattr(self, 'timer'):
@@ -277,7 +276,8 @@ class ComputeNodeUpdateActorTestCase(testutil.ActorTestMixin,
 
     def make_actor(self):
         self.driver = mock.MagicMock(name='driver_mock')
-        self.updater = self.ACTOR_CLASS.start(self.driver).proxy()
+        self.timer = mock.MagicMock(name='timer_mock')
+        self.updater = self.ACTOR_CLASS.start(self.driver, self.timer).proxy()
 
     def test_node_sync(self, *args):
         self.make_actor()
index e1def28d8b8e7f5a34b9ff57dfccab9bedfb0cea..73bcb577b7114d8812d2148f8257191e99231e9f 100644 (file)
@@ -84,7 +84,7 @@ class SLURMComputeNodeShutdownActorTestCase(ComputeNodeShutdownActorMixin,
         self.check_success_flag(False, 2)
 
     def test_issue_slurm_drain_retry(self, proc_mock):
-        proc_mock.side_effect = iter([OSError, '', OSError, 'drng\n'])
+        proc_mock.side_effect = iter([OSError, '', OSError, 'drng\n', 'drain\n', 'drain\n'])
         self.check_success_after_reset(proc_mock)
 
     def test_arvados_node_cleaned_after_shutdown(self, proc_mock):
index 702688d88fd51052ff9704755b97d96de0bc50be..c4bc680d2ec87431484a8190a9dd5fda85e765ad 100644 (file)
@@ -69,19 +69,6 @@ class AzureComputeNodeDriverTestCase(testutil.DriverTestMixin, unittest.TestCase
         node.extra = {'tags': {"hostname": name}}
         self.assertEqual(name, azure.ComputeNodeDriver.node_fqdn(node))
 
-    def test_cloud_exceptions(self):
-        for error in [Exception("test exception"),
-                      IOError("test exception"),
-                      ssl.SSLError("test exception"),
-                      cloud_types.LibcloudError("test exception")]:
-            self.assertTrue(azure.ComputeNodeDriver.is_cloud_exception(error),
-                            "{} not flagged as cloud exception".format(error))
-
-    def test_noncloud_exceptions(self):
-        self.assertFalse(
-            azure.ComputeNodeDriver.is_cloud_exception(ValueError("test error")),
-            "ValueError flagged as cloud exception")
-
     def test_sync_node(self):
         arv_node = testutil.arvados_node_mock(1)
         cloud_node = testutil.cloud_node_mock(2)
index a778cd541d690159a916868f6729a49da674d629..14df3602313f8e1c249811395d0809dc57f961ee 100644 (file)
@@ -96,16 +96,3 @@ class EC2ComputeNodeDriverTestCase(testutil.DriverTestMixin, unittest.TestCase):
         node = testutil.cloud_node_mock()
         node.name = name
         self.assertEqual(name, ec2.ComputeNodeDriver.node_fqdn(node))
-
-    def test_cloud_exceptions(self):
-        for error in [Exception("test exception"),
-                      IOError("test exception"),
-                      ssl.SSLError("test exception"),
-                      cloud_types.LibcloudError("test exception")]:
-            self.assertTrue(ec2.ComputeNodeDriver.is_cloud_exception(error),
-                            "{} not flagged as cloud exception".format(error))
-
-    def test_noncloud_exceptions(self):
-        self.assertFalse(
-            ec2.ComputeNodeDriver.is_cloud_exception(ValueError("test error")),
-            "ValueError flagged as cloud exception")