X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/f17a26ca512ae0083ea5ad608ad6cfbb7fd247ee..e4c7e6b368cf6d922db341580a2402a07c6cb079:/services/nodemanager/arvnodeman/computenode/driver/__init__.py diff --git a/services/nodemanager/arvnodeman/computenode/driver/__init__.py b/services/nodemanager/arvnodeman/computenode/driver/__init__.py index db799bc16b..29b04845b6 100644 --- a/services/nodemanager/arvnodeman/computenode/driver/__init__.py +++ b/services/nodemanager/arvnodeman/computenode/driver/__init__.py @@ -211,12 +211,31 @@ class BaseComputeNodeDriver(RetryMixin): # libcloud compute drivers typically raise bare Exceptions to # represent API errors. Return True for any exception that is # exactly an Exception, or a better-known higher-level exception. - if (exception is BaseHTTPError and - self.message and self.message.startswith("InvalidInstanceID.NotFound")): + if (type(exception) is BaseHTTPError and + exception.message and + (exception.message.startswith("InvalidInstanceID.NotFound") or + exception.message.startswith("InstanceLimitExceeded"))): return True return (isinstance(exception, cls.CLOUD_ERRORS) or type(exception) is Exception) + def destroy_node(self, cloud_node): + try: + return self.real.destroy_node(cloud_node) + except self.CLOUD_ERRORS as destroy_error: + # Sometimes the destroy node request succeeds but times out and + # raises an exception instead of returning success. If this + # happens, we get a noisy stack trace. Check if the node is still + # on the node list. If it is gone, we can declare victory. + try: + self.search_for_now(cloud_node.id, 'list_nodes') + except ValueError: + # If we catch ValueError, that means search_for_now didn't find + # it, which means destroy_node actually succeeded. + return True + # The node is still on the list. Re-raise. + raise + # Now that we've defined all our own methods, delegate generic, public # attributes of libcloud drivers that we haven't defined ourselves. def _delegate_to_real(attr_name):