[ARVADOS] updated: 5a39b44d563d1f0ea01f41864761f9ad2f910743
Git user
git at public.curoverse.com
Tue Jun 6 16:22:10 EDT 2017
Summary of changes:
services/nodemanager/arvnodeman/computenode/__init__.py | 3 ++-
.../nodemanager/arvnodeman/computenode/dispatch/__init__.py | 1 +
.../nodemanager/arvnodeman/computenode/dispatch/slurm.py | 6 +++---
.../nodemanager/arvnodeman/computenode/driver/__init__.py | 9 ++++-----
services/nodemanager/arvnodeman/computenode/driver/azure.py | 1 -
services/nodemanager/arvnodeman/config.py | 4 ++++
services/nodemanager/tests/test_computenode_dispatch.py | 4 ++--
.../nodemanager/tests/test_computenode_dispatch_slurm.py | 2 +-
services/nodemanager/tests/test_computenode_driver_azure.py | 13 -------------
services/nodemanager/tests/test_computenode_driver_ec2.py | 13 -------------
10 files changed, 17 insertions(+), 39 deletions(-)
via 5a39b44d563d1f0ea01f41864761f9ad2f910743 (commit)
from f33920ae83ed990135a00e96f7ce8b6d8c2f06bd (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
commit 5a39b44d563d1f0ea01f41864761f9ad2f910743
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date: Tue Jun 6 16:22:02 2017 -0400
10847: Fix unit tests after refactoring error types.
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curoverse.com>
diff --git a/services/nodemanager/arvnodeman/computenode/__init__.py b/services/nodemanager/arvnodeman/computenode/__init__.py
index 7cf9d63..b11b2de 100644
--- a/services/nodemanager/arvnodeman/computenode/__init__.py
+++ b/services/nodemanager/arvnodeman/computenode/__init__.py
@@ -8,6 +8,7 @@ import itertools
import re
import time
+from ..config import CLOUD_ERRORS
from libcloud.common.exceptions import BaseHTTPError
ARVADOS_TIMEFMT = '%Y-%m-%dT%H:%M:%SZ'
@@ -86,7 +87,7 @@ class RetryMixin(object):
pass
if error.code == 429 or error.code >= 500:
should_retry = True
- elif isinstance(error, errors):
+ elif isinstance(error, CLOUD_ERRORS) or isinstance(error, errors) or type(error) is Exception:
should_retry = True
if not should_retry:
diff --git a/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py b/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py
index 8a397dc..4463ec6 100644
--- a/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py
+++ b/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py
@@ -296,6 +296,7 @@ class ComputeNodeUpdateActor(config.actor_class, RetryMixin):
RetryMixin.__init__(self, 1, max_retry_wait,
None, cloud_factory(), timer_actor)
self._cloud = cloud_factory()
+ self._later = self.actor_ref.tell_proxy()
def _set_logger(self):
self._logger = logging.getLogger("%s.%s" % (self.__class__.__name__, self.actor_urn[33:]))
diff --git a/services/nodemanager/arvnodeman/computenode/dispatch/slurm.py b/services/nodemanager/arvnodeman/computenode/dispatch/slurm.py
index 0c8ddc2..11cc4e5 100644
--- a/services/nodemanager/arvnodeman/computenode/dispatch/slurm.py
+++ b/services/nodemanager/arvnodeman/computenode/dispatch/slurm.py
@@ -39,7 +39,7 @@ class ComputeNodeShutdownActor(SlurmMixin, ShutdownActorBase):
self._logger.info("Draining SLURM node %s", self._nodename)
self._later.issue_slurm_drain()
- @RetryMixin._retry((subprocess.CalledProcessError,))
+ @RetryMixin._retry((subprocess.CalledProcessError, OSError))
def cancel_shutdown(self, reason, try_resume=True):
if self._nodename:
if try_resume and self._get_slurm_state(self._nodename) in self.SLURM_DRAIN_STATES:
@@ -51,7 +51,7 @@ class ComputeNodeShutdownActor(SlurmMixin, ShutdownActorBase):
pass
return super(ComputeNodeShutdownActor, self).cancel_shutdown(reason)
- @RetryMixin._retry((subprocess.CalledProcessError,))
+ @RetryMixin._retry((subprocess.CalledProcessError, OSError))
def issue_slurm_drain(self):
if self.cancel_reason is not None:
return
@@ -62,7 +62,7 @@ class ComputeNodeShutdownActor(SlurmMixin, ShutdownActorBase):
else:
self._later.shutdown_node()
- @RetryMixin._retry((subprocess.CalledProcessError,))
+ @RetryMixin._retry((subprocess.CalledProcessError, OSError))
def await_slurm_drain(self):
if self.cancel_reason is not None:
return
diff --git a/services/nodemanager/arvnodeman/computenode/driver/__init__.py b/services/nodemanager/arvnodeman/computenode/driver/__init__.py
index 6d23c2b..c8c54dc 100644
--- a/services/nodemanager/arvnodeman/computenode/driver/__init__.py
+++ b/services/nodemanager/arvnodeman/computenode/driver/__init__.py
@@ -6,10 +6,9 @@ import logging
from operator import attrgetter
import libcloud.common.types as cloud_types
-from libcloud.common.exceptions import BaseHTTPError
from libcloud.compute.base import NodeDriver, NodeAuthSSHKey
-from ...config import NETWORK_ERRORS
+from ...config import CLOUD_ERRORS
from .. import RetryMixin
class BaseComputeNodeDriver(RetryMixin):
@@ -25,7 +24,7 @@ class BaseComputeNodeDriver(RetryMixin):
Subclasses must implement arvados_create_kwargs, sync_node,
node_fqdn, and node_start_time.
"""
- CLOUD_ERRORS = NETWORK_ERRORS + (cloud_types.LibcloudError,)
+
@RetryMixin._retry()
def _create_driver(self, driver_class, **auth_kwargs):
@@ -169,7 +168,7 @@ class BaseComputeNodeDriver(RetryMixin):
kwargs.update(self.arvados_create_kwargs(size, arvados_node))
kwargs['size'] = size
return self.real.create_node(**kwargs)
- except self.CLOUD_ERRORS as create_error:
+ except CLOUD_ERRORS as create_error:
# Workaround for bug #6702: sometimes the create node request
# succeeds but times out and raises an exception instead of
# returning a result. If this happens, we get stuck in a retry
@@ -209,7 +208,7 @@ class BaseComputeNodeDriver(RetryMixin):
def destroy_node(self, cloud_node):
try:
return self.real.destroy_node(cloud_node)
- except self.CLOUD_ERRORS as destroy_error:
+ except CLOUD_ERRORS as destroy_error:
# Sometimes the destroy node request succeeds but times out and
# raises an exception instead of returning success. If this
# happens, we get a noisy stack trace. Check if the node is still
diff --git a/services/nodemanager/arvnodeman/computenode/driver/azure.py b/services/nodemanager/arvnodeman/computenode/driver/azure.py
index 6e7392a..c707c2a 100644
--- a/services/nodemanager/arvnodeman/computenode/driver/azure.py
+++ b/services/nodemanager/arvnodeman/computenode/driver/azure.py
@@ -17,7 +17,6 @@ class ComputeNodeDriver(BaseComputeNodeDriver):
DEFAULT_DRIVER = cloud_provider.get_driver(cloud_types.Provider.AZURE_ARM)
SEARCH_CACHE = {}
- CLOUD_ERRORS = BaseComputeNodeDriver.CLOUD_ERRORS
def __init__(self, auth_kwargs, list_kwargs, create_kwargs,
driver_class=DEFAULT_DRIVER):
diff --git a/services/nodemanager/arvnodeman/config.py b/services/nodemanager/arvnodeman/config.py
index f884295..a16e0a8 100644
--- a/services/nodemanager/arvnodeman/config.py
+++ b/services/nodemanager/arvnodeman/config.py
@@ -14,11 +14,15 @@ from apiclient import errors as apierror
from .baseactor import BaseNodeManagerActor
+from libcloud.common.types import LibcloudError
+from libcloud.common.exceptions import BaseHTTPError
+
# IOError is the base class for socket.error, ssl.SSLError, and friends.
# It seems like it hits the sweet spot for operations we want to retry:
# it's low-level, but unlikely to catch code bugs.
NETWORK_ERRORS = (IOError,)
ARVADOS_ERRORS = NETWORK_ERRORS + (apierror.Error,)
+CLOUD_ERRORS = NETWORK_ERRORS + (LibcloudError, BaseHTTPError)
actor_class = BaseNodeManagerActor
diff --git a/services/nodemanager/tests/test_computenode_dispatch.py b/services/nodemanager/tests/test_computenode_dispatch.py
index b950cc1..598b293 100644
--- a/services/nodemanager/tests/test_computenode_dispatch.py
+++ b/services/nodemanager/tests/test_computenode_dispatch.py
@@ -28,7 +28,6 @@ class ComputeNodeSetupActorTestCase(testutil.ActorTestMixin, unittest.TestCase):
self.api_client.nodes().update().execute.side_effect = arvados_effect
self.cloud_client = mock.MagicMock(name='cloud_client')
self.cloud_client.create_node.return_value = testutil.cloud_node_mock(1)
- self.cloud_client.is_cloud_exception = BaseComputeNodeDriver.is_cloud_exception
def make_actor(self, arv_node=None):
if not hasattr(self, 'timer'):
@@ -277,7 +276,8 @@ class ComputeNodeUpdateActorTestCase(testutil.ActorTestMixin,
def make_actor(self):
self.driver = mock.MagicMock(name='driver_mock')
- self.updater = self.ACTOR_CLASS.start(self.driver).proxy()
+ self.timer = mock.MagicMock(name='timer_mock')
+ self.updater = self.ACTOR_CLASS.start(self.driver, self.timer).proxy()
def test_node_sync(self, *args):
self.make_actor()
diff --git a/services/nodemanager/tests/test_computenode_dispatch_slurm.py b/services/nodemanager/tests/test_computenode_dispatch_slurm.py
index e1def28..73bcb57 100644
--- a/services/nodemanager/tests/test_computenode_dispatch_slurm.py
+++ b/services/nodemanager/tests/test_computenode_dispatch_slurm.py
@@ -84,7 +84,7 @@ class SLURMComputeNodeShutdownActorTestCase(ComputeNodeShutdownActorMixin,
self.check_success_flag(False, 2)
def test_issue_slurm_drain_retry(self, proc_mock):
- proc_mock.side_effect = iter([OSError, '', OSError, 'drng\n'])
+ proc_mock.side_effect = iter([OSError, '', OSError, 'drng\n', 'drain\n', 'drain\n'])
self.check_success_after_reset(proc_mock)
def test_arvados_node_cleaned_after_shutdown(self, proc_mock):
diff --git a/services/nodemanager/tests/test_computenode_driver_azure.py b/services/nodemanager/tests/test_computenode_driver_azure.py
index 702688d..c4bc680 100644
--- a/services/nodemanager/tests/test_computenode_driver_azure.py
+++ b/services/nodemanager/tests/test_computenode_driver_azure.py
@@ -69,19 +69,6 @@ class AzureComputeNodeDriverTestCase(testutil.DriverTestMixin, unittest.TestCase
node.extra = {'tags': {"hostname": name}}
self.assertEqual(name, azure.ComputeNodeDriver.node_fqdn(node))
- def test_cloud_exceptions(self):
- for error in [Exception("test exception"),
- IOError("test exception"),
- ssl.SSLError("test exception"),
- cloud_types.LibcloudError("test exception")]:
- self.assertTrue(azure.ComputeNodeDriver.is_cloud_exception(error),
- "{} not flagged as cloud exception".format(error))
-
- def test_noncloud_exceptions(self):
- self.assertFalse(
- azure.ComputeNodeDriver.is_cloud_exception(ValueError("test error")),
- "ValueError flagged as cloud exception")
-
def test_sync_node(self):
arv_node = testutil.arvados_node_mock(1)
cloud_node = testutil.cloud_node_mock(2)
diff --git a/services/nodemanager/tests/test_computenode_driver_ec2.py b/services/nodemanager/tests/test_computenode_driver_ec2.py
index a778cd5..14df360 100644
--- a/services/nodemanager/tests/test_computenode_driver_ec2.py
+++ b/services/nodemanager/tests/test_computenode_driver_ec2.py
@@ -96,16 +96,3 @@ class EC2ComputeNodeDriverTestCase(testutil.DriverTestMixin, unittest.TestCase):
node = testutil.cloud_node_mock()
node.name = name
self.assertEqual(name, ec2.ComputeNodeDriver.node_fqdn(node))
-
- def test_cloud_exceptions(self):
- for error in [Exception("test exception"),
- IOError("test exception"),
- ssl.SSLError("test exception"),
- cloud_types.LibcloudError("test exception")]:
- self.assertTrue(ec2.ComputeNodeDriver.is_cloud_exception(error),
- "{} not flagged as cloud exception".format(error))
-
- def test_noncloud_exceptions(self):
- self.assertFalse(
- ec2.ComputeNodeDriver.is_cloud_exception(ValueError("test error")),
- "ValueError flagged as cloud exception")
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list