[ARVADOS] updated: 5a39b44d563d1f0ea01f41864761f9ad2f910743

Git user git at public.curoverse.com
Tue Jun 6 16:22:10 EDT 2017


Summary of changes:
 services/nodemanager/arvnodeman/computenode/__init__.py     |  3 ++-
 .../nodemanager/arvnodeman/computenode/dispatch/__init__.py |  1 +
 .../nodemanager/arvnodeman/computenode/dispatch/slurm.py    |  6 +++---
 .../nodemanager/arvnodeman/computenode/driver/__init__.py   |  9 ++++-----
 services/nodemanager/arvnodeman/computenode/driver/azure.py |  1 -
 services/nodemanager/arvnodeman/config.py                   |  4 ++++
 services/nodemanager/tests/test_computenode_dispatch.py     |  4 ++--
 .../nodemanager/tests/test_computenode_dispatch_slurm.py    |  2 +-
 services/nodemanager/tests/test_computenode_driver_azure.py | 13 -------------
 services/nodemanager/tests/test_computenode_driver_ec2.py   | 13 -------------
 10 files changed, 17 insertions(+), 39 deletions(-)

       via  5a39b44d563d1f0ea01f41864761f9ad2f910743 (commit)
      from  f33920ae83ed990135a00e96f7ce8b6d8c2f06bd (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit 5a39b44d563d1f0ea01f41864761f9ad2f910743
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Tue Jun 6 16:22:02 2017 -0400

    10847: Fix unit tests after refactoring error types.
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curoverse.com>

diff --git a/services/nodemanager/arvnodeman/computenode/__init__.py b/services/nodemanager/arvnodeman/computenode/__init__.py
index 7cf9d63..b11b2de 100644
--- a/services/nodemanager/arvnodeman/computenode/__init__.py
+++ b/services/nodemanager/arvnodeman/computenode/__init__.py
@@ -8,6 +8,7 @@ import itertools
 import re
 import time
 
+from ..config import CLOUD_ERRORS
 from libcloud.common.exceptions import BaseHTTPError
 
 ARVADOS_TIMEFMT = '%Y-%m-%dT%H:%M:%SZ'
@@ -86,7 +87,7 @@ class RetryMixin(object):
                                     pass
                             if error.code == 429 or error.code >= 500:
                                 should_retry = True
-                        elif isinstance(error, errors):
+                        elif isinstance(error, CLOUD_ERRORS) or isinstance(error, errors) or type(error) is Exception:
                             should_retry = True
 
                         if not should_retry:
diff --git a/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py b/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py
index 8a397dc..4463ec6 100644
--- a/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py
+++ b/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py
@@ -296,6 +296,7 @@ class ComputeNodeUpdateActor(config.actor_class, RetryMixin):
         RetryMixin.__init__(self, 1, max_retry_wait,
                             None, cloud_factory(), timer_actor)
         self._cloud = cloud_factory()
+        self._later = self.actor_ref.tell_proxy()
 
     def _set_logger(self):
         self._logger = logging.getLogger("%s.%s" % (self.__class__.__name__, self.actor_urn[33:]))
diff --git a/services/nodemanager/arvnodeman/computenode/dispatch/slurm.py b/services/nodemanager/arvnodeman/computenode/dispatch/slurm.py
index 0c8ddc2..11cc4e5 100644
--- a/services/nodemanager/arvnodeman/computenode/dispatch/slurm.py
+++ b/services/nodemanager/arvnodeman/computenode/dispatch/slurm.py
@@ -39,7 +39,7 @@ class ComputeNodeShutdownActor(SlurmMixin, ShutdownActorBase):
             self._logger.info("Draining SLURM node %s", self._nodename)
             self._later.issue_slurm_drain()
 
-    @RetryMixin._retry((subprocess.CalledProcessError,))
+    @RetryMixin._retry((subprocess.CalledProcessError, OSError))
     def cancel_shutdown(self, reason, try_resume=True):
         if self._nodename:
             if try_resume and self._get_slurm_state(self._nodename) in self.SLURM_DRAIN_STATES:
@@ -51,7 +51,7 @@ class ComputeNodeShutdownActor(SlurmMixin, ShutdownActorBase):
                 pass
         return super(ComputeNodeShutdownActor, self).cancel_shutdown(reason)
 
-    @RetryMixin._retry((subprocess.CalledProcessError,))
+    @RetryMixin._retry((subprocess.CalledProcessError, OSError))
     def issue_slurm_drain(self):
         if self.cancel_reason is not None:
             return
@@ -62,7 +62,7 @@ class ComputeNodeShutdownActor(SlurmMixin, ShutdownActorBase):
         else:
             self._later.shutdown_node()
 
-    @RetryMixin._retry((subprocess.CalledProcessError,))
+    @RetryMixin._retry((subprocess.CalledProcessError, OSError))
     def await_slurm_drain(self):
         if self.cancel_reason is not None:
             return
diff --git a/services/nodemanager/arvnodeman/computenode/driver/__init__.py b/services/nodemanager/arvnodeman/computenode/driver/__init__.py
index 6d23c2b..c8c54dc 100644
--- a/services/nodemanager/arvnodeman/computenode/driver/__init__.py
+++ b/services/nodemanager/arvnodeman/computenode/driver/__init__.py
@@ -6,10 +6,9 @@ import logging
 from operator import attrgetter
 
 import libcloud.common.types as cloud_types
-from libcloud.common.exceptions import BaseHTTPError
 from libcloud.compute.base import NodeDriver, NodeAuthSSHKey
 
-from ...config import NETWORK_ERRORS
+from ...config import CLOUD_ERRORS
 from .. import RetryMixin
 
 class BaseComputeNodeDriver(RetryMixin):
@@ -25,7 +24,7 @@ class BaseComputeNodeDriver(RetryMixin):
     Subclasses must implement arvados_create_kwargs, sync_node,
     node_fqdn, and node_start_time.
     """
-    CLOUD_ERRORS = NETWORK_ERRORS + (cloud_types.LibcloudError,)
+
 
     @RetryMixin._retry()
     def _create_driver(self, driver_class, **auth_kwargs):
@@ -169,7 +168,7 @@ class BaseComputeNodeDriver(RetryMixin):
             kwargs.update(self.arvados_create_kwargs(size, arvados_node))
             kwargs['size'] = size
             return self.real.create_node(**kwargs)
-        except self.CLOUD_ERRORS as create_error:
+        except CLOUD_ERRORS as create_error:
             # Workaround for bug #6702: sometimes the create node request
             # succeeds but times out and raises an exception instead of
             # returning a result.  If this happens, we get stuck in a retry
@@ -209,7 +208,7 @@ class BaseComputeNodeDriver(RetryMixin):
     def destroy_node(self, cloud_node):
         try:
             return self.real.destroy_node(cloud_node)
-        except self.CLOUD_ERRORS as destroy_error:
+        except CLOUD_ERRORS as destroy_error:
             # Sometimes the destroy node request succeeds but times out and
             # raises an exception instead of returning success.  If this
             # happens, we get a noisy stack trace.  Check if the node is still
diff --git a/services/nodemanager/arvnodeman/computenode/driver/azure.py b/services/nodemanager/arvnodeman/computenode/driver/azure.py
index 6e7392a..c707c2a 100644
--- a/services/nodemanager/arvnodeman/computenode/driver/azure.py
+++ b/services/nodemanager/arvnodeman/computenode/driver/azure.py
@@ -17,7 +17,6 @@ class ComputeNodeDriver(BaseComputeNodeDriver):
 
     DEFAULT_DRIVER = cloud_provider.get_driver(cloud_types.Provider.AZURE_ARM)
     SEARCH_CACHE = {}
-    CLOUD_ERRORS = BaseComputeNodeDriver.CLOUD_ERRORS
 
     def __init__(self, auth_kwargs, list_kwargs, create_kwargs,
                  driver_class=DEFAULT_DRIVER):
diff --git a/services/nodemanager/arvnodeman/config.py b/services/nodemanager/arvnodeman/config.py
index f884295..a16e0a8 100644
--- a/services/nodemanager/arvnodeman/config.py
+++ b/services/nodemanager/arvnodeman/config.py
@@ -14,11 +14,15 @@ from apiclient import errors as apierror
 
 from .baseactor import BaseNodeManagerActor
 
+from libcloud.common.types import LibcloudError
+from libcloud.common.exceptions import BaseHTTPError
+
 # IOError is the base class for socket.error, ssl.SSLError, and friends.
 # It seems like it hits the sweet spot for operations we want to retry:
 # it's low-level, but unlikely to catch code bugs.
 NETWORK_ERRORS = (IOError,)
 ARVADOS_ERRORS = NETWORK_ERRORS + (apierror.Error,)
+CLOUD_ERRORS = NETWORK_ERRORS + (LibcloudError, BaseHTTPError)
 
 actor_class = BaseNodeManagerActor
 
diff --git a/services/nodemanager/tests/test_computenode_dispatch.py b/services/nodemanager/tests/test_computenode_dispatch.py
index b950cc1..598b293 100644
--- a/services/nodemanager/tests/test_computenode_dispatch.py
+++ b/services/nodemanager/tests/test_computenode_dispatch.py
@@ -28,7 +28,6 @@ class ComputeNodeSetupActorTestCase(testutil.ActorTestMixin, unittest.TestCase):
         self.api_client.nodes().update().execute.side_effect = arvados_effect
         self.cloud_client = mock.MagicMock(name='cloud_client')
         self.cloud_client.create_node.return_value = testutil.cloud_node_mock(1)
-        self.cloud_client.is_cloud_exception = BaseComputeNodeDriver.is_cloud_exception
 
     def make_actor(self, arv_node=None):
         if not hasattr(self, 'timer'):
@@ -277,7 +276,8 @@ class ComputeNodeUpdateActorTestCase(testutil.ActorTestMixin,
 
     def make_actor(self):
         self.driver = mock.MagicMock(name='driver_mock')
-        self.updater = self.ACTOR_CLASS.start(self.driver).proxy()
+        self.timer = mock.MagicMock(name='timer_mock')
+        self.updater = self.ACTOR_CLASS.start(self.driver, self.timer).proxy()
 
     def test_node_sync(self, *args):
         self.make_actor()
diff --git a/services/nodemanager/tests/test_computenode_dispatch_slurm.py b/services/nodemanager/tests/test_computenode_dispatch_slurm.py
index e1def28..73bcb57 100644
--- a/services/nodemanager/tests/test_computenode_dispatch_slurm.py
+++ b/services/nodemanager/tests/test_computenode_dispatch_slurm.py
@@ -84,7 +84,7 @@ class SLURMComputeNodeShutdownActorTestCase(ComputeNodeShutdownActorMixin,
         self.check_success_flag(False, 2)
 
     def test_issue_slurm_drain_retry(self, proc_mock):
-        proc_mock.side_effect = iter([OSError, '', OSError, 'drng\n'])
+        proc_mock.side_effect = iter([OSError, '', OSError, 'drng\n', 'drain\n', 'drain\n'])
         self.check_success_after_reset(proc_mock)
 
     def test_arvados_node_cleaned_after_shutdown(self, proc_mock):
diff --git a/services/nodemanager/tests/test_computenode_driver_azure.py b/services/nodemanager/tests/test_computenode_driver_azure.py
index 702688d..c4bc680 100644
--- a/services/nodemanager/tests/test_computenode_driver_azure.py
+++ b/services/nodemanager/tests/test_computenode_driver_azure.py
@@ -69,19 +69,6 @@ class AzureComputeNodeDriverTestCase(testutil.DriverTestMixin, unittest.TestCase
         node.extra = {'tags': {"hostname": name}}
         self.assertEqual(name, azure.ComputeNodeDriver.node_fqdn(node))
 
-    def test_cloud_exceptions(self):
-        for error in [Exception("test exception"),
-                      IOError("test exception"),
-                      ssl.SSLError("test exception"),
-                      cloud_types.LibcloudError("test exception")]:
-            self.assertTrue(azure.ComputeNodeDriver.is_cloud_exception(error),
-                            "{} not flagged as cloud exception".format(error))
-
-    def test_noncloud_exceptions(self):
-        self.assertFalse(
-            azure.ComputeNodeDriver.is_cloud_exception(ValueError("test error")),
-            "ValueError flagged as cloud exception")
-
     def test_sync_node(self):
         arv_node = testutil.arvados_node_mock(1)
         cloud_node = testutil.cloud_node_mock(2)
diff --git a/services/nodemanager/tests/test_computenode_driver_ec2.py b/services/nodemanager/tests/test_computenode_driver_ec2.py
index a778cd5..14df360 100644
--- a/services/nodemanager/tests/test_computenode_driver_ec2.py
+++ b/services/nodemanager/tests/test_computenode_driver_ec2.py
@@ -96,16 +96,3 @@ class EC2ComputeNodeDriverTestCase(testutil.DriverTestMixin, unittest.TestCase):
         node = testutil.cloud_node_mock()
         node.name = name
         self.assertEqual(name, ec2.ComputeNodeDriver.node_fqdn(node))
-
-    def test_cloud_exceptions(self):
-        for error in [Exception("test exception"),
-                      IOError("test exception"),
-                      ssl.SSLError("test exception"),
-                      cloud_types.LibcloudError("test exception")]:
-            self.assertTrue(ec2.ComputeNodeDriver.is_cloud_exception(error),
-                            "{} not flagged as cloud exception".format(error))
-
-    def test_noncloud_exceptions(self):
-        self.assertFalse(
-            ec2.ComputeNodeDriver.is_cloud_exception(ValueError("test error")),
-            "ValueError flagged as cloud exception")

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list