[ARVADOS] updated: 12b18d9bbed9d604fb5f2a17ee0744bba1cc79e9

Git user git at public.curoverse.com
Wed Apr 6 14:34:15 EDT 2016


Summary of changes:
 services/nodemanager/arvnodeman/computenode/driver/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

  discards  22e3d6ccec797c58a6132145d91c9e3a40be123a (commit)
       via  12b18d9bbed9d604fb5f2a17ee0744bba1cc79e9 (commit)

This update added new revisions after undoing existing revisions.  That is
to say, the old revision is not a strict subset of the new revision.  This
situation occurs when you --force push a change and generate a repository
containing something like this:

 * -- * -- B -- O -- O -- O (22e3d6ccec797c58a6132145d91c9e3a40be123a)
            \
             N -- N -- N (12b18d9bbed9d604fb5f2a17ee0744bba1cc79e9)

When this happens we assume that you've already had alert emails for all
of the O revisions, and so we here report only the revisions in the N
branch from the common base, B.

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit 12b18d9bbed9d604fb5f2a17ee0744bba1cc79e9
Author: Brett Smith <brett at curoverse.com>
Date:   Wed Apr 6 14:23:11 2016 -0400

    8872: Bugfix Node Manager's node search after node create failure.
    
    search_for raises ValueError if the thing isn't found.  create_node
    seems to be expecting it to return None instead.  Bring create_node in
    line with search_for's documented API.
    
    In order to get the tests to pass, I had to separate out the raw
    search code from the caching, and use that in create_node.  Otherwise,
    the cloud node from the "node found" test would be cached and returned
    in the "node not found" test.

diff --git a/services/nodemanager/arvnodeman/computenode/driver/__init__.py b/services/nodemanager/arvnodeman/computenode/driver/__init__.py
index 0576999..d46b4b6 100644
--- a/services/nodemanager/arvnodeman/computenode/driver/__init__.py
+++ b/services/nodemanager/arvnodeman/computenode/driver/__init__.py
@@ -79,6 +79,20 @@ class BaseComputeNodeDriver(RetryMixin):
             key = NodeAuthSSHKey(ssh_file.read())
         return 'auth', key
 
+    def search_for_now(self, term, list_method, key=attrgetter('id'), **kwargs):
+        """Call a cloud list API method and return one matching item.
+
+        See search_for() for details of arguments and exceptions.
+        This method does not cache results.
+        """
+        items = getattr(self.real, list_method)(**kwargs)
+        results = [item for item in items if key(item) == term]
+        count = len(results)
+        if count != 1:
+            raise ValueError("{} returned {} results for {!r}".format(
+                    list_method, count, term))
+        return results[0]
+
     def search_for(self, term, list_method, key=attrgetter('id'), **kwargs):
         """Return one matching item from a list of cloud objects.
 
@@ -94,14 +108,8 @@ class BaseComputeNodeDriver(RetryMixin):
         """
         cache_key = (list_method, term)
         if cache_key not in self.SEARCH_CACHE:
-            items = getattr(self.real, list_method)(**kwargs)
-            results = [item for item in items
-                       if key(item) == term]
-            count = len(results)
-            if count != 1:
-                raise ValueError("{} returned {} results for '{}'".format(
-                        list_method, count, term))
-            self.SEARCH_CACHE[cache_key] = results[0]
+            self.SEARCH_CACHE[cache_key] = self.search_for_now(
+                term, list_method, key, **kwargs)
         return self.SEARCH_CACHE[cache_key]
 
     def list_nodes(self, **kwargs):
@@ -143,19 +151,17 @@ class BaseComputeNodeDriver(RetryMixin):
             kwargs.update(self.arvados_create_kwargs(size, arvados_node))
             kwargs['size'] = size
             return self.real.create_node(**kwargs)
-        except self.CLOUD_ERRORS:
+        except self.CLOUD_ERRORS as create_error:
             # Workaround for bug #6702: sometimes the create node request
             # succeeds but times out and raises an exception instead of
             # returning a result.  If this happens, we get stuck in a retry
             # loop forever because subsequent create_node attempts will fail
             # due to node name collision.  So check if the node we intended to
             # create shows up in the cloud node list and return it if found.
-            node = self.search_for(kwargs['name'], 'list_nodes', self._name_key)
-            if node:
-                return node
-            else:
-                # something else went wrong, re-raise the exception
-                raise
+            try:
+                return self.search_for_now(kwargs['name'], 'list_nodes', self._name_key)
+            except ValueError:
+                raise create_error
 
     def post_create_node(self, cloud_node):
         # ComputeNodeSetupActor calls this method after the cloud node is
diff --git a/services/nodemanager/tests/test_computenode_driver_azure.py b/services/nodemanager/tests/test_computenode_driver_azure.py
index 8e701b9..0b99e9c 100644
--- a/services/nodemanager/tests/test_computenode_driver_azure.py
+++ b/services/nodemanager/tests/test_computenode_driver_azure.py
@@ -19,6 +19,10 @@ class AzureComputeNodeDriverTestCase(testutil.DriverTestMixin, unittest.TestCase
         list_kwargs.setdefault("ex_resource_group", "TestResourceGroup")
         return super(AzureComputeNodeDriverTestCase, self).new_driver(auth_kwargs, list_kwargs, create_kwargs)
 
+    def cloud_name_from_arv_node(self, arv_node):
+        uuid_parts = arv_node['uuid'].split('-', 2)
+        return 'compute-{parts[2]}-{parts[0]}'.format(parts=uuid_parts)
+
     def test_driver_instantiation(self):
         kwargs = {'key': 'testkey'}
         driver = self.new_driver(auth_kwargs=kwargs)
diff --git a/services/nodemanager/tests/test_computenode_driver_ec2.py b/services/nodemanager/tests/test_computenode_driver_ec2.py
index a778cd5..7f1b38e 100644
--- a/services/nodemanager/tests/test_computenode_driver_ec2.py
+++ b/services/nodemanager/tests/test_computenode_driver_ec2.py
@@ -15,6 +15,9 @@ from . import testutil
 class EC2ComputeNodeDriverTestCase(testutil.DriverTestMixin, unittest.TestCase):
     TEST_CLASS = ec2.ComputeNodeDriver
 
+    def cloud_name_from_arv_node(self, arv_node):
+        return '{n[hostname]}.{n[domain]}'.format(n=arv_node)
+
     def test_driver_instantiation(self):
         kwargs = {'key': 'testkey'}
         driver = self.new_driver(auth_kwargs=kwargs)
diff --git a/services/nodemanager/tests/test_computenode_driver_gce.py b/services/nodemanager/tests/test_computenode_driver_gce.py
index e8b2fa3..15d94a5 100644
--- a/services/nodemanager/tests/test_computenode_driver_gce.py
+++ b/services/nodemanager/tests/test_computenode_driver_gce.py
@@ -28,6 +28,10 @@ class GCEComputeNodeDriverTestCase(testutil.DriverTestMixin, unittest.TestCase):
         return super(GCEComputeNodeDriverTestCase, self).new_driver(
             auth_kwargs, list_kwargs, create_kwargs)
 
+    def cloud_name_from_arv_node(self, arv_node):
+        uuid_parts = arv_node['uuid'].split('-', 2)
+        return 'compute-{parts[2]}-{parts[0]}'.format(parts=uuid_parts)
+
     def test_driver_instantiation(self):
         kwargs = {'user_id': 'foo'}
         driver = self.new_driver(auth_kwargs=kwargs)
diff --git a/services/nodemanager/tests/testutil.py b/services/nodemanager/tests/testutil.py
index b9e7bea..35716ab 100644
--- a/services/nodemanager/tests/testutil.py
+++ b/services/nodemanager/tests/testutil.py
@@ -6,6 +6,7 @@ import datetime
 import threading
 import time
 
+import libcloud.common.types as cloud_types
 import mock
 import pykka
 
@@ -142,6 +143,30 @@ class DriverTestMixin(object):
             self.assertTrue(self.driver_mock.called)
             self.assertIs(driver.real, driver_mock2)
 
+    def test_create_can_find_node_after_timeout(self):
+        arv_node = arvados_node_mock()
+        cloud_node = cloud_node_mock()
+        cloud_node.name = self.cloud_name_from_arv_node(arv_node)
+        driver = self.new_driver()
+        create_method = self.driver_mock().create_node
+        create_method.side_effect = cloud_types.LibcloudError("fake timeout")
+        list_method = self.driver_mock().list_nodes
+        list_method.return_value = [cloud_node]
+        actual = driver.create_node(MockSize(1), arv_node)
+        self.assertIs(cloud_node, actual)
+
+    def test_create_can_raise_exception_after_timeout(self):
+        arv_node = arvados_node_mock()
+        driver = self.new_driver()
+        create_method = self.driver_mock().create_node
+        create_method.side_effect = cloud_types.LibcloudError("fake timeout")
+        list_method = self.driver_mock().list_nodes
+        list_method.return_value = []
+        with self.assertRaises(cloud_types.LibcloudError) as exc_test:
+            driver.create_node(MockSize(1), arv_node)
+        self.assertIs(create_method.side_effect, exc_test.exception)
+
+
 class RemotePollLoopActorTestMixin(ActorTestMixin):
     def build_monitor(self, *args, **kwargs):
         self.timer = mock.MagicMock(name='timer_mock')

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list