[ARVADOS] created: 630eac929f942fa997a4efcb9e7cd2d88414d0c2

git at public.curoverse.com git at public.curoverse.com
Mon Feb 1 14:55:13 EST 2016


        at  630eac929f942fa997a4efcb9e7cd2d88414d0c2 (commit)


commit 630eac929f942fa997a4efcb9e7cd2d88414d0c2
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Mon Feb 1 14:54:28 2016 -0500

    6702: Catch GCE create_node() errors and check if the node was actually
    created.  Added test.

diff --git a/services/nodemanager/arvnodeman/computenode/driver/gce.py b/services/nodemanager/arvnodeman/computenode/driver/gce.py
index be3f3f1..860aa38 100644
--- a/services/nodemanager/arvnodeman/computenode/driver/gce.py
+++ b/services/nodemanager/arvnodeman/computenode/driver/gce.py
@@ -101,6 +101,29 @@ class ComputeNodeDriver(BaseComputeNodeDriver):
                 })
         return result
 
+    def create_node(self, size, arvados_node):
+        try:
+            kwargs = self.create_kwargs.copy()
+            kwargs.update(self.arvados_create_kwargs(size, arvados_node))
+            kwargs['size'] = size
+            return self.real.create_node(**kwargs)
+        except ComputeNodeDriver.CLOUD_ERRORS:
+            # Workaround for bug #6702: sometimes the create node request
+            # succeeds but times out and raises an exception instead of
+            # returning a result.  If this happens, we get stuck in a retry
+            # loop forever because subsequent create_node attempts will fail
+            # due to node name collision.  So check if the node we intended to
+            # create shows up in the cloud node list and return it if found.
+            try:
+                node = [n for n in self.list_nodes() if n.name == kwargs['name']]
+                if node:
+                    return node[0]
+            except:
+                # Ignore possible exception from list_nodes in favor of
+                # re-raising the original create_node exception.
+                pass
+            raise
+
     def list_nodes(self):
         # The GCE libcloud driver only supports filtering node lists by zone.
         # Do our own filtering based on tag list.
diff --git a/services/nodemanager/tests/test_computenode_driver_gce.py b/services/nodemanager/tests/test_computenode_driver_gce.py
index 41cb1aa..e8b2fa3 100644
--- a/services/nodemanager/tests/test_computenode_driver_gce.py
+++ b/services/nodemanager/tests/test_computenode_driver_gce.py
@@ -48,6 +48,16 @@ class GCEComputeNodeDriverTestCase(testutil.DriverTestMixin, unittest.TestCase):
         metadata = self.driver_mock().create_node.call_args[1]['ex_metadata']
         self.assertIn('ping_secret=ssshh', metadata.get('arv-ping-url'))
 
+    def test_create_raises_but_actually_succeeded(self):
+        arv_node = testutil.arvados_node_mock(1, hostname=None)
+        driver = self.new_driver()
+        nodelist = [testutil.cloud_node_mock(1)]
+        nodelist[0].name = 'compute-000000000000001-zzzzz'
+        self.driver_mock().list_nodes.return_value = nodelist
+        self.driver_mock().create_node.side_effect = IOError
+        n = driver.create_node(testutil.MockSize(1), arv_node)
+        self.assertEqual('compute-000000000000001-zzzzz', n.name)
+
     def test_create_sets_default_hostname(self):
         driver = self.new_driver()
         driver.create_node(testutil.MockSize(1),

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list