[ARVADOS] created: 630eac929f942fa997a4efcb9e7cd2d88414d0c2
git at public.curoverse.com
git at public.curoverse.com
Mon Feb 1 14:55:13 EST 2016
at 630eac929f942fa997a4efcb9e7cd2d88414d0c2 (commit)
commit 630eac929f942fa997a4efcb9e7cd2d88414d0c2
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date: Mon Feb 1 14:54:28 2016 -0500
6702: Catch GCE create_node() errors and check if the node was actually
created. Added test.
diff --git a/services/nodemanager/arvnodeman/computenode/driver/gce.py b/services/nodemanager/arvnodeman/computenode/driver/gce.py
index be3f3f1..860aa38 100644
--- a/services/nodemanager/arvnodeman/computenode/driver/gce.py
+++ b/services/nodemanager/arvnodeman/computenode/driver/gce.py
@@ -101,6 +101,29 @@ class ComputeNodeDriver(BaseComputeNodeDriver):
})
return result
+ def create_node(self, size, arvados_node):
+ try:
+ kwargs = self.create_kwargs.copy()
+ kwargs.update(self.arvados_create_kwargs(size, arvados_node))
+ kwargs['size'] = size
+ return self.real.create_node(**kwargs)
+ except ComputeNodeDriver.CLOUD_ERRORS:
+ # Workaround for bug #6702: sometimes the create node request
+ # succeeds but times out and raises an exception instead of
+ # returning a result. If this happens, we get stuck in a retry
+ # loop forever because subsequent create_node attempts will fail
+ # due to node name collision. So check if the node we intended to
+ # create shows up in the cloud node list and return it if found.
+ try:
+ node = [n for n in self.list_nodes() if n.name == kwargs['name']]
+ if node:
+ return node[0]
+ except:
+ # Ignore possible exception from list_nodes in favor of
+ # re-raising the original create_node exception.
+ pass
+ raise
+
def list_nodes(self):
# The GCE libcloud driver only supports filtering node lists by zone.
# Do our own filtering based on tag list.
diff --git a/services/nodemanager/tests/test_computenode_driver_gce.py b/services/nodemanager/tests/test_computenode_driver_gce.py
index 41cb1aa..e8b2fa3 100644
--- a/services/nodemanager/tests/test_computenode_driver_gce.py
+++ b/services/nodemanager/tests/test_computenode_driver_gce.py
@@ -48,6 +48,16 @@ class GCEComputeNodeDriverTestCase(testutil.DriverTestMixin, unittest.TestCase):
metadata = self.driver_mock().create_node.call_args[1]['ex_metadata']
self.assertIn('ping_secret=ssshh', metadata.get('arv-ping-url'))
+ def test_create_raises_but_actually_succeeded(self):
+ arv_node = testutil.arvados_node_mock(1, hostname=None)
+ driver = self.new_driver()
+ nodelist = [testutil.cloud_node_mock(1)]
+ nodelist[0].name = 'compute-000000000000001-zzzzz'
+ self.driver_mock().list_nodes.return_value = nodelist
+ self.driver_mock().create_node.side_effect = IOError
+ n = driver.create_node(testutil.MockSize(1), arv_node)
+ self.assertEqual('compute-000000000000001-zzzzz', n.name)
+
def test_create_sets_default_hostname(self):
driver = self.new_driver()
driver.create_node(testutil.MockSize(1),
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list