[ARVADOS] updated: aea45ff7ad85ec23a903faf16386d43e0d614f83

git at public.curoverse.com git at public.curoverse.com
Tue Feb 2 12:27:04 EST 2016


Summary of changes:
 .../arvnodeman/computenode/driver/__init__.py      | 32 +++++++++++++++++++---
 .../arvnodeman/computenode/driver/ec2.py           |  3 ++
 .../arvnodeman/computenode/driver/gce.py           | 22 ---------------
 .../tests/test_computenode_driver_azure.py         | 10 +++++++
 4 files changed, 41 insertions(+), 26 deletions(-)

       via  aea45ff7ad85ec23a903faf16386d43e0d614f83 (commit)
      from  6d349aac4810e7b0039e519860d0eda8345c9dfe (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit aea45ff7ad85ec23a903faf16386d43e0d614f83
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Tue Feb 2 12:26:57 2016 -0500

    6702: Refactor create_node to BaseComputeNodeDriver so logic also applies to
    Azure.  Adds new find_node() method; if returns None or raises an error,
    re-raise the original create_node exception.

diff --git a/services/nodemanager/arvnodeman/computenode/driver/__init__.py b/services/nodemanager/arvnodeman/computenode/driver/__init__.py
index b696677..11cbc99 100644
--- a/services/nodemanager/arvnodeman/computenode/driver/__init__.py
+++ b/services/nodemanager/arvnodeman/computenode/driver/__init__.py
@@ -127,11 +127,35 @@ class BaseComputeNodeDriver(RetryMixin):
             self.ping_host, arvados_node['uuid'],
             arvados_node['info']['ping_secret'])
 
+    def find_node(self, name):
+        node = [n for n in self.list_nodes() if n.name == name]
+        if node:
+            return node[0]
+        else:
+            return None
+
     def create_node(self, size, arvados_node):
-        kwargs = self.create_kwargs.copy()
-        kwargs.update(self.arvados_create_kwargs(size, arvados_node))
-        kwargs['size'] = size
-        return self.real.create_node(**kwargs)
+        try:
+            kwargs = self.create_kwargs.copy()
+            kwargs.update(self.arvados_create_kwargs(size, arvados_node))
+            kwargs['size'] = size
+            return self.real.create_node(**kwargs)
+        except self.CLOUD_ERRORS:
+            # Workaround for bug #6702: sometimes the create node request
+            # succeeds but times out and raises an exception instead of
+            # returning a result.  If this happens, we get stuck in a retry
+            # loop forever because subsequent create_node attempts will fail
+            # due to node name collision.  So check if the node we intended to
+            # create shows up in the cloud node list and return it if found.
+            try:
+                node = self.find_node(kwargs['name'])
+                if node:
+                    return node
+            except:
+                # Ignore possible exception from find_node in favor of
+                # re-raising the original create_node exception.
+                pass
+            raise
 
     def post_create_node(self, cloud_node):
         # ComputeNodeSetupActor calls this method after the cloud node is
diff --git a/services/nodemanager/arvnodeman/computenode/driver/ec2.py b/services/nodemanager/arvnodeman/computenode/driver/ec2.py
index 991a298..d89c48e 100644
--- a/services/nodemanager/arvnodeman/computenode/driver/ec2.py
+++ b/services/nodemanager/arvnodeman/computenode/driver/ec2.py
@@ -75,6 +75,9 @@ class ComputeNodeDriver(BaseComputeNodeDriver):
         self.real.ex_create_tags(cloud_node,
                                  {'Name': arvados_node_fqdn(arvados_node)})
 
+    def find_node(self, name):
+        raise NotImplementedError("ec2.ComputeNodeDriver.find_node")
+
     def list_nodes(self):
         # Need to populate Node.size
         nodes = super(ComputeNodeDriver, self).list_nodes()
diff --git a/services/nodemanager/arvnodeman/computenode/driver/gce.py b/services/nodemanager/arvnodeman/computenode/driver/gce.py
index 860aa38..c5bf0b8 100644
--- a/services/nodemanager/arvnodeman/computenode/driver/gce.py
+++ b/services/nodemanager/arvnodeman/computenode/driver/gce.py
@@ -101,28 +101,6 @@ class ComputeNodeDriver(BaseComputeNodeDriver):
                 })
         return result
 
-    def create_node(self, size, arvados_node):
-        try:
-            kwargs = self.create_kwargs.copy()
-            kwargs.update(self.arvados_create_kwargs(size, arvados_node))
-            kwargs['size'] = size
-            return self.real.create_node(**kwargs)
-        except ComputeNodeDriver.CLOUD_ERRORS:
-            # Workaround for bug #6702: sometimes the create node request
-            # succeeds but times out and raises an exception instead of
-            # returning a result.  If this happens, we get stuck in a retry
-            # loop forever because subsequent create_node attempts will fail
-            # due to node name collision.  So check if the node we intended to
-            # create shows up in the cloud node list and return it if found.
-            try:
-                node = [n for n in self.list_nodes() if n.name == kwargs['name']]
-                if node:
-                    return node[0]
-            except:
-                # Ignore possible exception from list_nodes in favor of
-                # re-raising the original create_node exception.
-                pass
-            raise
 
     def list_nodes(self):
         # The GCE libcloud driver only supports filtering node lists by zone.
diff --git a/services/nodemanager/tests/test_computenode_driver_azure.py b/services/nodemanager/tests/test_computenode_driver_azure.py
index 3ef152e..5721abc 100644
--- a/services/nodemanager/tests/test_computenode_driver_azure.py
+++ b/services/nodemanager/tests/test_computenode_driver_azure.py
@@ -100,3 +100,13 @@ echo compute-000000000000063-zzzzz > /var/tmp/arv-node-data/meta-data/instance-i
 echo z1.test > /var/tmp/arv-node-data/meta-data/instance-type
 """,
                          driver.arvados_create_kwargs(testutil.MockSize(1), arv_node)['ex_customdata'])
+
+    def test_create_raises_but_actually_succeeded(self):
+        arv_node = testutil.arvados_node_mock(1, hostname=None)
+        driver = self.new_driver(create_kwargs={"tag_arvados-class": "dynamic-compute"})
+        nodelist = [testutil.cloud_node_mock(1, tags={"arvados-class": "dynamic-compute"})]
+        nodelist[0].name = 'compute-000000000000001-zzzzz'
+        self.driver_mock().list_nodes.return_value = nodelist
+        self.driver_mock().create_node.side_effect = IOError
+        n = driver.create_node(testutil.MockSize(1), arv_node)
+        self.assertEqual('compute-000000000000001-zzzzz', n.name)

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list