[ARVADOS] updated: e1a67ca409408d063f8de92438a52957c2c9644c

git at public.curoverse.com git at public.curoverse.com
Tue Oct 7 16:18:09 EDT 2014


Summary of changes:
 services/nodemanager/arvnodeman/daemon.py | 11 +++++++++--
 services/nodemanager/tests/test_daemon.py | 17 +++++++++++++++++
 2 files changed, 26 insertions(+), 2 deletions(-)

       via  e1a67ca409408d063f8de92438a52957c2c9644c (commit)
      from  70fd9ea4dc6177c1774d90223d4f94edd5332c3a (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit e1a67ca409408d063f8de92438a52957c2c9644c
Author: Brett Smith <brett at curoverse.com>
Date:   Tue Oct 7 16:20:09 2014 -0400

    2881: Avoid race condition when tracking new cloud nodes.
    
    When we boot a new cloud node, it's possible we'll see it in the cloud
    node list before CloudNodeSetupActor sends a message to
    NodeManagerDaemonActor.  This commit helps ensure we handle that case
    gracefully.

diff --git a/services/nodemanager/arvnodeman/daemon.py b/services/nodemanager/arvnodeman/daemon.py
index 1aed1c5..ff28cc4 100644
--- a/services/nodemanager/arvnodeman/daemon.py
+++ b/services/nodemanager/arvnodeman/daemon.py
@@ -237,9 +237,16 @@ class NodeManagerDaemonActor(actor_class):
 
     def node_up(self, setup_proxy):
         cloud_node, arvados_node = self._actor_nodes(setup_proxy)
-        self._new_node(cloud_node, arvados_node)
+        cloud_key = cloud_node.id
+        arv_key = arvados_node['uuid']
         del self.booting[setup_proxy.actor_ref.actor_urn]
-        self.assigned_arv.pop(arvados_node['uuid'], None)
+        self.assigned_arv.pop(arv_key, None)
+        if cloud_key in self.unpaired_clouds:
+            if self.unpaired_clouds[cloud_key].offer_arvados_pair(
+                  arvados_node).get():
+                self._pair_nodes(cloud_key, arv_key)
+        elif cloud_key not in self.paired_clouds:
+            self._new_node(cloud_node, arvados_node)
         setup_proxy.stop()
 
     @_check_poll_freshness
diff --git a/services/nodemanager/tests/test_daemon.py b/services/nodemanager/tests/test_daemon.py
index 8459fb4..5f5e733 100644
--- a/services/nodemanager/tests/test_daemon.py
+++ b/services/nodemanager/tests/test_daemon.py
@@ -95,6 +95,23 @@ class NodeManagerDaemonActorTestCase(testutil.ActorTestMixin,
         self.daemon.update_server_wishlist(server_wishlist).get(self.TIMEOUT)
         self.assertFalse(self.node_setup.called)
 
+    def test_no_duplication_when_booting_node_listed_fast(self):
+        # Test that we don't start two ComputeNodeActors when we learn about
+        # a booting node through a listing before we get the "node up"
+        # message from CloudNodeSetupActor.
+        cloud_node = testutil.cloud_node_mock(1)
+        self.make_daemon(want_sizes=[testutil.MockSize(1)])
+        self.wait_for_call(self.node_setup.start)
+        setup = mock.MagicMock(name='setup_node_mock')
+        setup.actor_ref = self.node_setup.start().proxy().actor_ref
+        setup.cloud_node.get.return_value = cloud_node
+        setup.arvados_node.get.return_value = testutil.arvados_node_mock(1)
+        self.daemon.update_cloud_nodes([cloud_node])
+        self.wait_for_call(self.node_factory.start)
+        self.node_factory.reset_mock()
+        self.daemon.node_up(setup).get(self.TIMEOUT)
+        self.assertFalse(self.node_factory.start.called)
+
     def test_booting_nodes_shut_down(self):
         self.make_daemon(want_sizes=[testutil.MockSize(1)])
         self.wait_for_call(self.node_setup.start)

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list