[ARVADOS] created: 30175eff28e6add8053dacc7a6a8091c59844835

Git user git at public.curoverse.com
Fri Jun 24 11:04:53 EDT 2016


        at  30175eff28e6add8053dacc7a6a8091c59844835 (commit)


commit 30175eff28e6add8053dacc7a6a8091c59844835
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Fri Jun 24 11:04:48 2016 -0400

    9486: Add test

diff --git a/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py b/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py
index 7918c93..f4bdb43 100644
--- a/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py
+++ b/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py
@@ -96,6 +96,7 @@ class ComputeNodeSetupActor(ComputeNodeStateChangeBase):
         self.cloud_size = cloud_size
         self.arvados_node = None
         self.cloud_node = None
+        self.success = None
         if arvados_node is None:
             self._later.create_arvados_node()
         else:
@@ -153,6 +154,7 @@ class ComputeNodeSetupActor(ComputeNodeStateChangeBase):
 
     @RetryMixin._retry(config.ARVADOS_ERRORS)
     def _finished(self, success_flag=False):
+        self.success = success_flag
         if success_flag is not True and self.arvados_node:
             # Delete arvados node record on setup failure, this is safe to do
             # because this actor has already claimed the node record.  This
diff --git a/services/nodemanager/tests/test_computenode_dispatch.py b/services/nodemanager/tests/test_computenode_dispatch.py
index 227b5e5..b3eee0f 100644
--- a/services/nodemanager/tests/test_computenode_dispatch.py
+++ b/services/nodemanager/tests/test_computenode_dispatch.py
@@ -86,6 +86,14 @@ class ComputeNodeSetupActorTestCase(testutil.ActorTestMixin, unittest.TestCase):
         self.make_actor()
         self.wait_for_assignment(self.setup_actor, 'cloud_node')
 
+    def test_failed_cloud_calls_delete_arvados_node(self):
+        self.make_mocks()
+        self.cloud_client.create_node.side_effect = Exception("test cloud creation error")
+        self.cloud_client.is_cloud_exception.return_value = False
+        self.make_actor()
+        self.wait_for_assignment(self.setup_actor, 'success')
+        self.assertTrue(self.api_client.nodes().delete().execute.called)
+
     def test_failed_post_create_retried(self):
         self.make_mocks()
         self.cloud_client.post_create_node.side_effect = [

commit 1e116f7b2f3823f223494ec2f630f27af563e4b6
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Fri Jun 24 10:18:22 2016 -0400

    9486: Delete arvados node record on cloud node create failure.
    
    This addresses the failure mode where node manager is unable to create a new
    cloud node but does create a new arvados node record; the new node record can't
    be used on the next attempt because it is too new; as a result new records are
    created on every attempt which can result in the node table filling up with
    unused records.

diff --git a/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py b/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py
index a950210..7918c93 100644
--- a/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py
+++ b/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py
@@ -151,11 +151,25 @@ class ComputeNodeSetupActor(ComputeNodeStateChangeBase):
         self._logger.info("%s updated properties.", self.arvados_node['uuid'])
         self._later.post_create()
 
+    @RetryMixin._retry(config.ARVADOS_ERRORS)
+    def _finished(self, success_flag=False):
+        if success_flag is not True and self.arvados_node:
+            # Delete arvados node record on setup failure, this is safe to do
+            # because this actor has already claimed the node record.  This
+            # addresses the failure mode where node manager is unable to create
+            # a new cloud node but does create a new arvados node record; the
+            # new node record can't be used on the next attempt because it is
+            # too new; as a result new records are created on every attempt
+            # which can result in the node table filling up with unused
+            # records.
+            self._arvados.nodes().delete(uuid=self.arvados_node['uuid']).execute()
+        return super(ComputeNodeSetupActor, self)._finished()
+
     @RetryMixin._retry()
     def post_create(self):
         self._cloud.post_create_node(self.cloud_node)
         self._logger.info("%s post-create work done.", self.cloud_node.id)
-        self._finished()
+        self._finished(True)
 
     def stop_if_no_cloud_node(self):
         if self.cloud_node is not None:

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list