[ARVADOS] created: 30175eff28e6add8053dacc7a6a8091c59844835
Git user
git at public.curoverse.com
Fri Jun 24 11:04:53 EDT 2016
at 30175eff28e6add8053dacc7a6a8091c59844835 (commit)
commit 30175eff28e6add8053dacc7a6a8091c59844835
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date: Fri Jun 24 11:04:48 2016 -0400
9486: Add test
diff --git a/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py b/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py
index 7918c93..f4bdb43 100644
--- a/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py
+++ b/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py
@@ -96,6 +96,7 @@ class ComputeNodeSetupActor(ComputeNodeStateChangeBase):
self.cloud_size = cloud_size
self.arvados_node = None
self.cloud_node = None
+ self.success = None
if arvados_node is None:
self._later.create_arvados_node()
else:
@@ -153,6 +154,7 @@ class ComputeNodeSetupActor(ComputeNodeStateChangeBase):
@RetryMixin._retry(config.ARVADOS_ERRORS)
def _finished(self, success_flag=False):
+ self.success = success_flag
if success_flag is not True and self.arvados_node:
# Delete arvados node record on setup failure, this is safe to do
# because this actor has already claimed the node record. This
diff --git a/services/nodemanager/tests/test_computenode_dispatch.py b/services/nodemanager/tests/test_computenode_dispatch.py
index 227b5e5..b3eee0f 100644
--- a/services/nodemanager/tests/test_computenode_dispatch.py
+++ b/services/nodemanager/tests/test_computenode_dispatch.py
@@ -86,6 +86,14 @@ class ComputeNodeSetupActorTestCase(testutil.ActorTestMixin, unittest.TestCase):
self.make_actor()
self.wait_for_assignment(self.setup_actor, 'cloud_node')
+ def test_failed_cloud_calls_delete_arvados_node(self):
+ self.make_mocks()
+ self.cloud_client.create_node.side_effect = Exception("test cloud creation error")
+ self.cloud_client.is_cloud_exception.return_value = False
+ self.make_actor()
+ self.wait_for_assignment(self.setup_actor, 'success')
+ self.assertTrue(self.api_client.nodes().delete().execute.called)
+
def test_failed_post_create_retried(self):
self.make_mocks()
self.cloud_client.post_create_node.side_effect = [
commit 1e116f7b2f3823f223494ec2f630f27af563e4b6
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date: Fri Jun 24 10:18:22 2016 -0400
9486: Delete arvados node record on cloud node create failure.
This addresses the failure mode where node manager is unable to create a new
cloud node but does create a new arvados node record; the new node record can't
be used on the next attempt because it is too new; as a result new records are
created on every attempt which can result in the node table filling up with
unused records.
diff --git a/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py b/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py
index a950210..7918c93 100644
--- a/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py
+++ b/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py
@@ -151,11 +151,25 @@ class ComputeNodeSetupActor(ComputeNodeStateChangeBase):
self._logger.info("%s updated properties.", self.arvados_node['uuid'])
self._later.post_create()
+ @RetryMixin._retry(config.ARVADOS_ERRORS)
+ def _finished(self, success_flag=False):
+ if success_flag is not True and self.arvados_node:
+ # Delete arvados node record on setup failure, this is safe to do
+ # because this actor has already claimed the node record. This
+ # addresses the failure mode where node manager is unable to create
+ # a new cloud node but does create a new arvados node record; the
+ # new node record can't be used on the next attempt because it is
+ # too new; as a result new records are created on every attempt
+ # which can result in the node table filling up with unused
+ # records.
+ self._arvados.nodes().delete(uuid=self.arvados_node['uuid']).execute()
+ return super(ComputeNodeSetupActor, self)._finished()
+
@RetryMixin._retry()
def post_create(self):
self._cloud.post_create_node(self.cloud_node)
self._logger.info("%s post-create work done.", self.cloud_node.id)
- self._finished()
+ self._finished(True)
def stop_if_no_cloud_node(self):
if self.cloud_node is not None:
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list