[ARVADOS] created: acd3d380bb636ca7ef324e0903e54e309b132dc7
Git user
git at public.curoverse.com
Wed Aug 2 12:55:16 EDT 2017
at acd3d380bb636ca7ef324e0903e54e309b132dc7 (commit)
commit acd3d380bb636ca7ef324e0903e54e309b132dc7
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date: Wed Aug 2 12:49:25 2017 -0400
11925: Fix flaky tests in test_computenode_dispatch_slurm.
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curoverse.com>
diff --git a/services/nodemanager/tests/test_computenode_dispatch.py b/services/nodemanager/tests/test_computenode_dispatch.py
index a8aa2e3..afd37ee 100644
--- a/services/nodemanager/tests/test_computenode_dispatch.py
+++ b/services/nodemanager/tests/test_computenode_dispatch.py
@@ -207,7 +207,7 @@ class ComputeNodeShutdownActorMixin(testutil.ActorTestMixin):
self.make_mocks(shutdown_open=True, arvados_node=testutil.arvados_node_mock(crunch_worker_state="busy"))
self.cloud_client.destroy_node.return_value = True
self.make_actor(cancellable=True)
- self.check_success_flag(False)
+ self.check_success_flag(False, 2)
self.assertFalse(self.cloud_client.destroy_node.called)
def test_uncancellable_shutdown(self, *mocks):
@@ -241,6 +241,7 @@ class ComputeNodeShutdownActorMixin(testutil.ActorTestMixin):
self.cloud_client.destroy_node.return_value = False
self.make_actor(cancellable=True)
self.shutdown_actor.cancel_shutdown("test")
+ self.shutdown_actor.ping()
self.check_success_flag(False, 2)
self.assertFalse(self.arvados_client.nodes().update.called)
diff --git a/services/nodemanager/tests/test_computenode_dispatch_slurm.py b/services/nodemanager/tests/test_computenode_dispatch_slurm.py
index c7eb7af..d00425e 100644
--- a/services/nodemanager/tests/test_computenode_dispatch_slurm.py
+++ b/services/nodemanager/tests/test_computenode_dispatch_slurm.py
@@ -32,13 +32,20 @@ class SLURMComputeNodeShutdownActorTestCase(ComputeNodeShutdownActorMixin,
self.timer = testutil.MockTimer(False)
self.make_actor()
self.check_success_flag(None, 0)
+ # At this point, 1st try should have happened.
+
self.timer.deliver()
self.check_success_flag(None, 0)
- self.timer.deliver()
+ # At this point, 2nd try should have happened.
+
# Order is critical here: if the mock gets called when no return value
# or side effect is set, we may invoke a real subprocess.
proc_mock.return_value = end_state
proc_mock.side_effect = None
+
+ # 3rd try
+ self.timer.deliver()
+
self.check_success_flag(True, 3)
self.check_slurm_got_args(proc_mock, 'NodeName=compute63')
@@ -73,8 +80,9 @@ class SLURMComputeNodeShutdownActorTestCase(ComputeNodeShutdownActorMixin,
self.make_mocks(arvados_node=testutil.arvados_node_mock(job_uuid=True))
self.timer = testutil.MockTimer(False)
self.make_actor()
+ self.shutdown_actor.ping()
self.busywait(lambda: proc_mock.call_args is not None)
- self.shutdown_actor.cancel_shutdown("test").get(self.TIMEOUT)
+ self.shutdown_actor.cancel_shutdown("test")
self.check_success_flag(False, 2)
self.assertEqual(proc_mock.call_args_list,
[mock.call(['scontrol', 'update', 'NodeName=compute99', 'State=DRAIN', 'Reason=Node Manager shutdown']),
@@ -88,10 +96,10 @@ class SLURMComputeNodeShutdownActorTestCase(ComputeNodeShutdownActorMixin,
proc_mock.side_effect = iter([OSError, 'drain\n', OSError, 'idle\n', 'idle\n'])
self.make_mocks(arvados_node=testutil.arvados_node_mock(job_uuid=True))
self.make_actor()
- self.check_success_flag(False, 2)
+ self.check_success_flag(False, 5)
def test_issue_slurm_drain_retry(self, proc_mock):
- proc_mock.side_effect = iter([OSError, '', OSError, 'drng\n'])
+ proc_mock.side_effect = iter([OSError, OSError, 'drng\n', 'drain\n'])
self.check_success_after_reset(proc_mock, timer=False)
def test_arvados_node_cleaned_after_shutdown(self, proc_mock):
commit 7a78b6728122c56343d68502192445ae476d24b0
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date: Fri Jul 28 09:51:18 2017 -0400
11925: Make watchdog test more reliable.
Add wait so that it doesn't try to shut down the threads before they have had a
chance to start. Also added services/nodemanage_suite test case which includes
both unit and integration tests for node manager.
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curoverse.com>
diff --git a/build/run-tests.sh b/build/run-tests.sh
index 3952b36..2078081 100755
--- a/build/run-tests.sh
+++ b/build/run-tests.sh
@@ -81,7 +81,7 @@ services/keepstore
services/keep-balance
services/login-sync
services/nodemanager
-services/nodemanager-integration
+services/nodemanager_integration
services/crunch-run
services/crunch-dispatch-local
services/crunch-dispatch-slurm
@@ -545,6 +545,9 @@ do_test() {
apps/workbench_units | apps/workbench_functionals | apps/workbench_integration)
suite=apps/workbench
;;
+ services/nodemanager | services/nodemanager_integration)
+ suite=services/nodemanager_suite
+ ;;
*)
suite="${1}"
;;
@@ -860,11 +863,11 @@ test_login-sync() {
}
do_test services/login-sync login-sync
-test_nodemanager-integration() {
+test_nodemanager_integration() {
cd "$WORKSPACE/services/nodemanager" \
- && tests/integration_test.py ${testargs[services/nodemanager-integration]}
+ && tests/integration_test.py ${testargs[services/nodemanager_integration]}
}
-do_test services/nodemanager-integration nodemanager-integration
+do_test services/nodemanager_integration nodemanager_integration
for p in "${pythonstuff[@]}"
do
diff --git a/services/nodemanager/tests/test_failure.py b/services/nodemanager/tests/test_failure.py
index cfac61b..8704ef9 100644
--- a/services/nodemanager/tests/test_failure.py
+++ b/services/nodemanager/tests/test_failure.py
@@ -29,7 +29,7 @@ class BogusActor(arvnodeman.baseactor.BaseNodeManagerActor):
def ping(self):
# Called by WatchdogActorTest, this delay is longer than the test timeout
# of 1 second, which should cause the watchdog ping to fail.
- time.sleep(4)
+ time.sleep(2)
return True
class ActorUnhandledExceptionTest(testutil.ActorTestMixin, unittest.TestCase):
@@ -53,6 +53,7 @@ class WatchdogActorTest(testutil.ActorTestMixin, unittest.TestCase):
def test_time_timout(self, kill_mock):
act = BogusActor.start(OSError(errno.ENOENT, ""))
watch = arvnodeman.baseactor.WatchdogActor.start(1, act)
+ time.sleep(1)
watch.stop(block=True)
act.stop(block=True)
self.assertTrue(kill_mock.called)
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list