[ARVADOS] created: acd3d380bb636ca7ef324e0903e54e309b132dc7

Git user git at public.curoverse.com
Wed Aug 2 12:55:16 EDT 2017


        at  acd3d380bb636ca7ef324e0903e54e309b132dc7 (commit)


commit acd3d380bb636ca7ef324e0903e54e309b132dc7
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Wed Aug 2 12:49:25 2017 -0400

    11925: Fix flaky tests in test_computenode_dispatch_slurm.
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curoverse.com>

diff --git a/services/nodemanager/tests/test_computenode_dispatch.py b/services/nodemanager/tests/test_computenode_dispatch.py
index a8aa2e3..afd37ee 100644
--- a/services/nodemanager/tests/test_computenode_dispatch.py
+++ b/services/nodemanager/tests/test_computenode_dispatch.py
@@ -207,7 +207,7 @@ class ComputeNodeShutdownActorMixin(testutil.ActorTestMixin):
         self.make_mocks(shutdown_open=True, arvados_node=testutil.arvados_node_mock(crunch_worker_state="busy"))
         self.cloud_client.destroy_node.return_value = True
         self.make_actor(cancellable=True)
-        self.check_success_flag(False)
+        self.check_success_flag(False, 2)
         self.assertFalse(self.cloud_client.destroy_node.called)
 
     def test_uncancellable_shutdown(self, *mocks):
@@ -241,6 +241,7 @@ class ComputeNodeShutdownActorMixin(testutil.ActorTestMixin):
         self.cloud_client.destroy_node.return_value = False
         self.make_actor(cancellable=True)
         self.shutdown_actor.cancel_shutdown("test")
+        self.shutdown_actor.ping()
         self.check_success_flag(False, 2)
         self.assertFalse(self.arvados_client.nodes().update.called)
 
diff --git a/services/nodemanager/tests/test_computenode_dispatch_slurm.py b/services/nodemanager/tests/test_computenode_dispatch_slurm.py
index c7eb7af..d00425e 100644
--- a/services/nodemanager/tests/test_computenode_dispatch_slurm.py
+++ b/services/nodemanager/tests/test_computenode_dispatch_slurm.py
@@ -32,13 +32,20 @@ class SLURMComputeNodeShutdownActorTestCase(ComputeNodeShutdownActorMixin,
             self.timer = testutil.MockTimer(False)
         self.make_actor()
         self.check_success_flag(None, 0)
+        # At this point, 1st try should have happened.
+
         self.timer.deliver()
         self.check_success_flag(None, 0)
-        self.timer.deliver()
+        # At this point, 2nd try should have happened.
+
         # Order is critical here: if the mock gets called when no return value
         # or side effect is set, we may invoke a real subprocess.
         proc_mock.return_value = end_state
         proc_mock.side_effect = None
+
+        # 3rd try
+        self.timer.deliver()
+
         self.check_success_flag(True, 3)
         self.check_slurm_got_args(proc_mock, 'NodeName=compute63')
 
@@ -73,8 +80,9 @@ class SLURMComputeNodeShutdownActorTestCase(ComputeNodeShutdownActorMixin,
             self.make_mocks(arvados_node=testutil.arvados_node_mock(job_uuid=True))
             self.timer = testutil.MockTimer(False)
             self.make_actor()
+            self.shutdown_actor.ping()
             self.busywait(lambda: proc_mock.call_args is not None)
-            self.shutdown_actor.cancel_shutdown("test").get(self.TIMEOUT)
+            self.shutdown_actor.cancel_shutdown("test")
             self.check_success_flag(False, 2)
             self.assertEqual(proc_mock.call_args_list,
                              [mock.call(['scontrol', 'update', 'NodeName=compute99', 'State=DRAIN', 'Reason=Node Manager shutdown']),
@@ -88,10 +96,10 @@ class SLURMComputeNodeShutdownActorTestCase(ComputeNodeShutdownActorMixin,
         proc_mock.side_effect = iter([OSError, 'drain\n', OSError, 'idle\n', 'idle\n'])
         self.make_mocks(arvados_node=testutil.arvados_node_mock(job_uuid=True))
         self.make_actor()
-        self.check_success_flag(False, 2)
+        self.check_success_flag(False, 5)
 
     def test_issue_slurm_drain_retry(self, proc_mock):
-        proc_mock.side_effect = iter([OSError, '', OSError, 'drng\n'])
+        proc_mock.side_effect = iter([OSError, OSError, 'drng\n', 'drain\n'])
         self.check_success_after_reset(proc_mock, timer=False)
 
     def test_arvados_node_cleaned_after_shutdown(self, proc_mock):

commit 7a78b6728122c56343d68502192445ae476d24b0
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Fri Jul 28 09:51:18 2017 -0400

    11925: Make watchdog test more reliable.
    
    Add wait so that it doesn't try to shut down the threads before they have had a
    chance to start.  Also added services/nodemanage_suite test case which includes
    both unit and integration tests for node manager.
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curoverse.com>

diff --git a/build/run-tests.sh b/build/run-tests.sh
index 3952b36..2078081 100755
--- a/build/run-tests.sh
+++ b/build/run-tests.sh
@@ -81,7 +81,7 @@ services/keepstore
 services/keep-balance
 services/login-sync
 services/nodemanager
-services/nodemanager-integration
+services/nodemanager_integration
 services/crunch-run
 services/crunch-dispatch-local
 services/crunch-dispatch-slurm
@@ -545,6 +545,9 @@ do_test() {
         apps/workbench_units | apps/workbench_functionals | apps/workbench_integration)
             suite=apps/workbench
             ;;
+        services/nodemanager | services/nodemanager_integration)
+            suite=services/nodemanager_suite
+            ;;
         *)
             suite="${1}"
             ;;
@@ -860,11 +863,11 @@ test_login-sync() {
 }
 do_test services/login-sync login-sync
 
-test_nodemanager-integration() {
+test_nodemanager_integration() {
     cd "$WORKSPACE/services/nodemanager" \
-        && tests/integration_test.py ${testargs[services/nodemanager-integration]}
+        && tests/integration_test.py ${testargs[services/nodemanager_integration]}
 }
-do_test services/nodemanager-integration nodemanager-integration
+do_test services/nodemanager_integration nodemanager_integration
 
 for p in "${pythonstuff[@]}"
 do
diff --git a/services/nodemanager/tests/test_failure.py b/services/nodemanager/tests/test_failure.py
index cfac61b..8704ef9 100644
--- a/services/nodemanager/tests/test_failure.py
+++ b/services/nodemanager/tests/test_failure.py
@@ -29,7 +29,7 @@ class BogusActor(arvnodeman.baseactor.BaseNodeManagerActor):
     def ping(self):
         # Called by WatchdogActorTest, this delay is longer than the test timeout
         # of 1 second, which should cause the watchdog ping to fail.
-        time.sleep(4)
+        time.sleep(2)
         return True
 
 class ActorUnhandledExceptionTest(testutil.ActorTestMixin, unittest.TestCase):
@@ -53,6 +53,7 @@ class WatchdogActorTest(testutil.ActorTestMixin, unittest.TestCase):
     def test_time_timout(self, kill_mock):
         act = BogusActor.start(OSError(errno.ENOENT, ""))
         watch = arvnodeman.baseactor.WatchdogActor.start(1, act)
+        time.sleep(1)
         watch.stop(block=True)
         act.stop(block=True)
         self.assertTrue(kill_mock.called)

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list