[ARVADOS] updated: b02fc7b2227e923beb4d47daee7949c94b90da5f

Git user git at public.curoverse.com
Thu Aug 3 13:03:10 EDT 2017


Summary of changes:
 services/nodemanager/arvnodeman/baseactor.py       |  3 +-
 .../arvnodeman/computenode/dispatch/__init__.py    |  6 ++
 .../arvnodeman/computenode/dispatch/slurm.py       |  2 +-
 services/nodemanager/arvnodeman/timedcallback.py   |  8 +-
 services/nodemanager/tests/__init__.py             |  1 +
 .../nodemanager/tests/test_computenode_dispatch.py | 10 ++-
 .../tests/test_computenode_dispatch_slurm.py       | 11 +--
 services/nodemanager/tests/test_daemon.py          | 92 ++++++++++++----------
 services/nodemanager/tests/test_failure.py         |  7 +-
 services/nodemanager/tests/test_timedcallback.py   | 37 ++++-----
 services/nodemanager/tests/testutil.py             |  6 +-
 11 files changed, 105 insertions(+), 78 deletions(-)

       via  b02fc7b2227e923beb4d47daee7949c94b90da5f (commit)
      from  9bcdc4d79859742c44e09e606cbcdd64c7377e5f (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit b02fc7b2227e923beb4d47daee7949c94b90da5f
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Thu Aug 3 13:02:26 2017 -0400

    11925: Make unit tests more reliable.
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curoverse.com>

diff --git a/services/nodemanager/arvnodeman/baseactor.py b/services/nodemanager/arvnodeman/baseactor.py
index 988b83c..0142db2 100644
--- a/services/nodemanager/arvnodeman/baseactor.py
+++ b/services/nodemanager/arvnodeman/baseactor.py
@@ -101,12 +101,13 @@ class WatchdogActor(pykka.ThreadingActor):
          self.actors = [a.proxy() for a in args]
          self.actor_ref = TellableActorRef(self)
          self._later = self.actor_ref.tell_proxy()
+         self._killfunc = kwargs.get("killfunc", os.kill)
 
     def kill_self(self, e, act):
         lg = getattr(self, "_logger", logging)
         lg.critical("Watchdog exception", exc_info=e)
         lg.critical("Actor %s watchdog ping time out, killing Node Manager", act)
-        os.kill(os.getpid(), signal.SIGKILL)
+        self._killfunc(os.getpid(), signal.SIGKILL)
 
     def on_start(self):
         self._later.run()
diff --git a/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py b/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py
index fb9a6bf..c5dd1ad 100644
--- a/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py
+++ b/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py
@@ -240,6 +240,9 @@ class ComputeNodeShutdownActor(ComputeNodeStateChangeBase):
         return super(ComputeNodeShutdownActor, self)._finished()
 
     def cancel_shutdown(self, reason, **kwargs):
+        if self.cancel_reason is not None:
+            # already cancelled
+            return
         self.cancel_reason = reason
         self._logger.info("Shutdown cancelled: %s.", reason)
         self._finished(success_flag=False)
@@ -257,6 +260,9 @@ class ComputeNodeShutdownActor(ComputeNodeStateChangeBase):
 
     @_cancel_on_exception
     def shutdown_node(self):
+        if self.cancel_reason is not None:
+            # already cancelled
+            return
         if self.cancellable:
             self._logger.info("Checking that node is still eligible for shutdown")
             eligible, reason = self._monitor.shutdown_eligible().get()
diff --git a/services/nodemanager/arvnodeman/computenode/dispatch/slurm.py b/services/nodemanager/arvnodeman/computenode/dispatch/slurm.py
index fa56578..c8883c3 100644
--- a/services/nodemanager/arvnodeman/computenode/dispatch/slurm.py
+++ b/services/nodemanager/arvnodeman/computenode/dispatch/slurm.py
@@ -73,7 +73,7 @@ class ComputeNodeShutdownActor(SlurmMixin, ShutdownActorBase):
         if output in ("drng\n", "alloc\n", "drng*\n", "alloc*\n"):
             self._timer.schedule(time.time() + 10,
                                  self._later.await_slurm_drain)
-        elif output in ("idle\n"):
+        elif output in ("idle\n",):
             # Not in "drng" but idle, don't shut down
             self.cancel_shutdown("slurm state is %s" % output.strip(), try_resume=False)
         else:
diff --git a/services/nodemanager/arvnodeman/timedcallback.py b/services/nodemanager/arvnodeman/timedcallback.py
index 4d2a139..e7e3f25 100644
--- a/services/nodemanager/arvnodeman/timedcallback.py
+++ b/services/nodemanager/arvnodeman/timedcallback.py
@@ -19,11 +19,15 @@ class TimedCallBackActor(actor_class):
     message at a later time.  This actor runs the necessary event loop for
     delivery.
     """
-    def __init__(self, max_sleep=1):
+    def __init__(self, max_sleep=1, timefunc=None):
         super(TimedCallBackActor, self).__init__()
         self._proxy = self.actor_ref.tell_proxy()
         self.messages = []
         self.max_sleep = max_sleep
+        if timefunc is None:
+            self._timefunc = time.time
+        else:
+            self._timefunc = timefunc
 
     def schedule(self, delivery_time, receiver, *args, **kwargs):
         if not self.messages:
@@ -33,7 +37,7 @@ class TimedCallBackActor(actor_class):
     def deliver(self):
         if not self.messages:
             return
-        til_next = self.messages[0][0] - time.time()
+        til_next = self.messages[0][0] - self._timefunc()
         if til_next <= 0:
             t, receiver, args, kwargs = heapq.heappop(self.messages)
             try:
diff --git a/services/nodemanager/tests/__init__.py b/services/nodemanager/tests/__init__.py
index 20e02f9..c4ecc30 100644
--- a/services/nodemanager/tests/__init__.py
+++ b/services/nodemanager/tests/__init__.py
@@ -7,6 +7,7 @@ import logging
 import os
 
 # Set the ANMTEST_LOGLEVEL environment variable to enable logging at that level.
+#loglevel = os.environ.get('ANMTEST_LOGLEVEL', 'DEBUG')
 loglevel = os.environ.get('ANMTEST_LOGLEVEL', 'CRITICAL')
 logging.basicConfig(level=getattr(logging, loglevel.upper()))
 
diff --git a/services/nodemanager/tests/test_computenode_dispatch.py b/services/nodemanager/tests/test_computenode_dispatch.py
index 13ddf56..c44305d 100644
--- a/services/nodemanager/tests/test_computenode_dispatch.py
+++ b/services/nodemanager/tests/test_computenode_dispatch.py
@@ -100,6 +100,7 @@ class ComputeNodeSetupActorTestCase(testutil.ActorTestMixin, unittest.TestCase):
             ]
         self.make_actor()
         self.wait_for_assignment(self.setup_actor, 'cloud_node')
+        self.setup_actor.ping().get(self.TIMEOUT)
         self.assertEqual(1, self.cloud_client.post_create_node.call_count)
 
     def test_instance_exceeded_not_retried(self):
@@ -151,6 +152,7 @@ class ComputeNodeSetupActorTestCase(testutil.ActorTestMixin, unittest.TestCase):
         self.api_client.nodes().create().execute.side_effect = retry_resp
         self.api_client.nodes().update().execute.side_effect = retry_resp
         self.wait_for_assignment(self.setup_actor, 'cloud_node')
+        self.setup_actor.ping().get(self.TIMEOUT)
         self.assertEqual(self.setup_actor.actor_ref.actor_urn,
                          subscriber.call_args[0][0].actor_ref.actor_urn)
 
@@ -214,10 +216,12 @@ class ComputeNodeShutdownActorMixin(testutil.ActorTestMixin):
         self.make_mocks(shutdown_open=True, arvados_node=testutil.arvados_node_mock(crunch_worker_state="busy"))
         self.cloud_client.destroy_node.return_value = True
         self.make_actor(cancellable=False)
-        self.check_success_flag(True, 2)
+        self.check_success_flag(True, 4)
         self.assertTrue(self.cloud_client.destroy_node.called)
 
     def test_arvados_node_cleaned_after_shutdown(self, *mocks):
+        if len(mocks) == 1:
+            mocks[0].return_value = "drain\n"
         cloud_node = testutil.cloud_node_mock(62)
         arv_node = testutil.arvados_node_mock(62)
         self.make_mocks(cloud_node, arv_node)
@@ -235,6 +239,8 @@ class ComputeNodeShutdownActorMixin(testutil.ActorTestMixin):
         self.assertTrue(update_mock().execute.called)
 
     def test_arvados_node_not_cleaned_after_shutdown_cancelled(self, *mocks):
+        if len(mocks) == 1:
+            mocks[0].return_value = "idle\n"
         cloud_node = testutil.cloud_node_mock(61)
         arv_node = testutil.arvados_node_mock(61)
         self.make_mocks(cloud_node, arv_node, shutdown_open=False)
@@ -339,13 +345,11 @@ class ComputeNodeMonitorActorTestCase(testutil.ActorTestMixin,
     def test_in_state_when_no_state_available(self):
         self.make_actor(arv_node=testutil.arvados_node_mock(
                 crunch_worker_state=None))
-        print(self.node_actor.get_state().get())
         self.assertTrue(self.node_state('idle'))
 
     def test_in_state_when_no_state_available_old(self):
         self.make_actor(arv_node=testutil.arvados_node_mock(
                 crunch_worker_state=None, age=90000))
-        print(self.node_actor.get_state().get())
         self.assertTrue(self.node_state('down'))
 
     def test_in_idle_state(self):
diff --git a/services/nodemanager/tests/test_computenode_dispatch_slurm.py b/services/nodemanager/tests/test_computenode_dispatch_slurm.py
index 5c929f4..0b6162d 100644
--- a/services/nodemanager/tests/test_computenode_dispatch_slurm.py
+++ b/services/nodemanager/tests/test_computenode_dispatch_slurm.py
@@ -74,21 +74,18 @@ class SLURMComputeNodeShutdownActorTestCase(ComputeNodeShutdownActorMixin,
         self.check_success_flag(True)
         self.assertFalse(proc_mock.called)
 
-    def test_node_undrained_when_shutdown_cancelled(self, proc_mock):
+    def test_node_resumed_when_shutdown_cancelled(self, proc_mock):
         try:
             proc_mock.side_effect = iter(['', 'drng\n', 'drng\n', ''])
             self.make_mocks(arvados_node=testutil.arvados_node_mock(job_uuid=True))
             self.timer = testutil.MockTimer(False)
             self.make_actor()
-            self.shutdown_actor.ping().get(self.TIMEOUT)
             self.busywait(lambda: proc_mock.call_args is not None)
             self.shutdown_actor.cancel_shutdown("test")
             self.check_success_flag(False, 2)
-            self.assertEqual(proc_mock.call_args_list,
-                             [mock.call(['scontrol', 'update', 'NodeName=compute99', 'State=DRAIN', 'Reason=Node Manager shutdown']),
-                              mock.call(['sinfo', '--noheader', '-o', '%t', '-n', 'compute99']),
-                              mock.call(['sinfo', '--noheader', '-o', '%t', '-n', 'compute99']),
-                              mock.call(['scontrol', 'update', 'NodeName=compute99', 'State=RESUME'])])
+            self.assertEqual(proc_mock.call_args_list[0], mock.call(['scontrol', 'update', 'NodeName=compute99', 'State=DRAIN', 'Reason=Node Manager shutdown']))
+            self.assertEqual(proc_mock.call_args_list[-1], mock.call(['scontrol', 'update', 'NodeName=compute99', 'State=RESUME']))
+
         finally:
             self.shutdown_actor.actor_ref.stop()
 
diff --git a/services/nodemanager/tests/test_daemon.py b/services/nodemanager/tests/test_daemon.py
index f714c3c..730b185 100644
--- a/services/nodemanager/tests/test_daemon.py
+++ b/services/nodemanager/tests/test_daemon.py
@@ -21,6 +21,17 @@ import logging
 
 class NodeManagerDaemonActorTestCase(testutil.ActorTestMixin,
                                      unittest.TestCase):
+
+    def busywait(self, f, finalize=None):
+        n = 0
+        while not f() and n < 20:
+            time.sleep(.1)
+            self.daemon.ping().get(self.TIMEOUT)
+            n += 1
+        if finalize is not None:
+            finalize()
+        self.assertTrue(f())
+
     def mock_node_start(self, **kwargs):
         # Make sure that every time the daemon starts a setup actor,
         # it gets a new mock object back.
@@ -126,8 +137,8 @@ class NodeManagerDaemonActorTestCase(testutil.ActorTestMixin,
     def test_easy_node_creation(self):
         size = testutil.MockSize(1)
         self.make_daemon(want_sizes=[size])
-        self.stop_proxy(self.daemon)
-        self.assertTrue(self.node_setup.start.called)
+        self.busywait(lambda: self.node_setup.start.called,
+                      lambda: self.stop_proxy(self.daemon))
 
     def check_monitors_arvados_nodes(self, *arv_nodes):
         self.assertItemsEqual(arv_nodes, self.monitored_arvados_nodes())
@@ -136,7 +147,7 @@ class NodeManagerDaemonActorTestCase(testutil.ActorTestMixin,
         cloud_node = testutil.cloud_node_mock(1)
         arv_node = testutil.arvados_node_mock(1)
         self.make_daemon([cloud_node], [arv_node])
-        self.stop_proxy(self.daemon)
+        self.busywait(lambda: 1 == self.alive_monitor_count())
         self.check_monitors_arvados_nodes(arv_node)
 
     def test_node_pairing_after_arvados_update(self):
@@ -159,7 +170,8 @@ class NodeManagerDaemonActorTestCase(testutil.ActorTestMixin,
         self.daemon.update_cloud_nodes([]).get(self.TIMEOUT)
         self.assertEqual(0, self.alive_monitor_count())
         self.daemon.update_cloud_nodes([testutil.cloud_node_mock(3)])
-        self.stop_proxy(self.daemon)
+        self.busywait(lambda: 1 == self.alive_monitor_count(),
+                      lambda: self.stop_proxy(self.daemon))
         self.check_monitors_arvados_nodes(arv_node)
 
     def test_old_arvados_node_not_double_assigned(self):
@@ -179,8 +191,8 @@ class NodeManagerDaemonActorTestCase(testutil.ActorTestMixin,
     def test_node_count_satisfied(self):
         self.make_daemon(cloud_nodes=[testutil.cloud_node_mock(1)],
                          want_sizes=[testutil.MockSize(1)])
-        self.stop_proxy(self.daemon)
-        self.assertFalse(self.node_setup.start.called)
+        self.busywait(lambda: not self.node_setup.start.called,
+                      lambda: self.stop_proxy(self.daemon))
 
     def test_dont_count_missing_as_busy(self):
         size = testutil.MockSize(1)
@@ -191,8 +203,8 @@ class NodeManagerDaemonActorTestCase(testutil.ActorTestMixin,
                                             2,
                                             last_ping_at='1970-01-01T01:02:03.04050607Z')],
                          want_sizes=[size, size])
-        self.stop_proxy(self.daemon)
-        self.assertTrue(self.node_setup.start.called)
+        self.busywait(lambda: self.node_setup.start.called,
+                      lambda: self.stop_proxy(self.daemon))
 
     def test_missing_counts_towards_max(self):
         size = testutil.MockSize(1)
@@ -202,8 +214,8 @@ class NodeManagerDaemonActorTestCase(testutil.ActorTestMixin,
                                         testutil.arvados_node_mock(2, last_ping_at='1970-01-01T01:02:03.04050607Z')],
                          want_sizes=[size, size],
                          max_nodes=2)
-        self.stop_proxy(self.daemon)
-        self.assertFalse(self.node_setup.start.called)
+        self.busywait(lambda: not self.node_setup.start.called,
+                      lambda: self.stop_proxy(self.daemon))
 
     def test_excess_counts_missing(self):
         size = testutil.MockSize(1)
@@ -236,7 +248,7 @@ class NodeManagerDaemonActorTestCase(testutil.ActorTestMixin,
         self.assertEqual(2, self.alive_monitor_count())
         for mon_ref in self.monitor_list():
             self.daemon.node_can_shutdown(mon_ref.proxy()).get(self.TIMEOUT)
-        self.assertEqual(1, self.node_shutdown.start.call_count)
+        self.busywait(lambda: 1 == self.node_shutdown.start.call_count)
 
     def test_booting_nodes_counted(self):
         cloud_node = testutil.cloud_node_mock(1)
@@ -246,17 +258,16 @@ class NodeManagerDaemonActorTestCase(testutil.ActorTestMixin,
         self.daemon.max_nodes.get(self.TIMEOUT)
         self.assertTrue(self.node_setup.start.called)
         self.daemon.update_server_wishlist(server_wishlist).get(self.TIMEOUT)
-        self.stop_proxy(self.daemon)
-        self.assertEqual(1, self.node_setup.start.call_count)
+        self.busywait(lambda: 1 == self.node_setup.start.call_count,
+                      lambda: self.stop_proxy(self.daemon))
 
     def test_boot_new_node_when_all_nodes_busy(self):
         size = testutil.MockSize(2)
         arv_node = testutil.arvados_node_mock(2, job_uuid=True)
         self.make_daemon([testutil.cloud_node_mock(2, size=size)], [arv_node],
                          [size], avail_sizes=[(size, {"cores":1})])
-        self.busywait(lambda: self.node_setup.start.called)
-        self.stop_proxy(self.daemon)
-        self.assertTrue(self.node_setup.start.called)
+        self.busywait(lambda: self.node_setup.start.called,
+                      lambda: self.stop_proxy(self.daemon))
 
     def test_boot_new_node_below_min_nodes(self):
         min_size = testutil.MockSize(1)
@@ -401,8 +412,8 @@ class NodeManagerDaemonActorTestCase(testutil.ActorTestMixin,
         self.assertFalse(self.node_shutdown.start.called)
         now = time.time()
         self.monitor_list()[0].tell_proxy().consider_shutdown()
-        self.busywait(lambda: self.node_shutdown.start.called)
-        self.stop_proxy(self.daemon)
+        self.busywait(lambda: self.node_shutdown.start.called,
+                      lambda: self.stop_proxy(self.daemon))
         self.assertShutdownCancellable(False)
 
     def test_booted_node_shut_down_when_never_paired(self):
@@ -413,8 +424,8 @@ class NodeManagerDaemonActorTestCase(testutil.ActorTestMixin,
         self.assertEqual(1, self.alive_monitor_count())
         self.daemon.update_cloud_nodes([cloud_node])
         self.monitor_list()[0].tell_proxy().consider_shutdown()
-        self.busywait(lambda: self.node_shutdown.start.called)
-        self.stop_proxy(self.daemon)
+        self.busywait(lambda: self.node_shutdown.start.called,
+                      lambda: self.stop_proxy(self.daemon))
         self.assertShutdownCancellable(False)
 
     def test_booted_node_shut_down_when_never_working(self):
@@ -426,8 +437,8 @@ class NodeManagerDaemonActorTestCase(testutil.ActorTestMixin,
         self.assertEqual(1, self.alive_monitor_count())
         self.monitor_list()[0].proxy().cloud_node_start_time = time.time()-3601
         self.daemon.update_cloud_nodes([cloud_node])
-        self.busywait(lambda: self.node_shutdown.start.called)
-        self.stop_proxy(self.daemon)
+        self.busywait(lambda: self.node_shutdown.start.called,
+                      lambda: self.stop_proxy(self.daemon))
         self.assertShutdownCancellable(False)
 
     def test_node_that_pairs_not_considered_failed_boot(self):
@@ -457,8 +468,8 @@ class NodeManagerDaemonActorTestCase(testutil.ActorTestMixin,
     def test_booting_nodes_shut_down(self):
         self.make_daemon(want_sizes=[testutil.MockSize(1)])
         self.daemon.update_server_wishlist([]).get(self.TIMEOUT)
-        self.stop_proxy(self.daemon)
-        self.assertTrue(self.last_setup.stop_if_no_cloud_node.called)
+        self.busywait(lambda: self.last_setup.stop_if_no_cloud_node.called,
+                      lambda: self.stop_proxy(self.daemon))
 
     def test_all_booting_nodes_tried_to_shut_down(self):
         size = testutil.MockSize(2)
@@ -504,8 +515,8 @@ class NodeManagerDaemonActorTestCase(testutil.ActorTestMixin,
         self.assertEqual(1, self.alive_monitor_count())
         monitor = self.monitor_list()[0].proxy()
         self.daemon.node_can_shutdown(monitor).get(self.TIMEOUT)
-        self.stop_proxy(self.daemon)
-        self.assertTrue(self.node_shutdown.start.called)
+        self.busywait(lambda: self.node_shutdown.start.called,
+                      lambda: self.stop_proxy(self.daemon))
 
     def test_shutdown_declined_when_idle_and_job_queued(self):
         size = testutil.MockSize(1)
@@ -538,7 +549,7 @@ class NodeManagerDaemonActorTestCase(testutil.ActorTestMixin,
         self.last_shutdown.success.get.return_value = True
         self.last_shutdown.stop.side_effect = lambda: monitor.stop()
         self.daemon.node_finished_shutdown(self.last_shutdown).get(self.TIMEOUT)
-        self.assertEqual(0, self.alive_monitor_count())
+        self.busywait(lambda: 0 == self.alive_monitor_count())
 
     def test_nodes_shutting_down_replaced_below_max_nodes(self):
         size = testutil.MockSize(6)
@@ -551,8 +562,8 @@ class NodeManagerDaemonActorTestCase(testutil.ActorTestMixin,
         self.assertTrue(self.node_shutdown.start.called)
         self.daemon.update_server_wishlist(
             [testutil.MockSize(6)]).get(self.TIMEOUT)
-        self.stop_proxy(self.daemon)
-        self.assertTrue(self.node_setup.start.called)
+        self.busywait(lambda: self.node_setup.start.called,
+                      lambda: self.stop_proxy(self.daemon))
 
     def test_nodes_shutting_down_not_replaced_at_max_nodes(self):
         cloud_node = testutil.cloud_node_mock(7)
@@ -564,8 +575,8 @@ class NodeManagerDaemonActorTestCase(testutil.ActorTestMixin,
         self.assertTrue(self.node_shutdown.start.called)
         self.daemon.update_server_wishlist(
             [testutil.MockSize(7)]).get(self.TIMEOUT)
-        self.stop_proxy(self.daemon)
-        self.assertFalse(self.node_setup.start.called)
+        self.busywait(lambda: not self.node_setup.start.called,
+                      lambda: self.stop_proxy(self.daemon))
 
     def test_nodes_shutting_down_count_against_excess(self):
         size = testutil.MockSize(8)
@@ -573,7 +584,7 @@ class NodeManagerDaemonActorTestCase(testutil.ActorTestMixin,
         arv_nodes = [testutil.arvados_node_mock(n, size=size) for n in [8, 9]]
         self.make_daemon(cloud_nodes, arv_nodes, [size],
                          avail_sizes=[(size, {"cores":1})])
-        self.assertEqual(2, self.alive_monitor_count())
+        self.busywait(lambda: 2 == self.alive_monitor_count())
         for mon_ref in self.monitor_list():
             self.daemon.node_can_shutdown(mon_ref.proxy()).get(self.TIMEOUT)
         self.assertEqual(1, self.node_shutdown.start.call_count)
@@ -598,8 +609,8 @@ class NodeManagerDaemonActorTestCase(testutil.ActorTestMixin,
         size = testutil.MockSize(2)
         self.daemon.update_server_wishlist([size] * 2).get(self.TIMEOUT)
         self.timer.deliver()
-        self.stop_proxy(self.daemon)
-        self.assertEqual(1, self.node_setup.start.call_count)
+        self.busywait(lambda: 1 == self.node_setup.start.call_count,
+                      lambda: self.stop_proxy(self.daemon))
 
     def test_shutdown_actor_stopped_when_cloud_node_delisted(self):
         self.make_daemon(cloud_nodes=[testutil.cloud_node_mock()])
@@ -607,9 +618,8 @@ class NodeManagerDaemonActorTestCase(testutil.ActorTestMixin,
         monitor = self.monitor_list()[0].proxy()
         self.daemon.node_can_shutdown(monitor).get(self.TIMEOUT)
         self.daemon.update_cloud_nodes([]).get(self.TIMEOUT)
-        self.stop_proxy(self.daemon)
-        self.assertEqual(
-            1, self.last_shutdown.stop.call_count)
+        self.busywait(lambda: 1 == self.last_shutdown.stop.call_count,
+                      lambda: self.stop_proxy(self.daemon))
 
     def test_shutdown_actor_cleanup_copes_with_dead_actors(self):
         self.make_daemon(cloud_nodes=[testutil.cloud_node_mock()])
@@ -620,8 +630,8 @@ class NodeManagerDaemonActorTestCase(testutil.ActorTestMixin,
         # the ActorDeadError.
         self.last_shutdown.stop.side_effect = pykka.ActorDeadError
         self.daemon.update_cloud_nodes([]).get(self.TIMEOUT)
-        self.stop_proxy(self.daemon)
-        self.assertEqual(1, self.last_shutdown.stop.call_count)
+        self.busywait(lambda: 1 == self.last_shutdown.stop.call_count,
+                      lambda: self.stop_proxy(self.daemon))
 
     def test_node_create_two_sizes(self):
         small = testutil.MockSize(1)
@@ -688,8 +698,8 @@ class NodeManagerDaemonActorTestCase(testutil.ActorTestMixin,
 
         self.stop_proxy(self.daemon)
 
-        self.assertEqual(1, self.node_setup.start.call_count)
-        self.assertEqual(1, self.node_shutdown.start.call_count)
+        self.busywait(lambda: 1 == self.node_setup.start.call_count)
+        self.busywait(lambda: 1 == self.node_shutdown.start.call_count)
 
         # booting a new big node
         sizecounts = {a[0].id: 0 for a in avail_sizes}
diff --git a/services/nodemanager/tests/test_failure.py b/services/nodemanager/tests/test_failure.py
index 8704ef9..03cc5df 100644
--- a/services/nodemanager/tests/test_failure.py
+++ b/services/nodemanager/tests/test_failure.py
@@ -49,10 +49,11 @@ class ActorUnhandledExceptionTest(testutil.ActorTestMixin, unittest.TestCase):
         self.assertFalse(kill_mock.called)
 
 class WatchdogActorTest(testutil.ActorTestMixin, unittest.TestCase):
-    @mock.patch('os.kill')
-    def test_time_timout(self, kill_mock):
+
+    def test_time_timout(self):
+        kill_mock = mock.Mock('os.kill')
         act = BogusActor.start(OSError(errno.ENOENT, ""))
-        watch = arvnodeman.baseactor.WatchdogActor.start(1, act)
+        watch = arvnodeman.baseactor.WatchdogActor.start(1, act, killfunc=kill_mock)
         time.sleep(1)
         watch.stop(block=True)
         act.stop(block=True)
diff --git a/services/nodemanager/tests/test_timedcallback.py b/services/nodemanager/tests/test_timedcallback.py
index cee7fe1..21a9b5a 100644
--- a/services/nodemanager/tests/test_timedcallback.py
+++ b/services/nodemanager/tests/test_timedcallback.py
@@ -26,27 +26,29 @@ class TimedCallBackActorTestCase(testutil.ActorTestMixin, unittest.TestCase):
 
     def test_delayed_turnaround(self):
         receiver = mock.Mock()
-        with mock.patch('time.time', return_value=0) as mock_now:
-            deliverer = timedcallback.TimedCallBackActor.start().proxy()
-            deliverer.schedule(1, receiver, 'delayed')
-            deliverer.schedule(3, receiver, 'failure').get(self.TIMEOUT)
-            self.assertFalse(receiver.called)
-            mock_now.return_value = 2
-            deliverer.schedule(3, receiver, 'failure').get(self.TIMEOUT)
-            self.stop_proxy(deliverer)
+        mock_now = mock.Mock()
+        mock_now.return_value = 0
+        deliverer = timedcallback.TimedCallBackActor.start(timefunc=mock_now).proxy()
+        deliverer.schedule(1, receiver, 'delayed')
+        deliverer.schedule(3, receiver, 'failure').get(self.TIMEOUT)
+        self.assertFalse(receiver.called)
+        mock_now.return_value = 2
+        deliverer.schedule(3, receiver, 'failure').get(self.TIMEOUT)
+        self.stop_proxy(deliverer)
         receiver.assert_called_with('delayed')
 
     def test_out_of_order_scheduling(self):
         receiver = mock.Mock()
-        with mock.patch('time.time', return_value=1.5) as mock_now:
-            deliverer = timedcallback.TimedCallBackActor.start().proxy()
-            deliverer.schedule(2, receiver, 'second')
-            deliverer.schedule(1, receiver, 'first')
-            deliverer.schedule(3, receiver, 'failure').get(self.TIMEOUT)
-            receiver.assert_called_with('first')
-            mock_now.return_value = 2.5
-            deliverer.schedule(3, receiver, 'failure').get(self.TIMEOUT)
-            self.stop_proxy(deliverer)
+        mock_now = mock.Mock()
+        mock_now.return_value = 1.5
+        deliverer = timedcallback.TimedCallBackActor.start(timefunc=mock_now).proxy()
+        deliverer.schedule(2, receiver, 'second')
+        deliverer.schedule(1, receiver, 'first')
+        deliverer.schedule(3, receiver, 'failure').get(self.TIMEOUT)
+        receiver.assert_called_with('first')
+        mock_now.return_value = 2.5
+        deliverer.schedule(3, receiver, 'failure').get(self.TIMEOUT)
+        self.stop_proxy(deliverer)
         receiver.assert_called_with('second')
 
     def test_dead_actors_ignored(self):
@@ -61,4 +63,3 @@ class TimedCallBackActorTestCase(testutil.ActorTestMixin, unittest.TestCase):
 
 if __name__ == '__main__':
     unittest.main()
-
diff --git a/services/nodemanager/tests/testutil.py b/services/nodemanager/tests/testutil.py
index 0a48370..df8c7c7 100644
--- a/services/nodemanager/tests/testutil.py
+++ b/services/nodemanager/tests/testutil.py
@@ -136,11 +136,13 @@ class ActorTestMixin(object):
             if result is not unassigned:
                 return result
 
-    def busywait(self, f):
+    def busywait(self, f, finalize=None):
         n = 0
-        while not f() and n < 10:
+        while not f() and n < 20:
             time.sleep(.1)
             n += 1
+        if finalize is not None:
+            finalize()
         self.assertTrue(f())
 
 

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list