[ARVADOS] created: 6a7d7a2fa8e217e1ff9440769f39a2095d5bb837

Git user git at public.curoverse.com
Thu Feb 2 17:37:03 EST 2017


        at  6a7d7a2fa8e217e1ff9440769f39a2095d5bb837 (commit)


commit 6a7d7a2fa8e217e1ff9440769f39a2095d5bb837
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Thu Feb 2 09:41:30 2017 -0500

    10846: Revise shutdown behavior.
    
    * Remove node blacklisting.  Because of arvados node record reuse and assigning
      compute node ids based on record uuid, it was possible to boot a new node
      with the same id as a previously blacklisted node.  Previously blacklisted
      'broken' nodes are now considered 'down' when determining if it is necessary
      to bring up new nodes.
    
    * Failure to destroy a node is no longer retried by the shutdown actor.  A
      failure cancels the shutdown.  The daemon is expected to schedule a new
      shutdown attempt.
    
    * Restored the concept of "cancellable" shutdown, which checks if the node is
      still shutdown eligible before actually making the call to shut it down.
    
    * Adjusted mocking behavior to fix tests which were producing suppressed
      errors (visible in debug logging but not failing the test) when node sizes
      were inconsistent between the wishlist, cloud_node objects, and
      ServerCalculator.

diff --git a/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py b/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py
index 25b0779..05e3c9e 100644
--- a/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py
+++ b/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py
@@ -171,7 +171,7 @@ class ComputeNodeShutdownActor(ComputeNodeStateChangeBase):
     """
     # Reasons for a shutdown to be cancelled.
     WINDOW_CLOSED = "shutdown window closed"
-    NODE_BROKEN = "cloud failed to shut down broken node"
+    DESTROY_FAILED = "destroy_node failed"
 
     def __init__(self, timer_actor, cloud_client, arvados_client, node_monitor,
                  cancellable=True, retry_wait=1, max_retry_wait=180):
@@ -215,27 +215,31 @@ class ComputeNodeShutdownActor(ComputeNodeStateChangeBase):
             try:
                 return orig_func(self, *args, **kwargs)
             except Exception as error:
-                self._logger.error("Actor error %s", error)
+                self._logger.error("Actor error %s", error, exc_info=True)
+                self._logger.debug("", exc_info=True)
                 self._later.cancel_shutdown("Unhandled exception %s" % error)
         return finish_wrapper
 
     @_cancel_on_exception
-    @RetryMixin._retry()
     def shutdown_node(self):
+        if self.cancellable:
+            self._logger.info("Checking that node is still eligible for shutdown")
+            # Check that we still want to shut down the node.
+            eligible, reason = self._monitor.shutdown_eligible().get()
+            if not eligible:
+                self.cancel_shutdown("No longer eligible for shut down because %s" % reason)
+                return
+
         self._logger.info("Starting shutdown")
         arv_node = self._arvados_node()
-        if not self._cloud.destroy_node(self.cloud_node):
-            if self._cloud.broken(self.cloud_node):
-                self._later.cancel_shutdown(self.NODE_BROKEN)
-                return
+        if self._cloud.destroy_node(self.cloud_node):
+            self._logger.info("Shutdown success")
+            if arv_node:
+                self._later.clean_arvados_node(arv_node)
             else:
-                # Force a retry.
-                raise cloud_types.LibcloudError("destroy_node failed")
-        self._logger.info("Shutdown success")
-        if arv_node is None:
-            self._finished(success_flag=True)
+                self._finished(success_flag=True)
         else:
-            self._later.clean_arvados_node(arv_node)
+            self.cancel_shutdown(self.DESTROY_FAILED)
 
     @ComputeNodeStateChangeBase._finish_on_exception
     @RetryMixin._retry(config.ARVADOS_ERRORS)
@@ -342,6 +346,10 @@ class ComputeNodeMonitorActor(config.actor_class):
         if self.arvados_node is None:
             return 'unpaired'
 
+        # This node is indicated as non-functioning by the cloud
+        if self._cloud.broken(self.cloud_node):
+            return 'down'
+
         state = self.arvados_node['crunch_worker_state']
 
         # If state information is not available because it is missing or the
diff --git a/services/nodemanager/arvnodeman/daemon.py b/services/nodemanager/arvnodeman/daemon.py
index a809148..63c9d19 100644
--- a/services/nodemanager/arvnodeman/daemon.py
+++ b/services/nodemanager/arvnodeman/daemon.py
@@ -25,7 +25,6 @@ class _BaseNodeTracker(object):
     def __init__(self):
         self.nodes = {}
         self.orphans = {}
-        self._blacklist = set()
 
     # Proxy the methods listed below to self.nodes.
     def _proxy_method(name):
@@ -44,9 +43,6 @@ class _BaseNodeTracker(object):
     def add(self, record):
         self.nodes[self.record_key(record)] = record
 
-    def blacklist(self, key):
-        self._blacklist.add(key)
-
     def update_record(self, key, item):
         setattr(self.nodes[key], self.RECORD_ATTR, item)
 
@@ -54,9 +50,7 @@ class _BaseNodeTracker(object):
         unseen = set(self.nodes.iterkeys())
         for item in response:
             key = self.item_key(item)
-            if key in self._blacklist:
-                continue
-            elif key in unseen:
+            if key in unseen:
                 unseen.remove(key)
                 self.update_record(key, item)
             else:
@@ -214,7 +208,14 @@ class NodeManagerDaemonActor(actor_class):
             if hasattr(record.cloud_node, "_nodemanager_recently_booted"):
                 self.cloud_nodes.add(record)
             else:
-                record.actor.stop()
+                # Node disappeared from the cloud node list.  Stop the monitor
+                # actor if necessary and forget about the node.
+                if record.actor:
+                    try:
+                        record.actor.stop()
+                    except pykka.ActorDeadError:
+                        pass
+                    record.actor = None
                 record.cloud_node = None
 
     def _register_arvados_node(self, key, arv_node):
@@ -437,24 +438,29 @@ class NodeManagerDaemonActor(actor_class):
 
     def node_finished_shutdown(self, shutdown_actor):
         try:
-            cloud_node, success, cancel_reason = self._get_actor_attrs(
-                shutdown_actor, 'cloud_node', 'success', 'cancel_reason')
+            cloud_node, success = self._get_actor_attrs(
+                shutdown_actor, 'cloud_node', 'success')
         except pykka.ActorDeadError:
             return
         cloud_node_id = cloud_node.id
         record = self.cloud_nodes[cloud_node_id]
         shutdown_actor.stop()
+        record.shutdown_actor = None
+
         if not success:
-            if cancel_reason == self._node_shutdown.NODE_BROKEN:
-                self.cloud_nodes.blacklist(cloud_node_id)
-            record.shutdown_actor = None
-        else:
-            # If the node went from being booted to being shut down without ever
-            # appearing in the cloud node list, it will have the
-            # _nodemanager_recently_booted tag, so get rid of it so that the node
-            # can be forgotten completely.
-            if hasattr(self.cloud_nodes[cloud_node_id].cloud_node, "_nodemanager_recently_booted"):
-                del self.cloud_nodes[cloud_node_id].cloud_node._nodemanager_recently_booted
+            return
+
+        # Shutdown was successful, so stop the monitor actor, otherwise it
+        # will keep offering the node as a candidate for shutdown.
+        record.actor.stop()
+        record.actor = None
+
+        # If the node went from being booted to being shut down without ever
+        # appearing in the cloud node list, it will have the
+        # _nodemanager_recently_booted tag, so get rid of it so that the node
+        # can be forgotten completely.
+        if hasattr(record.cloud_node, "_nodemanager_recently_booted"):
+            del record.cloud_node._nodemanager_recently_booted
 
     def shutdown(self):
         self._logger.info("Shutting down after signal.")
diff --git a/services/nodemanager/tests/test_computenode_dispatch.py b/services/nodemanager/tests/test_computenode_dispatch.py
index c3774c1..ec15734 100644
--- a/services/nodemanager/tests/test_computenode_dispatch.py
+++ b/services/nodemanager/tests/test_computenode_dispatch.py
@@ -201,14 +201,19 @@ class ComputeNodeShutdownActorMixin(testutil.ActorTestMixin):
         else:
             self.fail("success flag {} is not {}".format(last_flag, expected))
 
+    def test_cancellable_shutdown(self, *mocks):
+        self.make_mocks(shutdown_open=True, arvados_node=testutil.arvados_node_mock(crunch_worker_state="busy"))
+        self.cloud_client.destroy_node.return_value = True
+        self.make_actor(cancellable=True)
+        self.check_success_flag(False)
+        self.assertFalse(self.cloud_client.destroy_node.called)
+
     def test_uncancellable_shutdown(self, *mocks):
-        self.make_mocks(shutdown_open=False)
-        self.cloud_client.destroy_node.return_value = False
-        self.make_actor(cancellable=False)
-        self.check_success_flag(None, 0)
-        self.shutdowns._set_state(True, 600)
+        self.make_mocks(shutdown_open=True, arvados_node=testutil.arvados_node_mock(crunch_worker_state="busy"))
         self.cloud_client.destroy_node.return_value = True
-        self.check_success_flag(True)
+        self.make_actor(cancellable=False)
+        self.check_success_flag(True, 2)
+        self.assertTrue(self.cloud_client.destroy_node.called)
 
     def test_arvados_node_cleaned_after_shutdown(self, *mocks):
         cloud_node = testutil.cloud_node_mock(62)
@@ -247,21 +252,13 @@ class ComputeNodeShutdownActorTestCase(ComputeNodeShutdownActorMixin,
         self.check_success_flag(True)
         self.assertTrue(self.cloud_client.destroy_node.called)
 
-    def test_shutdown_retries_when_cloud_fails(self):
-        self.make_mocks()
-        self.cloud_client.destroy_node.return_value = False
-        self.make_actor(start_time=0)
-        self.assertIsNone(self.shutdown_actor.success.get(self.TIMEOUT))
-        self.cloud_client.destroy_node.return_value = True
-        self.check_success_flag(True)
-
-    def test_shutdown_cancelled_when_cloud_fails_on_broken_node(self):
+    def test_shutdown_cancelled_when_destroy_node_fails(self):
         self.make_mocks(node_broken=True)
         self.cloud_client.destroy_node.return_value = False
         self.make_actor(start_time=0)
         self.check_success_flag(False, 2)
         self.assertEqual(1, self.cloud_client.destroy_node.call_count)
-        self.assertEqual(self.ACTOR_CLASS.NODE_BROKEN,
+        self.assertEqual(self.ACTOR_CLASS.DESTROY_FAILED,
                          self.shutdown_actor.cancel_reason.get(self.TIMEOUT))
 
     def test_late_subscribe(self):
diff --git a/services/nodemanager/tests/test_computenode_dispatch_slurm.py b/services/nodemanager/tests/test_computenode_dispatch_slurm.py
index 85a40ce..536b0ca 100644
--- a/services/nodemanager/tests/test_computenode_dispatch_slurm.py
+++ b/services/nodemanager/tests/test_computenode_dispatch_slurm.py
@@ -91,3 +91,13 @@ class SLURMComputeNodeShutdownActorTestCase(ComputeNodeShutdownActorMixin,
         proc_mock.return_value = 'drain\n'
         super(SLURMComputeNodeShutdownActorTestCase,
               self).test_arvados_node_cleaned_after_shutdown()
+
+    def test_cancellable_shutdown(self, proc_mock):
+        proc_mock.return_value = 'other\n'
+        super(SLURMComputeNodeShutdownActorTestCase,
+              self).test_cancellable_shutdown()
+
+    def test_uncancellable_shutdown(self, proc_mock):
+        proc_mock.return_value = 'other\n'
+        super(SLURMComputeNodeShutdownActorTestCase,
+              self).test_uncancellable_shutdown()
diff --git a/services/nodemanager/tests/test_computenode_driver_azure.py b/services/nodemanager/tests/test_computenode_driver_azure.py
index 59fc503..702688d 100644
--- a/services/nodemanager/tests/test_computenode_driver_azure.py
+++ b/services/nodemanager/tests/test_computenode_driver_azure.py
@@ -130,7 +130,7 @@ echo z1.test > /var/tmp/arv-node-data/meta-data/instance-type
     def test_node_found_after_timeout_has_fixed_size(self):
         size = testutil.MockSize(4)
         node_props = {'hardwareProfile': {'vmSize': size.id}}
-        cloud_node = testutil.cloud_node_mock(
-            size=None, tags={'arvados-class': 'test'}, properties=node_props)
+        cloud_node = testutil.cloud_node_mock(tags={'arvados-class': 'test'}, properties=node_props)
+        cloud_node.size = None
         self.check_node_found_after_timeout_has_fixed_size(
             size, cloud_node, {'tag_arvados-class': 'test'})
diff --git a/services/nodemanager/tests/test_daemon.py b/services/nodemanager/tests/test_daemon.py
index fe7b0fe..f1d168c 100644
--- a/services/nodemanager/tests/test_daemon.py
+++ b/services/nodemanager/tests/test_daemon.py
@@ -48,12 +48,19 @@ class NodeManagerDaemonActorTestCase(testutil.ActorTestMixin,
         return mock_actor
 
     def make_daemon(self, cloud_nodes=[], arvados_nodes=[], want_sizes=[],
-                    avail_sizes=[(testutil.MockSize(1), {"cores": 1})],
+                    avail_sizes=None,
                     min_nodes=0, max_nodes=8,
                     shutdown_windows=[54, 5, 1],
                     max_total_price=None):
         for name in ['cloud_nodes', 'arvados_nodes', 'server_wishlist']:
             setattr(self, name + '_poller', mock.MagicMock(name=name + '_mock'))
+
+        if not avail_sizes:
+            if cloud_nodes or want_sizes:
+                avail_sizes=[(c.size, {"cores": int(c.id)}) for c in cloud_nodes] + [(s, {"cores": 1}) for s in want_sizes]
+            else:
+                avail_sizes=[(testutil.MockSize(1), {"cores": 1})]
+
         self.arv_factory = mock.MagicMock(name='arvados_mock')
         api_client = mock.MagicMock(name='api_client')
         api_client.nodes().create().execute.side_effect = [testutil.arvados_node_mock(1),
@@ -65,6 +72,7 @@ class NodeManagerDaemonActorTestCase(testutil.ActorTestMixin,
         self.cloud_updates = mock.MagicMock(name='updates_mock')
         self.timer = testutil.MockTimer(deliver_immediately=False)
         self.cloud_factory().node_id.side_effect = lambda node: node.id
+        self.cloud_factory().broken.return_value = False
 
         self.node_setup = mock.MagicMock(name='setup_mock')
         self.node_setup.start.side_effect = self.mock_node_start
@@ -138,7 +146,8 @@ class NodeManagerDaemonActorTestCase(testutil.ActorTestMixin,
     def test_arvados_node_un_and_re_paired(self):
         # We need to create the Arvados node mock after spinning up the daemon
         # to make sure it's new enough to pair with the cloud node.
-        self.make_daemon([testutil.cloud_node_mock(3)], arvados_nodes=None)
+        self.make_daemon(cloud_nodes=[testutil.cloud_node_mock(3)],
+                         arvados_nodes=None)
         arv_node = testutil.arvados_node_mock(3)
         self.daemon.update_arvados_nodes([arv_node]).get(self.TIMEOUT)
         self.check_monitors_arvados_nodes(arv_node)
@@ -151,7 +160,8 @@ class NodeManagerDaemonActorTestCase(testutil.ActorTestMixin,
     def test_old_arvados_node_not_double_assigned(self):
         arv_node = testutil.arvados_node_mock(3, age=9000)
         size = testutil.MockSize(3)
-        self.make_daemon(arvados_nodes=[arv_node], avail_sizes=[(size, {"cores":1})])
+        self.make_daemon(arvados_nodes=[arv_node],
+                         avail_sizes=[(size, {"cores":1})])
         self.daemon.update_server_wishlist([size]).get(self.TIMEOUT)
         self.daemon.update_server_wishlist([size, size]).get(self.TIMEOUT)
         self.stop_proxy(self.daemon)
@@ -162,26 +172,27 @@ class NodeManagerDaemonActorTestCase(testutil.ActorTestMixin,
         self.assertIn(None, used_nodes)
 
     def test_node_count_satisfied(self):
-        self.make_daemon([testutil.cloud_node_mock()],
+        self.make_daemon(cloud_nodes=[testutil.cloud_node_mock(1)],
                          want_sizes=[testutil.MockSize(1)])
         self.stop_proxy(self.daemon)
         self.assertFalse(self.node_setup.start.called)
 
     def test_dont_count_missing_as_busy(self):
         size = testutil.MockSize(1)
-        self.make_daemon(cloud_nodes=[testutil.cloud_node_mock(1),
-                                      testutil.cloud_node_mock(2)],
+        self.make_daemon(cloud_nodes=[testutil.cloud_node_mock(1, size=size),
+                                      testutil.cloud_node_mock(2, size=size)],
                          arvados_nodes=[testutil.arvados_node_mock(1),
-                                      testutil.arvados_node_mock(2,
-                                                                 last_ping_at='1970-01-01T01:02:03.04050607Z')],
+                                        testutil.arvados_node_mock(
+                                            2,
+                                            last_ping_at='1970-01-01T01:02:03.04050607Z')],
                          want_sizes=[size, size])
         self.stop_proxy(self.daemon)
         self.assertTrue(self.node_setup.start.called)
 
     def test_missing_counts_towards_max(self):
         size = testutil.MockSize(1)
-        self.make_daemon(cloud_nodes=[testutil.cloud_node_mock(1),
-                                      testutil.cloud_node_mock(2)],
+        self.make_daemon(cloud_nodes=[testutil.cloud_node_mock(1, size=size),
+                                      testutil.cloud_node_mock(2, size=size)],
                          arvados_nodes=[testutil.arvados_node_mock(1),
                                         testutil.arvados_node_mock(2, last_ping_at='1970-01-01T01:02:03.04050607Z')],
                          want_sizes=[size, size],
@@ -191,7 +202,7 @@ class NodeManagerDaemonActorTestCase(testutil.ActorTestMixin,
 
     def test_excess_counts_missing(self):
         size = testutil.MockSize(1)
-        cloud_nodes = [testutil.cloud_node_mock(1), testutil.cloud_node_mock(2)]
+        cloud_nodes = [testutil.cloud_node_mock(1, size=size), testutil.cloud_node_mock(2, size=size)]
         self.make_daemon(cloud_nodes=cloud_nodes,
                          arvados_nodes=[testutil.arvados_node_mock(1),
                                         testutil.arvados_node_mock(2, last_ping_at='1970-01-01T01:02:03.04050607Z')],
@@ -203,7 +214,7 @@ class NodeManagerDaemonActorTestCase(testutil.ActorTestMixin,
 
     def test_missing_shutdown_not_excess(self):
         size = testutil.MockSize(1)
-        cloud_nodes = [testutil.cloud_node_mock(1), testutil.cloud_node_mock(2)]
+        cloud_nodes = [testutil.cloud_node_mock(1, size=size), testutil.cloud_node_mock(2, size=size)]
         self.make_daemon(cloud_nodes=cloud_nodes,
                          arvados_nodes=[testutil.arvados_node_mock(1),
                                         testutil.arvados_node_mock(2, last_ping_at='1970-01-01T01:02:03.04050607Z')],
@@ -257,7 +268,8 @@ class NodeManagerDaemonActorTestCase(testutil.ActorTestMixin,
                           for call in self.node_setup.start.call_args_list])
 
     def test_no_new_node_when_ge_min_nodes_busy(self):
-        cloud_nodes = [testutil.cloud_node_mock(n) for n in range(1, 4)]
+        size = testutil.MockSize(2)
+        cloud_nodes = [testutil.cloud_node_mock(n, size=size) for n in range(1, 4)]
         arv_nodes = [testutil.arvados_node_mock(n, job_uuid=True)
                      for n in range(1, 4)]
         self.make_daemon(cloud_nodes, arv_nodes, [], min_nodes=2)
@@ -265,9 +277,10 @@ class NodeManagerDaemonActorTestCase(testutil.ActorTestMixin,
         self.assertEqual(0, self.node_setup.start.call_count)
 
     def test_no_new_node_when_max_nodes_busy(self):
-        self.make_daemon([testutil.cloud_node_mock(3)],
-                         [testutil.arvados_node_mock(3, job_uuid=True)],
-                         [testutil.MockSize(3)],
+        size = testutil.MockSize(3)
+        self.make_daemon(cloud_nodes=[testutil.cloud_node_mock(3)],
+                         arvados_nodes=[testutil.arvados_node_mock(3, job_uuid=True)],
+                         want_sizes=[size],
                          max_nodes=1)
         self.stop_proxy(self.daemon)
         self.assertFalse(self.node_setup.start.called)
@@ -275,6 +288,7 @@ class NodeManagerDaemonActorTestCase(testutil.ActorTestMixin,
     def start_node_boot(self, cloud_node=None, arv_node=None, id_num=1):
         if cloud_node is None:
             cloud_node = testutil.cloud_node_mock(id_num)
+        id_num = int(cloud_node.id)
         if arv_node is None:
             arv_node = testutil.arvados_node_mock(id_num)
         self.make_daemon(want_sizes=[testutil.MockSize(id_num)],
@@ -295,7 +309,7 @@ class NodeManagerDaemonActorTestCase(testutil.ActorTestMixin,
         self.daemon.update_cloud_nodes([cloud_node])
         self.monitor_list()[0].proxy().cloud_node_start_time = time.time()-1801
         self.daemon.update_server_wishlist(
-            [testutil.MockSize(1)]).get(self.TIMEOUT)
+            [testutil.MockSize(4)]).get(self.TIMEOUT)
         self.stop_proxy(self.daemon)
         self.assertEqual(2, self.node_setup.start.call_count)
 
@@ -483,10 +497,11 @@ class NodeManagerDaemonActorTestCase(testutil.ActorTestMixin,
         self.assertTrue(self.node_shutdown.start.called)
 
     def test_shutdown_declined_when_idle_and_job_queued(self):
-        cloud_nodes = [testutil.cloud_node_mock(n) for n in [3, 4]]
+        size = testutil.MockSize(1)
+        cloud_nodes = [testutil.cloud_node_mock(n, size=size) for n in [3, 4]]
         arv_nodes = [testutil.arvados_node_mock(3, job_uuid=True),
                      testutil.arvados_node_mock(4, job_uuid=None)]
-        self.make_daemon(cloud_nodes, arv_nodes, [testutil.MockSize(1)])
+        self.make_daemon(cloud_nodes, arv_nodes, [size])
         self.assertEqual(2, self.alive_monitor_count())
         for mon_ref in self.monitor_list():
             monitor = mon_ref.proxy()
@@ -514,7 +529,7 @@ class NodeManagerDaemonActorTestCase(testutil.ActorTestMixin,
         self.daemon.node_finished_shutdown(self.last_shutdown).get(self.TIMEOUT)
         self.assertEqual(0, self.alive_monitor_count())
 
-    def test_broken_node_blackholed_after_cancelled_shutdown(self):
+    def test_broken_node_not_counted(self):
         size = testutil.MockSize(8)
         cloud_node = testutil.cloud_node_mock(8, size=size)
         wishlist = [size]
@@ -526,7 +541,7 @@ class NodeManagerDaemonActorTestCase(testutil.ActorTestMixin,
         shutdown_proxy = self.node_shutdown.start().proxy
         shutdown_proxy().cloud_node.get.return_value = cloud_node
         shutdown_proxy().success.get.return_value = False
-        shutdown_proxy().cancel_reason.get.return_value = self.node_shutdown.NODE_BROKEN
+        self.cloud_factory().broken.return_value = True
         self.daemon.update_server_wishlist([]).get(self.TIMEOUT)
         self.daemon.node_can_shutdown(monitor).get(self.TIMEOUT)
         self.daemon.node_finished_shutdown(shutdown_proxy()).get(self.TIMEOUT)
diff --git a/services/nodemanager/tests/test_failure.py b/services/nodemanager/tests/test_failure.py
index dea7230..285aa03 100644
--- a/services/nodemanager/tests/test_failure.py
+++ b/services/nodemanager/tests/test_failure.py
@@ -29,7 +29,7 @@ class BogusActor(arvnodeman.baseactor.BaseNodeManagerActor):
         time.sleep(2)
         return True
 
-class ActorUnhandledExceptionTest(unittest.TestCase):
+class ActorUnhandledExceptionTest(testutil.ActorTestMixin, unittest.TestCase):
     def test_fatal_error(self):
         for e in (MemoryError(), threading.ThreadError(), OSError(errno.ENOMEM, "")):
             with mock.patch('os.kill') as kill_mock:
@@ -45,7 +45,7 @@ class ActorUnhandledExceptionTest(unittest.TestCase):
         act.actor_ref.stop(block=True)
         self.assertFalse(kill_mock.called)
 
-class WatchdogActorTest(unittest.TestCase):
+class WatchdogActorTest(testutil.ActorTestMixin, unittest.TestCase):
     @mock.patch('os.kill')
     def test_time_timout(self, kill_mock):
         act = BogusActor.start(OSError(errno.ENOENT, ""))
diff --git a/services/nodemanager/tests/testutil.py b/services/nodemanager/tests/testutil.py
index 15337c4..41f4ed1 100644
--- a/services/nodemanager/tests/testutil.py
+++ b/services/nodemanager/tests/testutil.py
@@ -209,7 +209,9 @@ class RemotePollLoopActorTestMixin(ActorTestMixin):
         self.monitor = self.TEST_CLASS.start(
             self.client, self.timer, *args, **kwargs).proxy()
 
-def cloud_node_mock(node_num=99, size=MockSize(1), **extra):
+def cloud_node_mock(node_num=99, size=None, **extra):
+    if size is None:
+        size = MockSize(node_num)
     node = mock.NonCallableMagicMock(
         ['id', 'name', 'state', 'public_ips', 'private_ips', 'driver', 'size',
          'image', 'extra'],

commit 04a8e2c7d8a8dd3afd891292415cdfaea25fd481
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Wed Feb 1 12:40:12 2017 -0500

    10846: Remove duplicate log message suppression by ComputeNodeMonitorActor

diff --git a/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py b/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py
index a950210..25b0779 100644
--- a/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py
+++ b/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py
@@ -306,7 +306,6 @@ class ComputeNodeMonitorActor(config.actor_class):
     ):
         super(ComputeNodeMonitorActor, self).__init__()
         self._later = self.actor_ref.tell_proxy()
-        self._last_log = None
         self._shutdowns = shutdown_timer
         self._cloud_node_fqdn = cloud_fqdn_func
         self._timer = timer_actor
@@ -334,9 +333,6 @@ class ComputeNodeMonitorActor(config.actor_class):
         self.subscribers.add(subscriber)
 
     def _debug(self, msg, *args):
-        if msg == self._last_log:
-            return
-        self._last_log = msg
         self._logger.debug(msg, *args)
 
     def get_state(self):

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list