[ARVADOS] created: 3ee90fd536fbfc9d8d91d5fac7c12d0ebe0df5ab

git at public.curoverse.com git at public.curoverse.com
Mon Nov 10 16:28:46 EST 2014


        at  3ee90fd536fbfc9d8d91d5fac7c12d0ebe0df5ab (commit)


commit 3ee90fd536fbfc9d8d91d5fac7c12d0ebe0df5ab
Author: Tim Pierce <twp at curoverse.com>
Date:   Fri Nov 7 09:37:24 2014 -0500

    4294: added min_nodes config parameter
    
    Added min_nodes configuration setting.  The job queue will return a
    wishlist with at least min_nodes elements in it, and the node manager
    daemon will avoid shutting down a node if it would bring the total below
    min_nodes.
    
    * arvnodeman.config sets default min_nodes to 0.
    * NodeManagerDaemonActor:
    ** _nodes_wanted returns at least enough to keep the node count above
       min_nodes
    ** _nodes_excess will not allow the node count to drop below min_nodes
    * jobqueue.ServerCalculator.servers_for_queue returns a list with at least
      min_nodes elements
    * added test cases:
    ** NodeManagerDaemonActorTestCase.test_shutdown_declined_below_min_nodes
    ** ServerCalculatorTestCase.test_server_calc_returns_at_least_min_nodes

diff --git a/services/nodemanager/arvnodeman/config.py b/services/nodemanager/arvnodeman/config.py
index 1699584..754b931 100644
--- a/services/nodemanager/arvnodeman/config.py
+++ b/services/nodemanager/arvnodeman/config.py
@@ -37,7 +37,8 @@ class NodeManagerConfig(ConfigParser.SafeConfigParser):
         for sec_name, settings in {
             'Arvados': {'insecure': 'no',
                         'timeout': '15'},
-            'Daemon': {'max_nodes': '1',
+            'Daemon': {'min_nodes': '0',
+                       'max_nodes': '1',
                        'poll_time': '60',
                        'max_poll_time': '300',
                        'poll_stale_after': '600',
diff --git a/services/nodemanager/arvnodeman/daemon.py b/services/nodemanager/arvnodeman/daemon.py
index 83e3ec9..d950e2a 100644
--- a/services/nodemanager/arvnodeman/daemon.py
+++ b/services/nodemanager/arvnodeman/daemon.py
@@ -94,7 +94,7 @@ class NodeManagerDaemonActor(actor_class):
     def __init__(self, server_wishlist_actor, arvados_nodes_actor,
                  cloud_nodes_actor, cloud_update_actor, timer_actor,
                  arvados_factory, cloud_factory,
-                 shutdown_windows, max_nodes,
+                 shutdown_windows, min_nodes, max_nodes,
                  poll_stale_after=600, node_stale_after=7200,
                  node_setup_class=cnode.ComputeNodeSetupActor,
                  node_shutdown_class=cnode.ComputeNodeShutdownActor,
@@ -111,6 +111,7 @@ class NodeManagerDaemonActor(actor_class):
         self._logger = logging.getLogger('arvnodeman.daemon')
         self._later = self.actor_ref.proxy()
         self.shutdown_windows = shutdown_windows
+        self.min_nodes = min_nodes
         self.max_nodes = max_nodes
         self.poll_stale_after = poll_stale_after
         self.node_stale_after = node_stale_after
@@ -203,7 +204,8 @@ class NodeManagerDaemonActor(actor_class):
                    self.max_nodes) - self._nodes_up()
 
     def _nodes_excess(self):
-        return self._nodes_up() - self._nodes_busy() - len(self.last_wishlist)
+        idle_nodes = self._nodes_busy() + len(self.last_wishlist)
+        return (self._nodes_up() - max(self.min_nodes, idle_nodes))
 
     def update_server_wishlist(self, wishlist):
         self._update_poll_time('server_wishlist')
diff --git a/services/nodemanager/arvnodeman/jobqueue.py b/services/nodemanager/arvnodeman/jobqueue.py
index 59659fe..239934f 100644
--- a/services/nodemanager/arvnodeman/jobqueue.py
+++ b/services/nodemanager/arvnodeman/jobqueue.py
@@ -38,10 +38,11 @@ class ServerCalculator(object):
             return True
 
 
-    def __init__(self, server_list, max_nodes=None):
+    def __init__(self, server_list, min_nodes=0, max_nodes=None):
         self.cloud_sizes = [self.CloudSizeWrapper(s, **kws)
                             for s, kws in server_list]
         self.cloud_sizes.sort(key=lambda s: s.price)
+        self.min_nodes = min_nodes
         self.max_nodes = max_nodes or float('inf')
         self.logger = logging.getLogger('arvnodeman.jobqueue')
         self.logged_jobs = set()
@@ -78,6 +79,13 @@ class ServerCalculator(object):
             elif (want_count <= self.max_nodes):
                 servers.extend([cloud_size.real] * max(1, want_count))
         self.logged_jobs.intersection_update(seen_jobs)
+
+        # Make sure the server queue has at least enough entries to
+        # satisfy min_nodes.
+        node_shortfall = self.min_nodes - len(servers)
+        if node_shortfall > 0:
+            basic_node = self.cloud_size_for_constraints({})
+            servers.extend([basic_node.real] * node_shortfall)
         return servers
 
 
diff --git a/services/nodemanager/arvnodeman/launcher.py b/services/nodemanager/arvnodeman/launcher.py
index 87f2dda..f4ad716 100644
--- a/services/nodemanager/arvnodeman/launcher.py
+++ b/services/nodemanager/arvnodeman/launcher.py
@@ -68,7 +68,9 @@ def launch_pollers(config):
         abort("No valid node sizes configured")
 
     server_calculator = ServerCalculator(
-        cloud_size_list, config.getint('Daemon', 'max_nodes'))
+        cloud_size_list,
+        config.getint('Daemon', 'min_nodes'),
+        config.getint('Daemon', 'max_nodes'))
     poll_time = config.getint('Daemon', 'poll_time')
     max_poll_time = config.getint('Daemon', 'max_poll_time')
 
@@ -115,7 +117,9 @@ def main(args=None):
         job_queue_poller, arvados_node_poller, cloud_node_poller,
         cloud_node_updater, timer,
         config.new_arvados_client, config.new_cloud_client,
-        config.shutdown_windows(), config.getint('Daemon', 'max_nodes'),
+        config.shutdown_windows(),
+        config.getint('Daemon', 'min_nodes'),
+        config.getint('Daemon', 'max_nodes'),
         config.getint('Daemon', 'poll_stale_after'),
         config.getint('Daemon', 'node_stale_after')).proxy()
 
diff --git a/services/nodemanager/tests/test_daemon.py b/services/nodemanager/tests/test_daemon.py
index 93e4435..4bffd09 100644
--- a/services/nodemanager/tests/test_daemon.py
+++ b/services/nodemanager/tests/test_daemon.py
@@ -15,7 +15,7 @@ from . import testutil
 class NodeManagerDaemonActorTestCase(testutil.ActorTestMixin,
                                      unittest.TestCase):
     def make_daemon(self, cloud_nodes=[], arvados_nodes=[], want_sizes=[],
-                    max_nodes=8):
+                    min_nodes=0, max_nodes=8):
         for name in ['cloud_nodes', 'arvados_nodes', 'server_wishlist']:
             setattr(self, name + '_poller', mock.MagicMock(name=name + '_mock'))
         self.arv_factory = mock.MagicMock(name='arvados_mock')
@@ -29,7 +29,7 @@ class NodeManagerDaemonActorTestCase(testutil.ActorTestMixin,
             self.server_wishlist_poller, self.arvados_nodes_poller,
             self.cloud_nodes_poller, self.cloud_updates, self.timer,
             self.arv_factory, self.cloud_factory,
-            [54, 5, 1], max_nodes, 600, 3600,
+            [54, 5, 1], min_nodes, max_nodes, 600, 3600,
             self.node_setup, self.node_shutdown).proxy()
         if cloud_nodes is not None:
             self.daemon.update_cloud_nodes(cloud_nodes).get(self.TIMEOUT)
@@ -227,6 +227,15 @@ class NodeManagerDaemonActorTestCase(testutil.ActorTestMixin,
         self.stop_proxy(self.daemon)
         self.assertFalse(self.node_shutdown.start.called)
 
+    def test_shutdown_declined_below_min_nodes(self):
+        cloud_node = testutil.cloud_node_mock(1)
+        self.make_daemon(cloud_nodes=[cloud_node], min_nodes=1)
+        self.assertEqual(1, self.alive_monitor_count())
+        monitor = self.monitor_list()[0].proxy()
+        self.daemon.node_can_shutdown(monitor).get(self.TIMEOUT)
+        self.stop_proxy(self.daemon)
+        self.assertFalse(self.node_shutdown.start.called)
+
     def test_shutdown_accepted_below_capacity(self):
         self.make_daemon(cloud_nodes=[testutil.cloud_node_mock()])
         self.assertEqual(1, self.alive_monitor_count())
diff --git a/services/nodemanager/tests/test_jobqueue.py b/services/nodemanager/tests/test_jobqueue.py
index 158a3fd..6b5f532 100644
--- a/services/nodemanager/tests/test_jobqueue.py
+++ b/services/nodemanager/tests/test_jobqueue.py
@@ -48,6 +48,11 @@ class ServerCalculatorTestCase(unittest.TestCase):
                                   {'min_scratch_mb_per_node': 200})
         self.assertEqual(6, len(servlist))
 
+    def test_server_calc_returns_at_least_min_nodes(self):
+        servcalc = self.make_calculator([1], min_nodes=5, max_nodes=9)
+        servlist = self.calculate(servcalc, {})
+        self.assertEqual(5, len(servlist))
+
     def test_job_requesting_max_nodes_accepted(self):
         servcalc = self.make_calculator([1], max_nodes=4)
         servlist = self.calculate(servcalc, {'min_nodes': 4})

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list