[ARVADOS] created: 96da34b1888638648c291d989d990b4e5738db88

Git user git at public.curoverse.com
Fri Sep 29 23:04:37 EDT 2017


        at  96da34b1888638648c291d989d990b4e5738db88 (commit)


commit 96da34b1888638648c291d989d990b4e5738db88
Author: Lucas Di Pentima <ldipentima at veritasgenetics.com>
Date:   Sat Sep 30 00:02:19 2017 -0300

    12073: Prioritize stale node records that have a slot_number when
    trying to assign one to a cloud node.
    
    Arvados-DCO-1.1-Signed-off-by: Lucas Di Pentima <ldipentima at veritasgenetics.com>

diff --git a/services/nodemanager/arvnodeman/daemon.py b/services/nodemanager/arvnodeman/daemon.py
index d8087a1..ca3029d 100644
--- a/services/nodemanager/arvnodeman/daemon.py
+++ b/services/nodemanager/arvnodeman/daemon.py
@@ -78,7 +78,10 @@ class _ArvadosNodeTracker(_BaseNodeTracker):
     item_key = staticmethod(lambda arvados_node: arvados_node['uuid'])
 
     def find_stale_node(self, stale_time):
-        for record in self.nodes.itervalues():
+        # Try to select a stale node record that have an assigned slot first
+        for record in sorted(self.nodes.itervalues(),
+                             key=lambda r: r.arvados_node['slot_number'],
+                             reverse=True):
             node = record.arvados_node
             if (not cnode.timestamp_fresh(cnode.arvados_node_mtime(node),
                                           stale_time) and
diff --git a/services/nodemanager/tests/test_daemon.py b/services/nodemanager/tests/test_daemon.py
index d682080..ebe7408 100644
--- a/services/nodemanager/tests/test_daemon.py
+++ b/services/nodemanager/tests/test_daemon.py
@@ -193,6 +193,39 @@ class NodeManagerDaemonActorTestCase(testutil.ActorTestMixin,
                          want_sizes=[testutil.MockSize(1)])
         self.busywait(lambda: not self.node_setup.start.called)
 
+    def test_select_stale_node_records_with_slot_numbers_first(self):
+        """
+        Stale node records with slot_number assigned can exist when
+        clean_arvados_node() isn't executed after a node shutdown, for
+        various reasons.
+        NodeManagerDaemonActor should use these stale node records first, so
+        that they don't accumulate unused, reducing the slots available.
+        """
+        size = testutil.MockSize(1)
+        a_long_time_ago = '1970-01-01T01:02:03.04050607Z'
+        arvados_nodes = []
+        for n in range(9):
+            # Add several stale node records without slot_number assigned
+            arvados_nodes.append(
+                testutil.arvados_node_mock(
+                    n+1,
+                    slot_number=None,
+                    modified_at=a_long_time_ago))
+        # Add one record with stale_node assigned, it should be the
+        # first one selected
+        arv_node = testutil.arvados_node_mock(
+            123,
+            modified_at=a_long_time_ago)
+        arvados_nodes.append(arv_node)
+        cloud_node = testutil.cloud_node_mock(125, size=size)
+        self.make_daemon(cloud_nodes=[cloud_node],
+                         arvados_nodes=arvados_nodes)
+        arvados_nodes_tracker = self.daemon.arvados_nodes.get()
+        # Here, find_stale_node() should return the node record with
+        # the slot_number assigned.
+        self.assertEqual(arv_node,
+                         arvados_nodes_tracker.find_stale_node(3601))
+
     def test_dont_count_missing_as_busy(self):
         size = testutil.MockSize(1)
         self.make_daemon(cloud_nodes=[testutil.cloud_node_mock(1, size=size),

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list