[ARVADOS] created: 96da34b1888638648c291d989d990b4e5738db88
Git user
git at public.curoverse.com
Fri Sep 29 23:04:37 EDT 2017
at 96da34b1888638648c291d989d990b4e5738db88 (commit)
commit 96da34b1888638648c291d989d990b4e5738db88
Author: Lucas Di Pentima <ldipentima at veritasgenetics.com>
Date: Sat Sep 30 00:02:19 2017 -0300
12073: Prioritize stale node records that have a slot_number when
trying to assign one to a cloud node.
Arvados-DCO-1.1-Signed-off-by: Lucas Di Pentima <ldipentima at veritasgenetics.com>
diff --git a/services/nodemanager/arvnodeman/daemon.py b/services/nodemanager/arvnodeman/daemon.py
index d8087a1..ca3029d 100644
--- a/services/nodemanager/arvnodeman/daemon.py
+++ b/services/nodemanager/arvnodeman/daemon.py
@@ -78,7 +78,10 @@ class _ArvadosNodeTracker(_BaseNodeTracker):
item_key = staticmethod(lambda arvados_node: arvados_node['uuid'])
def find_stale_node(self, stale_time):
- for record in self.nodes.itervalues():
+ # Try to select a stale node record that have an assigned slot first
+ for record in sorted(self.nodes.itervalues(),
+ key=lambda r: r.arvados_node['slot_number'],
+ reverse=True):
node = record.arvados_node
if (not cnode.timestamp_fresh(cnode.arvados_node_mtime(node),
stale_time) and
diff --git a/services/nodemanager/tests/test_daemon.py b/services/nodemanager/tests/test_daemon.py
index d682080..ebe7408 100644
--- a/services/nodemanager/tests/test_daemon.py
+++ b/services/nodemanager/tests/test_daemon.py
@@ -193,6 +193,39 @@ class NodeManagerDaemonActorTestCase(testutil.ActorTestMixin,
want_sizes=[testutil.MockSize(1)])
self.busywait(lambda: not self.node_setup.start.called)
+ def test_select_stale_node_records_with_slot_numbers_first(self):
+ """
+ Stale node records with slot_number assigned can exist when
+ clean_arvados_node() isn't executed after a node shutdown, for
+ various reasons.
+ NodeManagerDaemonActor should use these stale node records first, so
+ that they don't accumulate unused, reducing the slots available.
+ """
+ size = testutil.MockSize(1)
+ a_long_time_ago = '1970-01-01T01:02:03.04050607Z'
+ arvados_nodes = []
+ for n in range(9):
+ # Add several stale node records without slot_number assigned
+ arvados_nodes.append(
+ testutil.arvados_node_mock(
+ n+1,
+ slot_number=None,
+ modified_at=a_long_time_ago))
+ # Add one record with stale_node assigned, it should be the
+ # first one selected
+ arv_node = testutil.arvados_node_mock(
+ 123,
+ modified_at=a_long_time_ago)
+ arvados_nodes.append(arv_node)
+ cloud_node = testutil.cloud_node_mock(125, size=size)
+ self.make_daemon(cloud_nodes=[cloud_node],
+ arvados_nodes=arvados_nodes)
+ arvados_nodes_tracker = self.daemon.arvados_nodes.get()
+ # Here, find_stale_node() should return the node record with
+ # the slot_number assigned.
+ self.assertEqual(arv_node,
+ arvados_nodes_tracker.find_stale_node(3601))
+
def test_dont_count_missing_as_busy(self):
size = testutil.MockSize(1)
self.make_daemon(cloud_nodes=[testutil.cloud_node_mock(1, size=size),
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list