[ARVADOS] created: 62991a76b32b9c7558dc359058d82a0be1da104d
Git user
git at public.curoverse.com
Tue Apr 12 18:23:17 EDT 2016
at 62991a76b32b9c7558dc359058d82a0be1da104d (commit)
commit 62991a76b32b9c7558dc359058d82a0be1da104d
Author: Brett Smith <brett at curoverse.com>
Date: Tue Apr 12 18:22:59 2016 -0400
8952: Node Manager shutdowns only check SLURM state of paired nodes.
diff --git a/services/nodemanager/arvnodeman/computenode/dispatch/slurm.py b/services/nodemanager/arvnodeman/computenode/dispatch/slurm.py
index 41919db..881a1c1 100644
--- a/services/nodemanager/arvnodeman/computenode/dispatch/slurm.py
+++ b/services/nodemanager/arvnodeman/computenode/dispatch/slurm.py
@@ -72,7 +72,7 @@ class ComputeNodeShutdownActor(SlurmMixin, ShutdownActorBase):
class ComputeNodeMonitorActor(SlurmMixin, MonitorActorBase):
def shutdown_eligible(self):
- if self.arvados_node is not None:
+ if self.arvados_node and self.arvados_node['hostname']:
state = self._get_slurm_state(self.arvados_node['hostname'])
# Automatically eligible for shutdown if it's down or failed, but
# not drain to avoid a race condition with resume_node().
diff --git a/services/nodemanager/tests/test_computenode_dispatch_slurm.py b/services/nodemanager/tests/test_computenode_dispatch_slurm.py
index 135b817..3859c4f 100644
--- a/services/nodemanager/tests/test_computenode_dispatch_slurm.py
+++ b/services/nodemanager/tests/test_computenode_dispatch_slurm.py
@@ -151,3 +151,27 @@ class SLURMComputeNodeMonitorActorTestCase(testutil.ActorTestMixin,
check_output.return_value = "drain\n"
self.make_actor(arv_node=testutil.arvados_node_mock())
self.assertEquals('node is draining', self.node_actor.shutdown_eligible().get(self.TIMEOUT))
+
+ @mock.patch("subprocess.check_output")
+ def test_shutdown_node_that_never_pinged(self, check_output):
+ check_output.side_effect = Exception(
+ "tested code tried to call `sinfo` without a node name")
+ arv_node = testutil.arvados_node_mock()
+ arv_node.update(first_ping_at=None, last_ping_at=None, slot_number=None,
+ hostname=None, ip_address=None)
+ self.make_actor(arv_node=arv_node)
+ self.shutdowns._set_state(True, 300)
+ self.assertIs(True, self.node_actor.shutdown_eligible().get(self.TIMEOUT))
+ check_output.assert_not_called()
+
+ @mock.patch("subprocess.check_output")
+ def test_no_shutdown_node_still_awaiting_ping(self, check_output):
+ check_output.side_effect = Exception(
+ "tested code tried to call `sinfo` without a node name")
+ arv_node = testutil.arvados_node_mock()
+ arv_node.update(first_ping_at=None, last_ping_at=None, slot_number=None,
+ hostname=None, ip_address=None)
+ self.make_actor(arv_node=arv_node)
+ self.assertIn('shutdown window is not open',
+ self.node_actor.shutdown_eligible().get(self.TIMEOUT))
+ check_output.assert_not_called()
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list