[ARVADOS] updated: e13c868ed9216b4ad414adc435a9f9ed5afe2b89
Git user
git at public.curoverse.com
Wed Apr 6 15:52:00 EDT 2016
Summary of changes:
.../nodemanager/arvnodeman/computenode/dispatch/slurm.py | 12 +++++++-----
.../nodemanager/tests/test_computenode_dispatch_slurm.py | 8 ++++++++
2 files changed, 15 insertions(+), 5 deletions(-)
via e13c868ed9216b4ad414adc435a9f9ed5afe2b89 (commit)
from 241ef75ec8b6cf5dd14ce19fa068462adaeb0386 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
commit e13c868ed9216b4ad414adc435a9f9ed5afe2b89
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date: Wed Apr 6 15:51:56 2016 -0400
8799: Nodes in "drain" state are not automatically eligible for shutdown to
avoid a race between starting a shutdown and resume_node().
diff --git a/services/nodemanager/arvnodeman/computenode/dispatch/slurm.py b/services/nodemanager/arvnodeman/computenode/dispatch/slurm.py
index 9ef54b3..845379f 100644
--- a/services/nodemanager/arvnodeman/computenode/dispatch/slurm.py
+++ b/services/nodemanager/arvnodeman/computenode/dispatch/slurm.py
@@ -72,11 +72,13 @@ class ComputeNodeShutdownActor(SlurmMixin, ShutdownActorBase):
class ComputeNodeMonitorActor(SlurmMixin, MonitorActorBase):
def shutdown_eligible(self):
- if (self.arvados_node is not None and
- self._get_slurm_state(self.arvados_node['hostname']) in self.SLURM_END_STATES):
- return True
- else:
- return super(ComputeNodeMonitorActor, self).shutdown_eligible()
+ if self.arvados_node is not None:
+ state = self._get_slurm_state(self.arvados_node['hostname'])
+ # Automatically eligible for shutdown if it's down or failed, but
+ # not drain to avoid a race condition with resume_node().
+ if state in self.SLURM_END_STATES and state not in self.SLURM_DRAIN_STATES:
+ return True
+ return super(ComputeNodeMonitorActor, self).shutdown_eligible()
def resume_node(self):
try:
diff --git a/services/nodemanager/tests/test_computenode_dispatch_slurm.py b/services/nodemanager/tests/test_computenode_dispatch_slurm.py
index 212bb3d..6e03a7d 100644
--- a/services/nodemanager/tests/test_computenode_dispatch_slurm.py
+++ b/services/nodemanager/tests/test_computenode_dispatch_slurm.py
@@ -160,3 +160,11 @@ class SLURMComputeNodeMonitorActorTestCase(testutil.ActorTestMixin,
self.assertEquals('shutdown window is not open.', self.node_actor.shutdown_eligible().get(self.TIMEOUT))
self.shutdowns._set_state(True, 600)
self.assertTrue(self.node_actor.shutdown_eligible().get(self.TIMEOUT))
+
+ @mock.patch("subprocess.check_output")
+ def test_no_shutdown_drain_node(self, check_output):
+ check_output.return_value = "drain\n"
+ self.make_actor()
+ self.assertEquals('shutdown window is not open.', self.node_actor.shutdown_eligible().get(self.TIMEOUT))
+ self.shutdowns._set_state(True, 600)
+ self.assertTrue(self.node_actor.shutdown_eligible().get(self.TIMEOUT))
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list