[ARVADOS] updated: 2a94b125b93a3aba204f55c37ecdc2876d81d642
git at public.curoverse.com
git at public.curoverse.com
Wed Oct 7 10:36:48 EDT 2015
Summary of changes:
.../arvnodeman/computenode/dispatch/slurm.py | 19 ++++++-------------
.../tests/test_computenode_dispatch_slurm.py | 13 ++++++++++---
2 files changed, 16 insertions(+), 16 deletions(-)
via 2a94b125b93a3aba204f55c37ecdc2876d81d642 (commit)
from 9c95a4c2dcd650627d524513e1e18596c8533ac0 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
commit 2a94b125b93a3aba204f55c37ecdc2876d81d642
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date: Wed Oct 7 10:38:46 2015 -0400
6142: Only resume from 'drng' or 'drain'. Add/fix tests.
diff --git a/services/nodemanager/arvnodeman/computenode/dispatch/slurm.py b/services/nodemanager/arvnodeman/computenode/dispatch/slurm.py
index bb397fa..dfb26bc 100644
--- a/services/nodemanager/arvnodeman/computenode/dispatch/slurm.py
+++ b/services/nodemanager/arvnodeman/computenode/dispatch/slurm.py
@@ -37,20 +37,13 @@ class ComputeNodeShutdownActor(ShutdownActorBase):
@ShutdownActorBase._retry((subprocess.CalledProcessError,))
def cancel_shutdown(self):
if self._nodename:
- try:
+ if self._get_slurm_state() in self.SLURM_DRAIN_STATES:
+ # Resume from "drng" or "drain"
self._set_node_state('RESUME')
- except subprocess.CalledProcessError:
- slum_state = self._get_slurm_state()
- if slum_state in self.SLURM_DRAIN_STATES:
- # We expect to be able to resume from "drain" or "drng"
- # So if scontrol exited non-zero, something actually failed, so
- # raise an exception to signal the retry to kick in.
- raise
- else:
- # Assume scontrol exited non-zero because the node is already in
- # 'idle' or 'alloc' (so it never started draining)
- # we don't need to do anything else resume it.
- pass
+ else:
+ # Node is in a state such as 'idle' or 'alloc' so don't
+ # try to resume it because that will just raise an error.
+ pass
return super(ComputeNodeShutdownActor, self).cancel_shutdown()
@ShutdownActorBase._stop_if_window_closed
diff --git a/services/nodemanager/tests/test_computenode_dispatch_slurm.py b/services/nodemanager/tests/test_computenode_dispatch_slurm.py
index c5097a7..2ddf767 100644
--- a/services/nodemanager/tests/test_computenode_dispatch_slurm.py
+++ b/services/nodemanager/tests/test_computenode_dispatch_slurm.py
@@ -60,12 +60,19 @@ class SLURMComputeNodeShutdownActorTestCase(ComputeNodeShutdownActorMixin,
self.assertFalse(proc_mock.called)
def test_node_undrained_when_shutdown_window_closes(self, proc_mock):
- proc_mock.return_value = 'alloc\n'
+ proc_mock.side_effect = iter(['drng\n', 'idle\n'])
self.make_mocks(arvados_node=testutil.arvados_node_mock(job_uuid=True))
self.make_actor()
self.check_success_flag(False, 2)
- self.check_slurm_got_args(proc_mock, 'NodeName=compute99',
- 'State=RESUME')
+ self.check_slurm_got_args(proc_mock, 'NodeName=compute99', 'State=RESUME')
+
+ def test_alloc_node_undrained_when_shutdown_window_closes(self, proc_mock):
+ proc_mock.side_effect = iter(['alloc\n'])
+ self.make_mocks(arvados_node=testutil.arvados_node_mock(job_uuid=True))
+ self.make_actor()
+ self.check_success_flag(False, 2)
+ self.check_slurm_got_args(proc_mock, 'sinfo', '--noheader', '-o', '%t', '-n', 'compute99')
+
def test_arvados_node_cleaned_after_shutdown(self, proc_mock):
proc_mock.return_value = 'drain\n'
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list