[ARVADOS] created: eb11bb17f1b95795104a46ad9b3bfbea1c9deae3

Git user git at public.curoverse.com
Tue Apr 12 21:54:53 EDT 2016


        at  eb11bb17f1b95795104a46ad9b3bfbea1c9deae3 (commit)


commit eb11bb17f1b95795104a46ad9b3bfbea1c9deae3
Author: Brett Smith <brett at curoverse.com>
Date:   Tue Apr 12 21:54:50 2016 -0400

    8953: Drained SLURM nodes can be eligible for shutdown.
    
    Without this, shutdown_eligible() will report "node is draining" as
    ComputeNodeShutdownActor runs its course, cancelling the shutdown.

diff --git a/services/nodemanager/arvnodeman/computenode/dispatch/slurm.py b/services/nodemanager/arvnodeman/computenode/dispatch/slurm.py
index 41919db..6d979b6 100644
--- a/services/nodemanager/arvnodeman/computenode/dispatch/slurm.py
+++ b/services/nodemanager/arvnodeman/computenode/dispatch/slurm.py
@@ -76,11 +76,9 @@ class ComputeNodeMonitorActor(SlurmMixin, MonitorActorBase):
             state = self._get_slurm_state(self.arvados_node['hostname'])
             # Automatically eligible for shutdown if it's down or failed, but
             # not drain to avoid a race condition with resume_node().
-            if state in self.SLURM_END_STATES:
-                if state in self.SLURM_DRAIN_STATES:
-                    return "node is draining"
-                else:
-                    return True
+            if ((state in self.SLURM_END_STATES) and
+                  (state not in self.SLURM_DRAIN_STATES)):
+                return True
         return super(ComputeNodeMonitorActor, self).shutdown_eligible()
 
     def resume_node(self):
diff --git a/services/nodemanager/tests/test_computenode_dispatch_slurm.py b/services/nodemanager/tests/test_computenode_dispatch_slurm.py
index 135b817..0ec9612 100644
--- a/services/nodemanager/tests/test_computenode_dispatch_slurm.py
+++ b/services/nodemanager/tests/test_computenode_dispatch_slurm.py
@@ -147,7 +147,14 @@ class SLURMComputeNodeMonitorActorTestCase(testutil.ActorTestMixin,
         self.assertIs(True, self.node_actor.shutdown_eligible().get(self.TIMEOUT))
 
     @mock.patch("subprocess.check_output")
-    def test_no_shutdown_drain_node(self, check_output):
+    def test_no_shutdown_ineligible_drain_node(self, check_output):
         check_output.return_value = "drain\n"
         self.make_actor(arv_node=testutil.arvados_node_mock())
-        self.assertEquals('node is draining', self.node_actor.shutdown_eligible().get(self.TIMEOUT))
+        self.assertIsNot(True, self.node_actor.shutdown_eligible().get(self.TIMEOUT))
+
+    @mock.patch("subprocess.check_output")
+    def test_shutdown_eligible_drain_node(self, check_output):
+        check_output.return_value = "drain\n"
+        self.make_actor(arv_node=testutil.arvados_node_mock())
+        self.shutdowns._set_state(True, 300)
+        self.assertIs(True, self.node_actor.shutdown_eligible().get(self.TIMEOUT))

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list