[ARVADOS] updated: da6e931b990c7edde55213196fc8a47f4dc4998d

Git user git at public.curoverse.com
Fri Apr 15 11:29:55 EDT 2016


Summary of changes:
 .../arvnodeman/computenode/dispatch/__init__.py    | 33 +++++++++++-----------
 .../nodemanager/tests/test_computenode_dispatch.py | 18 ++++++------
 2 files changed, 25 insertions(+), 26 deletions(-)

       via  da6e931b990c7edde55213196fc8a47f4dc4998d (commit)
      from  e7091ec45f8f634a52db2fba1b385e790d69e6fe (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit da6e931b990c7edde55213196fc8a47f4dc4998d
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Fri Apr 15 11:29:50 2016 -0400

    8953: shutdown_eligible() returns a tuple.  Report reason for shutdown decision.

diff --git a/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py b/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py
index 412e6f8..e7ae10f 100644
--- a/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py
+++ b/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py
@@ -358,10 +358,9 @@ class ComputeNodeMonitorActor(config.actor_class):
     def shutdown_eligible(self):
         """Determine if node is candidate for shut down.
 
-        Returns True if the node is candidate for shut down, if not, returns a
-        string explaining why it is not a candidate for shut down.  It is very
-        import to test the return value of this method as
-        "if shutdown_eligible() is True:".
+        Returns a tuple of (boolean, string) where the first value is whether
+        the node is candidate for shut down, and the second value is the
+        reason for the decision.
         """
 
         # Collect states and then consult state transition table whether we
@@ -374,11 +373,11 @@ class ComputeNodeMonitorActor(config.actor_class):
         if self.arvados_node is None:
             crunch_worker_state = 'unpaired'
         elif not timestamp_fresh(arvados_node_mtime(self.arvados_node), self.node_stale_after):
-            return "node state is stale"
+            return (False, "node state is stale")
         elif self.arvados_node['crunch_worker_state']:
             crunch_worker_state = self.arvados_node['crunch_worker_state']
         else:
-            return "node is paired but crunch_worker_state is '%s'" % self.arvados_node['crunch_worker_state']
+            return (False, "node is paired but crunch_worker_state is '%s'" % self.arvados_node['crunch_worker_state'])
 
         window = "open" if self._shutdowns.window_open() else "closed"
 
@@ -392,28 +391,28 @@ class ComputeNodeMonitorActor(config.actor_class):
 
         node_state = (crunch_worker_state, window, boot_grace, idle_grace)
         t = transitions[node_state]
-        self._debug("Node state is %s, transition is %s", node_state , t)
         if t is not None:
             # yes, shutdown eligible
-            return True
+            return (True, "node state is %s" % (node_state,))
         else:
             # no, return a reason
-            return "node state is %s" % (node_state,)
+            return (False, "node state is %s" % (node_state,))
 
     def consider_shutdown(self):
         try:
             eligible = self.shutdown_eligible()
             next_opening = self._shutdowns.next_opening()
-            if eligible is True:
-                self._debug("Suggesting shutdown")
+            if eligible[0]:
+                self._debug("Suggesting shutdown because %s", eligible[1])
                 _notify_subscribers(self.actor_ref.proxy(), self.subscribers)
-            elif self.last_shutdown_opening != next_opening:
-                self._debug("Shutdown window closed.  Next at %s.",
-                            time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(next_opening)))
-                self._timer.schedule(next_opening, self._later.consider_shutdown)
-                self.last_shutdown_opening = next_opening
             else:
-              self._debug("Won't shut down")
+                self._debug("Not eligible for shut down because %s", eligible[1])
+
+                if self.last_shutdown_opening != next_opening:
+                    self._debug("Shutdown window closed.  Next at %s.",
+                                time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(next_opening)))
+                    self._timer.schedule(next_opening, self._later.consider_shutdown)
+                    self.last_shutdown_opening = next_opening
         except Exception:
             self._logger.exception("Unexpected exception")
 
diff --git a/services/nodemanager/tests/test_computenode_dispatch.py b/services/nodemanager/tests/test_computenode_dispatch.py
index 8bb0c50..8def853 100644
--- a/services/nodemanager/tests/test_computenode_dispatch.py
+++ b/services/nodemanager/tests/test_computenode_dispatch.py
@@ -351,12 +351,12 @@ class ComputeNodeMonitorActorTestCase(testutil.ActorTestMixin,
         self.make_actor()
         self.shutdowns._set_state(True, 600)
         self.assertEquals(self.node_actor.shutdown_eligible().get(self.TIMEOUT),
-                          "node state is ('unpaired', 'open', 'boot wait', 'idle exceeded')")
+                          (False, "node state is ('unpaired', 'open', 'boot wait', 'idle exceeded')"))
 
     def test_shutdown_without_arvados_node(self):
         self.make_actor(start_time=0)
         self.shutdowns._set_state(True, 600)
-        self.assertIs(True, self.node_actor.shutdown_eligible().get(self.TIMEOUT))
+        self.assertEquals(self.node_actor.shutdown_eligible().get(self.TIMEOUT), (True, "node state is ('unpaired', 'open', 'boot exceeded', 'idle exceeded')"))
 
     def test_shutdown_missing(self):
         arv_node = testutil.arvados_node_mock(10, job_uuid=None,
@@ -364,7 +364,7 @@ class ComputeNodeMonitorActorTestCase(testutil.ActorTestMixin,
                                               last_ping_at='1970-01-01T01:02:03.04050607Z')
         self.make_actor(10, arv_node)
         self.shutdowns._set_state(True, 600)
-        self.assertIs(self.node_actor.shutdown_eligible().get(self.TIMEOUT), True)
+        self.assertEquals(self.node_actor.shutdown_eligible().get(self.TIMEOUT), (True, "node state is ('down', 'open', 'boot wait', 'idle exceeded')"))
 
     def test_shutdown_running_broken(self):
         arv_node = testutil.arvados_node_mock(12, job_uuid=None,
@@ -372,7 +372,7 @@ class ComputeNodeMonitorActorTestCase(testutil.ActorTestMixin,
         self.make_actor(12, arv_node)
         self.shutdowns._set_state(True, 600)
         self.cloud_client.broken.return_value = True
-        self.assertIs(self.node_actor.shutdown_eligible().get(self.TIMEOUT), True)
+        self.assertEquals(self.node_actor.shutdown_eligible().get(self.TIMEOUT), (True, "node state is ('down', 'open', 'boot wait', 'idle exceeded')"))
 
     def test_shutdown_missing_broken(self):
         arv_node = testutil.arvados_node_mock(11, job_uuid=None,
@@ -381,31 +381,31 @@ class ComputeNodeMonitorActorTestCase(testutil.ActorTestMixin,
         self.make_actor(11, arv_node)
         self.shutdowns._set_state(True, 600)
         self.cloud_client.broken.return_value = True
-        self.assertIs(True, self.node_actor.shutdown_eligible().get(self.TIMEOUT))
+        self.assertEquals(self.node_actor.shutdown_eligible().get(self.TIMEOUT), (True, "node state is ('down', 'open', 'boot wait', 'idle exceeded')"))
 
     def test_no_shutdown_when_window_closed(self):
         self.make_actor(3, testutil.arvados_node_mock(3, job_uuid=None))
         self.assertEquals(self.node_actor.shutdown_eligible().get(self.TIMEOUT),
-                          "node state is ('idle', 'closed', 'boot wait', 'idle exceeded')")
+                          (False, "node state is ('idle', 'closed', 'boot wait', 'idle exceeded')"))
 
     def test_no_shutdown_when_node_running_job(self):
         self.make_actor(4, testutil.arvados_node_mock(4, job_uuid=True))
         self.shutdowns._set_state(True, 600)
         self.assertEquals(self.node_actor.shutdown_eligible().get(self.TIMEOUT),
-                          "node state is ('busy', 'open', 'boot wait', 'idle exceeded')")
+                          (False, "node state is ('busy', 'open', 'boot wait', 'idle exceeded')"))
 
     def test_no_shutdown_when_node_state_unknown(self):
         self.make_actor(5, testutil.arvados_node_mock(
             5, crunch_worker_state=None))
         self.shutdowns._set_state(True, 600)
         self.assertEquals(self.node_actor.shutdown_eligible().get(self.TIMEOUT),
-                          "node is paired but crunch_worker_state is 'None'")
+                          (False, "node is paired but crunch_worker_state is 'None'"))
 
     def test_no_shutdown_when_node_state_stale(self):
         self.make_actor(6, testutil.arvados_node_mock(6, age=90000))
         self.shutdowns._set_state(True, 600)
         self.assertEquals(self.node_actor.shutdown_eligible().get(self.TIMEOUT),
-                          "node state is stale")
+                          (False, "node state is stale"))
 
     def test_arvados_node_match(self):
         self.make_actor(2)

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list