[ARVADOS] updated: 5e75d51dd186ae59eeb595b7d0f6a93aa4aef271

Git user git at public.curoverse.com
Mon May 16 16:36:47 EDT 2016


Summary of changes:
 services/nodemanager/arvnodeman/computenode/dispatch/__init__.py | 9 +++++++--
 services/nodemanager/arvnodeman/daemon.py                        | 2 --
 2 files changed, 7 insertions(+), 4 deletions(-)

       via  5e75d51dd186ae59eeb595b7d0f6a93aa4aef271 (commit)
       via  5c549965a11b6a2ce789c1e0db9e418f695aed84 (commit)
      from  a7a16338702965de3ad0687470ef5beb2f42759b (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit 5e75d51dd186ae59eeb595b7d0f6a93aa4aef271
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Mon May 16 16:36:41 2016 -0400

    9161: Remove spurious prints

diff --git a/services/nodemanager/arvnodeman/daemon.py b/services/nodemanager/arvnodeman/daemon.py
index 589b9a1..266b665 100644
--- a/services/nodemanager/arvnodeman/daemon.py
+++ b/services/nodemanager/arvnodeman/daemon.py
@@ -429,13 +429,11 @@ class NodeManagerDaemonActor(actor_class):
     @_check_poll_freshness
     def node_can_shutdown(self, node_actor):
         if self._nodes_excess(node_actor.cloud_node.get().size) > 0:
-            print("excess")
             self._begin_node_shutdown(node_actor, cancellable=True)
         elif self.cloud_nodes.nodes.get(node_actor.cloud_node.get().id).arvados_node is None:
             # Node is unpaired, which means it probably exceeded its booting
             # grace period without a ping, so shut it down so we can boot a new
             # node in its place.
-            print("unpaired")
             self._begin_node_shutdown(node_actor, cancellable=False)
         elif node_actor.in_state('down').get():
             # Node is down and unlikely to come back.

commit 5c549965a11b6a2ce789c1e0db9e418f695aed84
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Mon May 16 14:30:06 2016 -0400

    9161: Don't automatically consider nodes with job_uuid set to be 'busy'.

diff --git a/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py b/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py
index f9dbd20..96b2353 100644
--- a/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py
+++ b/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py
@@ -371,8 +371,13 @@ class ComputeNodeMonitorActor(config.actor_class):
         if arvados_node_missing(self.arvados_node, self.node_stale_after):
             state = 'down'
 
-        if state == 'idle' and self.arvados_node['job_uuid']:
-            state = 'busy'
+        # Turns out using 'job_uuid' this way is a bad idea.  The node record
+        # is assigned the job_uuid before the job is locked (which removes it
+        # from the queue) which means the job will be double-counted as both in
+        # the wishlist and but also keeping a node busy.  This end result is
+        # excess nodes being booted.
+        #if state == 'idle' and self.arvados_node['job_uuid']:
+        #    state = 'busy'
 
         return state
 

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list