[ARVADOS] created: b0e5efdd3fddb4779c0c1f51ac4ebca868f33952
Git user
git at public.curoverse.com
Fri Jun 9 16:27:23 EDT 2017
at b0e5efdd3fddb4779c0c1f51ac4ebca868f33952 (commit)
commit b0e5efdd3fddb4779c0c1f51ac4ebca868f33952
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date: Fri Jun 9 16:21:54 2017 -0400
11461: When destroy_on_shutdown is true, only shut down nodes known to have been booted by the current process.
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curoverse.com>
diff --git a/services/nodemanager/arvnodeman/daemon.py b/services/nodemanager/arvnodeman/daemon.py
index f3b9765..7ef628d 100644
--- a/services/nodemanager/arvnodeman/daemon.py
+++ b/services/nodemanager/arvnodeman/daemon.py
@@ -141,6 +141,7 @@ class NodeManagerDaemonActor(actor_class):
self.booting = {} # Actor IDs to ComputeNodeSetupActors
self.sizes_booting = {} # Actor IDs to node size
self.destroy_on_shutdown = destroy_on_shutdown
+ self.booted_by_this_process = []
def on_start(self):
self._logger = logging.getLogger("%s.%s" % (self.__class__.__name__, self.actor_urn[33:]))
@@ -447,6 +448,9 @@ class NodeManagerDaemonActor(actor_class):
self.node_quota = len(self.cloud_nodes)+1
self._logger.warning("After successful boot setting node quota to %s", self.node_quota)
+ if self.destroy_on_shutdown:
+ self.booted_by_this_process.append(cloud_node.id)
+
self.node_quota = min(self.node_quota, self.max_nodes)
del self.booting[setup_proxy.actor_ref.actor_urn]
del self.sizes_booting[setup_proxy.actor_ref.actor_urn]
@@ -550,11 +554,13 @@ class NodeManagerDaemonActor(actor_class):
def await_shutdown(self):
nodes_up = 0
if self.destroy_on_shutdown:
- for node in self.cloud_nodes.nodes.itervalues():
- # Begin shutdown of all nodes.
- if node.actor and not node.shutdown_actor:
- self._begin_node_shutdown(node.actor, cancellable=False)
- nodes_up = sum(1 for node in self.cloud_nodes.nodes.itervalues() if node.actor)
+ for nodeid in self.booted_by_this_process:
+ # Begin shutdown of nodes booted by the current process.
+ node = self.cloud_nodes.nodes[nodeid]
+ if node.actor:
+ nodes_up += 1
+ if not node.shutdown_actor:
+ self._begin_node_shutdown(node.actor, cancellable=False)
if self.booting or nodes_up:
self._timer.schedule(time.time() + 1, self._later.await_shutdown)
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list