[ARVADOS] updated: fb0f0a8eade18142e28e2691d656f262b0cdc01a
git at public.curoverse.com
git at public.curoverse.com
Mon Oct 12 12:32:53 EDT 2015
Summary of changes:
.../nodemanager/arvnodeman/computenode/dispatch/__init__.py | 13 +++++++++----
1 file changed, 9 insertions(+), 4 deletions(-)
via fb0f0a8eade18142e28e2691d656f262b0cdc01a (commit)
from 97ece5619eb80acce46ec5c7b521c08ecaaf0e86 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
commit fb0f0a8eade18142e28e2691d656f262b0cdc01a
Author: Tom Clegg <tom at curoverse.com>
Date: Mon Oct 12 12:40:38 2015 -0400
Warn about unhandled case if broken node has no ping time. refs #7286
diff --git a/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py b/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py
index 9ea6c32..1c828c1 100644
--- a/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py
+++ b/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py
@@ -325,16 +325,21 @@ class ComputeNodeMonitorActor(config.actor_class):
def shutdown_eligible(self):
if not self._shutdowns.window_open():
return False
- elif self.arvados_node is None:
+ if self.arvados_node is None:
# Node is unpaired.
# If it hasn't pinged Arvados after boot_fail seconds, shut it down
return not timestamp_fresh(self.cloud_node_start_time, self.boot_fail_after)
- elif arvados_node_missing(self.arvados_node, self.node_stale_after) and self._cloud.broken(self.cloud_node):
+ missing = arvados_node_missing(self.arvados_node, self.node_stale_after)
+ if missing and self._cloud.broken(self.cloud_node):
# Node is paired, but Arvados says it is missing and the cloud says the node
# is in an error state, so shut it down.
return True
- else:
- return self.in_state('idle')
+ if missing is None and self._cloud.broken(self.cloud_node):
+ self._logger.warning(
+ "cloud reports broken node, but paired node %s never pinged "
+ "(bug?) -- skipped check for node_stale_after",
+ self.arvados_node['uuid'])
+ return self.in_state('idle')
def consider_shutdown(self):
next_opening = self._shutdowns.next_opening()
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list