[ARVADOS] created: e7e61a29596d29198655b072845fecd77465044b
Git user
git at public.curoverse.com
Thu Jun 8 10:28:46 EDT 2017
at e7e61a29596d29198655b072845fecd77465044b (commit)
commit e7e61a29596d29198655b072845fecd77465044b
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date: Thu Jun 8 10:23:55 2017 -0400
11836: status.json includes node sizes and node counts, and includes individual node states
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curoverse.com>
diff --git a/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py b/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py
index 4463ec6..2158d6c 100644
--- a/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py
+++ b/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py
@@ -16,7 +16,7 @@ from .. import \
arvados_node_fqdn, arvados_node_mtime, arvados_timestamp, timestamp_fresh, \
arvados_node_missing, RetryMixin
from ...clientactor import _notify_subscribers
-from ... import config
+from ... import config, status
from .transitions import transitions
QuotaExceeded = "QuotaExceeded"
@@ -346,6 +346,9 @@ class ComputeNodeMonitorActor(config.actor_class):
self._set_logger()
self._timer.schedule(self.cloud_node_start_time + self.boot_fail_after, self._later.consider_shutdown)
+ def on_stop(self):
+ status.tracker.update({"node_"+self.cloud_node.name: None})
+
def subscribe(self, subscriber):
self.subscribers.add(subscriber)
@@ -428,6 +431,20 @@ class ComputeNodeMonitorActor(config.actor_class):
idle_grace = 'idle exceeded'
node_state = (crunch_worker_state, window, boot_grace, idle_grace)
+
+ arvados_node = None
+ if self.arvados_node:
+ arvados_node = self.arvados_node.copy()
+ arvados_node["info"] = arvados_node["info"].copy()
+ if "ping_secret" in arvados_node["info"]:
+ del arvados_node["info"]["ping_secret"]
+ status.tracker.update({"node_"+self.cloud_node.name: {
+ "id": self.cloud_node.id,
+ "state": list(node_state),
+ "size": self.cloud_node.size.name,
+ "arvados": arvados_node
+ }})
+
t = transitions[node_state]
if t is not None:
# yes, shutdown eligible
diff --git a/services/nodemanager/arvnodeman/daemon.py b/services/nodemanager/arvnodeman/daemon.py
index 7e63c78..5b4706b 100644
--- a/services/nodemanager/arvnodeman/daemon.py
+++ b/services/nodemanager/arvnodeman/daemon.py
@@ -5,6 +5,7 @@ from __future__ import absolute_import, print_function
import functools
import logging
import time
+import socket
import pykka
@@ -256,15 +257,18 @@ class NodeManagerDaemonActor(actor_class):
return states + pykka.get_all(proxy_states)
def _update_tracker(self):
- updates = {
- k: 0
- for k in status.tracker.keys()
- if k.startswith('nodes_')
- }
- for s in self._node_states(size=None):
- updates.setdefault('nodes_'+s, 0)
- updates['nodes_'+s] += 1
+ updates = {"nodes_"+k: v for k,v in self._state_counts(None).items()}
+ updates['timestamp'] = time.strftime(cnode.ARVADOS_TIMEFMT, time.gmtime())
+ updates['nodes_wish'] = len(self.last_wishlist)
+ updates['nodes_max'] = self.max_nodes
+ updates['nodes_quota'] = self.node_quota
updates['nodes_wish'] = len(self.last_wishlist)
+ updates['status'] = "OK"
+
+ for size in self.server_calculator.cloud_sizes:
+ updates["size_"+size.name] = {"nodes_"+k: v for k,v in self._state_counts(size).items()}
+ for attr in ['id', 'name', 'ram', 'disk', 'bandwidth', 'price']:
+ updates["size_"+size.name][attr] = getattr(size, attr)
status.tracker.update(updates)
def _state_counts(self, size):
diff --git a/services/nodemanager/arvnodeman/launcher.py b/services/nodemanager/arvnodeman/launcher.py
index 72a285b..bed9fad 100644
--- a/services/nodemanager/arvnodeman/launcher.py
+++ b/services/nodemanager/arvnodeman/launcher.py
@@ -119,6 +119,12 @@ def main(args=None):
signal.signal(sigcode, shutdown_signal)
status.Server(config).start()
+ import socket
+ updates = {}
+ updates['hostname'] = socket.getfqdn()
+ updates['servicetype'] = "arvados_nodemanager"
+ updates['version'] = __version__
+ status.tracker.update(updates)
try:
root_logger = setup_logging(config.get('Logging', 'file'), **config.log_levels())
diff --git a/services/nodemanager/tests/fake.cfg.template b/services/nodemanager/tests/fake.cfg.template
index eacd53f..6a70ad3 100644
--- a/services/nodemanager/tests/fake.cfg.template
+++ b/services/nodemanager/tests/fake.cfg.template
@@ -6,10 +6,10 @@
# a snapshot of internal state.
# Management server listening address (default 127.0.0.1)
-#address = 0.0.0.0
+address = 0.0.0.0
# Management server port number (default -1, server is disabled)
-#port = 8989
+port = 8989
[Daemon]
# The dispatcher can customize the start and stop procedure for
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list