[ARVADOS] created: e7e61a29596d29198655b072845fecd77465044b

Git user git at public.curoverse.com
Thu Jun 8 10:28:46 EDT 2017


        at  e7e61a29596d29198655b072845fecd77465044b (commit)


commit e7e61a29596d29198655b072845fecd77465044b
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Thu Jun 8 10:23:55 2017 -0400

    11836: status.json includes node sizes and node counts, and includes individual node states
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curoverse.com>

diff --git a/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py b/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py
index 4463ec6..2158d6c 100644
--- a/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py
+++ b/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py
@@ -16,7 +16,7 @@ from .. import \
     arvados_node_fqdn, arvados_node_mtime, arvados_timestamp, timestamp_fresh, \
     arvados_node_missing, RetryMixin
 from ...clientactor import _notify_subscribers
-from ... import config
+from ... import config, status
 from .transitions import transitions
 
 QuotaExceeded = "QuotaExceeded"
@@ -346,6 +346,9 @@ class ComputeNodeMonitorActor(config.actor_class):
         self._set_logger()
         self._timer.schedule(self.cloud_node_start_time + self.boot_fail_after, self._later.consider_shutdown)
 
+    def on_stop(self):
+        status.tracker.update({"node_"+self.cloud_node.name: None})
+
     def subscribe(self, subscriber):
         self.subscribers.add(subscriber)
 
@@ -428,6 +431,20 @@ class ComputeNodeMonitorActor(config.actor_class):
         idle_grace = 'idle exceeded'
 
         node_state = (crunch_worker_state, window, boot_grace, idle_grace)
+
+        arvados_node = None
+        if self.arvados_node:
+            arvados_node = self.arvados_node.copy()
+            arvados_node["info"] = arvados_node["info"].copy()
+            if "ping_secret" in arvados_node["info"]:
+                del arvados_node["info"]["ping_secret"]
+        status.tracker.update({"node_"+self.cloud_node.name: {
+            "id": self.cloud_node.id,
+            "state": list(node_state),
+            "size": self.cloud_node.size.name,
+            "arvados": arvados_node
+        }})
+
         t = transitions[node_state]
         if t is not None:
             # yes, shutdown eligible
diff --git a/services/nodemanager/arvnodeman/daemon.py b/services/nodemanager/arvnodeman/daemon.py
index 7e63c78..5b4706b 100644
--- a/services/nodemanager/arvnodeman/daemon.py
+++ b/services/nodemanager/arvnodeman/daemon.py
@@ -5,6 +5,7 @@ from __future__ import absolute_import, print_function
 import functools
 import logging
 import time
+import socket
 
 import pykka
 
@@ -256,15 +257,18 @@ class NodeManagerDaemonActor(actor_class):
         return states + pykka.get_all(proxy_states)
 
     def _update_tracker(self):
-        updates = {
-            k: 0
-            for k in status.tracker.keys()
-            if k.startswith('nodes_')
-        }
-        for s in self._node_states(size=None):
-            updates.setdefault('nodes_'+s, 0)
-            updates['nodes_'+s] += 1
+        updates = {"nodes_"+k: v for k,v in self._state_counts(None).items()}
+        updates['timestamp'] = time.strftime(cnode.ARVADOS_TIMEFMT, time.gmtime())
+        updates['nodes_wish'] = len(self.last_wishlist)
+        updates['nodes_max'] = self.max_nodes
+        updates['nodes_quota'] = self.node_quota
         updates['nodes_wish'] = len(self.last_wishlist)
+        updates['status'] = "OK"
+
+        for size in self.server_calculator.cloud_sizes:
+            updates["size_"+size.name] = {"nodes_"+k: v for k,v in self._state_counts(size).items()}
+            for attr in ['id', 'name', 'ram', 'disk', 'bandwidth', 'price']:
+                updates["size_"+size.name][attr] = getattr(size, attr)
         status.tracker.update(updates)
 
     def _state_counts(self, size):
diff --git a/services/nodemanager/arvnodeman/launcher.py b/services/nodemanager/arvnodeman/launcher.py
index 72a285b..bed9fad 100644
--- a/services/nodemanager/arvnodeman/launcher.py
+++ b/services/nodemanager/arvnodeman/launcher.py
@@ -119,6 +119,12 @@ def main(args=None):
         signal.signal(sigcode, shutdown_signal)
 
     status.Server(config).start()
+    import socket
+    updates = {}
+    updates['hostname'] = socket.getfqdn()
+    updates['servicetype'] = "arvados_nodemanager"
+    updates['version'] = __version__
+    status.tracker.update(updates)
 
     try:
         root_logger = setup_logging(config.get('Logging', 'file'), **config.log_levels())
diff --git a/services/nodemanager/tests/fake.cfg.template b/services/nodemanager/tests/fake.cfg.template
index eacd53f..6a70ad3 100644
--- a/services/nodemanager/tests/fake.cfg.template
+++ b/services/nodemanager/tests/fake.cfg.template
@@ -6,10 +6,10 @@
 # a snapshot of internal state.
 
 # Management server listening address (default 127.0.0.1)
-#address = 0.0.0.0
+address = 0.0.0.0
 
 # Management server port number (default -1, server is disabled)
-#port = 8989
+port = 8989
 
 [Daemon]
 # The dispatcher can customize the start and stop procedure for

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list