[ARVADOS] created: d146a62dd22c5b301f94282fc1282d81ce777668

Git user git at public.curoverse.com
Mon Feb 6 14:00:59 EST 2017


        at  d146a62dd22c5b301f94282fc1282d81ce777668 (commit)


commit d146a62dd22c5b301f94282fc1282d81ce777668
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Mon Feb 6 14:00:53 2017 -0500

    6520: Call squeue to include slurm jobs in wishlist.  Call sinfo to update
    crunch_worker_state in arvados node records.

diff --git a/services/nodemanager/arvnodeman/jobqueue.py b/services/nodemanager/arvnodeman/jobqueue.py
index 87cf738..5ea2c5c 100644
--- a/services/nodemanager/arvnodeman/jobqueue.py
+++ b/services/nodemanager/arvnodeman/jobqueue.py
@@ -3,6 +3,7 @@
 from __future__ import absolute_import, print_function
 
 import logging
+import subprocess
 
 from . import clientactor
 from .config import ARVADOS_ERRORS
@@ -109,7 +110,24 @@ class JobQueueMonitorActor(clientactor.RemotePollLoopActor):
         self._calculator = server_calc
 
     def _send_request(self):
-        return self._client.jobs().queue().execute()['items']
+        # cpus, memory, tempory disk space, reason, job name
+        squeue_out = subprocess.check_output(["squeue", "--state=PENDING", "--noheader", "--format=%c %m %d %r %j"])
+        queuelist = []
+        for out in squeue_out.splitlines():
+            cpu, ram, disk, reason, jobname = out.split(" ", 4)
+            if reason == "Resources":
+                queuelist.append({
+                    "uuid": jobname,
+                    "runtime_constraints": {
+                        "min_cores_per_node": cpu,
+                        "min_ram_mb_per_node": ram,
+                        "min_scratch_mb_per_node": disk
+                    }
+                })
+
+        queuelist.extend(self._client.jobs().queue().execute()['items'])
+
+        return queuelist
 
     def _got_response(self, queue):
         server_list = self._calculator.servers_for_queue(queue)
diff --git a/services/nodemanager/arvnodeman/nodelist.py b/services/nodemanager/arvnodeman/nodelist.py
index f1a661e..6d436be 100644
--- a/services/nodemanager/arvnodeman/nodelist.py
+++ b/services/nodemanager/arvnodeman/nodelist.py
@@ -2,9 +2,13 @@
 
 from __future__ import absolute_import, print_function
 
+import subprocess
+
 from . import clientactor
 from . import config
 
+import arvados.util
+
 class ArvadosNodeListMonitorActor(clientactor.RemotePollLoopActor):
     """Actor to poll the Arvados node list.
 
@@ -19,8 +23,27 @@ class ArvadosNodeListMonitorActor(clientactor.RemotePollLoopActor):
         return node['uuid']
 
     def _send_request(self):
-        return self._client.nodes().list(limit=10000).execute()['items']
-
+        nodelist = arvados.util.list_all(self._client.nodes)
+
+        # node hostname, state
+        sinfo_out = subprocess.check_output(["sinfo", "--noheader", "--format=%n %t"])
+        nodestates = {}
+        for out in sinfo_out.splitlines():
+            nodename, state = out.split(" ", 2)
+            if state in ('alloc', 'comp'):
+                nodestates[nodename] = 'busy'
+            elif state == 'idle':
+                nodestates[nodename] = 'idle'
+            else:
+                nodestates[nodename] = 'down'
+
+        for n in nodelist:
+            if n["slot_number"] and n["hostname"] and n["hostname"] in nodestates:
+                n["crunch_worker_state"] = nodestates[n["hostname"]]
+            else:
+                n["crunch_worker_state"] = 'down'
+
+        return nodelist
 
 class CloudNodeListMonitorActor(clientactor.RemotePollLoopActor):
     """Actor to poll the cloud node list.

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list