[ARVADOS] created: d146a62dd22c5b301f94282fc1282d81ce777668
Git user
git at public.curoverse.com
Mon Feb 6 14:00:59 EST 2017
at d146a62dd22c5b301f94282fc1282d81ce777668 (commit)
commit d146a62dd22c5b301f94282fc1282d81ce777668
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date: Mon Feb 6 14:00:53 2017 -0500
6520: Call squeue to include slurm jobs in wishlist. Call sinfo to update
crunch_worker_state in arvados node records.
diff --git a/services/nodemanager/arvnodeman/jobqueue.py b/services/nodemanager/arvnodeman/jobqueue.py
index 87cf738..5ea2c5c 100644
--- a/services/nodemanager/arvnodeman/jobqueue.py
+++ b/services/nodemanager/arvnodeman/jobqueue.py
@@ -3,6 +3,7 @@
from __future__ import absolute_import, print_function
import logging
+import subprocess
from . import clientactor
from .config import ARVADOS_ERRORS
@@ -109,7 +110,24 @@ class JobQueueMonitorActor(clientactor.RemotePollLoopActor):
self._calculator = server_calc
def _send_request(self):
- return self._client.jobs().queue().execute()['items']
+ # cpus, memory, tempory disk space, reason, job name
+ squeue_out = subprocess.check_output(["squeue", "--state=PENDING", "--noheader", "--format=%c %m %d %r %j"])
+ queuelist = []
+ for out in squeue_out.splitlines():
+ cpu, ram, disk, reason, jobname = out.split(" ", 4)
+ if reason == "Resources":
+ queuelist.append({
+ "uuid": jobname,
+ "runtime_constraints": {
+ "min_cores_per_node": cpu,
+ "min_ram_mb_per_node": ram,
+ "min_scratch_mb_per_node": disk
+ }
+ })
+
+ queuelist.extend(self._client.jobs().queue().execute()['items'])
+
+ return queuelist
def _got_response(self, queue):
server_list = self._calculator.servers_for_queue(queue)
diff --git a/services/nodemanager/arvnodeman/nodelist.py b/services/nodemanager/arvnodeman/nodelist.py
index f1a661e..6d436be 100644
--- a/services/nodemanager/arvnodeman/nodelist.py
+++ b/services/nodemanager/arvnodeman/nodelist.py
@@ -2,9 +2,13 @@
from __future__ import absolute_import, print_function
+import subprocess
+
from . import clientactor
from . import config
+import arvados.util
+
class ArvadosNodeListMonitorActor(clientactor.RemotePollLoopActor):
"""Actor to poll the Arvados node list.
@@ -19,8 +23,27 @@ class ArvadosNodeListMonitorActor(clientactor.RemotePollLoopActor):
return node['uuid']
def _send_request(self):
- return self._client.nodes().list(limit=10000).execute()['items']
-
+ nodelist = arvados.util.list_all(self._client.nodes)
+
+ # node hostname, state
+ sinfo_out = subprocess.check_output(["sinfo", "--noheader", "--format=%n %t"])
+ nodestates = {}
+ for out in sinfo_out.splitlines():
+ nodename, state = out.split(" ", 2)
+ if state in ('alloc', 'comp'):
+ nodestates[nodename] = 'busy'
+ elif state == 'idle':
+ nodestates[nodename] = 'idle'
+ else:
+ nodestates[nodename] = 'down'
+
+ for n in nodelist:
+ if n["slot_number"] and n["hostname"] and n["hostname"] in nodestates:
+ n["crunch_worker_state"] = nodestates[n["hostname"]]
+ else:
+ n["crunch_worker_state"] = 'down'
+
+ return nodelist
class CloudNodeListMonitorActor(clientactor.RemotePollLoopActor):
"""Actor to poll the cloud node list.
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list