[ARVADOS] created: bf9af31f49954d949317475bdcdc0694d247f82d
Git user
git at public.curoverse.com
Tue Feb 14 16:01:44 EST 2017
at bf9af31f49954d949317475bdcdc0694d247f82d (commit)
commit bf9af31f49954d949317475bdcdc0694d247f82d
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date: Tue Feb 14 16:01:21 2017 -0500
6520: Add testcase getting wishlist from squeue.
diff --git a/services/nodemanager/tests/test_jobqueue.py b/services/nodemanager/tests/test_jobqueue.py
index d4dc42f..b1bf3f6 100644
--- a/services/nodemanager/tests/test_jobqueue.py
+++ b/services/nodemanager/tests/test_jobqueue.py
@@ -3,6 +3,7 @@
from __future__ import absolute_import, print_function
import unittest
+import mock
import arvnodeman.jobqueue as jobqueue
from . import testutil
@@ -121,13 +122,29 @@ class JobQueueMonitorActorTestCase(testutil.RemotePollLoopActorTestMixin,
super(JobQueueMonitorActorTestCase, self).build_monitor(*args, **kwargs)
self.client.jobs().queue().execute.side_effect = side_effect
- def test_subscribers_get_server_lists(self):
+ @mock.patch("subprocess.check_output")
+ def test_subscribers_get_server_lists(self, mock_squeue):
+ mock_squeue.return_value = ""
+
self.build_monitor([{'items': [1, 2]}], self.MockCalculator())
self.monitor.subscribe(self.subscriber).get(self.TIMEOUT)
self.stop_proxy(self.monitor)
self.subscriber.assert_called_with([testutil.MockSize(1),
testutil.MockSize(2)])
+ @mock.patch("subprocess.check_output")
+ def test_squeue_server_list(self, mock_squeue):
+ mock_squeue.return_value = """1 0 0 Resources zzzzz-zzzzz-zzzzzzzzzzzzzzy
+2 0 0 Resources zzzzz-zzzzz-zzzzzzzzzzzzzzz
+"""
+
+ super(JobQueueMonitorActorTestCase, self).build_monitor(jobqueue.ServerCalculator(
+ [(testutil.MockSize(n), {'cores': n, 'ram': n, 'scratch': n}) for n in range(1, 3)]))
+ self.monitor.subscribe(self.subscriber).get(self.TIMEOUT)
+ self.stop_proxy(self.monitor)
+ self.subscriber.assert_called_with([testutil.MockSize(1),
+ testutil.MockSize(2)])
+
if __name__ == '__main__':
unittest.main()
commit 4440f049a94bd3570271f8f0b2461d7f3f3c4582
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date: Mon Feb 6 14:00:53 2017 -0500
6520: Call squeue to include slurm jobs in wishlist. Call sinfo to update
crunch_worker_state in arvados node records.
diff --git a/services/nodemanager/arvnodeman/jobqueue.py b/services/nodemanager/arvnodeman/jobqueue.py
index 87cf738..5ea2c5c 100644
--- a/services/nodemanager/arvnodeman/jobqueue.py
+++ b/services/nodemanager/arvnodeman/jobqueue.py
@@ -3,6 +3,7 @@
from __future__ import absolute_import, print_function
import logging
+import subprocess
from . import clientactor
from .config import ARVADOS_ERRORS
@@ -109,7 +110,24 @@ class JobQueueMonitorActor(clientactor.RemotePollLoopActor):
self._calculator = server_calc
def _send_request(self):
- return self._client.jobs().queue().execute()['items']
+ # cpus, memory, tempory disk space, reason, job name
+ squeue_out = subprocess.check_output(["squeue", "--state=PENDING", "--noheader", "--format=%c %m %d %r %j"])
+ queuelist = []
+ for out in squeue_out.splitlines():
+ cpu, ram, disk, reason, jobname = out.split(" ", 4)
+ if reason == "Resources":
+ queuelist.append({
+ "uuid": jobname,
+ "runtime_constraints": {
+ "min_cores_per_node": cpu,
+ "min_ram_mb_per_node": ram,
+ "min_scratch_mb_per_node": disk
+ }
+ })
+
+ queuelist.extend(self._client.jobs().queue().execute()['items'])
+
+ return queuelist
def _got_response(self, queue):
server_list = self._calculator.servers_for_queue(queue)
diff --git a/services/nodemanager/arvnodeman/nodelist.py b/services/nodemanager/arvnodeman/nodelist.py
index f1a661e..6d436be 100644
--- a/services/nodemanager/arvnodeman/nodelist.py
+++ b/services/nodemanager/arvnodeman/nodelist.py
@@ -2,9 +2,13 @@
from __future__ import absolute_import, print_function
+import subprocess
+
from . import clientactor
from . import config
+import arvados.util
+
class ArvadosNodeListMonitorActor(clientactor.RemotePollLoopActor):
"""Actor to poll the Arvados node list.
@@ -19,8 +23,27 @@ class ArvadosNodeListMonitorActor(clientactor.RemotePollLoopActor):
return node['uuid']
def _send_request(self):
- return self._client.nodes().list(limit=10000).execute()['items']
-
+ nodelist = arvados.util.list_all(self._client.nodes)
+
+ # node hostname, state
+ sinfo_out = subprocess.check_output(["sinfo", "--noheader", "--format=%n %t"])
+ nodestates = {}
+ for out in sinfo_out.splitlines():
+ nodename, state = out.split(" ", 2)
+ if state in ('alloc', 'comp'):
+ nodestates[nodename] = 'busy'
+ elif state == 'idle':
+ nodestates[nodename] = 'idle'
+ else:
+ nodestates[nodename] = 'down'
+
+ for n in nodelist:
+ if n["slot_number"] and n["hostname"] and n["hostname"] in nodestates:
+ n["crunch_worker_state"] = nodestates[n["hostname"]]
+ else:
+ n["crunch_worker_state"] = 'down'
+
+ return nodelist
class CloudNodeListMonitorActor(clientactor.RemotePollLoopActor):
"""Actor to poll the cloud node list.
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list