[ARVADOS] created: bf9af31f49954d949317475bdcdc0694d247f82d

Git user git at public.curoverse.com
Tue Feb 14 16:01:44 EST 2017


        at  bf9af31f49954d949317475bdcdc0694d247f82d (commit)


commit bf9af31f49954d949317475bdcdc0694d247f82d
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Tue Feb 14 16:01:21 2017 -0500

    6520: Add testcase getting wishlist from squeue.

diff --git a/services/nodemanager/tests/test_jobqueue.py b/services/nodemanager/tests/test_jobqueue.py
index d4dc42f..b1bf3f6 100644
--- a/services/nodemanager/tests/test_jobqueue.py
+++ b/services/nodemanager/tests/test_jobqueue.py
@@ -3,6 +3,7 @@
 from __future__ import absolute_import, print_function
 
 import unittest
+import mock
 
 import arvnodeman.jobqueue as jobqueue
 from . import testutil
@@ -121,13 +122,29 @@ class JobQueueMonitorActorTestCase(testutil.RemotePollLoopActorTestMixin,
         super(JobQueueMonitorActorTestCase, self).build_monitor(*args, **kwargs)
         self.client.jobs().queue().execute.side_effect = side_effect
 
-    def test_subscribers_get_server_lists(self):
+    @mock.patch("subprocess.check_output")
+    def test_subscribers_get_server_lists(self, mock_squeue):
+        mock_squeue.return_value = ""
+
         self.build_monitor([{'items': [1, 2]}], self.MockCalculator())
         self.monitor.subscribe(self.subscriber).get(self.TIMEOUT)
         self.stop_proxy(self.monitor)
         self.subscriber.assert_called_with([testutil.MockSize(1),
                                             testutil.MockSize(2)])
 
+    @mock.patch("subprocess.check_output")
+    def test_squeue_server_list(self, mock_squeue):
+        mock_squeue.return_value = """1 0 0 Resources zzzzz-zzzzz-zzzzzzzzzzzzzzy
+2 0 0 Resources zzzzz-zzzzz-zzzzzzzzzzzzzzz
+"""
+
+        super(JobQueueMonitorActorTestCase, self).build_monitor(jobqueue.ServerCalculator(
+            [(testutil.MockSize(n), {'cores': n, 'ram': n, 'scratch': n}) for n in range(1, 3)]))
+        self.monitor.subscribe(self.subscriber).get(self.TIMEOUT)
+        self.stop_proxy(self.monitor)
+        self.subscriber.assert_called_with([testutil.MockSize(1),
+                                            testutil.MockSize(2)])
+
 
 if __name__ == '__main__':
     unittest.main()

commit 4440f049a94bd3570271f8f0b2461d7f3f3c4582
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Mon Feb 6 14:00:53 2017 -0500

    6520: Call squeue to include slurm jobs in wishlist.  Call sinfo to update
    crunch_worker_state in arvados node records.

diff --git a/services/nodemanager/arvnodeman/jobqueue.py b/services/nodemanager/arvnodeman/jobqueue.py
index 87cf738..5ea2c5c 100644
--- a/services/nodemanager/arvnodeman/jobqueue.py
+++ b/services/nodemanager/arvnodeman/jobqueue.py
@@ -3,6 +3,7 @@
 from __future__ import absolute_import, print_function
 
 import logging
+import subprocess
 
 from . import clientactor
 from .config import ARVADOS_ERRORS
@@ -109,7 +110,24 @@ class JobQueueMonitorActor(clientactor.RemotePollLoopActor):
         self._calculator = server_calc
 
     def _send_request(self):
-        return self._client.jobs().queue().execute()['items']
+        # cpus, memory, tempory disk space, reason, job name
+        squeue_out = subprocess.check_output(["squeue", "--state=PENDING", "--noheader", "--format=%c %m %d %r %j"])
+        queuelist = []
+        for out in squeue_out.splitlines():
+            cpu, ram, disk, reason, jobname = out.split(" ", 4)
+            if reason == "Resources":
+                queuelist.append({
+                    "uuid": jobname,
+                    "runtime_constraints": {
+                        "min_cores_per_node": cpu,
+                        "min_ram_mb_per_node": ram,
+                        "min_scratch_mb_per_node": disk
+                    }
+                })
+
+        queuelist.extend(self._client.jobs().queue().execute()['items'])
+
+        return queuelist
 
     def _got_response(self, queue):
         server_list = self._calculator.servers_for_queue(queue)
diff --git a/services/nodemanager/arvnodeman/nodelist.py b/services/nodemanager/arvnodeman/nodelist.py
index f1a661e..6d436be 100644
--- a/services/nodemanager/arvnodeman/nodelist.py
+++ b/services/nodemanager/arvnodeman/nodelist.py
@@ -2,9 +2,13 @@
 
 from __future__ import absolute_import, print_function
 
+import subprocess
+
 from . import clientactor
 from . import config
 
+import arvados.util
+
 class ArvadosNodeListMonitorActor(clientactor.RemotePollLoopActor):
     """Actor to poll the Arvados node list.
 
@@ -19,8 +23,27 @@ class ArvadosNodeListMonitorActor(clientactor.RemotePollLoopActor):
         return node['uuid']
 
     def _send_request(self):
-        return self._client.nodes().list(limit=10000).execute()['items']
-
+        nodelist = arvados.util.list_all(self._client.nodes)
+
+        # node hostname, state
+        sinfo_out = subprocess.check_output(["sinfo", "--noheader", "--format=%n %t"])
+        nodestates = {}
+        for out in sinfo_out.splitlines():
+            nodename, state = out.split(" ", 2)
+            if state in ('alloc', 'comp'):
+                nodestates[nodename] = 'busy'
+            elif state == 'idle':
+                nodestates[nodename] = 'idle'
+            else:
+                nodestates[nodename] = 'down'
+
+        for n in nodelist:
+            if n["slot_number"] and n["hostname"] and n["hostname"] in nodestates:
+                n["crunch_worker_state"] = nodestates[n["hostname"]]
+            else:
+                n["crunch_worker_state"] = 'down'
+
+        return nodelist
 
 class CloudNodeListMonitorActor(clientactor.RemotePollLoopActor):
     """Actor to poll the cloud node list.

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list