[ARVADOS] updated: f77d08dd57a1021525717c8669296eb3e463c5f7

Git user git at public.curoverse.com
Wed Jul 5 10:30:10 EDT 2017


Summary of changes:
 services/nodemanager/arvnodeman/jobqueue.py | 31 +++++++---
 services/nodemanager/tests/test_jobqueue.py | 96 ++++++++++++++++++-----------
 2 files changed, 80 insertions(+), 47 deletions(-)

       via  f77d08dd57a1021525717c8669296eb3e463c5f7 (commit)
      from  3dad67f271492790f63e72ffcbba432cf8e00fa5 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit f77d08dd57a1021525717c8669296eb3e463c5f7
Author: Lucas Di Pentima <lucas at curoverse.com>
Date:   Wed Jul 5 11:28:37 2017 -0300

    7475: Catch exceptions when trying to cancel an unsatisfiable job,
    logging an error message in case of problems.
    Added tests.
    
    Arvados-DCO-1.1-Signed-off-by: Lucas Di Pentima <lucas at curoverse.com>

diff --git a/services/nodemanager/arvnodeman/jobqueue.py b/services/nodemanager/arvnodeman/jobqueue.py
index dbcbd92..895e03d 100644
--- a/services/nodemanager/arvnodeman/jobqueue.py
+++ b/services/nodemanager/arvnodeman/jobqueue.py
@@ -89,13 +89,19 @@ class ServerCalculator(object):
             want_count = max(1, self.coerce_int(constraints.get('min_nodes'), 1))
             cloud_size = self.cloud_size_for_constraints(constraints)
             if cloud_size is None:
-                unsatisfiable_jobs[job['uuid']] = 'Requirements for a single node exceed the available cloud node size'
+                unsatisfiable_jobs[job['uuid']] = (
+                    'Requirements for a single node exceed the available '
+                    'cloud node size')
             elif (want_count > self.max_nodes):
-                unsatisfiable_jobs[job['uuid']] = "Job's min_nodes constraint is greater than the configured max_nodes (%d)" % self.max_nodes
+                unsatisfiable_jobs[job['uuid']] = (
+                    "Job's min_nodes constraint is greater than the configured "
+                    "max_nodes (%d)" % self.max_nodes)
             elif (want_count*cloud_size.price <= self.max_price):
                 servers.extend([cloud_size.real] * want_count)
             else:
-                unsatisfiable_jobs[job['uuid']] = "Job's price (%d) is above system's max_price limit (%d)" % (want_count*cloud_size.price, self.max_price)
+                unsatisfiable_jobs[job['uuid']] = (
+                    "Job's price (%d) is above system's max_price "
+                    "limit (%d)" % (want_count*cloud_size.price, self.max_price))
         return (servers, unsatisfiable_jobs)
 
     def cheapest_size(self):
@@ -170,13 +176,18 @@ class JobQueueMonitorActor(clientactor.RemotePollLoopActor):
         # Cancel any job with unsatisfiable requirements, emitting a log
         # explaining why.
         for job_uuid, reason in unsatisfiable_jobs.iteritems():
-            self._client.logs().create(body={
-                'object_uuid': job_uuid,
-                'event_type': 'stderr',
-                'properties': {'text': reason},
-            }).execute()
-            self._client.jobs().cancel(uuid=job['uuid']).execute()
-            self._logger.debug("Unsatisfiable job '%s' cancelled", job_uuid)
+            self._logger.debug("Cancelling unsatisfiable job '%s'", job_uuid)
+            try:
+                self._client.logs().create(body={
+                    'object_uuid': job_uuid,
+                    'event_type': 'stderr',
+                    'properties': {'text': reason},
+                }).execute()
+                self._client.jobs().cancel(uuid=job_uuid).execute()
+            except Exception as error:
+                self._logger.error("Trying to cancel job '%s': %s",
+                                   job_uuid,
+                                   error)
         self._logger.debug("Calculated wishlist: %s",
                            ', '.join(s.name for s in server_list) or "(empty)")
         return super(JobQueueMonitorActor, self)._got_response(server_list)
diff --git a/services/nodemanager/tests/test_jobqueue.py b/services/nodemanager/tests/test_jobqueue.py
index 6b4f4b6..ab2258d 100644
--- a/services/nodemanager/tests/test_jobqueue.py
+++ b/services/nodemanager/tests/test_jobqueue.py
@@ -17,11 +17,10 @@ class ServerCalculatorTestCase(unittest.TestCase):
             [(testutil.MockSize(n), {'cores': n}) for n in factors], **kwargs)
 
     def calculate(self, servcalc, *constraints):
-        servlist, _ = servcalc.servers_for_queue(
+        return servcalc.servers_for_queue(
             [{'uuid': 'zzzzz-jjjjj-{:015x}'.format(index),
               'runtime_constraints': cdict}
              for index, cdict in enumerate(constraints)])
-        return servlist
 
     def test_empty_queue_needs_no_servers(self):
         servcalc = self.make_calculator([1])
@@ -29,59 +28,65 @@ class ServerCalculatorTestCase(unittest.TestCase):
 
     def test_easy_server_count(self):
         servcalc = self.make_calculator([1])
-        servlist = self.calculate(servcalc, {'min_nodes': 3})
+        servlist, _ = self.calculate(servcalc, {'min_nodes': 3})
         self.assertEqual(3, len(servlist))
 
     def test_default_5pct_ram_value_decrease(self):
         servcalc = self.make_calculator([1])
-        servlist = self.calculate(servcalc, {'min_ram_mb_per_node': 128})
+        servlist, _ = self.calculate(servcalc, {'min_ram_mb_per_node': 128})
         self.assertEqual(0, len(servlist))
-        servlist = self.calculate(servcalc, {'min_ram_mb_per_node': 121})
+        servlist, _ = self.calculate(servcalc, {'min_ram_mb_per_node': 121})
         self.assertEqual(1, len(servlist))
 
     def test_custom_node_mem_scaling_factor(self):
         # Simulate a custom 'node_mem_scaling' config parameter by passing
         # the value to ServerCalculator
         servcalc = self.make_calculator([1], node_mem_scaling=0.5)
-        servlist = self.calculate(servcalc, {'min_ram_mb_per_node': 128})
+        servlist, _ = self.calculate(servcalc, {'min_ram_mb_per_node': 128})
         self.assertEqual(0, len(servlist))
-        servlist = self.calculate(servcalc, {'min_ram_mb_per_node': 64})
+        servlist, _ = self.calculate(servcalc, {'min_ram_mb_per_node': 64})
         self.assertEqual(1, len(servlist))
 
     def test_implicit_server_count(self):
         servcalc = self.make_calculator([1])
-        servlist = self.calculate(servcalc, {}, {'min_nodes': 3})
+        servlist, _ = self.calculate(servcalc, {}, {'min_nodes': 3})
         self.assertEqual(4, len(servlist))
 
     def test_bad_min_nodes_override(self):
         servcalc = self.make_calculator([1])
-        servlist = self.calculate(servcalc,
-                                  {'min_nodes': -2}, {'min_nodes': 'foo'})
+        servlist, _ = self.calculate(servcalc,
+                                     {'min_nodes': -2}, {'min_nodes': 'foo'})
         self.assertEqual(2, len(servlist))
 
-    def test_ignore_unsatisfiable_jobs(self):
+    def test_ignore_and_return_unsatisfiable_jobs(self):
         servcalc = self.make_calculator([1], max_nodes=9)
-        servlist = self.calculate(servcalc,
-                                  {'min_cores_per_node': 2},
-                                  {'min_ram_mb_per_node': 256},
-                                  {'min_nodes': 6},
-                                  {'min_nodes': 12},
-                                  {'min_scratch_mb_per_node': 300000})
+        servlist, u_jobs = self.calculate(servcalc,
+                                          {'min_cores_per_node': 2},
+                                          {'min_ram_mb_per_node': 256},
+                                          {'min_nodes': 6},
+                                          {'min_nodes': 12},
+                                          {'min_scratch_mb_per_node': 300000})
         self.assertEqual(6, len(servlist))
+        # Only unsatisfiable jobs are returned on u_jobs
+        self.assertIn('zzzzz-jjjjj-000000000000000', u_jobs.keys())
+        self.assertIn('zzzzz-jjjjj-000000000000001', u_jobs.keys())
+        self.assertNotIn('zzzzz-jjjjj-000000000000002', u_jobs.keys())
+        self.assertIn('zzzzz-jjjjj-000000000000003', u_jobs.keys())
+        self.assertIn('zzzzz-jjjjj-000000000000004', u_jobs.keys())
 
     def test_ignore_too_expensive_jobs(self):
         servcalc = self.make_calculator([1, 2], max_nodes=12, max_price=6)
-        servlist = self.calculate(servcalc,
-                                  {'min_cores_per_node': 1, 'min_nodes': 6})
+        servlist, _ = self.calculate(servcalc,
+                                     {'min_cores_per_node': 1, 'min_nodes': 6})
         self.assertEqual(6, len(servlist))
 
-        servlist = self.calculate(servcalc,
-                                  {'min_cores_per_node': 2, 'min_nodes': 6})
+        servlist, _ = self.calculate(servcalc,
+                                     {'min_cores_per_node': 2, 'min_nodes': 6})
         self.assertEqual(0, len(servlist))
 
     def test_job_requesting_max_nodes_accepted(self):
         servcalc = self.make_calculator([1], max_nodes=4)
-        servlist = self.calculate(servcalc, {'min_nodes': 4})
+        servlist, _ = self.calculate(servcalc, {'min_nodes': 4})
         self.assertEqual(4, len(servlist))
 
     def test_cheapest_size(self):
@@ -90,37 +95,37 @@ class ServerCalculatorTestCase(unittest.TestCase):
 
     def test_next_biggest(self):
         servcalc = self.make_calculator([1, 2, 4, 8])
-        servlist = self.calculate(servcalc,
-                                  {'min_cores_per_node': 3},
-                                  {'min_cores_per_node': 6})
+        servlist, _ = self.calculate(servcalc,
+                                     {'min_cores_per_node': 3},
+                                     {'min_cores_per_node': 6})
         self.assertEqual([servcalc.cloud_sizes[2].id,
                           servcalc.cloud_sizes[3].id],
                          [s.id for s in servlist])
 
     def test_multiple_sizes(self):
         servcalc = self.make_calculator([1, 2])
-        servlist = self.calculate(servcalc,
-                                  {'min_cores_per_node': 2},
-                                  {'min_cores_per_node': 1},
-                                  {'min_cores_per_node': 1})
+        servlist, _ = self.calculate(servcalc,
+                                     {'min_cores_per_node': 2},
+                                     {'min_cores_per_node': 1},
+                                     {'min_cores_per_node': 1})
         self.assertEqual([servcalc.cloud_sizes[1].id,
                           servcalc.cloud_sizes[0].id,
                           servcalc.cloud_sizes[0].id],
                          [s.id for s in servlist])
 
-        servlist = self.calculate(servcalc,
-                                  {'min_cores_per_node': 1},
-                                  {'min_cores_per_node': 2},
-                                  {'min_cores_per_node': 1})
+        servlist, _ = self.calculate(servcalc,
+                                     {'min_cores_per_node': 1},
+                                     {'min_cores_per_node': 2},
+                                     {'min_cores_per_node': 1})
         self.assertEqual([servcalc.cloud_sizes[0].id,
                           servcalc.cloud_sizes[1].id,
                           servcalc.cloud_sizes[0].id],
                          [s.id for s in servlist])
 
-        servlist = self.calculate(servcalc,
-                                  {'min_cores_per_node': 1},
-                                  {'min_cores_per_node': 1},
-                                  {'min_cores_per_node': 2})
+        servlist, _ = self.calculate(servcalc,
+                                     {'min_cores_per_node': 1},
+                                     {'min_cores_per_node': 1},
+                                     {'min_cores_per_node': 2})
         self.assertEqual([servcalc.cloud_sizes[0].id,
                           servcalc.cloud_sizes[0].id,
                           servcalc.cloud_sizes[1].id],
@@ -132,17 +137,34 @@ class JobQueueMonitorActorTestCase(testutil.RemotePollLoopActorTestMixin,
                                    unittest.TestCase):
     TEST_CLASS = jobqueue.JobQueueMonitorActor
 
+
     class MockCalculator(object):
         @staticmethod
         def servers_for_queue(queue):
             return ([testutil.MockSize(n) for n in queue], {})
 
 
+    class MockCalculatorUnsatisfiableJobs(object):
+        @staticmethod
+        def servers_for_queue(queue):
+            return ([], {k: "Unsatisfiable job mock" for k in queue})
+
+
     def build_monitor(self, side_effect, *args, **kwargs):
         super(JobQueueMonitorActorTestCase, self).build_monitor(*args, **kwargs)
         self.client.jobs().queue().execute.side_effect = side_effect
 
     @mock.patch("subprocess.check_output")
+    def test_unsatisfiable_jobs(self, mock_squeue):
+        mock_squeue.return_value = ""
+
+        self.build_monitor([{'items': ['job1']}],
+                           self.MockCalculatorUnsatisfiableJobs(), True, True)
+        self.monitor.subscribe(self.subscriber).get(self.TIMEOUT)
+        self.stop_proxy(self.monitor)
+        self.client.jobs().cancel.assert_called_with(uuid='job1')
+
+    @mock.patch("subprocess.check_output")
     def test_subscribers_get_server_lists(self, mock_squeue):
         mock_squeue.return_value = ""
 

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list