[ARVADOS] created: 1.1.4-322-gd8dfc75
Git user
git at public.curoverse.com
Wed May 30 16:44:54 EDT 2018
at d8dfc75ec5c6cead3da0f3907466ce1b89373b69 (commit)
commit d8dfc75ec5c6cead3da0f3907466ce1b89373b69
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date: Wed May 30 16:32:20 2018 -0400
12061: Don't round down to 1s in timestamp parser.
ComputeNodeMonitorActor.offer_arvados_pair() pairs nodes only if
first_ping_time >= cloud_node_start_time
However, first_ping_time is passed through arvados_timestamp() before
this comparison, which was truncating the subsecond part -- so the
comparison was effectively
floor(first_ping_time) >= cloud_node_start_time
When FPT and CNST differed only in the subsecond part, this comparison
failed, and the nodes could never be paired. This caused sporadic
failures in tests, where the two values are often separated by less
than a second.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>
diff --git a/services/nodemanager/arvnodeman/computenode/__init__.py b/services/nodemanager/arvnodeman/computenode/__init__.py
index 3c04118..b124c66 100644
--- a/services/nodemanager/arvnodeman/computenode/__init__.py
+++ b/services/nodemanager/arvnodeman/computenode/__init__.py
@@ -33,7 +33,7 @@ def arvados_timestamp(timestr):
subsecs = float(subsec_match.group(1))
timestr = timestr[:subsec_match.start()] + 'Z'
return calendar.timegm(time.strptime(timestr + 'UTC',
- ARVADOS_TIMEFMT + '%Z'))
+ ARVADOS_TIMEFMT + '%Z')) + subsecs
def timestamp_fresh(timestamp, fresh_time):
return (time.time() - timestamp) < fresh_time
diff --git a/services/nodemanager/tests/test_computenode.py b/services/nodemanager/tests/test_computenode.py
index 3f11ff6..898112b 100644
--- a/services/nodemanager/tests/test_computenode.py
+++ b/services/nodemanager/tests/test_computenode.py
@@ -37,3 +37,9 @@ class ShutdownTimerTestCase(unittest.TestCase):
time_mock.return_value += 200
self.assertEqual(961, timer.next_opening())
self.assertFalse(timer.window_open())
+
+
+class ArvadosTimestamp(unittest.TestCase):
+ def test_arvados_timestamp(self):
+ self.assertEqual(1527710178, cnode.arvados_timestamp('2018-05-30T19:56:18Z'))
+ self.assertEqual(1527710178.999371, cnode.arvados_timestamp('2018-05-30T19:56:18.999371Z'))
commit 67c988ea479ba47da2d8dc93695188f86833436d
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date: Wed May 30 16:30:45 2018 -0400
12061: Change busywait approach to preserve assertion messages.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>
diff --git a/services/nodemanager/tests/test_daemon.py b/services/nodemanager/tests/test_daemon.py
index 8050e69..d09cbf7 100644
--- a/services/nodemanager/tests/test_daemon.py
+++ b/services/nodemanager/tests/test_daemon.py
@@ -17,11 +17,24 @@ from arvnodeman.jobqueue import ServerCalculator
from arvnodeman.computenode.dispatch import ComputeNodeMonitorActor
from . import testutil
from . import test_status
+from . import pykka_timeout
import logging
class NodeManagerDaemonActorTestCase(testutil.ActorTestMixin,
unittest.TestCase):
+ def assertwait(self, f, timeout=pykka_timeout*2):
+ deadline = time.time() + timeout
+ while True:
+ try:
+ return f()
+ except AssertionError:
+ if time.time() > deadline:
+ raise
+ pass
+ time.sleep(.1)
+ self.daemon.ping().get(self.TIMEOUT)
+
def busywait(self, f):
for n in xrange(200):
ok = f()
@@ -146,8 +159,7 @@ class NodeManagerDaemonActorTestCase(testutil.ActorTestMixin,
self.assertIn('node_quota', status.tracker._latest)
def check_monitors_arvados_nodes(self, *arv_nodes):
- self.busywait(lambda: len(arv_nodes) == len(self.monitored_arvados_nodes()))
- self.assertItemsEqual(arv_nodes, self.monitored_arvados_nodes())
+ self.assertwait(lambda: self.assertItemsEqual(arv_nodes, self.monitored_arvados_nodes()))
def test_node_pairing(self):
cloud_node = testutil.cloud_node_mock(1)
@@ -257,7 +269,7 @@ class NodeManagerDaemonActorTestCase(testutil.ActorTestMixin,
arvados_nodes=[testutil.arvados_node_mock(1),
testutil.arvados_node_mock(2, last_ping_at='1970-01-01T01:02:03.04050607Z')],
want_sizes=[size])
- self.busywait(lambda: 2 == self.paired_monitor_count())
+ self.assertwait(lambda: self.assertEqual(2, self.paired_monitor_count()))
for mon_ref in self.monitor_list():
self.daemon.node_can_shutdown(mon_ref.proxy()).get(self.TIMEOUT)
self.assertEqual(1, self.node_shutdown.start.call_count)
@@ -269,7 +281,7 @@ class NodeManagerDaemonActorTestCase(testutil.ActorTestMixin,
arvados_nodes=[testutil.arvados_node_mock(1),
testutil.arvados_node_mock(2, last_ping_at='1970-01-01T01:02:03.04050607Z')],
want_sizes=[size])
- self.busywait(lambda: 2 == self.paired_monitor_count())
+ self.assertwait(lambda: self.assertEqual(2, self.paired_monitor_count()))
get_cloud_node = mock.MagicMock(name="get_cloud_node")
get_cloud_node.get.return_value = cloud_nodes[1]
mock_node_monitor = mock.MagicMock()
@@ -278,7 +290,7 @@ class NodeManagerDaemonActorTestCase(testutil.ActorTestMixin,
self.daemon.cloud_nodes.get()[cloud_nodes[1].id].shutdown_actor = mock_shutdown.proxy()
- self.busywait(lambda: 2 == self.alive_monitor_count())
+ self.assertwait(lambda: self.assertEqual(2, self.alive_monitor_count()))
for mon_ref in self.monitor_list():
self.daemon.node_can_shutdown(mon_ref.proxy()).get(self.TIMEOUT)
self.busywait(lambda: 1 == self.node_shutdown.start.call_count)
@@ -298,8 +310,8 @@ class NodeManagerDaemonActorTestCase(testutil.ActorTestMixin,
arv_node = testutil.arvados_node_mock(2, job_uuid=True)
self.make_daemon([testutil.cloud_node_mock(2, size=size)], [arv_node],
[size], avail_sizes=[(size, {"cores":1})])
- self.busywait(lambda: 1 == self.paired_monitor_count())
- self.busywait(lambda: self.node_setup.start.called)
+ self.assertwait(lambda: self.assertEqual(1, self.paired_monitor_count()))
+ self.assertwait(lambda: self.assertEqual(1, self.node_setup.start.called))
def test_boot_new_node_below_min_nodes(self):
min_size = testutil.MockSize(1)
@@ -543,7 +555,7 @@ class NodeManagerDaemonActorTestCase(testutil.ActorTestMixin,
arv_node = testutil.arvados_node_mock(1)
size = testutil.MockSize(1)
self.make_daemon(cloud_nodes=[cloud_node], arvados_nodes=[arv_node], want_sizes=[size])
- self.busywait(lambda: 1 == self.paired_monitor_count())
+ self.assertwait(lambda: self.assertEqual(1, self.paired_monitor_count()))
monitor = self.monitor_list()[0].proxy()
self.daemon.node_can_shutdown(monitor).get(self.TIMEOUT)
self.stop_proxy(self.daemon)
@@ -553,7 +565,7 @@ class NodeManagerDaemonActorTestCase(testutil.ActorTestMixin,
cloud_node = testutil.cloud_node_mock(1)
arv_node = testutil.arvados_node_mock(1)
self.make_daemon(cloud_nodes=[cloud_node], arvados_nodes=[arv_node], min_nodes=1)
- self.busywait(lambda: 1 == self.paired_monitor_count())
+ self.assertwait(lambda: self.assertEqual(1, self.paired_monitor_count()))
monitor = self.monitor_list()[0].proxy()
self.daemon.node_can_shutdown(monitor).get(self.TIMEOUT)
self.stop_proxy(self.daemon)
@@ -572,7 +584,7 @@ class NodeManagerDaemonActorTestCase(testutil.ActorTestMixin,
arv_nodes = [testutil.arvados_node_mock(3, job_uuid=True),
testutil.arvados_node_mock(4, job_uuid=None)]
self.make_daemon(cloud_nodes, arv_nodes, [size])
- self.busywait(lambda: 2 == self.paired_monitor_count())
+ self.assertwait(lambda: self.assertEqual(2, self.paired_monitor_count()))
for mon_ref in self.monitor_list():
monitor = mon_ref.proxy()
if monitor.cloud_node.get(self.TIMEOUT) is cloud_nodes[-1]:
@@ -591,13 +603,13 @@ class NodeManagerDaemonActorTestCase(testutil.ActorTestMixin,
self.daemon.node_can_shutdown(monitor).get(self.TIMEOUT)
self.last_shutdown.success.get.return_value = False
self.daemon.node_finished_shutdown(self.last_shutdown).get(self.TIMEOUT)
- self.busywait(lambda: 1 == self.paired_monitor_count())
+ self.assertwait(lambda: self.assertEqual(1, self.paired_monitor_count()))
self.daemon.node_can_shutdown(monitor).get(self.TIMEOUT)
self.last_shutdown.success.get.return_value = True
self.last_shutdown.stop.side_effect = lambda: monitor.stop()
self.daemon.node_finished_shutdown(self.last_shutdown).get(self.TIMEOUT)
- self.busywait(lambda: 0 == self.paired_monitor_count())
+ self.assertwait(lambda: self.assertEqual(0, self.paired_monitor_count()))
def test_nodes_shutting_down_replaced_below_max_nodes(self):
size = testutil.MockSize(6)
@@ -616,7 +628,7 @@ class NodeManagerDaemonActorTestCase(testutil.ActorTestMixin,
cloud_node = testutil.cloud_node_mock(7)
self.make_daemon([cloud_node], [testutil.arvados_node_mock(7)],
max_nodes=1)
- self.busywait(lambda: 1 == self.paired_monitor_count())
+ self.assertwait(lambda: self.assertEqual(1, self.paired_monitor_count()))
monitor = self.monitor_list()[0].proxy()
self.daemon.node_can_shutdown(monitor).get(self.TIMEOUT)
self.assertTrue(self.node_shutdown.start.called)
@@ -630,7 +642,7 @@ class NodeManagerDaemonActorTestCase(testutil.ActorTestMixin,
arv_nodes = [testutil.arvados_node_mock(n, size=size) for n in [8, 9]]
self.make_daemon(cloud_nodes, arv_nodes, [size],
avail_sizes=[(size, {"cores":1})])
- self.busywait(lambda: 2 == self.paired_monitor_count())
+ self.assertwait(lambda: self.assertEqual(2, self.paired_monitor_count()))
for mon_ref in self.monitor_list():
self.daemon.node_can_shutdown(mon_ref.proxy()).get(self.TIMEOUT)
self.assertEqual(1, self.node_shutdown.start.call_count)
@@ -671,7 +683,7 @@ class NodeManagerDaemonActorTestCase(testutil.ActorTestMixin,
cloud_nodes = [testutil.cloud_node_mock(1, size=size)]
arv_nodes = [testutil.arvados_node_mock(1, job_uuid=None)]
self.make_daemon(cloud_nodes, arv_nodes, [size])
- self.busywait(lambda: 1 == self.paired_monitor_count())
+ self.assertwait(lambda: self.assertEqual(1, self.paired_monitor_count()))
for mon_ref in self.monitor_list():
monitor = mon_ref.proxy()
if monitor.cloud_node.get(self.TIMEOUT) is cloud_nodes[-1]:
@@ -770,7 +782,7 @@ class NodeManagerDaemonActorTestCase(testutil.ActorTestMixin,
testutil.arvados_node_mock(3)],
want_sizes=[small, small, big],
avail_sizes=avail_sizes)
- self.busywait(lambda: 3 == self.paired_monitor_count())
+ self.assertwait(lambda: self.assertEqual(3, self.paired_monitor_count()))
self.daemon.update_server_wishlist([small, big, big]).get(self.TIMEOUT)
self.assertEqual(0, self.node_shutdown.start.call_count)
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list