[ARVADOS] updated: 6be95f5c3a2fcbe6321bba52c20393060e33e637

git at public.curoverse.com git at public.curoverse.com
Mon Mar 2 14:10:08 EST 2015


Summary of changes:
 services/nodemanager/arvnodeman/computenode/dispatch/__init__.py | 9 ++++++---
 services/nodemanager/arvnodeman/computenode/driver/gce.py        | 7 ++++++-
 services/nodemanager/setup.py                                    | 2 +-
 services/nodemanager/tests/test_computenode_dispatch.py          | 7 +++++++
 services/nodemanager/tests/test_computenode_driver_gce.py        | 8 +++++++-
 services/nodemanager/tests/test_daemon.py                        | 5 ++++-
 services/nodemanager/tests/testutil.py                           | 7 +++++--
 7 files changed, 36 insertions(+), 9 deletions(-)

       via  6be95f5c3a2fcbe6321bba52c20393060e33e637 (commit)
       via  212b58706f2dd0f5fc925cf058e384ca66149d75 (commit)
       via  3e5f2c7fef459bdbfda9da8f2eefd1d2bbe8e384 (commit)
       via  70f2a361c8029ee7e8616a34d61d0e45f6c29d98 (commit)
       via  a8ac39264817dddf73bf763bbafb944d44aa132c (commit)
      from  62fea2a463eb0d8c3aef84a56ba84561a196666b (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit 6be95f5c3a2fcbe6321bba52c20393060e33e637
Merge: 3e5f2c7 212b587
Author: Brett Smith <brett at curoverse.com>
Date:   Mon Mar 2 14:08:02 2015 -0500

    Merge branch '4751-node-manager-stricter-node-pairing-wip'
    
    Closes #4751, #5351.


commit 212b58706f2dd0f5fc925cf058e384ca66149d75
Author: Brett Smith <brett at curoverse.com>
Date:   Mon Mar 2 11:27:59 2015 -0500

    4751: Node Manager considers ping times for stricter node pairing.
    
    Because the pairing decision is currently based on IP address alone,
    Node Manager will occasionally pair a cloud node with the wrong
    Arvados node after an IP address is reused.  Fix that by bringing the
    node's first_ping_at into consideration: if it's older than the cloud
    node, refuse to pair.

diff --git a/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py b/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py
index 1608b52..310e788 100644
--- a/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py
+++ b/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py
@@ -9,7 +9,8 @@ import time
 import libcloud.common.types as cloud_types
 import pykka
 
-from .. import arvados_node_fqdn, arvados_node_mtime, timestamp_fresh
+from .. import \
+    arvados_node_fqdn, arvados_node_mtime, arvados_timestamp, timestamp_fresh
 from ...clientactor import _notify_subscribers
 from ... import config
 
@@ -322,9 +323,11 @@ class ComputeNodeMonitorActor(config.actor_class):
             self.last_shutdown_opening = next_opening
 
     def offer_arvados_pair(self, arvados_node):
-        if self.arvados_node is not None:
+        first_ping_s = arvados_node.get('first_ping_at')
+        if (self.arvados_node is not None) or (not first_ping_s):
             return None
-        elif arvados_node['ip_address'] in self.cloud_node.private_ips:
+        elif ((arvados_node['ip_address'] in self.cloud_node.private_ips) and
+              (arvados_timestamp(first_ping_s) >= self.cloud_node_start_time)):
             self._later.update_arvados_node(arvados_node)
             return self.cloud_node.id
         else:
diff --git a/services/nodemanager/tests/test_computenode_dispatch.py b/services/nodemanager/tests/test_computenode_dispatch.py
index 4a72f47..8d69ea9 100644
--- a/services/nodemanager/tests/test_computenode_dispatch.py
+++ b/services/nodemanager/tests/test_computenode_dispatch.py
@@ -319,6 +319,13 @@ class ComputeNodeMonitorActorTestCase(testutil.ActorTestMixin,
         self.assertIsNone(
             self.node_actor.offer_arvados_pair(arv_node).get(self.TIMEOUT))
 
+    def test_arvados_node_mismatch_first_ping_too_early(self):
+        self.make_actor(4)
+        arv_node = testutil.arvados_node_mock(
+            4, first_ping_at='1971-03-02T14:15:16.1717282Z')
+        self.assertIsNone(
+            self.node_actor.offer_arvados_pair(arv_node).get(self.TIMEOUT))
+
     def test_update_cloud_node(self):
         self.make_actor(1)
         self.make_mocks(2)
diff --git a/services/nodemanager/tests/test_daemon.py b/services/nodemanager/tests/test_daemon.py
index bdba83a..dc8fdc3 100644
--- a/services/nodemanager/tests/test_daemon.py
+++ b/services/nodemanager/tests/test_daemon.py
@@ -94,8 +94,11 @@ class NodeManagerDaemonActorTestCase(testutil.ActorTestMixin,
         self.check_monitors_arvados_nodes(arv_node)
 
     def test_arvados_node_un_and_re_paired(self):
+        # We need to create the Arvados node mock after spinning up the daemon
+        # to make sure it's new enough to pair with the cloud node.
+        self.make_daemon([testutil.cloud_node_mock(3)], arvados_nodes=None)
         arv_node = testutil.arvados_node_mock(3)
-        self.make_daemon([testutil.cloud_node_mock(3)], [arv_node])
+        self.daemon.update_arvados_nodes([arv_node]).get(self.TIMEOUT)
         self.check_monitors_arvados_nodes(arv_node)
         self.daemon.update_cloud_nodes([]).get(self.TIMEOUT)
         self.assertEqual(0, self.alive_monitor_count())
diff --git a/services/nodemanager/tests/testutil.py b/services/nodemanager/tests/testutil.py
index 1c53c68..f0508e7 100644
--- a/services/nodemanager/tests/testutil.py
+++ b/services/nodemanager/tests/testutil.py
@@ -13,14 +13,17 @@ from . import pykka_timeout
 
 no_sleep = mock.patch('time.sleep', lambda n: None)
 
-def arvados_node_mock(node_num=99, job_uuid=None, age=0, **kwargs):
+def arvados_node_mock(node_num=99, job_uuid=None, age=-1, **kwargs):
     mod_time = datetime.datetime.utcnow() - datetime.timedelta(seconds=age)
+    mod_time_s = mod_time.strftime('%Y-%m-%dT%H:%M:%S.%fZ')
     if job_uuid is True:
         job_uuid = 'zzzzz-jjjjj-jobjobjobjobjob'
     crunch_worker_state = 'idle' if (job_uuid is None) else 'busy'
     node = {'uuid': 'zzzzz-yyyyy-{:015x}'.format(node_num),
             'created_at': '2014-01-01T01:02:03.04050607Z',
-            'modified_at': mod_time.strftime('%Y-%m-%dT%H:%M:%S.%fZ'),
+            'modified_at': mod_time_s,
+            'first_ping_at': kwargs.pop('first_ping_at', mod_time_s),
+            'last_ping_at': mod_time_s,
             'slot_number': node_num,
             'hostname': 'compute{}'.format(node_num),
             'domain': 'zzzzz.arvadosapi.com',

commit 3e5f2c7fef459bdbfda9da8f2eefd1d2bbe8e384
Merge: 62fea2a 70f2a36
Author: Brett Smith <brett at curoverse.com>
Date:   Mon Mar 2 14:07:29 2015 -0500

    Merge branch '5313-node-manager-gce-fixes-wip'
    
    Closes #5313, #5350.


-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list