[ARVADOS] updated: 1b03029be18e18f3155be51fd15e594b5e33a14c

git at public.curoverse.com git at public.curoverse.com
Sat Oct 4 22:29:45 EDT 2014


Summary of changes:
 services/nodemanager/arvnodeman/computenode/__init__.py | 12 +-----------
 services/nodemanager/arvnodeman/jobqueue.py             |  5 ++---
 services/nodemanager/doc/ec2.example.cfg                | 13 ++++++-------
 services/nodemanager/tests/test_computenode.py          |  4 ----
 services/nodemanager/tests/test_computenode_ec2.py      |  2 +-
 5 files changed, 10 insertions(+), 26 deletions(-)

       via  1b03029be18e18f3155be51fd15e594b5e33a14c (commit)
       via  18407aadc9d9f9f8b80730de93f66180f3461e65 (commit)
       via  0bd1834c1a1b0da2446a4f0262674d3d33042bc9 (commit)
      from  f0a929421c31d50d44fd30ac3a01b1cccc05e2d2 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit 1b03029be18e18f3155be51fd15e594b5e33a14c
Author: Brett Smith <brett at curoverse.com>
Date:   Sat Oct 4 22:25:00 2014 -0400

    Fixup: Remove unused "post_create" hooks.

diff --git a/services/nodemanager/arvnodeman/computenode/__init__.py b/services/nodemanager/arvnodeman/computenode/__init__.py
index e31e7c1..7400dc1 100644
--- a/services/nodemanager/arvnodeman/computenode/__init__.py
+++ b/services/nodemanager/arvnodeman/computenode/__init__.py
@@ -75,9 +75,6 @@ class BaseComputeNodeDriver(object):
         kwargs['size'] = size
         return self.real.create_node(**kwargs)
 
-    def post_create_node(self, cloud_node, arvados_node):
-        pass
-
     @classmethod
     def node_start_time(cls, node):
         raise NotImplementedError("BaseComputeNodeDriver.node_start_time")
@@ -132,14 +129,7 @@ class ComputeNodeSetupActor(config.actor_class):
                           self.cloud_size.name)
         self.cloud_node = self._cloud.create_node(self.cloud_size,
                                                   self.arvados_node)
-        self._logger.info("Cloud node %s created.  Setting up.",
-                          self.cloud_node.id)
-        self._later.setup_cloud_node()
-
-    @_retry(config.CLOUD_ERRORS)
-    def setup_cloud_node(self):
-        self._cloud.post_create_node(self.cloud_node, self.arvados_node)
-        self._logger.info("Cloud node %s set up.", self.cloud_node.id)
+        self._logger.info("Cloud node %s created.", self.cloud_node.id)
         _notify_subscribers(self._later, self.subscribers)
         self.subscribers = None
 
diff --git a/services/nodemanager/tests/test_computenode.py b/services/nodemanager/tests/test_computenode.py
index e628b13..5879a8f 100644
--- a/services/nodemanager/tests/test_computenode.py
+++ b/services/nodemanager/tests/test_computenode.py
@@ -33,7 +33,6 @@ class ComputeNodeSetupActorTestCase(testutil.ActorTestMixin, unittest.TestCase):
         self.make_actor()
         self.wait_for_call(self.api_client.nodes().create().execute)
         self.wait_for_call(self.cloud_client.create_node)
-        self.wait_for_call(self.cloud_client.post_create_node)
 
     def test_creation_with_arvados_node(self):
         arv_node = testutil.arvados_node_mock()
@@ -41,9 +40,6 @@ class ComputeNodeSetupActorTestCase(testutil.ActorTestMixin, unittest.TestCase):
         self.make_actor(arv_node)
         self.wait_for_call(self.api_client.nodes().update().execute)
         self.wait_for_call(self.cloud_client.create_node)
-        self.wait_for_call(self.cloud_client.post_create_node)
-        self.cloud_client.post_create_node.assert_called_with(
-            self.cloud_client.create_node(), arv_node)
 
     def test_failed_calls_retried(self):
         self.make_mocks([
diff --git a/services/nodemanager/tests/test_computenode_ec2.py b/services/nodemanager/tests/test_computenode_ec2.py
index 20f1d1d..d6b36ad 100644
--- a/services/nodemanager/tests/test_computenode_ec2.py
+++ b/services/nodemanager/tests/test_computenode_ec2.py
@@ -53,7 +53,7 @@ class EC2ComputeNodeDriverTestCase(unittest.TestCase):
                       create_method.call_args[1].get('ex_userdata',
                                                      'arg missing'))
 
-    def test_post_create_tags_node(self):
+    def test_tags_created_from_arvados_node(self):
         arv_node = testutil.arvados_node_mock(8)
         cloud_node = testutil.cloud_node_mock(8)
         driver = self.new_driver(list_kwargs={'tag:list': 'test'})

commit 18407aadc9d9f9f8b80730de93f66180f3461e65
Author: Brett Smith <brett at curoverse.com>
Date:   Sat Oct 4 22:20:07 2014 -0400

    Fixup node_stale_time example.

diff --git a/services/nodemanager/doc/ec2.example.cfg b/services/nodemanager/doc/ec2.example.cfg
index f1bebdf..a56e69e 100644
--- a/services/nodemanager/doc/ec2.example.cfg
+++ b/services/nodemanager/doc/ec2.example.cfg
@@ -19,15 +19,14 @@ max_poll_time = 300
 poll_stale_after = 600
 
 # "Node stale time" affects two related behaviors.
-# 1. If a compute node fails to ping an Arvados node for this long,
-# assume that it failed to bootstrap correctly, and consider it eligible
-# for shutdown.
-# This setting is only considered when the compute node is in a normal
-# shutdown window (see below).  You probably want to set this so that a
-# shutdown window opens just after time expires.
+# 1. If a compute node has been running for at least this long, but it
+# isn't paired with an Arvados node, do not shut it down, but leave it alone.
+# This prevents the node manager from shutting down a node that might
+# actually be doing work, but is having temporary trouble contacting the
+# API server.
 # 2. When the Node Manager starts a new compute node, it will try to reuse
 # an Arvados node that hasn't been updated for this long.
-node_stale_after = 3000
+node_stale_after = 14400
 
 # File path for Certificate Authorities
 certs_file = /etc/ssl/certs/ca-certificates.crt

commit 0bd1834c1a1b0da2446a4f0262674d3d33042bc9
Author: Brett Smith <brett at curoverse.com>
Date:   Sat Oct 4 22:15:49 2014 -0400

    Fixup error spec in JobQueueMonitorActor.

diff --git a/services/nodemanager/arvnodeman/jobqueue.py b/services/nodemanager/arvnodeman/jobqueue.py
index 9daf556..9d29818 100644
--- a/services/nodemanager/arvnodeman/jobqueue.py
+++ b/services/nodemanager/arvnodeman/jobqueue.py
@@ -1,8 +1,7 @@
 #!/usr/bin/env python
 
-import arvados.errors as arverror
-
 from . import clientactor
+from .config import ARVADOS_ERRORS
 
 class ServerCalculator(object):
     class SizeWrapper(object):
@@ -60,7 +59,7 @@ class ServerCalculator(object):
 
 
 class JobQueueMonitorActor(clientactor.RemotePollLoopActor):
-    CLIENT_ERRORS = (arverror.ApiError,)
+    CLIENT_ERRORS = ARVADOS_ERRORS
     LOGGER_NAME = 'arvnodeman.jobqueue'
 
     def __init__(self, client, timer_actor, server_calc, *args, **kwargs):

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list