[ARVADOS] updated: 1b03029be18e18f3155be51fd15e594b5e33a14c
git at public.curoverse.com
git at public.curoverse.com
Sat Oct 4 22:29:45 EDT 2014
Summary of changes:
services/nodemanager/arvnodeman/computenode/__init__.py | 12 +-----------
services/nodemanager/arvnodeman/jobqueue.py | 5 ++---
services/nodemanager/doc/ec2.example.cfg | 13 ++++++-------
services/nodemanager/tests/test_computenode.py | 4 ----
services/nodemanager/tests/test_computenode_ec2.py | 2 +-
5 files changed, 10 insertions(+), 26 deletions(-)
via 1b03029be18e18f3155be51fd15e594b5e33a14c (commit)
via 18407aadc9d9f9f8b80730de93f66180f3461e65 (commit)
via 0bd1834c1a1b0da2446a4f0262674d3d33042bc9 (commit)
from f0a929421c31d50d44fd30ac3a01b1cccc05e2d2 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
commit 1b03029be18e18f3155be51fd15e594b5e33a14c
Author: Brett Smith <brett at curoverse.com>
Date: Sat Oct 4 22:25:00 2014 -0400
Fixup: Remove unused "post_create" hooks.
diff --git a/services/nodemanager/arvnodeman/computenode/__init__.py b/services/nodemanager/arvnodeman/computenode/__init__.py
index e31e7c1..7400dc1 100644
--- a/services/nodemanager/arvnodeman/computenode/__init__.py
+++ b/services/nodemanager/arvnodeman/computenode/__init__.py
@@ -75,9 +75,6 @@ class BaseComputeNodeDriver(object):
kwargs['size'] = size
return self.real.create_node(**kwargs)
- def post_create_node(self, cloud_node, arvados_node):
- pass
-
@classmethod
def node_start_time(cls, node):
raise NotImplementedError("BaseComputeNodeDriver.node_start_time")
@@ -132,14 +129,7 @@ class ComputeNodeSetupActor(config.actor_class):
self.cloud_size.name)
self.cloud_node = self._cloud.create_node(self.cloud_size,
self.arvados_node)
- self._logger.info("Cloud node %s created. Setting up.",
- self.cloud_node.id)
- self._later.setup_cloud_node()
-
- @_retry(config.CLOUD_ERRORS)
- def setup_cloud_node(self):
- self._cloud.post_create_node(self.cloud_node, self.arvados_node)
- self._logger.info("Cloud node %s set up.", self.cloud_node.id)
+ self._logger.info("Cloud node %s created.", self.cloud_node.id)
_notify_subscribers(self._later, self.subscribers)
self.subscribers = None
diff --git a/services/nodemanager/tests/test_computenode.py b/services/nodemanager/tests/test_computenode.py
index e628b13..5879a8f 100644
--- a/services/nodemanager/tests/test_computenode.py
+++ b/services/nodemanager/tests/test_computenode.py
@@ -33,7 +33,6 @@ class ComputeNodeSetupActorTestCase(testutil.ActorTestMixin, unittest.TestCase):
self.make_actor()
self.wait_for_call(self.api_client.nodes().create().execute)
self.wait_for_call(self.cloud_client.create_node)
- self.wait_for_call(self.cloud_client.post_create_node)
def test_creation_with_arvados_node(self):
arv_node = testutil.arvados_node_mock()
@@ -41,9 +40,6 @@ class ComputeNodeSetupActorTestCase(testutil.ActorTestMixin, unittest.TestCase):
self.make_actor(arv_node)
self.wait_for_call(self.api_client.nodes().update().execute)
self.wait_for_call(self.cloud_client.create_node)
- self.wait_for_call(self.cloud_client.post_create_node)
- self.cloud_client.post_create_node.assert_called_with(
- self.cloud_client.create_node(), arv_node)
def test_failed_calls_retried(self):
self.make_mocks([
diff --git a/services/nodemanager/tests/test_computenode_ec2.py b/services/nodemanager/tests/test_computenode_ec2.py
index 20f1d1d..d6b36ad 100644
--- a/services/nodemanager/tests/test_computenode_ec2.py
+++ b/services/nodemanager/tests/test_computenode_ec2.py
@@ -53,7 +53,7 @@ class EC2ComputeNodeDriverTestCase(unittest.TestCase):
create_method.call_args[1].get('ex_userdata',
'arg missing'))
- def test_post_create_tags_node(self):
+ def test_tags_created_from_arvados_node(self):
arv_node = testutil.arvados_node_mock(8)
cloud_node = testutil.cloud_node_mock(8)
driver = self.new_driver(list_kwargs={'tag:list': 'test'})
commit 18407aadc9d9f9f8b80730de93f66180f3461e65
Author: Brett Smith <brett at curoverse.com>
Date: Sat Oct 4 22:20:07 2014 -0400
Fixup node_stale_time example.
diff --git a/services/nodemanager/doc/ec2.example.cfg b/services/nodemanager/doc/ec2.example.cfg
index f1bebdf..a56e69e 100644
--- a/services/nodemanager/doc/ec2.example.cfg
+++ b/services/nodemanager/doc/ec2.example.cfg
@@ -19,15 +19,14 @@ max_poll_time = 300
poll_stale_after = 600
# "Node stale time" affects two related behaviors.
-# 1. If a compute node fails to ping an Arvados node for this long,
-# assume that it failed to bootstrap correctly, and consider it eligible
-# for shutdown.
-# This setting is only considered when the compute node is in a normal
-# shutdown window (see below). You probably want to set this so that a
-# shutdown window opens just after time expires.
+# 1. If a compute node has been running for at least this long, but it
+# isn't paired with an Arvados node, do not shut it down, but leave it alone.
+# This prevents the node manager from shutting down a node that might
+# actually be doing work, but is having temporary trouble contacting the
+# API server.
# 2. When the Node Manager starts a new compute node, it will try to reuse
# an Arvados node that hasn't been updated for this long.
-node_stale_after = 3000
+node_stale_after = 14400
# File path for Certificate Authorities
certs_file = /etc/ssl/certs/ca-certificates.crt
commit 0bd1834c1a1b0da2446a4f0262674d3d33042bc9
Author: Brett Smith <brett at curoverse.com>
Date: Sat Oct 4 22:15:49 2014 -0400
Fixup error spec in JobQueueMonitorActor.
diff --git a/services/nodemanager/arvnodeman/jobqueue.py b/services/nodemanager/arvnodeman/jobqueue.py
index 9daf556..9d29818 100644
--- a/services/nodemanager/arvnodeman/jobqueue.py
+++ b/services/nodemanager/arvnodeman/jobqueue.py
@@ -1,8 +1,7 @@
#!/usr/bin/env python
-import arvados.errors as arverror
-
from . import clientactor
+from .config import ARVADOS_ERRORS
class ServerCalculator(object):
class SizeWrapper(object):
@@ -60,7 +59,7 @@ class ServerCalculator(object):
class JobQueueMonitorActor(clientactor.RemotePollLoopActor):
- CLIENT_ERRORS = (arverror.ApiError,)
+ CLIENT_ERRORS = ARVADOS_ERRORS
LOGGER_NAME = 'arvnodeman.jobqueue'
def __init__(self, client, timer_actor, server_calc, *args, **kwargs):
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list