[ARVADOS] updated: 02b41b35631329d9eff41f6f2e56456edb7f50a6
git at public.curoverse.com
git at public.curoverse.com
Tue Oct 7 10:49:52 EDT 2014
Summary of changes:
services/nodemanager/arvnodeman/clientactor.py | 17 ++++++++
.../nodemanager/arvnodeman/computenode/__init__.py | 49 ++++++++++++++++++++++
.../nodemanager/arvnodeman/computenode/dummy.py | 5 +++
services/nodemanager/arvnodeman/computenode/ec2.py | 6 +++
services/nodemanager/arvnodeman/config.py | 6 +++
services/nodemanager/arvnodeman/daemon.py | 14 +++++++
services/nodemanager/arvnodeman/jobqueue.py | 16 +++++++
services/nodemanager/arvnodeman/nodelist.py | 14 ++++++-
services/nodemanager/arvnodeman/timedcallback.py | 6 +++
9 files changed, 131 insertions(+), 2 deletions(-)
via 02b41b35631329d9eff41f6f2e56456edb7f50a6 (commit)
from eb902dd4ca618af0f2774db1fb13059ee028a398 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
commit 02b41b35631329d9eff41f6f2e56456edb7f50a6
Author: Brett Smith <brett at curoverse.com>
Date: Tue Oct 7 10:51:23 2014 -0400
2881: Add docstrings to Node Manager.
diff --git a/services/nodemanager/arvnodeman/clientactor.py b/services/nodemanager/arvnodeman/clientactor.py
index e02485c..77d85d6 100644
--- a/services/nodemanager/arvnodeman/clientactor.py
+++ b/services/nodemanager/arvnodeman/clientactor.py
@@ -10,6 +10,11 @@ import pykka
from .config import actor_class
def _notify_subscribers(response, subscribers):
+ """Send the response to all the subscriber methods.
+
+ If any of the subscriber actors have stopped, remove them from the
+ subscriber set.
+ """
dead_subscribers = set()
for subscriber in subscribers:
try:
@@ -19,6 +24,16 @@ def _notify_subscribers(response, subscribers):
subscribers.difference_update(dead_subscribers)
class RemotePollLoopActor(actor_class):
+ """Abstract actor class to regularly poll a remote service.
+
+ This actor sends regular requests to a remote service, and sends each
+ response to subscribers. It takes care of error handling, and retrying
+ requests with exponential backoff.
+
+ To use this actor, define CLIENT_ERRORS and the _send_request method.
+ If you also define an _item_key method, this class will support
+ subscribing to a specific item by key in responses.
+ """
def __init__(self, client, timer_actor, poll_wait=60, max_poll_wait=180):
super(RemotePollLoopActor, self).__init__()
self._client = client
@@ -44,6 +59,8 @@ class RemotePollLoopActor(actor_class):
self._logger.debug("%r subscribed to all events", subscriber)
self._start_polling()
+ # __init__ exposes this method to the proxy if the subclass defines
+ # _item_key.
def _subscribe_to(self, key, subscriber):
self.key_subscribers.setdefault(key, set()).add(subscriber)
self._logger.debug("%r subscribed to events for '%s'", subscriber, key)
diff --git a/services/nodemanager/arvnodeman/computenode/__init__.py b/services/nodemanager/arvnodeman/computenode/__init__.py
index 2246831..8bedf88 100644
--- a/services/nodemanager/arvnodeman/computenode/__init__.py
+++ b/services/nodemanager/arvnodeman/computenode/__init__.py
@@ -20,6 +20,13 @@ def timestamp_fresh(timestamp, fresh_time):
return (time.time() - timestamp) < fresh_time
def _retry(errors):
+ """Retry decorator for an actor method that makes remote requests.
+
+ Use this function to decorator an actor method, and pass in a tuple of
+ exceptions to catch. This decorator will schedule retries of that method
+ with exponential backoff if the original method raises any of the given
+ errors.
+ """
def decorator(orig_func):
@functools.wraps(orig_func)
def wrapper(self, *args, **kwargs):
@@ -40,6 +47,18 @@ def _retry(errors):
return decorator
class BaseComputeNodeDriver(object):
+ """Abstract base class for compute node drivers.
+
+ libcloud abstracts away many of the differences between cloud providers,
+ but managing compute nodes requires some cloud-specific features (e.g.,
+ on EC2 we use tags to identify compute nodes). Compute node drivers
+ are responsible for translating the node manager's cloud requests to a
+ specific cloud's vocabulary.
+
+ Subclasses must implement arvados_create_kwargs (to update node creation
+ kwargs with information about the specific Arvados node record) and
+ node_start_time.
+ """
def __init__(self, auth_kwargs, list_kwargs, create_kwargs, driver_class):
self.real = driver_class(**auth_kwargs)
self.list_kwargs = list_kwargs
@@ -85,6 +104,13 @@ class BaseComputeNodeDriver(object):
ComputeNodeDriverClass = BaseComputeNodeDriver
class ComputeNodeSetupActor(config.actor_class):
+ """Actor to create and set up a cloud compute node.
+
+ This actor prepares an Arvados node record for a new compute node
+ (either creating one or cleaning one passed in), then boots the
+ actual compute node. It notifies subscribers when the node finishes
+ booting.
+ """
def __init__(self, timer_actor, arvados_client, cloud_client,
cloud_size, arvados_node=None,
retry_wait=1, max_retry_wait=180):
@@ -150,6 +176,10 @@ class ComputeNodeSetupActor(config.actor_class):
class ComputeNodeShutdownActor(config.actor_class):
+ """Actor to shut down a compute node.
+
+ This actor simply destroys a cloud node, retrying as needed.
+ """
def __init__(self, timer_actor, cloud_client, cloud_node,
retry_wait=1, max_retry_wait=180):
super(ComputeNodeShutdownActor, self).__init__()
@@ -170,7 +200,20 @@ class ComputeNodeShutdownActor(config.actor_class):
class ShutdownTimer(object):
+ """Keep track of a cloud node's shutdown windows.
+
+ Instantiate this class with a timestamp of when a cloud node started,
+ and a list of durations (in minutes) of when the node must not and may
+ be shut down, alternating. The class will tell you when a shutdown
+ window is open, and when the next open window will start.
+ """
def __init__(self, start_time, shutdown_windows):
+ # The implementation is easiest if we have an even number of windows,
+ # because then windows always alternate between open and closed.
+ # Rig that up: calculate the first shutdown window based on what's
+ # passed in. Then, if we were given an odd number of windows, merge
+ # that first window into the last one, since they both# represent
+ # closed state.
first_window = shutdown_windows[0]
shutdown_windows = list(shutdown_windows[1:])
self._next_opening = start_time + (60 * first_window)
@@ -199,6 +242,12 @@ class ShutdownTimer(object):
class ComputeNodeActor(config.actor_class):
+ """Actor to manage a running compute node.
+
+ This actor gets updates about a compute node's cloud and Arvados records.
+ It uses this information to notify subscribers when the node is eligible
+ for shutdown.
+ """
def __init__(self, cloud_node, cloud_node_start_time, shutdown_timer,
timer_actor, arvados_node=None,
poll_stale_after=600, node_stale_after=3600):
diff --git a/services/nodemanager/arvnodeman/computenode/dummy.py b/services/nodemanager/arvnodeman/computenode/dummy.py
index 30e1d9a..e1a2da0 100644
--- a/services/nodemanager/arvnodeman/computenode/dummy.py
+++ b/services/nodemanager/arvnodeman/computenode/dummy.py
@@ -10,6 +10,11 @@ import libcloud.compute.types as cloud_types
from . import BaseComputeNodeDriver
class ComputeNodeDriver(BaseComputeNodeDriver):
+ """Compute node driver wrapper for libcloud's dummy driver.
+
+ This class provides the glue necessary to run the node manager with a
+ dummy cloud. It's useful for testing.
+ """
DEFAULT_DRIVER = cloud_provider.get_driver(cloud_types.Provider.DUMMY)
DEFAULT_REAL = DEFAULT_DRIVER('ComputeNodeDriver')
DUMMY_START_TIME = time.time()
diff --git a/services/nodemanager/arvnodeman/computenode/ec2.py b/services/nodemanager/arvnodeman/computenode/ec2.py
index cfd7f84..4269ed7 100644
--- a/services/nodemanager/arvnodeman/computenode/ec2.py
+++ b/services/nodemanager/arvnodeman/computenode/ec2.py
@@ -12,6 +12,8 @@ from libcloud.compute.drivers import ec2 as cloud_ec2
from . import BaseComputeNodeDriver
### Monkeypatch libcloud to support AWS' new SecurityGroup API.
+# These classes can be removed when libcloud support specifying
+# security groups with the SecurityGroupId parameter.
class ANMEC2Connection(cloud_ec2.EC2Connection):
def request(self, *args, **kwargs):
params = kwargs.get('params')
@@ -29,6 +31,10 @@ class ANMEC2NodeDriver(cloud_ec2.EC2NodeDriver):
class ComputeNodeDriver(BaseComputeNodeDriver):
+ """Compute node driver wrapper for EC2.
+
+ This translates cloud driver requests to EC2's specific parameters.
+ """
DEFAULT_DRIVER = ANMEC2NodeDriver
### End monkeypatch
SEARCH_CACHE = {}
diff --git a/services/nodemanager/arvnodeman/config.py b/services/nodemanager/arvnodeman/config.py
index d796cb7..07504e2 100644
--- a/services/nodemanager/arvnodeman/config.py
+++ b/services/nodemanager/arvnodeman/config.py
@@ -23,6 +23,12 @@ CLOUD_ERRORS = NETWORK_ERRORS + (cloud_types.LibcloudError,)
actor_class = pykka.ThreadingActor
class NodeManagerConfig(ConfigParser.SafeConfigParser):
+ """Node Manager Configuration class.
+
+ This a standard Python ConfigParser, with additional helper methods to
+ create objects instantiated with configuration information.
+ """
+
LOGGING_NONLEVELS = frozenset(['file'])
def __init__(self, *args, **kwargs):
diff --git a/services/nodemanager/arvnodeman/daemon.py b/services/nodemanager/arvnodeman/daemon.py
index ab5b2aa..8219af6 100644
--- a/services/nodemanager/arvnodeman/daemon.py
+++ b/services/nodemanager/arvnodeman/daemon.py
@@ -12,6 +12,14 @@ from . import computenode as cnode
from .config import actor_class
class NodeManagerDaemonActor(actor_class):
+ """Node Manager daemon.
+
+ This actor subscribes to all information polls about cloud nodes,
+ Arvados nodes, and the job queue. It creates a ComputeNodeActor
+ for every cloud node, subscribing them to poll updates
+ appropriately, and starts and stops cloud nodes based on job queue
+ demand.
+ """
class PairingTracker(object):
def __init__(self, key_func, paired_items, unpaired_items):
self.key_func = key_func
@@ -173,6 +181,12 @@ class NodeManagerDaemonActor(actor_class):
self._later.stop_booting_node()
def _check_poll_freshness(orig_func):
+ """Decorator to inhibit a method when poll information is stale.
+
+ This decorator checks the timestamps of all the poll information the
+ daemon has received. The decorated method is only called if none
+ of the timestamps are considered stale.
+ """
@functools.wraps(orig_func)
def wrapper(self, *args, **kwargs):
now = time.time()
diff --git a/services/nodemanager/arvnodeman/jobqueue.py b/services/nodemanager/arvnodeman/jobqueue.py
index 88d4b53..bae4930 100644
--- a/services/nodemanager/arvnodeman/jobqueue.py
+++ b/services/nodemanager/arvnodeman/jobqueue.py
@@ -6,6 +6,15 @@ from . import clientactor
from .config import ARVADOS_ERRORS
class ServerCalculator(object):
+ """Generate cloud server wishlists from an Arvados job queue.
+
+ Instantiate this class with a list of cloud node sizes you're willing to
+ use, plus keyword overrides from the configuration. Then you can pass
+ job queues to servers_for_queue. It will return a list of node sizes
+ that would best satisfy the jobs, choosing the cheapest size that
+ satisfies each job, and ignoring jobs that can't be satisfied.
+ """
+
class SizeWrapper(object):
def __init__(self, real_size, **kwargs):
self.real = real_size
@@ -61,6 +70,13 @@ class ServerCalculator(object):
class JobQueueMonitorActor(clientactor.RemotePollLoopActor):
+ """Actor to generate server wishlists from the job queue.
+
+ This actor regularly polls Arvados' job queue, and uses the provided
+ ServerCalculator to turn that into a list of requested node sizes. That
+ list is sent to subscribers on every poll.
+ """
+
CLIENT_ERRORS = ARVADOS_ERRORS
LOGGER_NAME = 'arvnodeman.jobqueue'
diff --git a/services/nodemanager/arvnodeman/nodelist.py b/services/nodemanager/arvnodeman/nodelist.py
index d0d2aac..7ddfb7c 100644
--- a/services/nodemanager/arvnodeman/nodelist.py
+++ b/services/nodemanager/arvnodeman/nodelist.py
@@ -2,12 +2,16 @@
from __future__ import absolute_import, print_function
-import arvados.errors as arverror
-
from . import clientactor
from . import config
class ArvadosNodeListMonitorActor(clientactor.RemotePollLoopActor):
+ """Actor to poll the Arvados node list.
+
+ This actor regularly polls the list of Arvados node records, and
+ sends it to subscribers.
+ """
+
CLIENT_ERRORS = config.ARVADOS_ERRORS
LOGGER_NAME = 'arvnodeman.arvados_nodes'
@@ -19,6 +23,12 @@ class ArvadosNodeListMonitorActor(clientactor.RemotePollLoopActor):
class CloudNodeListMonitorActor(clientactor.RemotePollLoopActor):
+ """Actor to poll the cloud node list.
+
+ This actor regularly polls the cloud to get a list of running compute
+ nodes, and sends it to subscribers.
+ """
+
CLIENT_ERRORS = config.CLOUD_ERRORS
LOGGER_NAME = 'arvnodeman.cloud_nodes'
diff --git a/services/nodemanager/arvnodeman/timedcallback.py b/services/nodemanager/arvnodeman/timedcallback.py
index 51afbd2..a1df8ec 100644
--- a/services/nodemanager/arvnodeman/timedcallback.py
+++ b/services/nodemanager/arvnodeman/timedcallback.py
@@ -10,6 +10,12 @@ import pykka
from .config import actor_class
class TimedCallBackActor(actor_class):
+ """Send messages to other actors on a schedule.
+
+ Other actors can call the schedule() method to schedule delivery of a
+ message at a later time. This actor runs the necessary event loop for
+ delivery.
+ """
def __init__(self, max_sleep=1):
super(TimedCallBackActor, self).__init__()
self._proxy = self.actor_ref.proxy()
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list