[ARVADOS] updated: 02b41b35631329d9eff41f6f2e56456edb7f50a6

git at public.curoverse.com git at public.curoverse.com
Tue Oct 7 10:49:52 EDT 2014


Summary of changes:
 services/nodemanager/arvnodeman/clientactor.py     | 17 ++++++++
 .../nodemanager/arvnodeman/computenode/__init__.py | 49 ++++++++++++++++++++++
 .../nodemanager/arvnodeman/computenode/dummy.py    |  5 +++
 services/nodemanager/arvnodeman/computenode/ec2.py |  6 +++
 services/nodemanager/arvnodeman/config.py          |  6 +++
 services/nodemanager/arvnodeman/daemon.py          | 14 +++++++
 services/nodemanager/arvnodeman/jobqueue.py        | 16 +++++++
 services/nodemanager/arvnodeman/nodelist.py        | 14 ++++++-
 services/nodemanager/arvnodeman/timedcallback.py   |  6 +++
 9 files changed, 131 insertions(+), 2 deletions(-)

       via  02b41b35631329d9eff41f6f2e56456edb7f50a6 (commit)
      from  eb902dd4ca618af0f2774db1fb13059ee028a398 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit 02b41b35631329d9eff41f6f2e56456edb7f50a6
Author: Brett Smith <brett at curoverse.com>
Date:   Tue Oct 7 10:51:23 2014 -0400

    2881: Add docstrings to Node Manager.

diff --git a/services/nodemanager/arvnodeman/clientactor.py b/services/nodemanager/arvnodeman/clientactor.py
index e02485c..77d85d6 100644
--- a/services/nodemanager/arvnodeman/clientactor.py
+++ b/services/nodemanager/arvnodeman/clientactor.py
@@ -10,6 +10,11 @@ import pykka
 from .config import actor_class
 
 def _notify_subscribers(response, subscribers):
+    """Send the response to all the subscriber methods.
+
+    If any of the subscriber actors have stopped, remove them from the
+    subscriber set.
+    """
     dead_subscribers = set()
     for subscriber in subscribers:
         try:
@@ -19,6 +24,16 @@ def _notify_subscribers(response, subscribers):
     subscribers.difference_update(dead_subscribers)
 
 class RemotePollLoopActor(actor_class):
+    """Abstract actor class to regularly poll a remote service.
+
+    This actor sends regular requests to a remote service, and sends each
+    response to subscribers.  It takes care of error handling, and retrying
+    requests with exponential backoff.
+
+    To use this actor, define CLIENT_ERRORS and the _send_request method.
+    If you also define an _item_key method, this class will support
+    subscribing to a specific item by key in responses.
+    """
     def __init__(self, client, timer_actor, poll_wait=60, max_poll_wait=180):
         super(RemotePollLoopActor, self).__init__()
         self._client = client
@@ -44,6 +59,8 @@ class RemotePollLoopActor(actor_class):
         self._logger.debug("%r subscribed to all events", subscriber)
         self._start_polling()
 
+    # __init__ exposes this method to the proxy if the subclass defines
+    # _item_key.
     def _subscribe_to(self, key, subscriber):
         self.key_subscribers.setdefault(key, set()).add(subscriber)
         self._logger.debug("%r subscribed to events for '%s'", subscriber, key)
diff --git a/services/nodemanager/arvnodeman/computenode/__init__.py b/services/nodemanager/arvnodeman/computenode/__init__.py
index 2246831..8bedf88 100644
--- a/services/nodemanager/arvnodeman/computenode/__init__.py
+++ b/services/nodemanager/arvnodeman/computenode/__init__.py
@@ -20,6 +20,13 @@ def timestamp_fresh(timestamp, fresh_time):
     return (time.time() - timestamp) < fresh_time
 
 def _retry(errors):
+    """Retry decorator for an actor method that makes remote requests.
+
+    Use this function to decorator an actor method, and pass in a tuple of
+    exceptions to catch.  This decorator will schedule retries of that method
+    with exponential backoff if the original method raises any of the given
+    errors.
+    """
     def decorator(orig_func):
         @functools.wraps(orig_func)
         def wrapper(self, *args, **kwargs):
@@ -40,6 +47,18 @@ def _retry(errors):
     return decorator
 
 class BaseComputeNodeDriver(object):
+    """Abstract base class for compute node drivers.
+
+    libcloud abstracts away many of the differences between cloud providers,
+    but managing compute nodes requires some cloud-specific features (e.g.,
+    on EC2 we use tags to identify compute nodes).  Compute node drivers
+    are responsible for translating the node manager's cloud requests to a
+    specific cloud's vocabulary.
+
+    Subclasses must implement arvados_create_kwargs (to update node creation
+    kwargs with information about the specific Arvados node record) and
+    node_start_time.
+    """
     def __init__(self, auth_kwargs, list_kwargs, create_kwargs, driver_class):
         self.real = driver_class(**auth_kwargs)
         self.list_kwargs = list_kwargs
@@ -85,6 +104,13 @@ class BaseComputeNodeDriver(object):
 ComputeNodeDriverClass = BaseComputeNodeDriver
 
 class ComputeNodeSetupActor(config.actor_class):
+    """Actor to create and set up a cloud compute node.
+
+    This actor prepares an Arvados node record for a new compute node
+    (either creating one or cleaning one passed in), then boots the
+    actual compute node.  It notifies subscribers when the node finishes
+    booting.
+    """
     def __init__(self, timer_actor, arvados_client, cloud_client,
                  cloud_size, arvados_node=None,
                  retry_wait=1, max_retry_wait=180):
@@ -150,6 +176,10 @@ class ComputeNodeSetupActor(config.actor_class):
 
 
 class ComputeNodeShutdownActor(config.actor_class):
+    """Actor to shut down a compute node.
+
+    This actor simply destroys a cloud node, retrying as needed.
+    """
     def __init__(self, timer_actor, cloud_client, cloud_node,
                  retry_wait=1, max_retry_wait=180):
         super(ComputeNodeShutdownActor, self).__init__()
@@ -170,7 +200,20 @@ class ComputeNodeShutdownActor(config.actor_class):
 
 
 class ShutdownTimer(object):
+    """Keep track of a cloud node's shutdown windows.
+
+    Instantiate this class with a timestamp of when a cloud node started,
+    and a list of durations (in minutes) of when the node must not and may
+    be shut down, alternating.  The class will tell you when a shutdown
+    window is open, and when the next open window will start.
+    """
     def __init__(self, start_time, shutdown_windows):
+        # The implementation is easiest if we have an even number of windows,
+        # because then windows always alternate between open and closed.
+        # Rig that up: calculate the first shutdown window based on what's
+        # passed in.  Then, if we were given an odd number of windows, merge
+        # that first window into the last one, since they both# represent
+        # closed state.
         first_window = shutdown_windows[0]
         shutdown_windows = list(shutdown_windows[1:])
         self._next_opening = start_time + (60 * first_window)
@@ -199,6 +242,12 @@ class ShutdownTimer(object):
 
 
 class ComputeNodeActor(config.actor_class):
+    """Actor to manage a running compute node.
+
+    This actor gets updates about a compute node's cloud and Arvados records.
+    It uses this information to notify subscribers when the node is eligible
+    for shutdown.
+    """
     def __init__(self, cloud_node, cloud_node_start_time, shutdown_timer,
                  timer_actor, arvados_node=None,
                  poll_stale_after=600, node_stale_after=3600):
diff --git a/services/nodemanager/arvnodeman/computenode/dummy.py b/services/nodemanager/arvnodeman/computenode/dummy.py
index 30e1d9a..e1a2da0 100644
--- a/services/nodemanager/arvnodeman/computenode/dummy.py
+++ b/services/nodemanager/arvnodeman/computenode/dummy.py
@@ -10,6 +10,11 @@ import libcloud.compute.types as cloud_types
 from . import BaseComputeNodeDriver
 
 class ComputeNodeDriver(BaseComputeNodeDriver):
+    """Compute node driver wrapper for libcloud's dummy driver.
+
+    This class provides the glue necessary to run the node manager with a
+    dummy cloud.  It's useful for testing.
+    """
     DEFAULT_DRIVER = cloud_provider.get_driver(cloud_types.Provider.DUMMY)
     DEFAULT_REAL = DEFAULT_DRIVER('ComputeNodeDriver')
     DUMMY_START_TIME = time.time()
diff --git a/services/nodemanager/arvnodeman/computenode/ec2.py b/services/nodemanager/arvnodeman/computenode/ec2.py
index cfd7f84..4269ed7 100644
--- a/services/nodemanager/arvnodeman/computenode/ec2.py
+++ b/services/nodemanager/arvnodeman/computenode/ec2.py
@@ -12,6 +12,8 @@ from libcloud.compute.drivers import ec2 as cloud_ec2
 from . import BaseComputeNodeDriver
 
 ### Monkeypatch libcloud to support AWS' new SecurityGroup API.
+# These classes can be removed when libcloud support specifying
+# security groups with the SecurityGroupId parameter.
 class ANMEC2Connection(cloud_ec2.EC2Connection):
     def request(self, *args, **kwargs):
         params = kwargs.get('params')
@@ -29,6 +31,10 @@ class ANMEC2NodeDriver(cloud_ec2.EC2NodeDriver):
 
 
 class ComputeNodeDriver(BaseComputeNodeDriver):
+    """Compute node driver wrapper for EC2.
+
+    This translates cloud driver requests to EC2's specific parameters.
+    """
     DEFAULT_DRIVER = ANMEC2NodeDriver
 ### End monkeypatch
     SEARCH_CACHE = {}
diff --git a/services/nodemanager/arvnodeman/config.py b/services/nodemanager/arvnodeman/config.py
index d796cb7..07504e2 100644
--- a/services/nodemanager/arvnodeman/config.py
+++ b/services/nodemanager/arvnodeman/config.py
@@ -23,6 +23,12 @@ CLOUD_ERRORS = NETWORK_ERRORS + (cloud_types.LibcloudError,)
 actor_class = pykka.ThreadingActor
 
 class NodeManagerConfig(ConfigParser.SafeConfigParser):
+    """Node Manager Configuration class.
+
+    This a standard Python ConfigParser, with additional helper methods to
+    create objects instantiated with configuration information.
+    """
+
     LOGGING_NONLEVELS = frozenset(['file'])
 
     def __init__(self, *args, **kwargs):
diff --git a/services/nodemanager/arvnodeman/daemon.py b/services/nodemanager/arvnodeman/daemon.py
index ab5b2aa..8219af6 100644
--- a/services/nodemanager/arvnodeman/daemon.py
+++ b/services/nodemanager/arvnodeman/daemon.py
@@ -12,6 +12,14 @@ from . import computenode as cnode
 from .config import actor_class
 
 class NodeManagerDaemonActor(actor_class):
+    """Node Manager daemon.
+
+    This actor subscribes to all information polls about cloud nodes,
+    Arvados nodes, and the job queue.  It creates a ComputeNodeActor
+    for every cloud node, subscribing them to poll updates
+    appropriately, and starts and stops cloud nodes based on job queue
+    demand.
+    """
     class PairingTracker(object):
         def __init__(self, key_func, paired_items, unpaired_items):
             self.key_func = key_func
@@ -173,6 +181,12 @@ class NodeManagerDaemonActor(actor_class):
             self._later.stop_booting_node()
 
     def _check_poll_freshness(orig_func):
+        """Decorator to inhibit a method when poll information is stale.
+
+        This decorator checks the timestamps of all the poll information the
+        daemon has received.  The decorated method is only called if none
+        of the timestamps are considered stale.
+        """
         @functools.wraps(orig_func)
         def wrapper(self, *args, **kwargs):
             now = time.time()
diff --git a/services/nodemanager/arvnodeman/jobqueue.py b/services/nodemanager/arvnodeman/jobqueue.py
index 88d4b53..bae4930 100644
--- a/services/nodemanager/arvnodeman/jobqueue.py
+++ b/services/nodemanager/arvnodeman/jobqueue.py
@@ -6,6 +6,15 @@ from . import clientactor
 from .config import ARVADOS_ERRORS
 
 class ServerCalculator(object):
+    """Generate cloud server wishlists from an Arvados job queue.
+
+    Instantiate this class with a list of cloud node sizes you're willing to
+    use, plus keyword overrides from the configuration.  Then you can pass
+    job queues to servers_for_queue.  It will return a list of node sizes
+    that would best satisfy the jobs, choosing the cheapest size that
+    satisfies each job, and ignoring jobs that can't be satisfied.
+    """
+
     class SizeWrapper(object):
         def __init__(self, real_size, **kwargs):
             self.real = real_size
@@ -61,6 +70,13 @@ class ServerCalculator(object):
 
 
 class JobQueueMonitorActor(clientactor.RemotePollLoopActor):
+    """Actor to generate server wishlists from the job queue.
+
+    This actor regularly polls Arvados' job queue, and uses the provided
+    ServerCalculator to turn that into a list of requested node sizes.  That
+    list is sent to subscribers on every poll.
+    """
+
     CLIENT_ERRORS = ARVADOS_ERRORS
     LOGGER_NAME = 'arvnodeman.jobqueue'
 
diff --git a/services/nodemanager/arvnodeman/nodelist.py b/services/nodemanager/arvnodeman/nodelist.py
index d0d2aac..7ddfb7c 100644
--- a/services/nodemanager/arvnodeman/nodelist.py
+++ b/services/nodemanager/arvnodeman/nodelist.py
@@ -2,12 +2,16 @@
 
 from __future__ import absolute_import, print_function
 
-import arvados.errors as arverror
-
 from . import clientactor
 from . import config
 
 class ArvadosNodeListMonitorActor(clientactor.RemotePollLoopActor):
+    """Actor to poll the Arvados node list.
+
+    This actor regularly polls the list of Arvados node records, and
+    sends it to subscribers.
+    """
+
     CLIENT_ERRORS = config.ARVADOS_ERRORS
     LOGGER_NAME = 'arvnodeman.arvados_nodes'
 
@@ -19,6 +23,12 @@ class ArvadosNodeListMonitorActor(clientactor.RemotePollLoopActor):
 
 
 class CloudNodeListMonitorActor(clientactor.RemotePollLoopActor):
+    """Actor to poll the cloud node list.
+
+    This actor regularly polls the cloud to get a list of running compute
+    nodes, and sends it to subscribers.
+    """
+
     CLIENT_ERRORS = config.CLOUD_ERRORS
     LOGGER_NAME = 'arvnodeman.cloud_nodes'
 
diff --git a/services/nodemanager/arvnodeman/timedcallback.py b/services/nodemanager/arvnodeman/timedcallback.py
index 51afbd2..a1df8ec 100644
--- a/services/nodemanager/arvnodeman/timedcallback.py
+++ b/services/nodemanager/arvnodeman/timedcallback.py
@@ -10,6 +10,12 @@ import pykka
 from .config import actor_class
 
 class TimedCallBackActor(actor_class):
+    """Send messages to other actors on a schedule.
+
+    Other actors can call the schedule() method to schedule delivery of a
+    message at a later time.  This actor runs the necessary event loop for
+    delivery.
+    """
     def __init__(self, max_sleep=1):
         super(TimedCallBackActor, self).__init__()
         self._proxy = self.actor_ref.proxy()

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list