[ARVADOS] created: ea47e67af0e7528f0bcb23f3b34019b308eaa68a

Thu Jun 1 17:37:32 EDT 2017

at  ea47e67af0e7528f0bcb23f3b34019b308eaa68a (commit)


commit ea47e67af0e7528f0bcb23f3b34019b308eaa68a
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Thu Jun 1 17:37:09 2017 -0400

    10312: Adding ability to substitute fake libcloud driver but run full node manager for integration testing.
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curoverse.com>

diff --git a/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py b/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py
index ec8b1d1..63dac3f 100644
--- a/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py
+++ b/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py
@@ -5,6 +5,7 @@ from __future__ import absolute_import, print_function
 import functools
 import logging
 import time
+import re
 
 import libcloud.common.types as cloud_types
 import pykka
diff --git a/services/nodemanager/arvnodeman/config.py b/services/nodemanager/arvnodeman/config.py
index 30e8995..ac0d0bd 100644
--- a/services/nodemanager/arvnodeman/config.py
+++ b/services/nodemanager/arvnodeman/config.py
@@ -103,12 +103,19 @@ class NodeManagerConfig(ConfigParser.SafeConfigParser):
     def new_cloud_client(self):
         module = importlib.import_module('arvnodeman.computenode.driver.' +
                                          self.get('Cloud', 'provider'))
+        driver_class = module.ComputeNodeDriver.DEFAULT_DRIVER
+        if self.get('Cloud', 'driver_class'):
+            d = self.get('Cloud', 'driver_class').split('.')
+            mod = '.'.join(d[:-1])
+            cls = d[-1]
+            driver_class = importlib.import_module(mod).__dict__[cls]
         auth_kwargs = self.get_section('Cloud Credentials')
         if 'timeout' in auth_kwargs:
             auth_kwargs['timeout'] = int(auth_kwargs['timeout'])
         return module.ComputeNodeDriver(auth_kwargs,
                                         self.get_section('Cloud List'),
-                                        self.get_section('Cloud Create'))
+                                        self.get_section('Cloud Create'),
+                                        driver_class=driver_class)
 
     def node_sizes(self, all_sizes):
         """Finds all acceptable NodeSizes for our installation.
diff --git a/services/nodemanager/arvnodeman/fake_driver.py b/services/nodemanager/arvnodeman/fake_driver.py
new file mode 100644
index 0000000..89a3dbb
--- /dev/null
+++ b/services/nodemanager/arvnodeman/fake_driver.py
@@ -0,0 +1,47 @@
+import re
+import urllib
+import ssl
+
+from libcloud.compute.base import NodeSize, Node, NodeDriver, NodeState
+
+all_nodes = []
+
+class FakeDriver(NodeDriver):
+    def __init__(self, *args, **kwargs):
+        self.name = "FakeDriver"
+
+    def list_sizes(self, **kwargs):
+        return [NodeSize("Standard_D3", "Standard_D3", 3500, 200, 0, 0, self),
+                NodeSize("Standard_D4", "Standard_D4", 7000, 400, 0, 0, self)]
+
+    def list_nodes(self, **kwargs):
+        return all_nodes
+
+    def create_node(self, name=None,
+                    size=None,
+                    image=None,
+                    auth=None,
+                    ex_storage_account=None,
+                    ex_customdata=None,
+                    ex_resource_group=None,
+                    ex_user_name=None,
+                    ex_tags=None,
+                    ex_network=None):
+        all_nodes.append(Node(name, name, NodeState.RUNNING, [], [], self, size=size, extra={"tags": ex_tags}))
+        ping_url = re.search(r"echo '(.*)' > /var/tmp/arv-node-data/arv-ping-url", ex_customdata).groups(1)[0] + "&instance_id=" + name
+        print(ping_url)
+        ctx = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
+        ctx.verify_mode = ssl.CERT_NONE
+        f = urllib.urlopen(ping_url, "", context=ctx)
+        print(f.read())
+        f.close()
+        return all_nodes[-1]
+
+    def destroy_node(self, cloud_node):
+        return None
+
+    def get_image(self, img):
+        pass
+
+    def ex_create_tags(self, cloud_node, tags):
+        pass
diff --git a/services/nodemanager/arvnodeman/jobqueue.py b/services/nodemanager/arvnodeman/jobqueue.py
index 0340918..c0de691 100644
--- a/services/nodemanager/arvnodeman/jobqueue.py
+++ b/services/nodemanager/arvnodeman/jobqueue.py
@@ -136,16 +136,19 @@ class JobQueueMonitorActor(clientactor.RemotePollLoopActor):
         squeue_out = subprocess.check_output(["squeue", "--state=PENDING", "--noheader", "--format=%c|%m|%d|%r|%j"])
         queuelist = []
         for out in squeue_out.splitlines():
-            cpu, ram, disk, reason, jobname = out.split("|", 4)
-            if ("ReqNodeNotAvail" in reason) or ("Resources" in reason):
-                queuelist.append({
-                    "uuid": jobname,
-                    "runtime_constraints": {
-                        "min_cores_per_node": cpu,
-                        "min_ram_mb_per_node": self.coerce_to_mb(ram),
-                        "min_scratch_mb_per_node": self.coerce_to_mb(disk)
-                    }
-                })
+            try:
+                cpu, ram, disk, reason, jobname = out.split("|", 4)
+                if ("ReqNodeNotAvail" in reason) or ("Resources" in reason):
+                    queuelist.append({
+                        "uuid": jobname,
+                        "runtime_constraints": {
+                            "min_cores_per_node": cpu,
+                            "min_ram_mb_per_node": self.coerce_to_mb(ram),
+                            "min_scratch_mb_per_node": self.coerce_to_mb(disk)
+                        }
+                    })
+            except ValueError:
+                pass
 
         queuelist.extend(self._client.jobs().queue().execute()['items'])
 
diff --git a/services/nodemanager/arvnodeman/nodelist.py b/services/nodemanager/arvnodeman/nodelist.py
index 6bf1a8b..7bc3a5e 100644
--- a/services/nodemanager/arvnodeman/nodelist.py
+++ b/services/nodemanager/arvnodeman/nodelist.py
@@ -29,16 +29,19 @@ class ArvadosNodeListMonitorActor(clientactor.RemotePollLoopActor):
         sinfo_out = subprocess.check_output(["sinfo", "--noheader", "--format=%n %t"])
         nodestates = {}
         for out in sinfo_out.splitlines():
-            nodename, state = out.split(" ", 2)
-            if state in ('alloc', 'alloc*',
-                         'comp',  'comp*',
-                         'mix',   'mix*',
-                         'drng',  'drng*'):
-                nodestates[nodename] = 'busy'
-            elif state == 'idle':
-                nodestates[nodename] = 'idle'
-            else:
-                nodestates[nodename] = 'down'
+            try:
+                nodename, state = out.split(" ", 2)
+                if state in ('alloc', 'alloc*',
+                             'comp',  'comp*',
+                             'mix',   'mix*',
+                             'drng',  'drng*'):
+                    nodestates[nodename] = 'busy'
+                elif state == 'idle':
+                    nodestates[nodename] = 'idle'
+                else:
+                    nodestates[nodename] = 'down'
+            except ValueError:
+                pass
 
         for n in nodelist:
             if n["slot_number"] and n["hostname"] and n["hostname"] in nodestates:
diff --git a/services/nodemanager/fake_slurm/sinfo b/services/nodemanager/fake_slurm/sinfo
new file mode 100755
index 0000000..e57d0d3
--- /dev/null
+++ b/services/nodemanager/fake_slurm/sinfo
@@ -0,0 +1,2 @@
+#!/bin/sh
+echo
\ No newline at end of file
diff --git a/services/nodemanager/fake_slurm/squeue b/services/nodemanager/fake_slurm/squeue
new file mode 100755
index 0000000..dd114a0
--- /dev/null
+++ b/services/nodemanager/fake_slurm/squeue
@@ -0,0 +1,2 @@
+#!/bin/sh
+echo '1|100|100|ReqNodeNotAvail|34t0i-dz642-h42bg3hq4bdfpf9'
diff --git a/services/nodemanager/tests/fake.azure.cfg b/services/nodemanager/tests/fake.azure.cfg
new file mode 100644
index 0000000..7f7629f
--- /dev/null
+++ b/services/nodemanager/tests/fake.azure.cfg
@@ -0,0 +1,187 @@
+# Azure configuration for Arvados Node Manager.
+# All times are in seconds unless specified otherwise.
+
+[Manage]
+# The management server responds to http://addr:port/status.json with
+# a snapshot of internal state.
+
+# Management server listening address (default 127.0.0.1)
+#address = 0.0.0.0
+
+# Management server port number (default -1, server is disabled)
+#port = 8989
+
+[Daemon]
+# The dispatcher can customize the start and stop procedure for
+# cloud nodes.  For example, the SLURM dispatcher drains nodes
+# through SLURM before shutting them down.
+#dispatcher = slurm
+
+# Node Manager will ensure that there are at least this many nodes running at
+# all times.  If node manager needs to start new idle nodes for the purpose of
+# satisfying min_nodes, it will use the cheapest node type.  However, depending
+# on usage patterns, it may also satisfy min_nodes by keeping alive some
+# more-expensive nodes
+min_nodes = 0
+
+# Node Manager will not start any compute nodes when at least this
+# many are running.
+max_nodes = 8
+
+# Upper limit on rate of spending (in $/hr), will not boot additional nodes
+# if total price of already running nodes meets or exceeds this threshold.
+# default 0 means no limit.
+max_total_price = 0
+
+# Poll Azure nodes and Arvados for new information every N seconds.
+poll_time = 15
+
+# Polls have exponential backoff when services fail to respond.
+# This is the longest time to wait between polls.
+max_poll_time = 300
+
+# If Node Manager can't succesfully poll a service for this long,
+# it will never start or stop compute nodes, on the assumption that its
+# information is too outdated.
+poll_stale_after = 600
+
+# If Node Manager boots a cloud node, and it does not pair with an Arvados
+# node before this long, assume that there was a cloud bootstrap failure and
+# shut it down.  Note that normal shutdown windows apply (see the Cloud
+# section), so this should be shorter than the first shutdown window value.
+boot_fail_after = 1800
+
+# "Node stale time" affects two related behaviors.
+# 1. If a compute node has been running for at least this long, but it
+# isn't paired with an Arvados node, do not shut it down, but leave it alone.
+# This prevents the node manager from shutting down a node that might
+# actually be doing work, but is having temporary trouble contacting the
+# API server.
+# 2. When the Node Manager starts a new compute node, it will try to reuse
+# an Arvados node that hasn't been updated for this long.
+node_stale_after = 14400
+
+# Scaling factor to be applied to nodes' available RAM size. Usually there's a
+# variable discrepancy between the advertised RAM value on cloud nodes and the
+# actual amount available.
+# If not set, this value will be set to 0.95
+node_mem_scaling = 0.95
+
+# File path for Certificate Authorities
+certs_file = /etc/ssl/certs/ca-certificates.crt
+
+[Logging]
+# Log file path
+#file = node-manager.log
+
+# Log level for most Node Manager messages.
+# Choose one of DEBUG, INFO, WARNING, ERROR, or CRITICAL.
+# WARNING lets you know when polling a service fails.
+# INFO additionally lets you know when a compute node is started or stopped.
+level = DEBUG
+
+# You can also set different log levels for specific libraries.
+# Pykka is the Node Manager's actor library.
+# Setting this to DEBUG will display tracebacks for uncaught
+# exceptions in the actors, but it's also very chatty.
+pykka = WARNING
+
+# Setting apiclient to INFO will log the URL of every Arvados API request.
+apiclient = WARNING
+
+[Arvados]
+host = 192.168.5.2:8000
+token = 2tnmn9ou33o3vk3bynzyzrc7aedhijo7ufa11j9kyv7509cygx
+timeout = 15
+
+# Accept an untrusted SSL certificate from the API server?
+insecure = yes
+
+[Cloud]
+provider = azure
+driver_class = arvnodeman.fake_driver.FakeDriver
+
+# Shutdown windows define periods of time when a node may and may not be shut
+# down.  These are windows in full minutes, separated by commas.  Counting from
+# the time the node is booted, the node WILL NOT shut down for N1 minutes; then
+# it MAY shut down for N2 minutes; then it WILL NOT shut down for N3 minutes;
+# and so on.  For example, "20, 999999" means the node may shut down between
+# the 20th and 999999th minutes of uptime.
+# Azure bills by the minute, so it makes sense to agressively shut down idle
+# nodes.  Specify at least two windows.  You can add as many as you need beyond
+# that.
+shutdown_windows = 5, 999999
+
+[Cloud Credentials]
+# Use "azure account list" with the azure CLI to get these values.
+tenant_id = 00000000-0000-0000-0000-000000000000
+subscription_id = 00000000-0000-0000-0000-000000000000
+
+# The following directions are based on
+# https://azure.microsoft.com/en-us/documentation/articles/resource-group-authenticate-service-principal/
+#
+# azure config mode arm
+# azure ad app create --name "<Your Application Display Name>" --home-page "<https://YourApplicationHomePage>" --identifier-uris "<https://YouApplicationUri>" --password <Your_Password>
+# azure ad sp create "<Application_Id>"
+# azure role assignment create --objectId "<Object_Id>" -o Owner -c /subscriptions/{subscriptionId}/
+#
+# Use <Application_Id> for "key" and the <Your_Password> for "secret"
+#
+key = 00000000-0000-0000-0000-000000000000
+secret = PASSWORD
+timeout = 60
+region = East US
+
+[Cloud List]
+# The resource group in which the compute node virtual machines will be created
+# and listed.
+ex_resource_group = ArvadosResourceGroup
+
+[Cloud Create]
+# The image id, in the form "Publisher:Offer:SKU:Version"
+image = Canonical:UbuntuServer:14.04.3-LTS:14.04.201508050
+
+# Path to a local ssh key file that will be used to provision new nodes.
+ssh_key = /home/peter/.ssh/id_rsa.pub
+
+# The account name for the admin user that will be provisioned on new nodes.
+ex_user_name = arvadosuser
+
+# The Azure storage account that will be used to store the node OS disk images.
+ex_storage_account = arvadosstorage
+
+# The virtual network the VMs will be associated with.
+ex_network = ArvadosNetwork
+
+# Optional subnet of the virtual network.
+#ex_subnet = default
+
+# Node tags
+tag_arvados-class = dynamic-compute
+tag_cluster = zyxwv
+
+# the API server to ping
+ping_host = 192.168.5.2:8000
+
+# You can define any number of Size sections to list Azure sizes you're willing
+# to use.  The Node Manager should boot the cheapest size(s) that can run jobs
+# in the queue.  You must also provide price per hour as the Azure driver
+# compute currently does not report prices.
+#
+# See https://azure.microsoft.com/en-us/pricing/details/virtual-machines/
+# for a list of known machine types that may be used as a Size parameter.
+#
+# Each size section MUST define the number of cores are available in this
+# size class (since libcloud does not provide any consistent API for exposing
+# this setting).
+# You may also want to define the amount of scratch space (expressed
+# in GB) for Crunch jobs.  You can also override Microsoft's provided
+# data fields by setting them here.
+
+[Size Standard_D3]
+cores = 4
+price = 0.56
+
+[Size Standard_D4]
+cores = 8
+price = 1.12

commit e4c7e6b368cf6d922db341580a2402a07c6cb079
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Thu Jun 1 10:07:49 2017 -0400

    10312: Identify error message that look like we are hitting a quota or account limit.  Set soft node quota in order to stop trying to boot new nodes until the total node count goes down.  Probe node quota upward when at the soft limit and able to boot nodes successfully.
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curoverse.com>

diff --git a/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py b/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py
index 9ee26e3..ec8b1d1 100644
--- a/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py
+++ b/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py
@@ -16,6 +16,8 @@ from ...clientactor import _notify_subscribers
 from ... import config
 from .transitions import transitions
 
+QuotaExceeded = "QuotaExceeded"
+
 class ComputeNodeStateChangeBase(config.actor_class, RetryMixin):
     """Base class for actors that change a compute node's state.
 
@@ -96,6 +98,7 @@ class ComputeNodeSetupActor(ComputeNodeStateChangeBase):
         self.cloud_size = cloud_size
         self.arvados_node = None
         self.cloud_node = None
+        self.error = None
         if arvados_node is None:
             self._later.create_arvados_node()
         else:
@@ -119,11 +122,23 @@ class ComputeNodeSetupActor(ComputeNodeStateChangeBase):
     def create_cloud_node(self):
         self._logger.info("Sending create_node request for node size %s.",
                           self.cloud_size.name)
-        self.cloud_node = self._cloud.create_node(self.cloud_size,
-                                                  self.arvados_node)
+        try:
+            self.cloud_node = self._cloud.create_node(self.cloud_size,
+                                                      self.arvados_node)
+        except Exception as e:
+            # The set of possible error codes / messages isn't documented for
+            # all clouds, so use a keyword heuristic to determine if the
+            # failure is likely due to a quota.
+            if re.search(r'(exceed|quota|limit)', e.message, re.I):
+                self.error = QuotaExceeded
+                self._logger.warning("Quota exceeded: %s", e)
+                self._finished()
+                return
+            else:
+                raise
 
         # The information included in the node size object we get from libcloud
-        # is inconsistent between cloud providers.  Replace libcloud NodeSize
+        # is inconsistent between cloud drivers.  Replace libcloud NodeSize
         # object with compatible CloudSizeWrapper object which merges the size
         # info reported from the cloud with size information from the
         # configuration file.
diff --git a/services/nodemanager/arvnodeman/daemon.py b/services/nodemanager/arvnodeman/daemon.py
index 9bfee79..8f9207e 100644
--- a/services/nodemanager/arvnodeman/daemon.py
+++ b/services/nodemanager/arvnodeman/daemon.py
@@ -122,6 +122,7 @@ class NodeManagerDaemonActor(actor_class):
         self.min_cloud_size = self.server_calculator.cheapest_size()
         self.min_nodes = min_nodes
         self.max_nodes = max_nodes
+        self.node_quota = max_nodes
         self.max_total_price = max_total_price
         self.poll_stale_after = poll_stale_after
         self.boot_fail_after = boot_fail_after
@@ -298,16 +299,17 @@ class NodeManagerDaemonActor(actor_class):
     def _nodes_wanted(self, size):
         total_node_count = self._nodes_booting(None) + len(self.cloud_nodes)
         under_min = self.min_nodes - total_node_count
-        over_max = total_node_count - self.max_nodes
+        over_max = total_node_count - self.node_quota
         total_price = self._total_price()
 
         counts = self._state_counts(size)
 
         up_count = self._nodes_up(counts)
         busy_count = counts["busy"]
+        wishlist_count = self._size_wishlist(size)
 
         self._logger.info("%s: wishlist %i, up %i (booting %i, unpaired %i, idle %i, busy %i), down %i, shutdown %i", size.name,
-                          self._size_wishlist(size),
+                          wishlist_count,
                           up_count,
                           counts["booting"],
                           counts["unpaired"],
@@ -321,7 +323,7 @@ class NodeManagerDaemonActor(actor_class):
         elif under_min > 0 and size.id == self.min_cloud_size.id:
             return under_min
 
-        wanted = self._size_wishlist(size) - (up_count - busy_count)
+        wanted = wishlist_count - (up_count - busy_count)
         if wanted > 0 and self.max_total_price and ((total_price + (size.price*wanted)) > self.max_total_price):
             can_boot = int((self.max_total_price - total_price) / size.price)
             if can_boot == 0:
@@ -392,25 +394,46 @@ class NodeManagerDaemonActor(actor_class):
         if arvados_node is not None:
             self.arvados_nodes[arvados_node['uuid']].assignment_time = (
                 time.time())
-        new_setup.subscribe(self._later.node_up)
+        new_setup.subscribe(self._later.node_setup_finished)
         if nodes_wanted > 1:
             self._later.start_node(cloud_size)
 
     def _get_actor_attrs(self, actor, *attr_names):
         return pykka.get_all([getattr(actor, name) for name in attr_names])
 
-    def node_up(self, setup_proxy):
+    def node_setup_finished(self, setup_proxy):
         # Called when a SetupActor has completed.
-        cloud_node, arvados_node = self._get_actor_attrs(
-            setup_proxy, 'cloud_node', 'arvados_node')
+        cloud_node, arvados_node, error = self._get_actor_attrs(
+            setup_proxy, 'cloud_node', 'arvados_node', 'error')
         setup_proxy.stop()
 
-        # If cloud_node is None then the node create wasn't
-        # successful and so there isn't anything to do.
-        if cloud_node is not None:
+        total_node_count = self._nodes_booting(None) + len(self.cloud_nodes)
+        if cloud_node is None:
+            # If cloud_node is None then the node create wasn't successful.
+            if error == dispatch.QuotaExceeded:
+                # We've hit a quota limit, so adjust node_quota to stop trying to
+                # boot new nodes until the node count goes down.
+                self.node_quota = min(total_node_count-1, self.max_nodes)
+                self._logger.warning("Setting node quota to %s", self.node_quota)
+        else:
             # Node creation succeeded.  Update cloud node list.
             cloud_node._nodemanager_recently_booted = True
             self._register_cloud_node(cloud_node)
+
+            # Different quota policies may in force depending on the cloud
+            # provider, account limits, and the specific mix of nodes sizes
+            # that are already created.  If we are right at the quota limit,
+            # we want to probe to see if the last quota still applies or if we
+            # are allowed to create more nodes.
+            #
+            # For example, if the quota is actually based on core count, the
+            # quota might be 20 single-core machines or 10 dual-core machines.
+            # If we previously set node_quota to 10 dual core machines, but are
+            # now booting single core machines (actual quota 20), we want to
+            # allow the quota to expand so we don't get stuck at 10 machines
+            # forever.
+            if total_node_count == self.node_quota and self.node_quota < self.max_nodes:
+                self.node_quota += 1
         del self.booting[setup_proxy.actor_ref.actor_urn]
         del self.sizes_booting[setup_proxy.actor_ref.actor_urn]
 

-----------------------------------------------------------------------


hooks/post-receive
--