[ARVADOS] created: 8f28c5303bce12a562eb1e0ee861c7168d1c3f09

git at public.curoverse.com git at public.curoverse.com
Mon Feb 16 11:15:09 EST 2015


        at  8f28c5303bce12a562eb1e0ee861c7168d1c3f09 (commit)


commit 8f28c5303bce12a562eb1e0ee861c7168d1c3f09
Author: Brett Smith <brett at curoverse.com>
Date:   Mon Feb 16 11:06:41 2015 -0500

    4138: Prepare Node Manager GCE driver for production.
    
    * Set node metadata in more appropriate places.
    * Bridge more differences between GCE and EC2, like the fact that
      sizes are listed for each location they're available.
    * Use more infrastructure from BaseComputeNodeDriver to reduce code
      duplication.
    * Load as many objects as possible at initialization time, to reduce
      API overhead of creating nodes.

diff --git a/services/nodemanager/arvnodeman/computenode/driver/gce.py b/services/nodemanager/arvnodeman/computenode/driver/gce.py
index a8edc43..6921839 100644
--- a/services/nodemanager/arvnodeman/computenode/driver/gce.py
+++ b/services/nodemanager/arvnodeman/computenode/driver/gce.py
@@ -6,10 +6,8 @@ import functools
 import json
 import time
 
-import libcloud.compute.base as cloud_base
 import libcloud.compute.providers as cloud_provider
 import libcloud.compute.types as cloud_types
-from libcloud.compute.drivers import gce
 
 from . import BaseComputeNodeDriver
 from .. import arvados_node_fqdn
@@ -21,73 +19,94 @@ class ComputeNodeDriver(BaseComputeNodeDriver):
     """
     DEFAULT_DRIVER = cloud_provider.get_driver(cloud_types.Provider.GCE)
     SEARCH_CACHE = {}
-    ssh_key = None
-    service_accounts = None
+    node_start_times = {}
 
     def __init__(self, auth_kwargs, list_kwargs, create_kwargs,
                  driver_class=DEFAULT_DRIVER):
+        list_kwargs = list_kwargs.copy()
+        tags_str = list_kwargs.pop('tags', '')
+        if not tags_str.strip():
+            self.node_tags = frozenset()
+        else:
+            self.node_tags = frozenset(t.strip() for t in tags_str.split(','))
+        create_kwargs = create_kwargs.copy()
+        create_kwargs.setdefault('external_ip', None)
+        create_kwargs.setdefault('ex_metadata', {})
         super(ComputeNodeDriver, self).__init__(
             auth_kwargs, list_kwargs, create_kwargs,
             driver_class)
 
-        for key in self.create_kwargs.keys():
-            init_method = getattr(self, '_init_' + key, None)
-            if init_method is not None:
-                new_pair = init_method(self.create_kwargs.pop(key))
-                if new_pair is not None:
-                    self.create_kwargs[new_pair[0]] = new_pair[1]
+    @staticmethod
+    def _name_key(cloud_object):
+        return cloud_object.name
 
     def _init_image(self, image_name):
-        return 'image', image_name
+        return 'image', self.search_for(
+            image_name, 'list_images', self._name_key)
 
-    def _init_location(self, location):
-        return 'location', location
+    def _init_location(self, location_name):
+        return 'location', self.search_for(
+            location_name, 'list_locations', self._name_key)
 
-    def _init_ping_host(self, ping_host):
-        self.ping_host = ping_host
+    def _init_network(self, network_name):
+        return 'ex_network', self.search_for(
+            network_name, 'ex_list_networks', self._name_key)
 
     def _init_service_accounts(self, service_accounts_str):
-        self.service_accounts = json.loads(service_accounts_str)
+        return 'ex_service_accounts', json.loads(service_accounts_str)
 
     def _init_ssh_key(self, filename):
+        # SSH keys are delivered to GCE nodes via ex_metadata: see
+        # http://stackoverflow.com/questions/26752617/creating-sshkeys-for-gce-instance-using-libcloud
         with open(filename) as ssh_file:
-            self.ssh_key = ssh_file.read().strip()
+            self.create_kwargs['ex_metadata']['sshKeys'] = (
+                'root:' + ssh_file.read().strip())
 
-    def arvados_create_kwargs(self, arvados_node):
-        result = {'ex_metadata': self.list_kwargs.copy() }
-        ping_secret = arvados_node['info'].get('ping_secret')
-        if ping_secret is not None:
-            ping_url = ('https://{}/arvados/v1/nodes/{}/ping?ping_secret={}'.
-                        format(self.ping_host, arvados_node['uuid'],
-                               ping_secret))
-            result['ex_metadata']['pingUrl'] = ping_url
-        if self.service_accounts is not None:
-            result['ex_service_accounts'] = self.service_accounts
+    def list_sizes(self):
+        return super(ComputeNodeDriver, self).list_sizes(
+            self.create_kwargs['location'])
 
-        # SSH keys are delivered to GCE nodes via ex_metadata: see
-        # http://stackoverflow.com/questions/26752617/creating-sshkeys-for-gce-instance-using-libcloud
-        if self.ssh_key is not None:
-            result['ex_metadata']['sshKeys'] = 'root:{}'.format(self.ssh_key)
+    def arvados_create_kwargs(self, arvados_node):
+        cluster_id, _, node_id = arvados_node['uuid'].split('-')
+        result = {'name': 'compute-{}-{}'.format(node_id, cluster_id),
+                  'ex_metadata': self.create_kwargs['ex_metadata'].copy(),
+                  'ex_tags': list(self.node_tags)}
+        result['ex_metadata']['hostname'] = arvados_node_fqdn(arvados_node)
+        result['ex_metadata']['user-data'] = self._make_ping_url(arvados_node)
         return result
 
-    def create_node(self, size, arvados_node):
-        kwargs = self.create_kwargs.copy()
-        kwargs.update(self.arvados_create_kwargs(arvados_node))
-        kwargs.setdefault('name', 'arv-{}'.format(arvados_node['uuid']))
-        kwargs['size'] = size
-        return self.real.create_node(**kwargs)
-
-    # When an Arvados node is synced with a GCE node, the Arvados hostname
-    # is forwarded in a GCE tag 'hostname-foo'.
-    # TODO(twp): implement an ex_set_metadata method (at least until
-    # libcloud supports the API setMetadata method) so we can pass this
-    # sensibly in the node metadata.
+    def list_nodes(self):
+        # The GCE libcloud driver only supports filtering node lists by zone.
+        # Do our own filtering based on tag list.
+        return [node for node in
+                super(ComputeNodeDriver, self).list_nodes()
+                if self.node_tags.issubset(node.extra.get('tags', []))]
+
     def sync_node(self, cloud_node, arvados_node):
-        tags = ['hostname-{}'.format(arvados_node_fqdn(arvados_node))]
-        self.real.ex_set_node_tags(cloud_node, tags)
+        hostname = arvados_node_fqdn(arvados_node)
+        metadata_req = cloud_node.extra['metadata'].copy()
+        for data_dict in metadata_req.setdefault('items', []):
+            if data_dict['key'] == 'hostname':
+                data_dict['value'] = hostname
+                break
+        else:
+            metadata_req['items'].append({'key': 'hostname', 'value': hostname})
+        response = self.real.connection.async_request(
+            '/zones/{}/instances/{}/setMetadata'.format(
+                cloud_node.extra['zone'].name, cloud_node.name),
+            method='POST', data=metadata_req)
+        if not response.success():
+            raise Exception("setMetadata error: {}".format(response.error))
+
+    def destroy_node(self, node):
+        success = super(ComputeNodeDriver, self).destroy_node(node)
+        if success:
+            self.node_start_times.pop(node.id, None)
+        return success
 
     @classmethod
     def node_start_time(cls, node):
-        time_str = node.extra['launch_time'].split('.', 2)[0] + 'UTC'
-        return time.mktime(time.strptime(
-                time_str,'%Y-%m-%dT%H:%M:%S%Z')) - time.timezone
+        # Launch time isn't available on GCE node records.  Thankfully that's
+        # not too big a deal because they have by-minute billing.
+        # Fake an answer based on the first time we see it.
+        return cls.node_start_times.setdefault(node.id, time.time())
diff --git a/services/nodemanager/doc/gce.example.cfg b/services/nodemanager/doc/gce.example.cfg
index 6369383..3789cea 100644
--- a/services/nodemanager/doc/gce.example.cfg
+++ b/services/nodemanager/doc/gce.example.cfg
@@ -65,18 +65,20 @@ insecure = no
 [Cloud]
 provider = gce
 
-# XXX(twp): figure out good default settings for GCE
-# It's usually most cost-effective to shut down compute nodes during narrow
-# windows of time.  For example, EC2 bills each node by the hour, so the best
-# time to shut down a node is right before a new hour of uptime starts.
-# Shutdown windows define these periods of time.  These are windows in
-# full minutes, separated by commas.  Counting from the time the node is
-# booted, the node WILL NOT shut down for N1 minutes; then it MAY shut down
-# for N2 minutes; then it WILL NOT shut down for N3 minutes; and so on.
-# For example, "54, 5, 1" means the node may shut down from the 54th to the
-# 59th minute of each hour of uptime.
-# Specify at least two windows.  You can add as many as you need beyond that.
-shutdown_windows = 54, 5, 1
+# Shutdown windows define periods of time when a node may and may not
+# be shut down.  These are windows in full minutes, separated by
+# commas.  Counting from the time the node is booted, the node WILL
+# NOT shut down for N1 minutes; then it MAY shut down for N2 minutes;
+# then it WILL NOT shut down for N3 minutes; and so on.  For example,
+# "54, 5, 1" means the node may shut down from the 54th to the 59th
+# minute of each hour of uptime.
+# GCE bills by the minute, and does not provide information about
+# when a node booted.  Because of that, Node Manager will pretend a
+# node booted when it's first seen by the process.  To help
+# counterbalance that, these shutdown settings are very aggressive.
+# You may want to adjust this if you want more continuity of service
+# from a single node.
+shutdown_windows = 20, 999999
 
 [Cloud Credentials]
 user_id = client_email_address at developer.gserviceaccount.com
@@ -93,7 +95,10 @@ timeout = 60
 # credential_file =
 
 [Cloud List]
-# Keywords here will be used to populate the metadata field for a GCE node.
+# A comma-separated list of tags that must be applied to a node for it to
+# be considered a compute node.
+# The driver will automatically apply these tags to nodes it creates.
+tags = zyxwv, compute
 
 [Cloud Create]
 # New compute nodes will send pings to Arvados at this host.
@@ -108,6 +113,7 @@ ping_host = hostname:port
 # * Valid location (zone) names: https://cloud.google.com/compute/docs/zones
 image = debian-7
 location = us-central1-a
+# network = your_network_name
 
 # JSON string of service account authorizations for this cluster.
 # See http://libcloud.readthedocs.org/en/latest/compute/drivers/gce.html#specifying-service-account-scopes
diff --git a/services/nodemanager/tests/test_computenode_driver_gce.py b/services/nodemanager/tests/test_computenode_driver_gce.py
index d4b73f7..26f8ca7 100644
--- a/services/nodemanager/tests/test_computenode_driver_gce.py
+++ b/services/nodemanager/tests/test_computenode_driver_gce.py
@@ -2,6 +2,7 @@
 
 from __future__ import absolute_import, print_function
 
+import json
 import time
 import unittest
 
@@ -19,80 +20,145 @@ class GCEComputeNodeDriverTestCase(testutil.DriverTestMixin, unittest.TestCase):
         self.assertTrue(self.driver_mock.called)
         self.assertEqual(kwargs, self.driver_mock.call_args[1])
 
-    def test_create_location_loaded_at_initialization(self):
-        kwargs = {'location': 'testregion'}
-        driver = self.new_driver(create_kwargs=kwargs)
-        self.assertTrue(self.driver_mock().list_locations)
-
-    def test_create_image_loaded_at_initialization(self):
-        kwargs = {'image': 'testimage'}
-        driver = self.new_driver(create_kwargs=kwargs)
-        self.assertTrue(self.driver_mock().list_images)
+    def test_create_image_loaded_at_initialization_by_name(self):
+        image_mocks = [testutil.cloud_object_mock(c) for c in 'abc']
+        list_method = self.driver_mock().list_images
+        list_method.return_value = image_mocks
+        driver = self.new_driver(create_kwargs={'image': 'B'})
+        self.assertEqual(1, list_method.call_count)
+
+    def test_list_sizes_requires_location_match(self):
+        locations = [testutil.cloud_object_mock(name)
+                     for name in ['there', 'here', 'other']]
+        self.driver_mock().list_locations.return_value = locations
+        driver = self.new_driver(create_kwargs={'location': 'HERE'})
+        driver.list_sizes()
+        self.assertIs(locations[1],
+                      self.driver_mock().list_sizes.call_args[0][0])
 
     def test_create_includes_ping_secret(self):
         arv_node = testutil.arvados_node_mock(info={'ping_secret': 'ssshh'})
         driver = self.new_driver()
         driver.create_node(testutil.MockSize(1), arv_node)
-        create_method = self.driver_mock().create_node
-        self.assertTrue(create_method.called)
-        create_metadata = create_method.call_args[1].get('ex_metadata')
-        self.assertIsInstance(create_metadata, dict)
-        self.assertIn('ping_secret=ssshh',
-                      create_metadata.get('pingUrl', 'arg missing'))
-
-    def test_generate_metadata_for_new_arvados_node(self):
-        arv_node = testutil.arvados_node_mock(8)
-        driver = self.new_driver(list_kwargs={'list': 'test'})
-        self.assertEqual({'ex_metadata': {'list': 'test'}},
-                         driver.arvados_create_kwargs(arv_node))
+        metadata = self.driver_mock().create_node.call_args[1]['ex_metadata']
+        self.assertIn('ping_secret=ssshh', metadata.get('user-data'))
 
-    def test_tags_set_default_hostname_from_new_arvados_node(self):
-        arv_node = testutil.arvados_node_mock(hostname=None)
-        cloud_node = testutil.cloud_node_mock(1)
+    def test_create_sets_default_hostname(self):
         driver = self.new_driver()
-        driver.sync_node(cloud_node, arv_node)
-        tag_mock = self.driver_mock().ex_set_node_tags
-        self.assertTrue(tag_mock.called)
-        self.assertEqual(['hostname-dynamic.compute.zzzzz.arvadosapi.com'],
-                         tag_mock.call_args[0][1])
-
-    def test_sync_node_sets_static_hostname(self):
+        driver.create_node(testutil.MockSize(1),
+                           testutil.arvados_node_mock(254, hostname=None))
+        create_kwargs = self.driver_mock().create_node.call_args[1]
+        self.assertEqual('compute-0000000000000fe-zzzzz',
+                         create_kwargs.get('name'))
+        self.assertEqual('dynamic.compute.zzzzz.arvadosapi.com',
+                         create_kwargs.get('ex_metadata', {}).get('hostname'))
+
+    def test_create_tags_from_list_tags(self):
+        driver = self.new_driver(list_kwargs={'tags': 'testA, testB'})
+        driver.create_node(testutil.MockSize(1), testutil.arvados_node_mock())
+        self.assertEqual(['testA', 'testB'],
+                         self.driver_mock().create_node.call_args[1]['ex_tags'])
+
+    def test_list_nodes_requires_tags_match(self):
+        # A node matches if our list tags are a subset of the node's tags.
+        # Test behavior with no tags, no match, partial matches, different
+        # order, and strict supersets.
+        cloud_mocks = [
+            testutil.cloud_node_mock(node_num, tags=tag_set)
+            for node_num, tag_set in enumerate(
+                [[], ['bad'], ['good'], ['great'], ['great', 'ok'],
+                 ['great', 'good'], ['good', 'fantastic', 'great']])]
+        cloud_mocks.append(testutil.cloud_node_mock())
+        self.driver_mock().list_nodes.return_value = cloud_mocks
+        driver = self.new_driver(list_kwargs={'tags': 'good, great'})
+        self.assertItemsEqual(['5', '6'], [n.id for n in driver.list_nodes()])
+
+    def check_sync_node_updates_hostname_tag(self, plain_metadata):
+        start_metadata = {
+            'kind': 'compute#metadata',
+            'fingerprint': 'testprint',
+            'items': [{'key': key, 'value': plain_metadata[key]}
+                      for key in plain_metadata],
+            }
         arv_node = testutil.arvados_node_mock(1)
-        cloud_node = testutil.cloud_node_mock(2)
+        cloud_node = testutil.cloud_node_mock(
+            2, metadata=start_metadata.copy(),
+            zone=testutil.cloud_object_mock('testzone'))
         driver = self.new_driver()
         driver.sync_node(cloud_node, arv_node)
-        tag_mock = self.driver_mock().ex_set_node_tags
-        self.assertTrue(tag_mock.called)
-        self.assertEqual(['hostname-compute1.zzzzz.arvadosapi.com'],
-                         tag_mock.call_args[0][1])
-
-    def test_node_create_time(self):
-        refsecs = int(time.time())
-        reftuple = time.gmtime(refsecs)
-        node = testutil.cloud_node_mock()
-        node.extra = {'launch_time': time.strftime('%Y-%m-%dT%H:%M:%S.000Z',
-                                                   reftuple)}
-        self.assertEqual(refsecs, gce.ComputeNodeDriver.node_start_time(node))
-
-    def test_generate_metadata_for_new_arvados_node(self):
+        args, kwargs = self.driver_mock().connection.async_request.call_args
+        self.assertEqual('/zones/TESTZONE/instances/2/setMetadata', args[0])
+        for key in ['kind', 'fingerprint']:
+            self.assertEqual(start_metadata[key], kwargs['data'][key])
+        plain_metadata['hostname'] = 'compute1.zzzzz.arvadosapi.com'
+        self.assertEqual(
+            plain_metadata,
+            {item['key']: item['value'] for item in kwargs['data']['items']})
+
+    def test_sync_node_updates_hostname_tag(self):
+        self.check_sync_node_updates_hostname_tag(
+            {'testkey': 'testvalue', 'hostname': 'startvalue'})
+
+    def test_sync_node_adds_hostname_tag(self):
+        self.check_sync_node_updates_hostname_tag({'testkey': 'testval'})
+
+    def test_sync_node_raises_exception_on_failure(self):
         arv_node = testutil.arvados_node_mock(8)
-        driver = self.new_driver(list_kwargs={'list': 'test'})
-        self.assertEqual({'ex_metadata': {'list': 'test'}},
-                         driver.arvados_create_kwargs(arv_node))
+        cloud_node = testutil.cloud_node_mock(
+            9, metadata={}, zone=testutil.cloud_object_mock('failzone'))
+        mock_response = self.driver_mock().connection.async_request()
+        mock_response.success.return_value = False
+        mock_response.error = 'sync error test'
+        driver = self.new_driver()
+        with self.assertRaises(Exception) as err_check:
+            driver.sync_node(cloud_node, arv_node)
+        self.assertIs(err_check.exception.__class__, Exception)
+        self.assertIn('sync error test', str(err_check.exception))
+
+    def test_node_create_time_static(self):
+        node = testutil.cloud_node_mock()
+        with mock.patch('time.time', return_value=1) as time_mock:
+            result1 = gce.ComputeNodeDriver.node_start_time(node)
+            time_mock.return_value += 1
+            self.assertEqual(result1,
+                             gce.ComputeNodeDriver.node_start_time(node))
+
+    def test_node_create_time_varies_by_node(self):
+        node1 = testutil.cloud_node_mock(1)
+        node2 = testutil.cloud_node_mock(2)
+        with mock.patch('time.time', return_value=1) as time_mock:
+            start_time1 = gce.ComputeNodeDriver.node_start_time(node1)
+            time_mock.return_value += 1
+            self.assertNotEqual(start_time1,
+                                gce.ComputeNodeDriver.node_start_time(node2))
+
+    def test_node_create_time_not_remembered_after_delete(self):
+        node = testutil.cloud_node_mock()
+        driver = self.new_driver()
+        with mock.patch('time.time', return_value=1) as time_mock:
+            result1 = gce.ComputeNodeDriver.node_start_time(node)
+            driver.destroy_node(node)
+            time_mock.return_value += 1
+            self.assertNotEqual(result1,
+                                gce.ComputeNodeDriver.node_start_time(node))
 
     def test_deliver_ssh_key_in_metadata(self):
         test_ssh_key = 'ssh-rsa-foo'
         arv_node = testutil.arvados_node_mock(1)
-        with mock.patch('__builtin__.open', mock.mock_open(read_data=test_ssh_key)) as mock_file:
+        with mock.patch('__builtin__.open',
+                        mock.mock_open(read_data=test_ssh_key)) as mock_file:
             driver = self.new_driver(create_kwargs={'ssh_key': 'ssh-key-file'})
         mock_file.assert_called_once_with('ssh-key-file')
-        self.assertEqual({'ex_metadata': {'sshKeys': 'root:ssh-rsa-foo'}},
-                         driver.arvados_create_kwargs(arv_node))
+        driver.create_node(testutil.MockSize(1), arv_node)
+        metadata = self.driver_mock().create_node.call_args[1]['ex_metadata']
+        self.assertEqual('root:ssh-rsa-foo', metadata.get('sshKeys'))
 
     def test_create_driver_with_service_accounts(self):
-        srv_acct_config = { 'service_accounts': '{ "email": "foo at bar", "scopes":["storage-full"]}' }
+        service_accounts = {'email': 'foo at bar', 'scopes': ['storage-full']}
+        srv_acct_config = {'service_accounts': json.dumps(service_accounts)}
         arv_node = testutil.arvados_node_mock(1)
         driver = self.new_driver(create_kwargs=srv_acct_config)
-        create_kwargs = driver.arvados_create_kwargs(arv_node)
-        self.assertEqual({u'email': u'foo at bar', u'scopes': [u'storage-full']},
-                         create_kwargs['ex_service_accounts'])
+        driver.create_node(testutil.MockSize(1), arv_node)
+        self.assertEqual(
+            service_accounts,
+            self.driver_mock().create_node.call_args[1]['ex_service_accounts'])
diff --git a/services/nodemanager/tests/testutil.py b/services/nodemanager/tests/testutil.py
index 633aac6..d51aab2 100644
--- a/services/nodemanager/tests/testutil.py
+++ b/services/nodemanager/tests/testutil.py
@@ -39,7 +39,7 @@ def cloud_object_mock(name_id):
     cloud_object.name = cloud_object.id.upper()
     return cloud_object
 
-def cloud_node_mock(node_num=99):
+def cloud_node_mock(node_num=99, **extra):
     node = mock.NonCallableMagicMock(
         ['id', 'name', 'state', 'public_ips', 'private_ips', 'driver', 'size',
          'image', 'extra'],
@@ -48,6 +48,7 @@ def cloud_node_mock(node_num=99):
     node.name = node.id
     node.public_ips = []
     node.private_ips = [ip_address_mock(node_num)]
+    node.extra = extra
     return node
 
 def ip_address_mock(last_octet):

commit 82bb5282db6a64cc6523c5bdbc31d4b8247ed5d9
Author: Brett Smith <brett at curoverse.com>
Date:   Fri Feb 13 15:24:04 2015 -0500

    4138: Revamp Node Manager driver proxying in BaseComputeNodeDriver.
    
    Accessing attributes through a super() proxy does not invoke
    __getattr__ on base classes, so the old implementation made it
    impossible for subclasses to be agnostic about whether a method was
    implemented in BaseComputeNodeDriver or the real libcloud driver.
    This version makes that possible.  It's also a little nicer because
    now the class will report these method names to dir(), hasattr(), etc.

diff --git a/services/nodemanager/arvnodeman/computenode/driver/__init__.py b/services/nodemanager/arvnodeman/computenode/driver/__init__.py
index c06bba3..369bb8f 100644
--- a/services/nodemanager/arvnodeman/computenode/driver/__init__.py
+++ b/services/nodemanager/arvnodeman/computenode/driver/__init__.py
@@ -3,6 +3,7 @@
 from __future__ import absolute_import, print_function
 
 import libcloud.common.types as cloud_types
+from libcloud.compute.base import NodeDriver
 
 from ...config import NETWORK_ERRORS
 
@@ -35,14 +36,6 @@ class BaseComputeNodeDriver(object):
     def _init_ping_host(self, ping_host):
         self.ping_host = ping_host
 
-    def __getattr__(self, name):
-        # Proxy non-extension methods to the real driver.
-        if (not name.startswith('_') and not name.startswith('ex_')
-              and hasattr(self.real, name)):
-            return getattr(self.real, name)
-        else:
-            return super(BaseComputeNodeDriver, self).__getattr__(name)
-
     def search_for(self, term, list_method, key=lambda item: item.id):
         cache_key = (list_method, term)
         if cache_key not in self.SEARCH_CACHE:
@@ -96,3 +89,16 @@ class BaseComputeNodeDriver(object):
         # exactly an Exception, or a better-known higher-level exception.
         return (isinstance(exception, cls.CLOUD_ERRORS) or
                 getattr(exception, '__class__', None) is Exception)
+
+    # Now that we've defined all our own methods, delegate generic, public
+    # attributes of libcloud drivers that we haven't defined ourselves.
+    def _delegate_to_real(attr_name):
+        return property(
+            lambda self: getattr(self.real, attr_name),
+            lambda self, value: setattr(self.real, attr_name, value),
+            doc=getattr(getattr(NodeDriver, attr_name), '__doc__', None))
+
+    _locals = locals()
+    for _attr_name in dir(NodeDriver):
+        if (not _attr_name.startswith('_')) and (_attr_name not in _locals):
+            _locals[_attr_name] = _delegate_to_real(_attr_name)

commit 1e0fa5965513b3396296568d45c5b97b27a96dd8
Author: Brett Smith <brett at curoverse.com>
Date:   Thu Feb 12 17:22:12 2015 -0500

    4138: Refactor out Node Manager DriverTestMixin.

diff --git a/services/nodemanager/tests/test_computenode_driver_ec2.py b/services/nodemanager/tests/test_computenode_driver_ec2.py
index 10a41a8..8e52824 100644
--- a/services/nodemanager/tests/test_computenode_driver_ec2.py
+++ b/services/nodemanager/tests/test_computenode_driver_ec2.py
@@ -12,15 +12,8 @@ import mock
 import arvnodeman.computenode.driver.ec2 as ec2
 from . import testutil
 
-class EC2ComputeNodeDriverTestCase(unittest.TestCase):
-    def setUp(self):
-        self.driver_mock = mock.MagicMock(name='driver_mock')
-
-    def new_driver(self, auth_kwargs={}, list_kwargs={}, create_kwargs={}):
-        create_kwargs.setdefault('ping_host', '100::')
-        return ec2.ComputeNodeDriver(
-            auth_kwargs, list_kwargs, create_kwargs,
-            driver_class=self.driver_mock)
+class EC2ComputeNodeDriverTestCase(testutil.DriverTestMixin, unittest.TestCase):
+    TEST_CLASS = ec2.ComputeNodeDriver
 
     def test_driver_instantiation(self):
         kwargs = {'key': 'testkey'}
diff --git a/services/nodemanager/tests/test_computenode_driver_gce.py b/services/nodemanager/tests/test_computenode_driver_gce.py
index 0e10e2f..d4b73f7 100644
--- a/services/nodemanager/tests/test_computenode_driver_gce.py
+++ b/services/nodemanager/tests/test_computenode_driver_gce.py
@@ -10,15 +10,8 @@ import mock
 import arvnodeman.computenode.driver.gce as gce
 from . import testutil
 
-class GCEComputeNodeDriverTestCase(unittest.TestCase):
-    def setUp(self):
-        self.driver_mock = mock.MagicMock(name='driver_mock')
-
-    def new_driver(self, auth_kwargs={}, list_kwargs={}, create_kwargs={}):
-        create_kwargs.setdefault('ping_host', '100::')
-        return gce.ComputeNodeDriver(
-            auth_kwargs, list_kwargs, create_kwargs,
-            driver_class=self.driver_mock)
+class GCEComputeNodeDriverTestCase(testutil.DriverTestMixin, unittest.TestCase):
+    TEST_CLASS = gce.ComputeNodeDriver
 
     def test_driver_instantiation(self):
         kwargs = {'user_id': 'foo'}
diff --git a/services/nodemanager/tests/testutil.py b/services/nodemanager/tests/testutil.py
index c5b6539..633aac6 100644
--- a/services/nodemanager/tests/testutil.py
+++ b/services/nodemanager/tests/testutil.py
@@ -115,6 +115,21 @@ class ActorTestMixin(object):
                 return result
 
 
+class DriverTestMixin(object):
+    def setUp(self):
+        self.driver_mock = mock.MagicMock(name='driver_mock')
+        super(DriverTestMixin, self).setUp()
+
+    def new_driver(self, auth_kwargs={}, list_kwargs={}, create_kwargs={}):
+        create_kwargs.setdefault('ping_host', '100::')
+        return self.TEST_CLASS(
+            auth_kwargs, list_kwargs, create_kwargs,
+            driver_class=self.driver_mock)
+
+    def driver_method_args(self, method_name):
+        return getattr(self.driver_mock(), method_name).call_args
+
+
 class RemotePollLoopActorTestMixin(ActorTestMixin):
     def build_monitor(self, *args, **kwargs):
         self.timer = mock.MagicMock(name='timer_mock')

commit 58be160ab5549fc03c7c09b90c8fad7ff1234096
Author: Brett Smith <brett at curoverse.com>
Date:   Thu Feb 12 15:53:16 2015 -0500

    4138: Fix noop Node Manager EC2 driver tests.
    
    The previous tests simply instantiated the driver, then checked that a
    mock method was truthy (which it will always be).  This makes the test
    work as intended.

diff --git a/services/nodemanager/tests/test_computenode_driver_ec2.py b/services/nodemanager/tests/test_computenode_driver_ec2.py
index fae63a5..10a41a8 100644
--- a/services/nodemanager/tests/test_computenode_driver_ec2.py
+++ b/services/nodemanager/tests/test_computenode_driver_ec2.py
@@ -37,15 +37,12 @@ class EC2ComputeNodeDriverTestCase(unittest.TestCase):
         self.assertEqual({'tag:test': 'true'},
                           list_method.call_args[1].get('ex_filters'))
 
-    def test_create_location_loaded_at_initialization(self):
-        kwargs = {'location': 'testregion'}
-        driver = self.new_driver(create_kwargs=kwargs)
-        self.assertTrue(self.driver_mock().list_locations)
-
     def test_create_image_loaded_at_initialization(self):
-        kwargs = {'image': 'testimage'}
-        driver = self.new_driver(create_kwargs=kwargs)
-        self.assertTrue(self.driver_mock().list_images)
+        list_method = self.driver_mock().list_images
+        list_method.return_value = [testutil.cloud_object_mock(c)
+                                    for c in 'abc']
+        driver = self.new_driver(create_kwargs={'image_id': 'b'})
+        self.assertEqual(1, list_method.call_count)
 
     def test_create_includes_ping_secret(self):
         arv_node = testutil.arvados_node_mock(info={'ping_secret': 'ssshh'})
diff --git a/services/nodemanager/tests/testutil.py b/services/nodemanager/tests/testutil.py
index ff525f0..c5b6539 100644
--- a/services/nodemanager/tests/testutil.py
+++ b/services/nodemanager/tests/testutil.py
@@ -30,6 +30,15 @@ def arvados_node_mock(node_num=99, job_uuid=None, age=0, **kwargs):
     node.update(kwargs)
     return node
 
+def cloud_object_mock(name_id):
+    # A very generic mock, useful for stubbing libcloud objects we
+    # only search for and pass around, like locations, subnets, etc.
+    cloud_object = mock.NonCallableMagicMock(['id', 'name'],
+                                             name='cloud_object')
+    cloud_object.id = str(name_id)
+    cloud_object.name = cloud_object.id.upper()
+    return cloud_object
+
 def cloud_node_mock(node_num=99):
     node = mock.NonCallableMagicMock(
         ['id', 'name', 'state', 'public_ips', 'private_ips', 'driver', 'size',

commit 6bd3ba9693f500162bd2744b2e79edf91fab6b02
Author: Brett Smith <brett at curoverse.com>
Date:   Fri Feb 13 16:00:30 2015 -0500

    4138: Refactor common Node Manager driver initialization to base driver.

diff --git a/services/nodemanager/arvnodeman/computenode/driver/__init__.py b/services/nodemanager/arvnodeman/computenode/driver/__init__.py
index 3a0c206..c06bba3 100644
--- a/services/nodemanager/arvnodeman/computenode/driver/__init__.py
+++ b/services/nodemanager/arvnodeman/computenode/driver/__init__.py
@@ -25,6 +25,15 @@ class BaseComputeNodeDriver(object):
         self.real = driver_class(**auth_kwargs)
         self.list_kwargs = list_kwargs
         self.create_kwargs = create_kwargs
+        for key in self.create_kwargs.keys():
+            init_method = getattr(self, '_init_' + key, None)
+            if init_method is not None:
+                new_pair = init_method(self.create_kwargs.pop(key))
+                if new_pair is not None:
+                    self.create_kwargs[new_pair[0]] = new_pair[1]
+
+    def _init_ping_host(self, ping_host):
+        self.ping_host = ping_host
 
     def __getattr__(self, name):
         # Proxy non-extension methods to the real driver.
@@ -52,6 +61,11 @@ class BaseComputeNodeDriver(object):
     def arvados_create_kwargs(self, arvados_node):
         raise NotImplementedError("BaseComputeNodeDriver.arvados_create_kwargs")
 
+    def _make_ping_url(self, arvados_node):
+        return 'https://{}/arvados/v1/nodes/{}/ping?ping_secret={}'.format(
+            self.ping_host, arvados_node['uuid'],
+            arvados_node['info']['ping_secret'])
+
     def create_node(self, size, arvados_node):
         kwargs = self.create_kwargs.copy()
         kwargs.update(self.arvados_create_kwargs(arvados_node))
diff --git a/services/nodemanager/arvnodeman/computenode/driver/ec2.py b/services/nodemanager/arvnodeman/computenode/driver/ec2.py
index 255a948..9db3d89 100644
--- a/services/nodemanager/arvnodeman/computenode/driver/ec2.py
+++ b/services/nodemanager/arvnodeman/computenode/driver/ec2.py
@@ -52,19 +52,10 @@ class ComputeNodeDriver(BaseComputeNodeDriver):
         super(ComputeNodeDriver, self).__init__(
             auth_kwargs, {'ex_filters': list_kwargs}, create_kwargs,
             driver_class)
-        for key in self.create_kwargs.keys():
-            init_method = getattr(self, '_init_' + key, None)
-            if init_method is not None:
-                new_pair = init_method(self.create_kwargs.pop(key))
-                if new_pair is not None:
-                    self.create_kwargs[new_pair[0]] = new_pair[1]
 
     def _init_image_id(self, image_id):
         return 'image', self.search_for(image_id, 'list_images')
 
-    def _init_ping_host(self, ping_host):
-        self.ping_host = ping_host
-
     def _init_security_groups(self, group_names):
         return 'ex_security_groups', [
             self.search_for(gname.strip(), 'ex_get_security_groups')
@@ -79,14 +70,8 @@ class ComputeNodeDriver(BaseComputeNodeDriver):
         return 'auth', key
 
     def arvados_create_kwargs(self, arvados_node):
-        result = {'name': arvados_node_fqdn(arvados_node)}
-        ping_secret = arvados_node['info'].get('ping_secret')
-        if ping_secret is not None:
-            ping_url = ('https://{}/arvados/v1/nodes/{}/ping?ping_secret={}'.
-                        format(self.ping_host, arvados_node['uuid'],
-                               ping_secret))
-            result['ex_userdata'] = ping_url
-        return result
+        return {'name': arvados_node_fqdn(arvados_node),
+                'ex_userdata': self._make_ping_url(arvados_node)}
 
     def post_create_node(self, cloud_node):
         self.real.ex_create_tags(cloud_node, self.tags)
diff --git a/services/nodemanager/tests/testutil.py b/services/nodemanager/tests/testutil.py
index f254e72..ff525f0 100644
--- a/services/nodemanager/tests/testutil.py
+++ b/services/nodemanager/tests/testutil.py
@@ -26,7 +26,7 @@ def arvados_node_mock(node_num=99, job_uuid=None, age=0, **kwargs):
             'ip_address': ip_address_mock(node_num),
             'job_uuid': job_uuid,
             'crunch_worker_state': crunch_worker_state,
-            'info': {}}
+            'info': {'ping_secret': 'defaulttestsecret'}}
     node.update(kwargs)
     return node
 

commit 0d0a1fe02eb3775fd48e54a5d2408d3220090ed8
Author: Brett Smith <brett at curoverse.com>
Date:   Wed Feb 11 15:12:37 2015 -0500

    4138: Simplify Node Manager GCE credential handling.
    
    Because libcloud's GCE driver accepts a key path as a constructor
    argument, it's relatively straightforward to put all the constructor
    arguments directly in the Node Manager configuration.  No need to
    parse out JSON.

diff --git a/services/nodemanager/arvnodeman/config.py b/services/nodemanager/arvnodeman/config.py
index f386653..315df1c 100644
--- a/services/nodemanager/arvnodeman/config.py
+++ b/services/nodemanager/arvnodeman/config.py
@@ -4,7 +4,6 @@ from __future__ import absolute_import, print_function
 
 import ConfigParser
 import importlib
-import json
 import logging
 import ssl
 import sys
@@ -99,13 +98,6 @@ class NodeManagerConfig(ConfigParser.SafeConfigParser):
         module = importlib.import_module('arvnodeman.computenode.driver.' +
                                          self.get('Cloud', 'provider'))
         auth_kwargs = self.get_section('Cloud Credentials')
-        # GCE credentials are delivered in a JSON file.
-        if 'json_credential_file' in auth_kwargs:
-            with open(auth_kwargs['json_credential_file']) as jf:
-                json_creds = json.load(jf)
-            auth_kwargs['user_id'] = json_creds['client_email']
-            auth_kwargs['key'] = json_creds['private_key']
-
         if 'timeout' in auth_kwargs:
             auth_kwargs['timeout'] = int(auth_kwargs['timeout'])
         return module.ComputeNodeDriver(auth_kwargs,
diff --git a/services/nodemanager/doc/gce.example.cfg b/services/nodemanager/doc/gce.example.cfg
index 73cd2e1..6369383 100644
--- a/services/nodemanager/doc/gce.example.cfg
+++ b/services/nodemanager/doc/gce.example.cfg
@@ -79,7 +79,8 @@ provider = gce
 shutdown_windows = 54, 5, 1
 
 [Cloud Credentials]
-json_credential_file = /path/to/credential_file.json
+user_id = client_email_address at developer.gserviceaccount.com
+key = path_to_certificate.pem
 project = project-id-from-google-cloud-dashboard
 timeout = 60
 

commit 36929497f4825cde6c9c386217300cf4fd43f4f4
Author: Tim Pierce <twp at curoverse.com>
Date:   Fri Jan 23 17:44:41 2015 -0500

    4138: updated unit test
    
    Corrected test_create_includes_ping_secret to account for delivering the
    ping secret via metadata in GCE.

diff --git a/services/nodemanager/tests/test_computenode_driver_gce.py b/services/nodemanager/tests/test_computenode_driver_gce.py
index 075760a..0e10e2f 100644
--- a/services/nodemanager/tests/test_computenode_driver_gce.py
+++ b/services/nodemanager/tests/test_computenode_driver_gce.py
@@ -42,9 +42,10 @@ class GCEComputeNodeDriverTestCase(unittest.TestCase):
         driver.create_node(testutil.MockSize(1), arv_node)
         create_method = self.driver_mock().create_node
         self.assertTrue(create_method.called)
+        create_metadata = create_method.call_args[1].get('ex_metadata')
+        self.assertIsInstance(create_metadata, dict)
         self.assertIn('ping_secret=ssshh',
-                      create_method.call_args[1].get('ex_userdata',
-                                                     'arg missing'))
+                      create_metadata.get('pingUrl', 'arg missing'))
 
     def test_generate_metadata_for_new_arvados_node(self):
         arv_node = testutil.arvados_node_mock(8)

commit 0d0de3867410b415620573ae13706192ddcd8961
Author: Tim Pierce <twp at curoverse.com>
Date:   Fri Jan 23 17:24:54 2015 -0500

    4138: GCE fixes
    
    The 'network_id' parameter needs to be delivered as 'location' in GCE.
    
    The ping_url parameter is now delivered in the node metadata as
    'pingUrl'.
    
    When creating a new GCE instance, 'name' is a required parameter and
    must begin with a letter. The default name is the UUID of the
    corresponding Arvados node, prepended with 'arv-'.

diff --git a/services/nodemanager/arvnodeman/computenode/driver/gce.py b/services/nodemanager/arvnodeman/computenode/driver/gce.py
index 125a090..a8edc43 100644
--- a/services/nodemanager/arvnodeman/computenode/driver/gce.py
+++ b/services/nodemanager/arvnodeman/computenode/driver/gce.py
@@ -37,8 +37,11 @@ class ComputeNodeDriver(BaseComputeNodeDriver):
                 if new_pair is not None:
                     self.create_kwargs[new_pair[0]] = new_pair[1]
 
-    def _init_image_id(self, image_id):
-        return 'image', image_id
+    def _init_image(self, image_name):
+        return 'image', image_name
+
+    def _init_location(self, location):
+        return 'location', location
 
     def _init_ping_host(self, ping_host):
         self.ping_host = ping_host
@@ -46,9 +49,6 @@ class ComputeNodeDriver(BaseComputeNodeDriver):
     def _init_service_accounts(self, service_accounts_str):
         self.service_accounts = json.loads(service_accounts_str)
 
-    def _init_network_id(self, subnet_id):
-        return 'ex_network', subnet_id
-
     def _init_ssh_key(self, filename):
         with open(filename) as ssh_file:
             self.ssh_key = ssh_file.read().strip()
@@ -60,7 +60,7 @@ class ComputeNodeDriver(BaseComputeNodeDriver):
             ping_url = ('https://{}/arvados/v1/nodes/{}/ping?ping_secret={}'.
                         format(self.ping_host, arvados_node['uuid'],
                                ping_secret))
-            result['ex_userdata'] = ping_url
+            result['ex_metadata']['pingUrl'] = ping_url
         if self.service_accounts is not None:
             result['ex_service_accounts'] = self.service_accounts
 
@@ -70,6 +70,13 @@ class ComputeNodeDriver(BaseComputeNodeDriver):
             result['ex_metadata']['sshKeys'] = 'root:{}'.format(self.ssh_key)
         return result
 
+    def create_node(self, size, arvados_node):
+        kwargs = self.create_kwargs.copy()
+        kwargs.update(self.arvados_create_kwargs(arvados_node))
+        kwargs.setdefault('name', 'arv-{}'.format(arvados_node['uuid']))
+        kwargs['size'] = size
+        return self.real.create_node(**kwargs)
+
     # When an Arvados node is synced with a GCE node, the Arvados hostname
     # is forwarded in a GCE tag 'hostname-foo'.
     # TODO(twp): implement an ex_set_metadata method (at least until
diff --git a/services/nodemanager/doc/gce.example.cfg b/services/nodemanager/doc/gce.example.cfg
index 11289bc..73cd2e1 100644
--- a/services/nodemanager/doc/gce.example.cfg
+++ b/services/nodemanager/doc/gce.example.cfg
@@ -104,9 +104,9 @@ ping_host = hostname:port
 
 # The GCE image name and network zone name to use when creating new nodes.
 # * Valid image aliases: https://cloud.google.com/sdk/gcloud/reference/compute/instances/create
-# * Valid network zones: https://cloud.google.com/compute/docs/zones
-image_id = debian-7
-network_id = us-central1-a
+# * Valid location (zone) names: https://cloud.google.com/compute/docs/zones
+image = debian-7
+location = us-central1-a
 
 # JSON string of service account authorizations for this cluster.
 # See http://libcloud.readthedocs.org/en/latest/compute/drivers/gce.html#specifying-service-account-scopes

commit 2b6cfad3ac09667655e5e5a745cffe97b215ab4b
Author: Tim Pierce <twp at curoverse.com>
Date:   Wed Jan 21 13:06:35 2015 -0500

    4138: general GCE fixes
    
    * JSON credential file
    ** GCE credentials are delivered as a JSON string (and the key is formatted as a multi-line RSA private key). Let the GCE config file specify a path to the JSON credential file for simplicity.
    * Accept NodeSizes addressed by id or name
    ** In EC2, NodeSizes are identified by the 'id' field.  In GCE they are identified by the 'name' field.  Allow the Node Manager config module to accept either.

diff --git a/services/nodemanager/arvnodeman/computenode/driver/gce.py b/services/nodemanager/arvnodeman/computenode/driver/gce.py
index a4fd57d..125a090 100644
--- a/services/nodemanager/arvnodeman/computenode/driver/gce.py
+++ b/services/nodemanager/arvnodeman/computenode/driver/gce.py
@@ -38,7 +38,7 @@ class ComputeNodeDriver(BaseComputeNodeDriver):
                     self.create_kwargs[new_pair[0]] = new_pair[1]
 
     def _init_image_id(self, image_id):
-        return 'image', self.search_for(image_id, 'list_images')
+        return 'image', image_id
 
     def _init_ping_host(self, ping_host):
         self.ping_host = ping_host
@@ -47,7 +47,7 @@ class ComputeNodeDriver(BaseComputeNodeDriver):
         self.service_accounts = json.loads(service_accounts_str)
 
     def _init_network_id(self, subnet_id):
-        return 'ex_network', self.search_for(subnet_id, 'ex_list_networks')
+        return 'ex_network', subnet_id
 
     def _init_ssh_key(self, filename):
         with open(filename) as ssh_file:
diff --git a/services/nodemanager/arvnodeman/config.py b/services/nodemanager/arvnodeman/config.py
index e15f023..f386653 100644
--- a/services/nodemanager/arvnodeman/config.py
+++ b/services/nodemanager/arvnodeman/config.py
@@ -4,6 +4,7 @@ from __future__ import absolute_import, print_function
 
 import ConfigParser
 import importlib
+import json
 import logging
 import ssl
 import sys
@@ -98,6 +99,13 @@ class NodeManagerConfig(ConfigParser.SafeConfigParser):
         module = importlib.import_module('arvnodeman.computenode.driver.' +
                                          self.get('Cloud', 'provider'))
         auth_kwargs = self.get_section('Cloud Credentials')
+        # GCE credentials are delivered in a JSON file.
+        if 'json_credential_file' in auth_kwargs:
+            with open(auth_kwargs['json_credential_file']) as jf:
+                json_creds = json.load(jf)
+            auth_kwargs['user_id'] = json_creds['client_email']
+            auth_kwargs['key'] = json_creds['private_key']
+
         if 'timeout' in auth_kwargs:
             auth_kwargs['timeout'] = int(auth_kwargs['timeout'])
         return module.ComputeNodeDriver(auth_kwargs,
@@ -105,14 +113,26 @@ class NodeManagerConfig(ConfigParser.SafeConfigParser):
                                         self.get_section('Cloud Create'))
 
     def node_sizes(self, all_sizes):
+        """Finds all acceptable NodeSizes for our installation.
+
+        Returns a list of (NodeSize, kwargs) pairs for each NodeSize object
+        returned by libcloud that matches a size listed in our config file.
+        """
+
         size_kwargs = {}
         for sec_name in self.sections():
             sec_words = sec_name.split(None, 2)
             if sec_words[0] != 'Size':
                 continue
             size_kwargs[sec_words[1]] = self.get_section(sec_name, int)
-        return [(size, size_kwargs[size.id]) for size in all_sizes
-                if size.id in size_kwargs]
+        # EC2 node sizes are identified by id. GCE sizes are identified by name.
+        matching_sizes = []
+        for size in all_sizes:
+            if size.id in size_kwargs:
+                matching_sizes.append((size, size_kwargs[size.id]))
+            elif size.name in size_kwargs:
+                matching_sizes.append((size, size_kwargs[size.name]))
+        return matching_sizes
 
     def shutdown_windows(self):
         return [int(n)
diff --git a/services/nodemanager/doc/gce.example.cfg b/services/nodemanager/doc/gce.example.cfg
index adc0300..11289bc 100644
--- a/services/nodemanager/doc/gce.example.cfg
+++ b/services/nodemanager/doc/gce.example.cfg
@@ -79,9 +79,8 @@ provider = gce
 shutdown_windows = 54, 5, 1
 
 [Cloud Credentials]
-user_id = USERID
-key = SECRET_KEY
-project = project_name
+json_credential_file = /path/to/credential_file.json
+project = project-id-from-google-cloud-dashboard
 timeout = 60
 
 # Optional settings. For full documentation see
@@ -103,9 +102,11 @@ ping_host = hostname:port
 # A file path for an SSH key that can log in to the compute node.
 # ssh_key = path
 
-# The GCE IDs of the image and network compute nodes should use.
-image_id = idstring
-network_id = idstring
+# The GCE image name and network zone name to use when creating new nodes.
+# * Valid image aliases: https://cloud.google.com/sdk/gcloud/reference/compute/instances/create
+# * Valid network zones: https://cloud.google.com/compute/docs/zones
+image_id = debian-7
+network_id = us-central1-a
 
 # JSON string of service account authorizations for this cluster.
 # See http://libcloud.readthedocs.org/en/latest/compute/drivers/gce.html#specifying-service-account-scopes
@@ -120,6 +121,9 @@ network_id = idstring
 # The Size fields are interpreted the same way as with a libcloud NodeSize:
 # http://libcloud.readthedocs.org/en/latest/compute/api.html#libcloud.compute.base.NodeSize
 #
+# See https://cloud.google.com/compute/docs/machine-types for a list
+# of known machine types that may be used as a Size parameter.
+#
 # Each size section MUST define the number of cores are available in this
 # size class (since libcloud does not provide any consistent API for exposing
 # this setting).

commit c2fb3f5752b87617cb42761e4963126c549b3edc
Author: Tim Pierce <twp at curoverse.com>
Date:   Mon Nov 24 17:12:07 2014 -0500

    4138: code review feedback

diff --git a/services/nodemanager/doc/gce.example.cfg b/services/nodemanager/doc/gce.example.cfg
index 4886cb2..adc0300 100644
--- a/services/nodemanager/doc/gce.example.cfg
+++ b/services/nodemanager/doc/gce.example.cfg
@@ -82,15 +82,15 @@ shutdown_windows = 54, 5, 1
 user_id = USERID
 key = SECRET_KEY
 project = project_name
-timeout = 60             # used by NodeManagerConfig
+timeout = 60
 
 # Optional settings. For full documentation see
 # http://libcloud.readthedocs.org/en/latest/compute/drivers/gce.html#libcloud.compute.drivers.gce.GCENodeDriver
 #
-# datacenter = 'us-central1-a'
-# auth_type = 'SA'               # SA, IA or GCE
+# datacenter = us-central1-a
+# auth_type = SA               # SA, IA or GCE
 # scopes = https://www.googleapis.com/auth/compute
-# credential_file = 
+# credential_file =
 
 [Cloud List]
 # Keywords here will be used to populate the metadata field for a GCE node.

commit 650bd4aea584857b5ba8e1e3657a07ff540e9ab8
Author: Tim Pierce <twp at curoverse.com>
Date:   Tue Nov 18 13:49:10 2014 -0500

    4138: support for Google Cloud Engine.
    
    * Added:
    ** nodemanager/arvnodeman/computenode/drivers/gce.py
    ** nodemanager/doc/gce.example.cfg
    ** nodemanager/tests/test_computenode_driver_gce.py
    
    Updated comment in nodemanager/arvnodeman/computenode/drivers/ec2.py.

diff --git a/services/nodemanager/arvnodeman/computenode/driver/gce.py b/services/nodemanager/arvnodeman/computenode/driver/gce.py
new file mode 100644
index 0000000..a4fd57d
--- /dev/null
+++ b/services/nodemanager/arvnodeman/computenode/driver/gce.py
@@ -0,0 +1,86 @@
+#!/usr/bin/env python
+
+from __future__ import absolute_import, print_function
+
+import functools
+import json
+import time
+
+import libcloud.compute.base as cloud_base
+import libcloud.compute.providers as cloud_provider
+import libcloud.compute.types as cloud_types
+from libcloud.compute.drivers import gce
+
+from . import BaseComputeNodeDriver
+from .. import arvados_node_fqdn
+
+class ComputeNodeDriver(BaseComputeNodeDriver):
+    """Compute node driver wrapper for GCE
+
+    This translates cloud driver requests to GCE's specific parameters.
+    """
+    DEFAULT_DRIVER = cloud_provider.get_driver(cloud_types.Provider.GCE)
+    SEARCH_CACHE = {}
+    ssh_key = None
+    service_accounts = None
+
+    def __init__(self, auth_kwargs, list_kwargs, create_kwargs,
+                 driver_class=DEFAULT_DRIVER):
+        super(ComputeNodeDriver, self).__init__(
+            auth_kwargs, list_kwargs, create_kwargs,
+            driver_class)
+
+        for key in self.create_kwargs.keys():
+            init_method = getattr(self, '_init_' + key, None)
+            if init_method is not None:
+                new_pair = init_method(self.create_kwargs.pop(key))
+                if new_pair is not None:
+                    self.create_kwargs[new_pair[0]] = new_pair[1]
+
+    def _init_image_id(self, image_id):
+        return 'image', self.search_for(image_id, 'list_images')
+
+    def _init_ping_host(self, ping_host):
+        self.ping_host = ping_host
+
+    def _init_service_accounts(self, service_accounts_str):
+        self.service_accounts = json.loads(service_accounts_str)
+
+    def _init_network_id(self, subnet_id):
+        return 'ex_network', self.search_for(subnet_id, 'ex_list_networks')
+
+    def _init_ssh_key(self, filename):
+        with open(filename) as ssh_file:
+            self.ssh_key = ssh_file.read().strip()
+
+    def arvados_create_kwargs(self, arvados_node):
+        result = {'ex_metadata': self.list_kwargs.copy() }
+        ping_secret = arvados_node['info'].get('ping_secret')
+        if ping_secret is not None:
+            ping_url = ('https://{}/arvados/v1/nodes/{}/ping?ping_secret={}'.
+                        format(self.ping_host, arvados_node['uuid'],
+                               ping_secret))
+            result['ex_userdata'] = ping_url
+        if self.service_accounts is not None:
+            result['ex_service_accounts'] = self.service_accounts
+
+        # SSH keys are delivered to GCE nodes via ex_metadata: see
+        # http://stackoverflow.com/questions/26752617/creating-sshkeys-for-gce-instance-using-libcloud
+        if self.ssh_key is not None:
+            result['ex_metadata']['sshKeys'] = 'root:{}'.format(self.ssh_key)
+        return result
+
+    # When an Arvados node is synced with a GCE node, the Arvados hostname
+    # is forwarded in a GCE tag 'hostname-foo'.
+    # TODO(twp): implement an ex_set_metadata method (at least until
+    # libcloud supports the API setMetadata method) so we can pass this
+    # sensibly in the node metadata.
+    def sync_node(self, cloud_node, arvados_node):
+        tags = ['hostname-{}'.format(arvados_node_fqdn(arvados_node))]
+        self.real.ex_set_node_tags(cloud_node, tags)
+
+    @classmethod
+    def node_start_time(cls, node):
+        time_str = node.extra['launch_time'].split('.', 2)[0] + 'UTC'
+        return time.mktime(time.strptime(
+                time_str,'%Y-%m-%dT%H:%M:%S%Z')) - time.timezone
diff --git a/services/nodemanager/doc/ec2.example.cfg b/services/nodemanager/doc/ec2.example.cfg
index 024ed2b..9b41ca1 100644
--- a/services/nodemanager/doc/ec2.example.cfg
+++ b/services/nodemanager/doc/ec2.example.cfg
@@ -128,9 +128,11 @@ security_groups = idstring1, idstring2
 # willing to use.  The Node Manager should boot the cheapest size(s) that
 # can run jobs in the queue (N.B.: defining more than one size has not been
 # tested yet).
-# Each size section MUST define the number of cores it has.  You may also
-# want to define the number of mebibytes of scratch space for Crunch jobs.
-# You can also override Amazon's provided data fields by setting the same
-# names here.
+# Each size section MUST define the number of cores are available in this
+# size class (since libcloud does not provide any consistent API for exposing
+# this setting).
+# You may also want to define the amount of scratch space (expressed
+# in GB) for Crunch jobs.  You can also override Amazon's provided
+# data fields by setting the same names here.
 cores = 2
-scratch = 100
\ No newline at end of file
+scratch = 100
diff --git a/services/nodemanager/doc/ec2.example.cfg b/services/nodemanager/doc/gce.example.cfg
similarity index 64%
copy from services/nodemanager/doc/ec2.example.cfg
copy to services/nodemanager/doc/gce.example.cfg
index 024ed2b..4886cb2 100644
--- a/services/nodemanager/doc/ec2.example.cfg
+++ b/services/nodemanager/doc/gce.example.cfg
@@ -1,12 +1,7 @@
-# EC2 configuration for Arvados Node Manager.
+# Google Compute Engine configuration for Arvados Node Manager.
 # All times are in seconds unless specified otherwise.
 
 [Daemon]
-# The dispatcher can customize the start and stop procedure for
-# cloud nodes.  For example, the SLURM dispatcher drains nodes
-# through SLURM before shutting them down.
-#dispatcher = slurm
-
 # Node Manager will ensure that there are at least this many nodes
 # running at all times.
 min_nodes = 0
@@ -15,7 +10,7 @@ min_nodes = 0
 # many are running.
 max_nodes = 8
 
-# Poll EC2 nodes and Arvados for new information every N seconds.
+# Poll compute nodes and Arvados for new information every N seconds.
 poll_time = 60
 
 # Polls have exponential backoff when services fail to respond.
@@ -27,12 +22,6 @@ max_poll_time = 300
 # information is too outdated.
 poll_stale_after = 600
 
-# If Node Manager boots a cloud node, and it does not pair with an Arvados
-# node before this long, assume that there was a cloud bootstrap failure and
-# shut it down.  Note that normal shutdown windows apply (see the Cloud
-# section), so this should be shorter than the first shutdown window value.
-boot_fail_after = 1800
-
 # "Node stale time" affects two related behaviors.
 # 1. If a compute node has been running for at least this long, but it
 # isn't paired with an Arvados node, do not shut it down, but leave it alone.
@@ -74,8 +63,9 @@ timeout = 15
 insecure = no
 
 [Cloud]
-provider = ec2
+provider = gce
 
+# XXX(twp): figure out good default settings for GCE
 # It's usually most cost-effective to shut down compute nodes during narrow
 # windows of time.  For example, EC2 bills each node by the hour, so the best
 # time to shut down a node is right before a new hour of uptime starts.
@@ -89,48 +79,52 @@ provider = ec2
 shutdown_windows = 54, 5, 1
 
 [Cloud Credentials]
-key = KEY
-secret = SECRET_KEY
-region = us-east-1
-timeout = 60
+user_id = USERID
+key = SECRET_KEY
+project = project_name
+timeout = 60             # used by NodeManagerConfig
+
+# Optional settings. For full documentation see
+# http://libcloud.readthedocs.org/en/latest/compute/drivers/gce.html#libcloud.compute.drivers.gce.GCENodeDriver
+#
+# datacenter = 'us-central1-a'
+# auth_type = 'SA'               # SA, IA or GCE
+# scopes = https://www.googleapis.com/auth/compute
+# credential_file = 
 
 [Cloud List]
-# This section defines filters that find compute nodes.
-# Tags that you specify here will automatically be added to nodes you create.
-# Replace colons in Amazon filters with underscores
-# (e.g., write "tag:mytag" as "tag_mytag").
-instance-state-name = running
-tag_arvados-class = dynamic-compute
-tag_cluster = zyxwv
+# Keywords here will be used to populate the metadata field for a GCE node.
 
 [Cloud Create]
 # New compute nodes will send pings to Arvados at this host.
 # You may specify a port, and use brackets to disambiguate IPv6 addresses.
 ping_host = hostname:port
 
-# Give the name of an SSH key on AWS...
-ex_keyname = string
-
-# ... or a file path for an SSH key that can log in to the compute node.
-# (One or the other, not both.)
+# A file path for an SSH key that can log in to the compute node.
 # ssh_key = path
 
-# The EC2 IDs of the image and subnet compute nodes should use.
+# The GCE IDs of the image and network compute nodes should use.
 image_id = idstring
-subnet_id = idstring
+network_id = idstring
 
-# Comma-separated EC2 IDs for the security group(s) assigned to each
-# compute node.
-security_groups = idstring1, idstring2
+# JSON string of service account authorizations for this cluster.
+# See http://libcloud.readthedocs.org/en/latest/compute/drivers/gce.html#specifying-service-account-scopes
+# service_accounts = [ { 'email': 'ex at mple.com', 'scopes': ['storage-ro'] } ]
 
-[Size t2.medium]
-# You can define any number of Size sections to list EC2 sizes you're
+[Size n1-standard-2]
+# You can define any number of Size sections to list node sizes you're
 # willing to use.  The Node Manager should boot the cheapest size(s) that
 # can run jobs in the queue (N.B.: defining more than one size has not been
 # tested yet).
-# Each size section MUST define the number of cores it has.  You may also
-# want to define the number of mebibytes of scratch space for Crunch jobs.
-# You can also override Amazon's provided data fields by setting the same
-# names here.
+#
+# The Size fields are interpreted the same way as with a libcloud NodeSize:
+# http://libcloud.readthedocs.org/en/latest/compute/api.html#libcloud.compute.base.NodeSize
+#
+# Each size section MUST define the number of cores are available in this
+# size class (since libcloud does not provide any consistent API for exposing
+# this setting).
+# You may also want to define the amount of scratch space (expressed
+# in GB) for Crunch jobs.
 cores = 2
-scratch = 100
\ No newline at end of file
+scratch = 100
+ram = 512
diff --git a/services/nodemanager/tests/test_computenode_driver_gce.py b/services/nodemanager/tests/test_computenode_driver_gce.py
new file mode 100644
index 0000000..075760a
--- /dev/null
+++ b/services/nodemanager/tests/test_computenode_driver_gce.py
@@ -0,0 +1,104 @@
+#!/usr/bin/env python
+
+from __future__ import absolute_import, print_function
+
+import time
+import unittest
+
+import mock
+
+import arvnodeman.computenode.driver.gce as gce
+from . import testutil
+
+class GCEComputeNodeDriverTestCase(unittest.TestCase):
+    def setUp(self):
+        self.driver_mock = mock.MagicMock(name='driver_mock')
+
+    def new_driver(self, auth_kwargs={}, list_kwargs={}, create_kwargs={}):
+        create_kwargs.setdefault('ping_host', '100::')
+        return gce.ComputeNodeDriver(
+            auth_kwargs, list_kwargs, create_kwargs,
+            driver_class=self.driver_mock)
+
+    def test_driver_instantiation(self):
+        kwargs = {'user_id': 'foo'}
+        driver = self.new_driver(auth_kwargs=kwargs)
+        self.assertTrue(self.driver_mock.called)
+        self.assertEqual(kwargs, self.driver_mock.call_args[1])
+
+    def test_create_location_loaded_at_initialization(self):
+        kwargs = {'location': 'testregion'}
+        driver = self.new_driver(create_kwargs=kwargs)
+        self.assertTrue(self.driver_mock().list_locations)
+
+    def test_create_image_loaded_at_initialization(self):
+        kwargs = {'image': 'testimage'}
+        driver = self.new_driver(create_kwargs=kwargs)
+        self.assertTrue(self.driver_mock().list_images)
+
+    def test_create_includes_ping_secret(self):
+        arv_node = testutil.arvados_node_mock(info={'ping_secret': 'ssshh'})
+        driver = self.new_driver()
+        driver.create_node(testutil.MockSize(1), arv_node)
+        create_method = self.driver_mock().create_node
+        self.assertTrue(create_method.called)
+        self.assertIn('ping_secret=ssshh',
+                      create_method.call_args[1].get('ex_userdata',
+                                                     'arg missing'))
+
+    def test_generate_metadata_for_new_arvados_node(self):
+        arv_node = testutil.arvados_node_mock(8)
+        driver = self.new_driver(list_kwargs={'list': 'test'})
+        self.assertEqual({'ex_metadata': {'list': 'test'}},
+                         driver.arvados_create_kwargs(arv_node))
+
+    def test_tags_set_default_hostname_from_new_arvados_node(self):
+        arv_node = testutil.arvados_node_mock(hostname=None)
+        cloud_node = testutil.cloud_node_mock(1)
+        driver = self.new_driver()
+        driver.sync_node(cloud_node, arv_node)
+        tag_mock = self.driver_mock().ex_set_node_tags
+        self.assertTrue(tag_mock.called)
+        self.assertEqual(['hostname-dynamic.compute.zzzzz.arvadosapi.com'],
+                         tag_mock.call_args[0][1])
+
+    def test_sync_node_sets_static_hostname(self):
+        arv_node = testutil.arvados_node_mock(1)
+        cloud_node = testutil.cloud_node_mock(2)
+        driver = self.new_driver()
+        driver.sync_node(cloud_node, arv_node)
+        tag_mock = self.driver_mock().ex_set_node_tags
+        self.assertTrue(tag_mock.called)
+        self.assertEqual(['hostname-compute1.zzzzz.arvadosapi.com'],
+                         tag_mock.call_args[0][1])
+
+    def test_node_create_time(self):
+        refsecs = int(time.time())
+        reftuple = time.gmtime(refsecs)
+        node = testutil.cloud_node_mock()
+        node.extra = {'launch_time': time.strftime('%Y-%m-%dT%H:%M:%S.000Z',
+                                                   reftuple)}
+        self.assertEqual(refsecs, gce.ComputeNodeDriver.node_start_time(node))
+
+    def test_generate_metadata_for_new_arvados_node(self):
+        arv_node = testutil.arvados_node_mock(8)
+        driver = self.new_driver(list_kwargs={'list': 'test'})
+        self.assertEqual({'ex_metadata': {'list': 'test'}},
+                         driver.arvados_create_kwargs(arv_node))
+
+    def test_deliver_ssh_key_in_metadata(self):
+        test_ssh_key = 'ssh-rsa-foo'
+        arv_node = testutil.arvados_node_mock(1)
+        with mock.patch('__builtin__.open', mock.mock_open(read_data=test_ssh_key)) as mock_file:
+            driver = self.new_driver(create_kwargs={'ssh_key': 'ssh-key-file'})
+        mock_file.assert_called_once_with('ssh-key-file')
+        self.assertEqual({'ex_metadata': {'sshKeys': 'root:ssh-rsa-foo'}},
+                         driver.arvados_create_kwargs(arv_node))
+
+    def test_create_driver_with_service_accounts(self):
+        srv_acct_config = { 'service_accounts': '{ "email": "foo at bar", "scopes":["storage-full"]}' }
+        arv_node = testutil.arvados_node_mock(1)
+        driver = self.new_driver(create_kwargs=srv_acct_config)
+        create_kwargs = driver.arvados_create_kwargs(arv_node)
+        self.assertEqual({u'email': u'foo at bar', u'scopes': [u'storage-full']},
+                         create_kwargs['ex_service_accounts'])

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list