[ARVADOS] updated: 319503f1a8eda9fb9cea0bff038ad437e88ebeac

git at public.curoverse.com git at public.curoverse.com
Tue Feb 17 17:56:40 EST 2015


Summary of changes:
 .../nodemanager/arvnodeman/computenode/__init__.py |   9 +-
 .../arvnodeman/computenode/driver/__init__.py      |  34 ++++-
 .../arvnodeman/computenode/driver/ec2.py           |  19 +--
 .../arvnodeman/computenode/driver/gce.py           | 126 ++++++++++++++++
 services/nodemanager/arvnodeman/config.py          |  16 ++-
 services/nodemanager/doc/ec2.example.cfg           |  12 +-
 services/nodemanager/doc/gce.example.cfg           | 141 ++++++++++++++++++
 .../tests/test_computenode_driver_ec2.py           |  24 +---
 .../tests/test_computenode_driver_gce.py           | 158 +++++++++++++++++++++
 services/nodemanager/tests/testutil.py             |  29 +++-
 10 files changed, 516 insertions(+), 52 deletions(-)
 create mode 100644 services/nodemanager/arvnodeman/computenode/driver/gce.py
 create mode 100644 services/nodemanager/doc/gce.example.cfg
 create mode 100644 services/nodemanager/tests/test_computenode_driver_gce.py

       via  319503f1a8eda9fb9cea0bff038ad437e88ebeac (commit)
       via  d6d290bfc01d90d160cecf72d86aff40d7f63f3f (commit)
       via  d32bbed004b76333c3e72e6c1f97dcde88e11edd (commit)
       via  c51fe8e859262a8c534c2d3265fabe54555ac462 (commit)
       via  5884b7c433e6f682c089956917f31a587e75363d (commit)
       via  b5249ac7c8ccc1bec4ae751d1ff6816677e6b2e7 (commit)
       via  708630d0303948874d231a8d6b6d080adcdf6d2c (commit)
       via  a6837612f9678bb983f634b518bde16b8921a0a4 (commit)
       via  51d417f941214512c0cbd6d56687ce2b9a0869bc (commit)
       via  45fdc95efb2467b0ad7d21d82aa08b26a43cfaa0 (commit)
       via  af52e4975a52d4eeec356e5dd0ab4dbb5957ea28 (commit)
       via  9d6a6eca3a634e4090d7e0fc4f094c411ab5817a (commit)
      from  1713f54c6b41cb18a69c09f361d97ca6384a9492 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit 319503f1a8eda9fb9cea0bff038ad437e88ebeac
Merge: 1713f54 d6d290b
Author: Brett Smith <brett at curoverse.com>
Date:   Tue Feb 17 17:54:59 2015 -0500

    Merge branch '4138-node-manager-gce-wip'
    
    Closes #4138, #5222.
    
    Thanks, Tim.


commit d6d290bfc01d90d160cecf72d86aff40d7f63f3f
Author: Brett Smith <brett at curoverse.com>
Date:   Mon Feb 16 11:06:41 2015 -0500

    4138: Prepare Node Manager GCE driver for production.
    
    * Set node metadata in more appropriate places.
    * Bridge more differences between GCE and EC2, like the fact that
      sizes are listed for each location they're available, and GCE
      doesn't provide node boot times.
    * Use more infrastructure from BaseComputeNodeDriver to reduce code
      duplication.
    * Load as many objects as possible at initialization time, to reduce
      API overhead of creating nodes.

diff --git a/services/nodemanager/arvnodeman/computenode/__init__.py b/services/nodemanager/arvnodeman/computenode/__init__.py
index 4955992..30fe516 100644
--- a/services/nodemanager/arvnodeman/computenode/__init__.py
+++ b/services/nodemanager/arvnodeman/computenode/__init__.py
@@ -5,13 +5,18 @@ from __future__ import absolute_import, print_function
 import itertools
 import time
 
+ARVADOS_TIMEFMT = '%Y-%m-%dT%H:%M:%SZ'
+
 def arvados_node_fqdn(arvados_node, default_hostname='dynamic.compute'):
     hostname = arvados_node.get('hostname') or default_hostname
     return '{}.{}'.format(hostname, arvados_node['domain'])
 
 def arvados_node_mtime(node):
-    return time.mktime(time.strptime(node['modified_at'] + 'UTC',
-                                     '%Y-%m-%dT%H:%M:%SZ%Z')) - time.timezone
+    return arvados_timestamp(node['modified_at'])
+
+def arvados_timestamp(timestr):
+    return time.mktime(time.strptime(timestr + 'UTC',
+                                     ARVADOS_TIMEFMT + '%Z')) - time.timezone
 
 def timestamp_fresh(timestamp, fresh_time):
     return (time.time() - timestamp) < fresh_time
diff --git a/services/nodemanager/arvnodeman/computenode/driver/gce.py b/services/nodemanager/arvnodeman/computenode/driver/gce.py
index a8edc43..d6ea2b2 100644
--- a/services/nodemanager/arvnodeman/computenode/driver/gce.py
+++ b/services/nodemanager/arvnodeman/computenode/driver/gce.py
@@ -6,13 +6,11 @@ import functools
 import json
 import time
 
-import libcloud.compute.base as cloud_base
 import libcloud.compute.providers as cloud_provider
 import libcloud.compute.types as cloud_types
-from libcloud.compute.drivers import gce
 
 from . import BaseComputeNodeDriver
-from .. import arvados_node_fqdn
+from .. import arvados_node_fqdn, arvados_timestamp, ARVADOS_TIMEFMT
 
 class ComputeNodeDriver(BaseComputeNodeDriver):
     """Compute node driver wrapper for GCE
@@ -21,73 +19,108 @@ class ComputeNodeDriver(BaseComputeNodeDriver):
     """
     DEFAULT_DRIVER = cloud_provider.get_driver(cloud_types.Provider.GCE)
     SEARCH_CACHE = {}
-    ssh_key = None
-    service_accounts = None
 
     def __init__(self, auth_kwargs, list_kwargs, create_kwargs,
                  driver_class=DEFAULT_DRIVER):
+        list_kwargs = list_kwargs.copy()
+        tags_str = list_kwargs.pop('tags', '')
+        if not tags_str.strip():
+            self.node_tags = frozenset()
+        else:
+            self.node_tags = frozenset(t.strip() for t in tags_str.split(','))
+        create_kwargs = create_kwargs.copy()
+        create_kwargs.setdefault('external_ip', None)
+        create_kwargs.setdefault('ex_metadata', {})
         super(ComputeNodeDriver, self).__init__(
             auth_kwargs, list_kwargs, create_kwargs,
             driver_class)
 
-        for key in self.create_kwargs.keys():
-            init_method = getattr(self, '_init_' + key, None)
-            if init_method is not None:
-                new_pair = init_method(self.create_kwargs.pop(key))
-                if new_pair is not None:
-                    self.create_kwargs[new_pair[0]] = new_pair[1]
+    @staticmethod
+    def _name_key(cloud_object):
+        return cloud_object.name
 
     def _init_image(self, image_name):
-        return 'image', image_name
+        return 'image', self.search_for(
+            image_name, 'list_images', self._name_key)
 
-    def _init_location(self, location):
-        return 'location', location
+    def _init_location(self, location_name):
+        return 'location', self.search_for(
+            location_name, 'list_locations', self._name_key)
 
-    def _init_ping_host(self, ping_host):
-        self.ping_host = ping_host
+    def _init_network(self, network_name):
+        return 'ex_network', self.search_for(
+            network_name, 'ex_list_networks', self._name_key)
 
     def _init_service_accounts(self, service_accounts_str):
-        self.service_accounts = json.loads(service_accounts_str)
+        return 'ex_service_accounts', json.loads(service_accounts_str)
 
     def _init_ssh_key(self, filename):
+        # SSH keys are delivered to GCE nodes via ex_metadata: see
+        # http://stackoverflow.com/questions/26752617/creating-sshkeys-for-gce-instance-using-libcloud
         with open(filename) as ssh_file:
-            self.ssh_key = ssh_file.read().strip()
+            self.create_kwargs['ex_metadata']['sshKeys'] = (
+                'root:' + ssh_file.read().strip())
 
-    def arvados_create_kwargs(self, arvados_node):
-        result = {'ex_metadata': self.list_kwargs.copy() }
-        ping_secret = arvados_node['info'].get('ping_secret')
-        if ping_secret is not None:
-            ping_url = ('https://{}/arvados/v1/nodes/{}/ping?ping_secret={}'.
-                        format(self.ping_host, arvados_node['uuid'],
-                               ping_secret))
-            result['ex_metadata']['pingUrl'] = ping_url
-        if self.service_accounts is not None:
-            result['ex_service_accounts'] = self.service_accounts
+    def list_sizes(self):
+        return super(ComputeNodeDriver, self).list_sizes(
+            self.create_kwargs['location'])
 
-        # SSH keys are delivered to GCE nodes via ex_metadata: see
-        # http://stackoverflow.com/questions/26752617/creating-sshkeys-for-gce-instance-using-libcloud
-        if self.ssh_key is not None:
-            result['ex_metadata']['sshKeys'] = 'root:{}'.format(self.ssh_key)
+    def arvados_create_kwargs(self, arvados_node):
+        cluster_id, _, node_id = arvados_node['uuid'].split('-')
+        result = {'name': 'compute-{}-{}'.format(node_id, cluster_id),
+                  'ex_metadata': self.create_kwargs['ex_metadata'].copy(),
+                  'ex_tags': list(self.node_tags)}
+        result['ex_metadata']['booted_at'] = time.strftime(ARVADOS_TIMEFMT,
+                                                           time.gmtime())
+        result['ex_metadata']['hostname'] = arvados_node_fqdn(arvados_node)
+        result['ex_metadata']['user-data'] = self._make_ping_url(arvados_node)
         return result
 
-    def create_node(self, size, arvados_node):
-        kwargs = self.create_kwargs.copy()
-        kwargs.update(self.arvados_create_kwargs(arvados_node))
-        kwargs.setdefault('name', 'arv-{}'.format(arvados_node['uuid']))
-        kwargs['size'] = size
-        return self.real.create_node(**kwargs)
-
-    # When an Arvados node is synced with a GCE node, the Arvados hostname
-    # is forwarded in a GCE tag 'hostname-foo'.
-    # TODO(twp): implement an ex_set_metadata method (at least until
-    # libcloud supports the API setMetadata method) so we can pass this
-    # sensibly in the node metadata.
+    def list_nodes(self):
+        # The GCE libcloud driver only supports filtering node lists by zone.
+        # Do our own filtering based on tag list.
+        return [node for node in
+                super(ComputeNodeDriver, self).list_nodes()
+                if self.node_tags.issubset(node.extra.get('tags', []))]
+
+    @classmethod
+    def _find_metadata(cls, metadata_items, key):
+        # Given a list of two-item metadata dictonaries, return the one with
+        # the named key.  Raise KeyError if not found.
+        try:
+            return next(data_dict for data_dict in metadata_items
+                        if data_dict.get('key') == key)
+        except StopIteration:
+            raise KeyError(key)
+
+    @classmethod
+    def _get_metadata(cls, metadata_items, key, *default):
+        try:
+            return cls._find_metadata(metadata_items, key)['value']
+        except KeyError:
+            if default:
+                return default[0]
+            raise
+
     def sync_node(self, cloud_node, arvados_node):
-        tags = ['hostname-{}'.format(arvados_node_fqdn(arvados_node))]
-        self.real.ex_set_node_tags(cloud_node, tags)
+        hostname = arvados_node_fqdn(arvados_node)
+        metadata_req = cloud_node.extra['metadata'].copy()
+        metadata_items = metadata_req.setdefault('items', [])
+        try:
+            self._find_metadata(metadata_items, 'hostname')['value'] = hostname
+        except KeyError:
+            metadata_items.append({'key': 'hostname', 'value': hostname})
+        response = self.real.connection.async_request(
+            '/zones/{}/instances/{}/setMetadata'.format(
+                cloud_node.extra['zone'].name, cloud_node.name),
+            method='POST', data=metadata_req)
+        if not response.success():
+            raise Exception("setMetadata error: {}".format(response.error))
 
     @classmethod
     def node_start_time(cls, node):
-        time_str = node.extra['launch_time'].split('.', 2)[0] + 'UTC'
-        return time.mktime(time.strptime(
-                time_str,'%Y-%m-%dT%H:%M:%S%Z')) - time.timezone
+        try:
+            return arvados_timestamp(cls._get_metadata(
+                    node.extra['metadata']['items'], 'booted_at'))
+        except KeyError:
+            return 0
diff --git a/services/nodemanager/doc/gce.example.cfg b/services/nodemanager/doc/gce.example.cfg
index 5bf033d..7e7813c 100644
--- a/services/nodemanager/doc/gce.example.cfg
+++ b/services/nodemanager/doc/gce.example.cfg
@@ -65,18 +65,20 @@ insecure = no
 [Cloud]
 provider = gce
 
-# XXX(twp): figure out good default settings for GCE
-# It's usually most cost-effective to shut down compute nodes during narrow
-# windows of time.  For example, EC2 bills each node by the hour, so the best
-# time to shut down a node is right before a new hour of uptime starts.
-# Shutdown windows define these periods of time.  These are windows in
-# full minutes, separated by commas.  Counting from the time the node is
-# booted, the node WILL NOT shut down for N1 minutes; then it MAY shut down
-# for N2 minutes; then it WILL NOT shut down for N3 minutes; and so on.
-# For example, "54, 5, 1" means the node may shut down from the 54th to the
-# 59th minute of each hour of uptime.
-# Specify at least two windows.  You can add as many as you need beyond that.
-shutdown_windows = 54, 5, 1
+# Shutdown windows define periods of time when a node may and may not
+# be shut down.  These are windows in full minutes, separated by
+# commas.  Counting from the time the node is booted, the node WILL
+# NOT shut down for N1 minutes; then it MAY shut down for N2 minutes;
+# then it WILL NOT shut down for N3 minutes; and so on.  For example,
+# "54, 5, 1" means the node may shut down from the 54th to the 59th
+# minute of each hour of uptime.
+# GCE bills by the minute, and does not provide information about when
+# a node booted.  Node Manager will store this information in metadata
+# when it boots a node; if that information is not available, it will
+# assume the node booted at the epoch.  These shutdown settings are
+# very aggressive.  You may want to adjust this if you want more
+# continuity of service from a single node.
+shutdown_windows = 20, 999999
 
 [Cloud Credentials]
 user_id = client_email_address at developer.gserviceaccount.com
@@ -93,7 +95,10 @@ timeout = 60
 # credential_file =
 
 [Cloud List]
-# Keywords here will be used to populate the metadata field for a GCE node.
+# A comma-separated list of tags that must be applied to a node for it to
+# be considered a compute node.
+# The driver will automatically apply these tags to nodes it creates.
+tags = zyxwv, compute
 
 [Cloud Create]
 # New compute nodes will send pings to Arvados at this host.
@@ -108,6 +113,7 @@ ping_host = hostname:port
 # * Valid location (zone) names: https://cloud.google.com/compute/docs/zones
 image = debian-7
 location = us-central1-a
+# network = your_network_name
 
 # JSON string of service account authorizations for this cluster.
 # See http://libcloud.readthedocs.org/en/latest/compute/drivers/gce.html#specifying-service-account-scopes
diff --git a/services/nodemanager/tests/test_computenode_driver_gce.py b/services/nodemanager/tests/test_computenode_driver_gce.py
index d4b73f7..f995a8d 100644
--- a/services/nodemanager/tests/test_computenode_driver_gce.py
+++ b/services/nodemanager/tests/test_computenode_driver_gce.py
@@ -2,6 +2,7 @@
 
 from __future__ import absolute_import, print_function
 
+import json
 import time
 import unittest
 
@@ -19,80 +20,139 @@ class GCEComputeNodeDriverTestCase(testutil.DriverTestMixin, unittest.TestCase):
         self.assertTrue(self.driver_mock.called)
         self.assertEqual(kwargs, self.driver_mock.call_args[1])
 
-    def test_create_location_loaded_at_initialization(self):
-        kwargs = {'location': 'testregion'}
-        driver = self.new_driver(create_kwargs=kwargs)
-        self.assertTrue(self.driver_mock().list_locations)
-
-    def test_create_image_loaded_at_initialization(self):
-        kwargs = {'image': 'testimage'}
-        driver = self.new_driver(create_kwargs=kwargs)
-        self.assertTrue(self.driver_mock().list_images)
+    def test_create_image_loaded_at_initialization_by_name(self):
+        image_mocks = [testutil.cloud_object_mock(c) for c in 'abc']
+        list_method = self.driver_mock().list_images
+        list_method.return_value = image_mocks
+        driver = self.new_driver(create_kwargs={'image': 'B'})
+        self.assertEqual(1, list_method.call_count)
+
+    def test_list_sizes_requires_location_match(self):
+        locations = [testutil.cloud_object_mock(name)
+                     for name in ['there', 'here', 'other']]
+        self.driver_mock().list_locations.return_value = locations
+        driver = self.new_driver(create_kwargs={'location': 'HERE'})
+        driver.list_sizes()
+        self.assertIs(locations[1],
+                      self.driver_mock().list_sizes.call_args[0][0])
 
     def test_create_includes_ping_secret(self):
         arv_node = testutil.arvados_node_mock(info={'ping_secret': 'ssshh'})
         driver = self.new_driver()
         driver.create_node(testutil.MockSize(1), arv_node)
-        create_method = self.driver_mock().create_node
-        self.assertTrue(create_method.called)
-        create_metadata = create_method.call_args[1].get('ex_metadata')
-        self.assertIsInstance(create_metadata, dict)
-        self.assertIn('ping_secret=ssshh',
-                      create_metadata.get('pingUrl', 'arg missing'))
-
-    def test_generate_metadata_for_new_arvados_node(self):
-        arv_node = testutil.arvados_node_mock(8)
-        driver = self.new_driver(list_kwargs={'list': 'test'})
-        self.assertEqual({'ex_metadata': {'list': 'test'}},
-                         driver.arvados_create_kwargs(arv_node))
+        metadata = self.driver_mock().create_node.call_args[1]['ex_metadata']
+        self.assertIn('ping_secret=ssshh', metadata.get('user-data'))
 
-    def test_tags_set_default_hostname_from_new_arvados_node(self):
-        arv_node = testutil.arvados_node_mock(hostname=None)
-        cloud_node = testutil.cloud_node_mock(1)
+    def test_create_sets_default_hostname(self):
         driver = self.new_driver()
-        driver.sync_node(cloud_node, arv_node)
-        tag_mock = self.driver_mock().ex_set_node_tags
-        self.assertTrue(tag_mock.called)
-        self.assertEqual(['hostname-dynamic.compute.zzzzz.arvadosapi.com'],
-                         tag_mock.call_args[0][1])
-
-    def test_sync_node_sets_static_hostname(self):
+        driver.create_node(testutil.MockSize(1),
+                           testutil.arvados_node_mock(254, hostname=None))
+        create_kwargs = self.driver_mock().create_node.call_args[1]
+        self.assertEqual('compute-0000000000000fe-zzzzz',
+                         create_kwargs.get('name'))
+        self.assertEqual('dynamic.compute.zzzzz.arvadosapi.com',
+                         create_kwargs.get('ex_metadata', {}).get('hostname'))
+
+    def test_create_tags_from_list_tags(self):
+        driver = self.new_driver(list_kwargs={'tags': 'testA, testB'})
+        driver.create_node(testutil.MockSize(1), testutil.arvados_node_mock())
+        self.assertEqual(['testA', 'testB'],
+                         self.driver_mock().create_node.call_args[1]['ex_tags'])
+
+    def test_list_nodes_requires_tags_match(self):
+        # A node matches if our list tags are a subset of the node's tags.
+        # Test behavior with no tags, no match, partial matches, different
+        # order, and strict supersets.
+        cloud_mocks = [
+            testutil.cloud_node_mock(node_num, tags=tag_set)
+            for node_num, tag_set in enumerate(
+                [[], ['bad'], ['good'], ['great'], ['great', 'ok'],
+                 ['great', 'good'], ['good', 'fantastic', 'great']])]
+        cloud_mocks.append(testutil.cloud_node_mock())
+        self.driver_mock().list_nodes.return_value = cloud_mocks
+        driver = self.new_driver(list_kwargs={'tags': 'good, great'})
+        self.assertItemsEqual(['5', '6'], [n.id for n in driver.list_nodes()])
+
+    def build_gce_metadata(self, metadata_dict):
+        # Convert a plain metadata dictionary to the GCE data structure.
+        return {
+            'kind': 'compute#metadata',
+            'fingerprint': 'testprint',
+            'items': [{'key': key, 'value': metadata_dict[key]}
+                      for key in metadata_dict],
+            }
+
+    def check_sync_node_updates_hostname_tag(self, plain_metadata):
+        start_metadata = self.build_gce_metadata(plain_metadata)
         arv_node = testutil.arvados_node_mock(1)
-        cloud_node = testutil.cloud_node_mock(2)
+        cloud_node = testutil.cloud_node_mock(
+            2, metadata=start_metadata.copy(),
+            zone=testutil.cloud_object_mock('testzone'))
         driver = self.new_driver()
         driver.sync_node(cloud_node, arv_node)
-        tag_mock = self.driver_mock().ex_set_node_tags
-        self.assertTrue(tag_mock.called)
-        self.assertEqual(['hostname-compute1.zzzzz.arvadosapi.com'],
-                         tag_mock.call_args[0][1])
-
-    def test_node_create_time(self):
-        refsecs = int(time.time())
-        reftuple = time.gmtime(refsecs)
+        args, kwargs = self.driver_mock().connection.async_request.call_args
+        self.assertEqual('/zones/TESTZONE/instances/2/setMetadata', args[0])
+        for key in ['kind', 'fingerprint']:
+            self.assertEqual(start_metadata[key], kwargs['data'][key])
+        plain_metadata['hostname'] = 'compute1.zzzzz.arvadosapi.com'
+        self.assertEqual(
+            plain_metadata,
+            {item['key']: item['value'] for item in kwargs['data']['items']})
+
+    def test_sync_node_updates_hostname_tag(self):
+        self.check_sync_node_updates_hostname_tag(
+            {'testkey': 'testvalue', 'hostname': 'startvalue'})
+
+    def test_sync_node_adds_hostname_tag(self):
+        self.check_sync_node_updates_hostname_tag({'testkey': 'testval'})
+
+    def test_sync_node_raises_exception_on_failure(self):
+        arv_node = testutil.arvados_node_mock(8)
+        cloud_node = testutil.cloud_node_mock(
+            9, metadata={}, zone=testutil.cloud_object_mock('failzone'))
+        mock_response = self.driver_mock().connection.async_request()
+        mock_response.success.return_value = False
+        mock_response.error = 'sync error test'
+        driver = self.new_driver()
+        with self.assertRaises(Exception) as err_check:
+            driver.sync_node(cloud_node, arv_node)
+        self.assertIs(err_check.exception.__class__, Exception)
+        self.assertIn('sync error test', str(err_check.exception))
+
+    def test_node_create_time_zero_for_unknown_nodes(self):
         node = testutil.cloud_node_mock()
-        node.extra = {'launch_time': time.strftime('%Y-%m-%dT%H:%M:%S.000Z',
-                                                   reftuple)}
-        self.assertEqual(refsecs, gce.ComputeNodeDriver.node_start_time(node))
+        self.assertEqual(0, gce.ComputeNodeDriver.node_start_time(node))
 
-    def test_generate_metadata_for_new_arvados_node(self):
-        arv_node = testutil.arvados_node_mock(8)
-        driver = self.new_driver(list_kwargs={'list': 'test'})
-        self.assertEqual({'ex_metadata': {'list': 'test'}},
-                         driver.arvados_create_kwargs(arv_node))
+    def test_node_create_time_for_known_node(self):
+        node = testutil.cloud_node_mock(metadata=self.build_gce_metadata(
+                {'booted_at': '1970-01-01T00:01:05Z'}))
+        self.assertEqual(65, gce.ComputeNodeDriver.node_start_time(node))
+
+    def test_node_create_time_recorded_when_node_boots(self):
+        start_time = time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime())
+        arv_node = testutil.arvados_node_mock()
+        driver = self.new_driver()
+        driver.create_node(testutil.MockSize(1), arv_node)
+        metadata = self.driver_mock().create_node.call_args[1]['ex_metadata']
+        self.assertLessEqual(start_time, metadata.get('booted_at'))
 
     def test_deliver_ssh_key_in_metadata(self):
         test_ssh_key = 'ssh-rsa-foo'
         arv_node = testutil.arvados_node_mock(1)
-        with mock.patch('__builtin__.open', mock.mock_open(read_data=test_ssh_key)) as mock_file:
+        with mock.patch('__builtin__.open',
+                        mock.mock_open(read_data=test_ssh_key)) as mock_file:
             driver = self.new_driver(create_kwargs={'ssh_key': 'ssh-key-file'})
         mock_file.assert_called_once_with('ssh-key-file')
-        self.assertEqual({'ex_metadata': {'sshKeys': 'root:ssh-rsa-foo'}},
-                         driver.arvados_create_kwargs(arv_node))
+        driver.create_node(testutil.MockSize(1), arv_node)
+        metadata = self.driver_mock().create_node.call_args[1]['ex_metadata']
+        self.assertEqual('root:ssh-rsa-foo', metadata.get('sshKeys'))
 
     def test_create_driver_with_service_accounts(self):
-        srv_acct_config = { 'service_accounts': '{ "email": "foo at bar", "scopes":["storage-full"]}' }
+        service_accounts = {'email': 'foo at bar', 'scopes': ['storage-full']}
+        srv_acct_config = {'service_accounts': json.dumps(service_accounts)}
         arv_node = testutil.arvados_node_mock(1)
         driver = self.new_driver(create_kwargs=srv_acct_config)
-        create_kwargs = driver.arvados_create_kwargs(arv_node)
-        self.assertEqual({u'email': u'foo at bar', u'scopes': [u'storage-full']},
-                         create_kwargs['ex_service_accounts'])
+        driver.create_node(testutil.MockSize(1), arv_node)
+        self.assertEqual(
+            service_accounts,
+            self.driver_mock().create_node.call_args[1]['ex_service_accounts'])
diff --git a/services/nodemanager/tests/testutil.py b/services/nodemanager/tests/testutil.py
index 633aac6..d51aab2 100644
--- a/services/nodemanager/tests/testutil.py
+++ b/services/nodemanager/tests/testutil.py
@@ -39,7 +39,7 @@ def cloud_object_mock(name_id):
     cloud_object.name = cloud_object.id.upper()
     return cloud_object
 
-def cloud_node_mock(node_num=99):
+def cloud_node_mock(node_num=99, **extra):
     node = mock.NonCallableMagicMock(
         ['id', 'name', 'state', 'public_ips', 'private_ips', 'driver', 'size',
          'image', 'extra'],
@@ -48,6 +48,7 @@ def cloud_node_mock(node_num=99):
     node.name = node.id
     node.public_ips = []
     node.private_ips = [ip_address_mock(node_num)]
+    node.extra = extra
     return node
 
 def ip_address_mock(last_octet):

commit d32bbed004b76333c3e72e6c1f97dcde88e11edd
Author: Brett Smith <brett at curoverse.com>
Date:   Fri Feb 13 15:24:04 2015 -0500

    4138: Revamp Node Manager driver proxying in BaseComputeNodeDriver.
    
    Accessing attributes through a super() proxy does not invoke
    __getattr__ on base classes, so the old implementation made it
    impossible for subclasses to be agnostic about whether a method was
    implemented in BaseComputeNodeDriver or the real libcloud driver.
    This version makes that possible.  It's also a little nicer because
    now the class will report these method names to dir(), hasattr(), etc.

diff --git a/services/nodemanager/arvnodeman/computenode/driver/__init__.py b/services/nodemanager/arvnodeman/computenode/driver/__init__.py
index c06bba3..369bb8f 100644
--- a/services/nodemanager/arvnodeman/computenode/driver/__init__.py
+++ b/services/nodemanager/arvnodeman/computenode/driver/__init__.py
@@ -3,6 +3,7 @@
 from __future__ import absolute_import, print_function
 
 import libcloud.common.types as cloud_types
+from libcloud.compute.base import NodeDriver
 
 from ...config import NETWORK_ERRORS
 
@@ -35,14 +36,6 @@ class BaseComputeNodeDriver(object):
     def _init_ping_host(self, ping_host):
         self.ping_host = ping_host
 
-    def __getattr__(self, name):
-        # Proxy non-extension methods to the real driver.
-        if (not name.startswith('_') and not name.startswith('ex_')
-              and hasattr(self.real, name)):
-            return getattr(self.real, name)
-        else:
-            return super(BaseComputeNodeDriver, self).__getattr__(name)
-
     def search_for(self, term, list_method, key=lambda item: item.id):
         cache_key = (list_method, term)
         if cache_key not in self.SEARCH_CACHE:
@@ -96,3 +89,16 @@ class BaseComputeNodeDriver(object):
         # exactly an Exception, or a better-known higher-level exception.
         return (isinstance(exception, cls.CLOUD_ERRORS) or
                 getattr(exception, '__class__', None) is Exception)
+
+    # Now that we've defined all our own methods, delegate generic, public
+    # attributes of libcloud drivers that we haven't defined ourselves.
+    def _delegate_to_real(attr_name):
+        return property(
+            lambda self: getattr(self.real, attr_name),
+            lambda self, value: setattr(self.real, attr_name, value),
+            doc=getattr(getattr(NodeDriver, attr_name), '__doc__', None))
+
+    _locals = locals()
+    for _attr_name in dir(NodeDriver):
+        if (not _attr_name.startswith('_')) and (_attr_name not in _locals):
+            _locals[_attr_name] = _delegate_to_real(_attr_name)

commit c51fe8e859262a8c534c2d3265fabe54555ac462
Author: Brett Smith <brett at curoverse.com>
Date:   Thu Feb 12 17:22:12 2015 -0500

    4138: Refactor out Node Manager DriverTestMixin.

diff --git a/services/nodemanager/tests/test_computenode_driver_ec2.py b/services/nodemanager/tests/test_computenode_driver_ec2.py
index 10a41a8..8e52824 100644
--- a/services/nodemanager/tests/test_computenode_driver_ec2.py
+++ b/services/nodemanager/tests/test_computenode_driver_ec2.py
@@ -12,15 +12,8 @@ import mock
 import arvnodeman.computenode.driver.ec2 as ec2
 from . import testutil
 
-class EC2ComputeNodeDriverTestCase(unittest.TestCase):
-    def setUp(self):
-        self.driver_mock = mock.MagicMock(name='driver_mock')
-
-    def new_driver(self, auth_kwargs={}, list_kwargs={}, create_kwargs={}):
-        create_kwargs.setdefault('ping_host', '100::')
-        return ec2.ComputeNodeDriver(
-            auth_kwargs, list_kwargs, create_kwargs,
-            driver_class=self.driver_mock)
+class EC2ComputeNodeDriverTestCase(testutil.DriverTestMixin, unittest.TestCase):
+    TEST_CLASS = ec2.ComputeNodeDriver
 
     def test_driver_instantiation(self):
         kwargs = {'key': 'testkey'}
diff --git a/services/nodemanager/tests/test_computenode_driver_gce.py b/services/nodemanager/tests/test_computenode_driver_gce.py
index 0e10e2f..d4b73f7 100644
--- a/services/nodemanager/tests/test_computenode_driver_gce.py
+++ b/services/nodemanager/tests/test_computenode_driver_gce.py
@@ -10,15 +10,8 @@ import mock
 import arvnodeman.computenode.driver.gce as gce
 from . import testutil
 
-class GCEComputeNodeDriverTestCase(unittest.TestCase):
-    def setUp(self):
-        self.driver_mock = mock.MagicMock(name='driver_mock')
-
-    def new_driver(self, auth_kwargs={}, list_kwargs={}, create_kwargs={}):
-        create_kwargs.setdefault('ping_host', '100::')
-        return gce.ComputeNodeDriver(
-            auth_kwargs, list_kwargs, create_kwargs,
-            driver_class=self.driver_mock)
+class GCEComputeNodeDriverTestCase(testutil.DriverTestMixin, unittest.TestCase):
+    TEST_CLASS = gce.ComputeNodeDriver
 
     def test_driver_instantiation(self):
         kwargs = {'user_id': 'foo'}
diff --git a/services/nodemanager/tests/testutil.py b/services/nodemanager/tests/testutil.py
index c5b6539..633aac6 100644
--- a/services/nodemanager/tests/testutil.py
+++ b/services/nodemanager/tests/testutil.py
@@ -115,6 +115,21 @@ class ActorTestMixin(object):
                 return result
 
 
+class DriverTestMixin(object):
+    def setUp(self):
+        self.driver_mock = mock.MagicMock(name='driver_mock')
+        super(DriverTestMixin, self).setUp()
+
+    def new_driver(self, auth_kwargs={}, list_kwargs={}, create_kwargs={}):
+        create_kwargs.setdefault('ping_host', '100::')
+        return self.TEST_CLASS(
+            auth_kwargs, list_kwargs, create_kwargs,
+            driver_class=self.driver_mock)
+
+    def driver_method_args(self, method_name):
+        return getattr(self.driver_mock(), method_name).call_args
+
+
 class RemotePollLoopActorTestMixin(ActorTestMixin):
     def build_monitor(self, *args, **kwargs):
         self.timer = mock.MagicMock(name='timer_mock')

commit 5884b7c433e6f682c089956917f31a587e75363d
Author: Brett Smith <brett at curoverse.com>
Date:   Thu Feb 12 15:53:16 2015 -0500

    4138: Fix noop Node Manager EC2 driver tests.
    
    The previous tests simply instantiated the driver, then checked that a
    mock method was truthy (which it will always be).  This makes the test
    work as intended.

diff --git a/services/nodemanager/tests/test_computenode_driver_ec2.py b/services/nodemanager/tests/test_computenode_driver_ec2.py
index fae63a5..10a41a8 100644
--- a/services/nodemanager/tests/test_computenode_driver_ec2.py
+++ b/services/nodemanager/tests/test_computenode_driver_ec2.py
@@ -37,15 +37,12 @@ class EC2ComputeNodeDriverTestCase(unittest.TestCase):
         self.assertEqual({'tag:test': 'true'},
                           list_method.call_args[1].get('ex_filters'))
 
-    def test_create_location_loaded_at_initialization(self):
-        kwargs = {'location': 'testregion'}
-        driver = self.new_driver(create_kwargs=kwargs)
-        self.assertTrue(self.driver_mock().list_locations)
-
     def test_create_image_loaded_at_initialization(self):
-        kwargs = {'image': 'testimage'}
-        driver = self.new_driver(create_kwargs=kwargs)
-        self.assertTrue(self.driver_mock().list_images)
+        list_method = self.driver_mock().list_images
+        list_method.return_value = [testutil.cloud_object_mock(c)
+                                    for c in 'abc']
+        driver = self.new_driver(create_kwargs={'image_id': 'b'})
+        self.assertEqual(1, list_method.call_count)
 
     def test_create_includes_ping_secret(self):
         arv_node = testutil.arvados_node_mock(info={'ping_secret': 'ssshh'})
diff --git a/services/nodemanager/tests/testutil.py b/services/nodemanager/tests/testutil.py
index ff525f0..c5b6539 100644
--- a/services/nodemanager/tests/testutil.py
+++ b/services/nodemanager/tests/testutil.py
@@ -30,6 +30,15 @@ def arvados_node_mock(node_num=99, job_uuid=None, age=0, **kwargs):
     node.update(kwargs)
     return node
 
+def cloud_object_mock(name_id):
+    # A very generic mock, useful for stubbing libcloud objects we
+    # only search for and pass around, like locations, subnets, etc.
+    cloud_object = mock.NonCallableMagicMock(['id', 'name'],
+                                             name='cloud_object')
+    cloud_object.id = str(name_id)
+    cloud_object.name = cloud_object.id.upper()
+    return cloud_object
+
 def cloud_node_mock(node_num=99):
     node = mock.NonCallableMagicMock(
         ['id', 'name', 'state', 'public_ips', 'private_ips', 'driver', 'size',

commit b5249ac7c8ccc1bec4ae751d1ff6816677e6b2e7
Author: Brett Smith <brett at curoverse.com>
Date:   Fri Feb 13 16:00:30 2015 -0500

    4138: Refactor common Node Manager driver initialization to base driver.

diff --git a/services/nodemanager/arvnodeman/computenode/driver/__init__.py b/services/nodemanager/arvnodeman/computenode/driver/__init__.py
index 3a0c206..c06bba3 100644
--- a/services/nodemanager/arvnodeman/computenode/driver/__init__.py
+++ b/services/nodemanager/arvnodeman/computenode/driver/__init__.py
@@ -25,6 +25,15 @@ class BaseComputeNodeDriver(object):
         self.real = driver_class(**auth_kwargs)
         self.list_kwargs = list_kwargs
         self.create_kwargs = create_kwargs
+        for key in self.create_kwargs.keys():
+            init_method = getattr(self, '_init_' + key, None)
+            if init_method is not None:
+                new_pair = init_method(self.create_kwargs.pop(key))
+                if new_pair is not None:
+                    self.create_kwargs[new_pair[0]] = new_pair[1]
+
+    def _init_ping_host(self, ping_host):
+        self.ping_host = ping_host
 
     def __getattr__(self, name):
         # Proxy non-extension methods to the real driver.
@@ -52,6 +61,11 @@ class BaseComputeNodeDriver(object):
     def arvados_create_kwargs(self, arvados_node):
         raise NotImplementedError("BaseComputeNodeDriver.arvados_create_kwargs")
 
+    def _make_ping_url(self, arvados_node):
+        return 'https://{}/arvados/v1/nodes/{}/ping?ping_secret={}'.format(
+            self.ping_host, arvados_node['uuid'],
+            arvados_node['info']['ping_secret'])
+
     def create_node(self, size, arvados_node):
         kwargs = self.create_kwargs.copy()
         kwargs.update(self.arvados_create_kwargs(arvados_node))
diff --git a/services/nodemanager/arvnodeman/computenode/driver/ec2.py b/services/nodemanager/arvnodeman/computenode/driver/ec2.py
index 255a948..9db3d89 100644
--- a/services/nodemanager/arvnodeman/computenode/driver/ec2.py
+++ b/services/nodemanager/arvnodeman/computenode/driver/ec2.py
@@ -52,19 +52,10 @@ class ComputeNodeDriver(BaseComputeNodeDriver):
         super(ComputeNodeDriver, self).__init__(
             auth_kwargs, {'ex_filters': list_kwargs}, create_kwargs,
             driver_class)
-        for key in self.create_kwargs.keys():
-            init_method = getattr(self, '_init_' + key, None)
-            if init_method is not None:
-                new_pair = init_method(self.create_kwargs.pop(key))
-                if new_pair is not None:
-                    self.create_kwargs[new_pair[0]] = new_pair[1]
 
     def _init_image_id(self, image_id):
         return 'image', self.search_for(image_id, 'list_images')
 
-    def _init_ping_host(self, ping_host):
-        self.ping_host = ping_host
-
     def _init_security_groups(self, group_names):
         return 'ex_security_groups', [
             self.search_for(gname.strip(), 'ex_get_security_groups')
@@ -79,14 +70,8 @@ class ComputeNodeDriver(BaseComputeNodeDriver):
         return 'auth', key
 
     def arvados_create_kwargs(self, arvados_node):
-        result = {'name': arvados_node_fqdn(arvados_node)}
-        ping_secret = arvados_node['info'].get('ping_secret')
-        if ping_secret is not None:
-            ping_url = ('https://{}/arvados/v1/nodes/{}/ping?ping_secret={}'.
-                        format(self.ping_host, arvados_node['uuid'],
-                               ping_secret))
-            result['ex_userdata'] = ping_url
-        return result
+        return {'name': arvados_node_fqdn(arvados_node),
+                'ex_userdata': self._make_ping_url(arvados_node)}
 
     def post_create_node(self, cloud_node):
         self.real.ex_create_tags(cloud_node, self.tags)
diff --git a/services/nodemanager/tests/testutil.py b/services/nodemanager/tests/testutil.py
index f254e72..ff525f0 100644
--- a/services/nodemanager/tests/testutil.py
+++ b/services/nodemanager/tests/testutil.py
@@ -26,7 +26,7 @@ def arvados_node_mock(node_num=99, job_uuid=None, age=0, **kwargs):
             'ip_address': ip_address_mock(node_num),
             'job_uuid': job_uuid,
             'crunch_worker_state': crunch_worker_state,
-            'info': {}}
+            'info': {'ping_secret': 'defaulttestsecret'}}
     node.update(kwargs)
     return node
 

commit 708630d0303948874d231a8d6b6d080adcdf6d2c
Author: Brett Smith <brett at curoverse.com>
Date:   Wed Feb 11 15:12:37 2015 -0500

    4138: Simplify Node Manager GCE credential handling.
    
    Because libcloud's GCE driver accepts a key path as a constructor
    argument, it's relatively straightforward to put all the constructor
    arguments directly in the Node Manager configuration.  No need to
    parse out JSON.

diff --git a/services/nodemanager/arvnodeman/config.py b/services/nodemanager/arvnodeman/config.py
index f386653..315df1c 100644
--- a/services/nodemanager/arvnodeman/config.py
+++ b/services/nodemanager/arvnodeman/config.py
@@ -4,7 +4,6 @@ from __future__ import absolute_import, print_function
 
 import ConfigParser
 import importlib
-import json
 import logging
 import ssl
 import sys
@@ -99,13 +98,6 @@ class NodeManagerConfig(ConfigParser.SafeConfigParser):
         module = importlib.import_module('arvnodeman.computenode.driver.' +
                                          self.get('Cloud', 'provider'))
         auth_kwargs = self.get_section('Cloud Credentials')
-        # GCE credentials are delivered in a JSON file.
-        if 'json_credential_file' in auth_kwargs:
-            with open(auth_kwargs['json_credential_file']) as jf:
-                json_creds = json.load(jf)
-            auth_kwargs['user_id'] = json_creds['client_email']
-            auth_kwargs['key'] = json_creds['private_key']
-
         if 'timeout' in auth_kwargs:
             auth_kwargs['timeout'] = int(auth_kwargs['timeout'])
         return module.ComputeNodeDriver(auth_kwargs,
diff --git a/services/nodemanager/doc/gce.example.cfg b/services/nodemanager/doc/gce.example.cfg
index 0d8fbb7..5bf033d 100644
--- a/services/nodemanager/doc/gce.example.cfg
+++ b/services/nodemanager/doc/gce.example.cfg
@@ -79,7 +79,8 @@ provider = gce
 shutdown_windows = 54, 5, 1
 
 [Cloud Credentials]
-json_credential_file = /path/to/credential_file.json
+user_id = client_email_address at developer.gserviceaccount.com
+key = path_to_certificate.pem
 project = project-id-from-google-cloud-dashboard
 timeout = 60
 

commit a6837612f9678bb983f634b518bde16b8921a0a4
Author: Tim Pierce <twp at curoverse.com>
Date:   Fri Jan 23 17:44:41 2015 -0500

    4138: updated unit test
    
    Corrected test_create_includes_ping_secret to account for delivering the
    ping secret via metadata in GCE.

diff --git a/services/nodemanager/tests/test_computenode_driver_gce.py b/services/nodemanager/tests/test_computenode_driver_gce.py
index 075760a..0e10e2f 100644
--- a/services/nodemanager/tests/test_computenode_driver_gce.py
+++ b/services/nodemanager/tests/test_computenode_driver_gce.py
@@ -42,9 +42,10 @@ class GCEComputeNodeDriverTestCase(unittest.TestCase):
         driver.create_node(testutil.MockSize(1), arv_node)
         create_method = self.driver_mock().create_node
         self.assertTrue(create_method.called)
+        create_metadata = create_method.call_args[1].get('ex_metadata')
+        self.assertIsInstance(create_metadata, dict)
         self.assertIn('ping_secret=ssshh',
-                      create_method.call_args[1].get('ex_userdata',
-                                                     'arg missing'))
+                      create_metadata.get('pingUrl', 'arg missing'))
 
     def test_generate_metadata_for_new_arvados_node(self):
         arv_node = testutil.arvados_node_mock(8)

commit 51d417f941214512c0cbd6d56687ce2b9a0869bc
Author: Tim Pierce <twp at curoverse.com>
Date:   Fri Jan 23 17:24:54 2015 -0500

    4138: GCE fixes
    
    The 'network_id' parameter needs to be delivered as 'location' in GCE.
    
    The ping_url parameter is now delivered in the node metadata as
    'pingUrl'.
    
    When creating a new GCE instance, 'name' is a required parameter and
    must begin with a letter. The default name is the UUID of the
    corresponding Arvados node, prepended with 'arv-'.

diff --git a/services/nodemanager/arvnodeman/computenode/driver/gce.py b/services/nodemanager/arvnodeman/computenode/driver/gce.py
index 125a090..a8edc43 100644
--- a/services/nodemanager/arvnodeman/computenode/driver/gce.py
+++ b/services/nodemanager/arvnodeman/computenode/driver/gce.py
@@ -37,8 +37,11 @@ class ComputeNodeDriver(BaseComputeNodeDriver):
                 if new_pair is not None:
                     self.create_kwargs[new_pair[0]] = new_pair[1]
 
-    def _init_image_id(self, image_id):
-        return 'image', image_id
+    def _init_image(self, image_name):
+        return 'image', image_name
+
+    def _init_location(self, location):
+        return 'location', location
 
     def _init_ping_host(self, ping_host):
         self.ping_host = ping_host
@@ -46,9 +49,6 @@ class ComputeNodeDriver(BaseComputeNodeDriver):
     def _init_service_accounts(self, service_accounts_str):
         self.service_accounts = json.loads(service_accounts_str)
 
-    def _init_network_id(self, subnet_id):
-        return 'ex_network', subnet_id
-
     def _init_ssh_key(self, filename):
         with open(filename) as ssh_file:
             self.ssh_key = ssh_file.read().strip()
@@ -60,7 +60,7 @@ class ComputeNodeDriver(BaseComputeNodeDriver):
             ping_url = ('https://{}/arvados/v1/nodes/{}/ping?ping_secret={}'.
                         format(self.ping_host, arvados_node['uuid'],
                                ping_secret))
-            result['ex_userdata'] = ping_url
+            result['ex_metadata']['pingUrl'] = ping_url
         if self.service_accounts is not None:
             result['ex_service_accounts'] = self.service_accounts
 
@@ -70,6 +70,13 @@ class ComputeNodeDriver(BaseComputeNodeDriver):
             result['ex_metadata']['sshKeys'] = 'root:{}'.format(self.ssh_key)
         return result
 
+    def create_node(self, size, arvados_node):
+        kwargs = self.create_kwargs.copy()
+        kwargs.update(self.arvados_create_kwargs(arvados_node))
+        kwargs.setdefault('name', 'arv-{}'.format(arvados_node['uuid']))
+        kwargs['size'] = size
+        return self.real.create_node(**kwargs)
+
     # When an Arvados node is synced with a GCE node, the Arvados hostname
     # is forwarded in a GCE tag 'hostname-foo'.
     # TODO(twp): implement an ex_set_metadata method (at least until
diff --git a/services/nodemanager/doc/gce.example.cfg b/services/nodemanager/doc/gce.example.cfg
index d969336..0d8fbb7 100644
--- a/services/nodemanager/doc/gce.example.cfg
+++ b/services/nodemanager/doc/gce.example.cfg
@@ -104,9 +104,9 @@ ping_host = hostname:port
 
 # The GCE image name and network zone name to use when creating new nodes.
 # * Valid image aliases: https://cloud.google.com/sdk/gcloud/reference/compute/instances/create
-# * Valid network zones: https://cloud.google.com/compute/docs/zones
-image_id = debian-7
-network_id = us-central1-a
+# * Valid location (zone) names: https://cloud.google.com/compute/docs/zones
+image = debian-7
+location = us-central1-a
 
 # JSON string of service account authorizations for this cluster.
 # See http://libcloud.readthedocs.org/en/latest/compute/drivers/gce.html#specifying-service-account-scopes

commit 45fdc95efb2467b0ad7d21d82aa08b26a43cfaa0
Author: Tim Pierce <twp at curoverse.com>
Date:   Wed Jan 21 13:06:35 2015 -0500

    4138: general GCE fixes
    
    * JSON credential file
    ** GCE credentials are delivered as a JSON string (and the key is formatted as a multi-line RSA private key). Let the GCE config file specify a path to the JSON credential file for simplicity.
    * Accept NodeSizes addressed by id or name
    ** In EC2, NodeSizes are identified by the 'id' field.  In GCE they are identified by the 'name' field.  Allow the Node Manager config module to accept either.

diff --git a/services/nodemanager/arvnodeman/computenode/driver/gce.py b/services/nodemanager/arvnodeman/computenode/driver/gce.py
index a4fd57d..125a090 100644
--- a/services/nodemanager/arvnodeman/computenode/driver/gce.py
+++ b/services/nodemanager/arvnodeman/computenode/driver/gce.py
@@ -38,7 +38,7 @@ class ComputeNodeDriver(BaseComputeNodeDriver):
                     self.create_kwargs[new_pair[0]] = new_pair[1]
 
     def _init_image_id(self, image_id):
-        return 'image', self.search_for(image_id, 'list_images')
+        return 'image', image_id
 
     def _init_ping_host(self, ping_host):
         self.ping_host = ping_host
@@ -47,7 +47,7 @@ class ComputeNodeDriver(BaseComputeNodeDriver):
         self.service_accounts = json.loads(service_accounts_str)
 
     def _init_network_id(self, subnet_id):
-        return 'ex_network', self.search_for(subnet_id, 'ex_list_networks')
+        return 'ex_network', subnet_id
 
     def _init_ssh_key(self, filename):
         with open(filename) as ssh_file:
diff --git a/services/nodemanager/arvnodeman/config.py b/services/nodemanager/arvnodeman/config.py
index e15f023..f386653 100644
--- a/services/nodemanager/arvnodeman/config.py
+++ b/services/nodemanager/arvnodeman/config.py
@@ -4,6 +4,7 @@ from __future__ import absolute_import, print_function
 
 import ConfigParser
 import importlib
+import json
 import logging
 import ssl
 import sys
@@ -98,6 +99,13 @@ class NodeManagerConfig(ConfigParser.SafeConfigParser):
         module = importlib.import_module('arvnodeman.computenode.driver.' +
                                          self.get('Cloud', 'provider'))
         auth_kwargs = self.get_section('Cloud Credentials')
+        # GCE credentials are delivered in a JSON file.
+        if 'json_credential_file' in auth_kwargs:
+            with open(auth_kwargs['json_credential_file']) as jf:
+                json_creds = json.load(jf)
+            auth_kwargs['user_id'] = json_creds['client_email']
+            auth_kwargs['key'] = json_creds['private_key']
+
         if 'timeout' in auth_kwargs:
             auth_kwargs['timeout'] = int(auth_kwargs['timeout'])
         return module.ComputeNodeDriver(auth_kwargs,
@@ -105,14 +113,26 @@ class NodeManagerConfig(ConfigParser.SafeConfigParser):
                                         self.get_section('Cloud Create'))
 
     def node_sizes(self, all_sizes):
+        """Finds all acceptable NodeSizes for our installation.
+
+        Returns a list of (NodeSize, kwargs) pairs for each NodeSize object
+        returned by libcloud that matches a size listed in our config file.
+        """
+
         size_kwargs = {}
         for sec_name in self.sections():
             sec_words = sec_name.split(None, 2)
             if sec_words[0] != 'Size':
                 continue
             size_kwargs[sec_words[1]] = self.get_section(sec_name, int)
-        return [(size, size_kwargs[size.id]) for size in all_sizes
-                if size.id in size_kwargs]
+        # EC2 node sizes are identified by id. GCE sizes are identified by name.
+        matching_sizes = []
+        for size in all_sizes:
+            if size.id in size_kwargs:
+                matching_sizes.append((size, size_kwargs[size.id]))
+            elif size.name in size_kwargs:
+                matching_sizes.append((size, size_kwargs[size.name]))
+        return matching_sizes
 
     def shutdown_windows(self):
         return [int(n)
diff --git a/services/nodemanager/doc/gce.example.cfg b/services/nodemanager/doc/gce.example.cfg
index 92bc44c..d969336 100644
--- a/services/nodemanager/doc/gce.example.cfg
+++ b/services/nodemanager/doc/gce.example.cfg
@@ -79,9 +79,8 @@ provider = gce
 shutdown_windows = 54, 5, 1
 
 [Cloud Credentials]
-user_id = USERID
-key = SECRET_KEY
-project = project_name
+json_credential_file = /path/to/credential_file.json
+project = project-id-from-google-cloud-dashboard
 timeout = 60
 
 # Optional settings. For full documentation see
@@ -103,9 +102,11 @@ ping_host = hostname:port
 # A file path for an SSH key that can log in to the compute node.
 # ssh_key = path
 
-# The GCE IDs of the image and network compute nodes should use.
-image_id = idstring
-network_id = idstring
+# The GCE image name and network zone name to use when creating new nodes.
+# * Valid image aliases: https://cloud.google.com/sdk/gcloud/reference/compute/instances/create
+# * Valid network zones: https://cloud.google.com/compute/docs/zones
+image_id = debian-7
+network_id = us-central1-a
 
 # JSON string of service account authorizations for this cluster.
 # See http://libcloud.readthedocs.org/en/latest/compute/drivers/gce.html#specifying-service-account-scopes
@@ -120,6 +121,9 @@ network_id = idstring
 # The Size fields are interpreted the same way as with a libcloud NodeSize:
 # http://libcloud.readthedocs.org/en/latest/compute/api.html#libcloud.compute.base.NodeSize
 #
+# See https://cloud.google.com/compute/docs/machine-types for a list
+# of known machine types that may be used as a Size parameter.
+#
 # Each size section MUST define the number of cores are available in this
 # size class (since libcloud does not provide any consistent API for exposing
 # this setting).

commit af52e4975a52d4eeec356e5dd0ab4dbb5957ea28
Author: Tim Pierce <twp at curoverse.com>
Date:   Mon Nov 24 17:12:07 2014 -0500

    4138: code review feedback

diff --git a/services/nodemanager/doc/gce.example.cfg b/services/nodemanager/doc/gce.example.cfg
index d09396f..92bc44c 100644
--- a/services/nodemanager/doc/gce.example.cfg
+++ b/services/nodemanager/doc/gce.example.cfg
@@ -82,15 +82,15 @@ shutdown_windows = 54, 5, 1
 user_id = USERID
 key = SECRET_KEY
 project = project_name
-timeout = 60             # used by NodeManagerConfig
+timeout = 60
 
 # Optional settings. For full documentation see
 # http://libcloud.readthedocs.org/en/latest/compute/drivers/gce.html#libcloud.compute.drivers.gce.GCENodeDriver
 #
-# datacenter = 'us-central1-a'
-# auth_type = 'SA'               # SA, IA or GCE
+# datacenter = us-central1-a
+# auth_type = SA               # SA, IA or GCE
 # scopes = https://www.googleapis.com/auth/compute
-# credential_file = 
+# credential_file =
 
 [Cloud List]
 # Keywords here will be used to populate the metadata field for a GCE node.

commit 9d6a6eca3a634e4090d7e0fc4f094c411ab5817a
Author: Tim Pierce <twp at curoverse.com>
Date:   Tue Nov 18 13:49:10 2014 -0500

    4138: support for Google Cloud Engine.
    
    * Added:
    ** nodemanager/arvnodeman/computenode/drivers/gce.py
    ** nodemanager/doc/gce.example.cfg
    ** nodemanager/tests/test_computenode_driver_gce.py
    
    Updated comment in nodemanager/arvnodeman/computenode/drivers/ec2.py.

diff --git a/services/nodemanager/arvnodeman/computenode/driver/gce.py b/services/nodemanager/arvnodeman/computenode/driver/gce.py
new file mode 100644
index 0000000..a4fd57d
--- /dev/null
+++ b/services/nodemanager/arvnodeman/computenode/driver/gce.py
@@ -0,0 +1,86 @@
+#!/usr/bin/env python
+
+from __future__ import absolute_import, print_function
+
+import functools
+import json
+import time
+
+import libcloud.compute.base as cloud_base
+import libcloud.compute.providers as cloud_provider
+import libcloud.compute.types as cloud_types
+from libcloud.compute.drivers import gce
+
+from . import BaseComputeNodeDriver
+from .. import arvados_node_fqdn
+
+class ComputeNodeDriver(BaseComputeNodeDriver):
+    """Compute node driver wrapper for GCE
+
+    This translates cloud driver requests to GCE's specific parameters.
+    """
+    DEFAULT_DRIVER = cloud_provider.get_driver(cloud_types.Provider.GCE)
+    SEARCH_CACHE = {}
+    ssh_key = None
+    service_accounts = None
+
+    def __init__(self, auth_kwargs, list_kwargs, create_kwargs,
+                 driver_class=DEFAULT_DRIVER):
+        super(ComputeNodeDriver, self).__init__(
+            auth_kwargs, list_kwargs, create_kwargs,
+            driver_class)
+
+        for key in self.create_kwargs.keys():
+            init_method = getattr(self, '_init_' + key, None)
+            if init_method is not None:
+                new_pair = init_method(self.create_kwargs.pop(key))
+                if new_pair is not None:
+                    self.create_kwargs[new_pair[0]] = new_pair[1]
+
+    def _init_image_id(self, image_id):
+        return 'image', self.search_for(image_id, 'list_images')
+
+    def _init_ping_host(self, ping_host):
+        self.ping_host = ping_host
+
+    def _init_service_accounts(self, service_accounts_str):
+        self.service_accounts = json.loads(service_accounts_str)
+
+    def _init_network_id(self, subnet_id):
+        return 'ex_network', self.search_for(subnet_id, 'ex_list_networks')
+
+    def _init_ssh_key(self, filename):
+        with open(filename) as ssh_file:
+            self.ssh_key = ssh_file.read().strip()
+
+    def arvados_create_kwargs(self, arvados_node):
+        result = {'ex_metadata': self.list_kwargs.copy() }
+        ping_secret = arvados_node['info'].get('ping_secret')
+        if ping_secret is not None:
+            ping_url = ('https://{}/arvados/v1/nodes/{}/ping?ping_secret={}'.
+                        format(self.ping_host, arvados_node['uuid'],
+                               ping_secret))
+            result['ex_userdata'] = ping_url
+        if self.service_accounts is not None:
+            result['ex_service_accounts'] = self.service_accounts
+
+        # SSH keys are delivered to GCE nodes via ex_metadata: see
+        # http://stackoverflow.com/questions/26752617/creating-sshkeys-for-gce-instance-using-libcloud
+        if self.ssh_key is not None:
+            result['ex_metadata']['sshKeys'] = 'root:{}'.format(self.ssh_key)
+        return result
+
+    # When an Arvados node is synced with a GCE node, the Arvados hostname
+    # is forwarded in a GCE tag 'hostname-foo'.
+    # TODO(twp): implement an ex_set_metadata method (at least until
+    # libcloud supports the API setMetadata method) so we can pass this
+    # sensibly in the node metadata.
+    def sync_node(self, cloud_node, arvados_node):
+        tags = ['hostname-{}'.format(arvados_node_fqdn(arvados_node))]
+        self.real.ex_set_node_tags(cloud_node, tags)
+
+    @classmethod
+    def node_start_time(cls, node):
+        time_str = node.extra['launch_time'].split('.', 2)[0] + 'UTC'
+        return time.mktime(time.strptime(
+                time_str,'%Y-%m-%dT%H:%M:%S%Z')) - time.timezone
diff --git a/services/nodemanager/doc/ec2.example.cfg b/services/nodemanager/doc/ec2.example.cfg
index 024ed2b..9b41ca1 100644
--- a/services/nodemanager/doc/ec2.example.cfg
+++ b/services/nodemanager/doc/ec2.example.cfg
@@ -128,9 +128,11 @@ security_groups = idstring1, idstring2
 # willing to use.  The Node Manager should boot the cheapest size(s) that
 # can run jobs in the queue (N.B.: defining more than one size has not been
 # tested yet).
-# Each size section MUST define the number of cores it has.  You may also
-# want to define the number of mebibytes of scratch space for Crunch jobs.
-# You can also override Amazon's provided data fields by setting the same
-# names here.
+# Each size section MUST define the number of cores are available in this
+# size class (since libcloud does not provide any consistent API for exposing
+# this setting).
+# You may also want to define the amount of scratch space (expressed
+# in GB) for Crunch jobs.  You can also override Amazon's provided
+# data fields by setting the same names here.
 cores = 2
-scratch = 100
\ No newline at end of file
+scratch = 100
diff --git a/services/nodemanager/doc/ec2.example.cfg b/services/nodemanager/doc/gce.example.cfg
similarity index 64%
copy from services/nodemanager/doc/ec2.example.cfg
copy to services/nodemanager/doc/gce.example.cfg
index 024ed2b..d09396f 100644
--- a/services/nodemanager/doc/ec2.example.cfg
+++ b/services/nodemanager/doc/gce.example.cfg
@@ -1,12 +1,7 @@
-# EC2 configuration for Arvados Node Manager.
+# Google Compute Engine configuration for Arvados Node Manager.
 # All times are in seconds unless specified otherwise.
 
 [Daemon]
-# The dispatcher can customize the start and stop procedure for
-# cloud nodes.  For example, the SLURM dispatcher drains nodes
-# through SLURM before shutting them down.
-#dispatcher = slurm
-
 # Node Manager will ensure that there are at least this many nodes
 # running at all times.
 min_nodes = 0
@@ -15,7 +10,7 @@ min_nodes = 0
 # many are running.
 max_nodes = 8
 
-# Poll EC2 nodes and Arvados for new information every N seconds.
+# Poll compute nodes and Arvados for new information every N seconds.
 poll_time = 60
 
 # Polls have exponential backoff when services fail to respond.
@@ -27,12 +22,6 @@ max_poll_time = 300
 # information is too outdated.
 poll_stale_after = 600
 
-# If Node Manager boots a cloud node, and it does not pair with an Arvados
-# node before this long, assume that there was a cloud bootstrap failure and
-# shut it down.  Note that normal shutdown windows apply (see the Cloud
-# section), so this should be shorter than the first shutdown window value.
-boot_fail_after = 1800
-
 # "Node stale time" affects two related behaviors.
 # 1. If a compute node has been running for at least this long, but it
 # isn't paired with an Arvados node, do not shut it down, but leave it alone.
@@ -74,8 +63,9 @@ timeout = 15
 insecure = no
 
 [Cloud]
-provider = ec2
+provider = gce
 
+# XXX(twp): figure out good default settings for GCE
 # It's usually most cost-effective to shut down compute nodes during narrow
 # windows of time.  For example, EC2 bills each node by the hour, so the best
 # time to shut down a node is right before a new hour of uptime starts.
@@ -89,48 +79,52 @@ provider = ec2
 shutdown_windows = 54, 5, 1
 
 [Cloud Credentials]
-key = KEY
-secret = SECRET_KEY
-region = us-east-1
-timeout = 60
+user_id = USERID
+key = SECRET_KEY
+project = project_name
+timeout = 60             # used by NodeManagerConfig
+
+# Optional settings. For full documentation see
+# http://libcloud.readthedocs.org/en/latest/compute/drivers/gce.html#libcloud.compute.drivers.gce.GCENodeDriver
+#
+# datacenter = 'us-central1-a'
+# auth_type = 'SA'               # SA, IA or GCE
+# scopes = https://www.googleapis.com/auth/compute
+# credential_file = 
 
 [Cloud List]
-# This section defines filters that find compute nodes.
-# Tags that you specify here will automatically be added to nodes you create.
-# Replace colons in Amazon filters with underscores
-# (e.g., write "tag:mytag" as "tag_mytag").
-instance-state-name = running
-tag_arvados-class = dynamic-compute
-tag_cluster = zyxwv
+# Keywords here will be used to populate the metadata field for a GCE node.
 
 [Cloud Create]
 # New compute nodes will send pings to Arvados at this host.
 # You may specify a port, and use brackets to disambiguate IPv6 addresses.
 ping_host = hostname:port
 
-# Give the name of an SSH key on AWS...
-ex_keyname = string
-
-# ... or a file path for an SSH key that can log in to the compute node.
-# (One or the other, not both.)
+# A file path for an SSH key that can log in to the compute node.
 # ssh_key = path
 
-# The EC2 IDs of the image and subnet compute nodes should use.
+# The GCE IDs of the image and network compute nodes should use.
 image_id = idstring
-subnet_id = idstring
+network_id = idstring
 
-# Comma-separated EC2 IDs for the security group(s) assigned to each
-# compute node.
-security_groups = idstring1, idstring2
+# JSON string of service account authorizations for this cluster.
+# See http://libcloud.readthedocs.org/en/latest/compute/drivers/gce.html#specifying-service-account-scopes
+# service_accounts = [{'email':'account at example.com', 'scopes':['storage-ro']}]
 
-[Size t2.medium]
-# You can define any number of Size sections to list EC2 sizes you're
+[Size n1-standard-2]
+# You can define any number of Size sections to list node sizes you're
 # willing to use.  The Node Manager should boot the cheapest size(s) that
 # can run jobs in the queue (N.B.: defining more than one size has not been
 # tested yet).
-# Each size section MUST define the number of cores it has.  You may also
-# want to define the number of mebibytes of scratch space for Crunch jobs.
-# You can also override Amazon's provided data fields by setting the same
-# names here.
+#
+# The Size fields are interpreted the same way as with a libcloud NodeSize:
+# http://libcloud.readthedocs.org/en/latest/compute/api.html#libcloud.compute.base.NodeSize
+#
+# Each size section MUST define the number of cores are available in this
+# size class (since libcloud does not provide any consistent API for exposing
+# this setting).
+# You may also want to define the amount of scratch space (expressed
+# in GB) for Crunch jobs.
 cores = 2
-scratch = 100
\ No newline at end of file
+scratch = 100
+ram = 512
diff --git a/services/nodemanager/tests/test_computenode_driver_gce.py b/services/nodemanager/tests/test_computenode_driver_gce.py
new file mode 100644
index 0000000..075760a
--- /dev/null
+++ b/services/nodemanager/tests/test_computenode_driver_gce.py
@@ -0,0 +1,104 @@
+#!/usr/bin/env python
+
+from __future__ import absolute_import, print_function
+
+import time
+import unittest
+
+import mock
+
+import arvnodeman.computenode.driver.gce as gce
+from . import testutil
+
+class GCEComputeNodeDriverTestCase(unittest.TestCase):
+    def setUp(self):
+        self.driver_mock = mock.MagicMock(name='driver_mock')
+
+    def new_driver(self, auth_kwargs={}, list_kwargs={}, create_kwargs={}):
+        create_kwargs.setdefault('ping_host', '100::')
+        return gce.ComputeNodeDriver(
+            auth_kwargs, list_kwargs, create_kwargs,
+            driver_class=self.driver_mock)
+
+    def test_driver_instantiation(self):
+        kwargs = {'user_id': 'foo'}
+        driver = self.new_driver(auth_kwargs=kwargs)
+        self.assertTrue(self.driver_mock.called)
+        self.assertEqual(kwargs, self.driver_mock.call_args[1])
+
+    def test_create_location_loaded_at_initialization(self):
+        kwargs = {'location': 'testregion'}
+        driver = self.new_driver(create_kwargs=kwargs)
+        self.assertTrue(self.driver_mock().list_locations)
+
+    def test_create_image_loaded_at_initialization(self):
+        kwargs = {'image': 'testimage'}
+        driver = self.new_driver(create_kwargs=kwargs)
+        self.assertTrue(self.driver_mock().list_images)
+
+    def test_create_includes_ping_secret(self):
+        arv_node = testutil.arvados_node_mock(info={'ping_secret': 'ssshh'})
+        driver = self.new_driver()
+        driver.create_node(testutil.MockSize(1), arv_node)
+        create_method = self.driver_mock().create_node
+        self.assertTrue(create_method.called)
+        self.assertIn('ping_secret=ssshh',
+                      create_method.call_args[1].get('ex_userdata',
+                                                     'arg missing'))
+
+    def test_generate_metadata_for_new_arvados_node(self):
+        arv_node = testutil.arvados_node_mock(8)
+        driver = self.new_driver(list_kwargs={'list': 'test'})
+        self.assertEqual({'ex_metadata': {'list': 'test'}},
+                         driver.arvados_create_kwargs(arv_node))
+
+    def test_tags_set_default_hostname_from_new_arvados_node(self):
+        arv_node = testutil.arvados_node_mock(hostname=None)
+        cloud_node = testutil.cloud_node_mock(1)
+        driver = self.new_driver()
+        driver.sync_node(cloud_node, arv_node)
+        tag_mock = self.driver_mock().ex_set_node_tags
+        self.assertTrue(tag_mock.called)
+        self.assertEqual(['hostname-dynamic.compute.zzzzz.arvadosapi.com'],
+                         tag_mock.call_args[0][1])
+
+    def test_sync_node_sets_static_hostname(self):
+        arv_node = testutil.arvados_node_mock(1)
+        cloud_node = testutil.cloud_node_mock(2)
+        driver = self.new_driver()
+        driver.sync_node(cloud_node, arv_node)
+        tag_mock = self.driver_mock().ex_set_node_tags
+        self.assertTrue(tag_mock.called)
+        self.assertEqual(['hostname-compute1.zzzzz.arvadosapi.com'],
+                         tag_mock.call_args[0][1])
+
+    def test_node_create_time(self):
+        refsecs = int(time.time())
+        reftuple = time.gmtime(refsecs)
+        node = testutil.cloud_node_mock()
+        node.extra = {'launch_time': time.strftime('%Y-%m-%dT%H:%M:%S.000Z',
+                                                   reftuple)}
+        self.assertEqual(refsecs, gce.ComputeNodeDriver.node_start_time(node))
+
+    def test_generate_metadata_for_new_arvados_node(self):
+        arv_node = testutil.arvados_node_mock(8)
+        driver = self.new_driver(list_kwargs={'list': 'test'})
+        self.assertEqual({'ex_metadata': {'list': 'test'}},
+                         driver.arvados_create_kwargs(arv_node))
+
+    def test_deliver_ssh_key_in_metadata(self):
+        test_ssh_key = 'ssh-rsa-foo'
+        arv_node = testutil.arvados_node_mock(1)
+        with mock.patch('__builtin__.open', mock.mock_open(read_data=test_ssh_key)) as mock_file:
+            driver = self.new_driver(create_kwargs={'ssh_key': 'ssh-key-file'})
+        mock_file.assert_called_once_with('ssh-key-file')
+        self.assertEqual({'ex_metadata': {'sshKeys': 'root:ssh-rsa-foo'}},
+                         driver.arvados_create_kwargs(arv_node))
+
+    def test_create_driver_with_service_accounts(self):
+        srv_acct_config = { 'service_accounts': '{ "email": "foo at bar", "scopes":["storage-full"]}' }
+        arv_node = testutil.arvados_node_mock(1)
+        driver = self.new_driver(create_kwargs=srv_acct_config)
+        create_kwargs = driver.arvados_create_kwargs(arv_node)
+        self.assertEqual({u'email': u'foo at bar', u'scopes': [u'storage-full']},
+                         create_kwargs['ex_service_accounts'])

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list