[ARVADOS] created: ab8acfe2a31b34a850b6bf9a776cfcc0b1a3bbef

git at public.curoverse.com git at public.curoverse.com
Tue Nov 18 17:35:05 EST 2014


        at  ab8acfe2a31b34a850b6bf9a776cfcc0b1a3bbef (commit)


commit ab8acfe2a31b34a850b6bf9a776cfcc0b1a3bbef
Author: Tim Pierce <twp at curoverse.com>
Date:   Tue Nov 18 13:49:10 2014 -0500

    4138: support for Google Cloud Engine.
    
    * Added:
    ** nodemanager/arvnodeman/computenode/drivers/gce.py
    ** nodemanager/doc/gce.example.cfg
    ** nodemanager/tests/test_computenode_driver_gce.py
    
    Updated comment in nodemanager/arvnodeman/computenode/drivers/ec2.py.

diff --git a/services/nodemanager/arvnodeman/computenode/driver/gce.py b/services/nodemanager/arvnodeman/computenode/driver/gce.py
new file mode 100644
index 0000000..a4fd57d
--- /dev/null
+++ b/services/nodemanager/arvnodeman/computenode/driver/gce.py
@@ -0,0 +1,86 @@
+#!/usr/bin/env python
+
+from __future__ import absolute_import, print_function
+
+import functools
+import json
+import time
+
+import libcloud.compute.base as cloud_base
+import libcloud.compute.providers as cloud_provider
+import libcloud.compute.types as cloud_types
+from libcloud.compute.drivers import gce
+
+from . import BaseComputeNodeDriver
+from .. import arvados_node_fqdn
+
+class ComputeNodeDriver(BaseComputeNodeDriver):
+    """Compute node driver wrapper for GCE
+
+    This translates cloud driver requests to GCE's specific parameters.
+    """
+    DEFAULT_DRIVER = cloud_provider.get_driver(cloud_types.Provider.GCE)
+    SEARCH_CACHE = {}
+    ssh_key = None
+    service_accounts = None
+
+    def __init__(self, auth_kwargs, list_kwargs, create_kwargs,
+                 driver_class=DEFAULT_DRIVER):
+        super(ComputeNodeDriver, self).__init__(
+            auth_kwargs, list_kwargs, create_kwargs,
+            driver_class)
+
+        for key in self.create_kwargs.keys():
+            init_method = getattr(self, '_init_' + key, None)
+            if init_method is not None:
+                new_pair = init_method(self.create_kwargs.pop(key))
+                if new_pair is not None:
+                    self.create_kwargs[new_pair[0]] = new_pair[1]
+
+    def _init_image_id(self, image_id):
+        return 'image', self.search_for(image_id, 'list_images')
+
+    def _init_ping_host(self, ping_host):
+        self.ping_host = ping_host
+
+    def _init_service_accounts(self, service_accounts_str):
+        self.service_accounts = json.loads(service_accounts_str)
+
+    def _init_network_id(self, subnet_id):
+        return 'ex_network', self.search_for(subnet_id, 'ex_list_networks')
+
+    def _init_ssh_key(self, filename):
+        with open(filename) as ssh_file:
+            self.ssh_key = ssh_file.read().strip()
+
+    def arvados_create_kwargs(self, arvados_node):
+        result = {'ex_metadata': self.list_kwargs.copy() }
+        ping_secret = arvados_node['info'].get('ping_secret')
+        if ping_secret is not None:
+            ping_url = ('https://{}/arvados/v1/nodes/{}/ping?ping_secret={}'.
+                        format(self.ping_host, arvados_node['uuid'],
+                               ping_secret))
+            result['ex_userdata'] = ping_url
+        if self.service_accounts is not None:
+            result['ex_service_accounts'] = self.service_accounts
+
+        # SSH keys are delivered to GCE nodes via ex_metadata: see
+        # http://stackoverflow.com/questions/26752617/creating-sshkeys-for-gce-instance-using-libcloud
+        if self.ssh_key is not None:
+            result['ex_metadata']['sshKeys'] = 'root:{}'.format(self.ssh_key)
+        return result
+
+    # When an Arvados node is synced with a GCE node, the Arvados hostname
+    # is forwarded in a GCE tag 'hostname-foo'.
+    # TODO(twp): implement an ex_set_metadata method (at least until
+    # libcloud supports the API setMetadata method) so we can pass this
+    # sensibly in the node metadata.
+    def sync_node(self, cloud_node, arvados_node):
+        tags = ['hostname-{}'.format(arvados_node_fqdn(arvados_node))]
+        self.real.ex_set_node_tags(cloud_node, tags)
+
+    @classmethod
+    def node_start_time(cls, node):
+        time_str = node.extra['launch_time'].split('.', 2)[0] + 'UTC'
+        return time.mktime(time.strptime(
+                time_str,'%Y-%m-%dT%H:%M:%S%Z')) - time.timezone
diff --git a/services/nodemanager/doc/ec2.example.cfg b/services/nodemanager/doc/ec2.example.cfg
index 0f9caca..31ea6ea 100644
--- a/services/nodemanager/doc/ec2.example.cfg
+++ b/services/nodemanager/doc/ec2.example.cfg
@@ -122,9 +122,11 @@ security_groups = idstring1, idstring2
 # willing to use.  The Node Manager should boot the cheapest size(s) that
 # can run jobs in the queue (N.B.: defining more than one size has not been
 # tested yet).
-# Each size section MUST define the number of cores it has.  You may also
-# want to define the number of mebibytes of scratch space for Crunch jobs.
-# You can also override Amazon's provided data fields by setting the same
-# names here.
+# Each size section MUST define the number of cores are available in this
+# size class (since libcloud does not provide any consistent API for exposing
+# this setting).
+# You may also want to define the amount of scratch space (expressed
+# in GB) for Crunch jobs.  You can also override Amazon's provided
+# data fields by setting the same names here.
 cores = 2
-scratch = 100
\ No newline at end of file
+scratch = 100
diff --git a/services/nodemanager/doc/ec2.example.cfg b/services/nodemanager/doc/gce.example.cfg
similarity index 65%
copy from services/nodemanager/doc/ec2.example.cfg
copy to services/nodemanager/doc/gce.example.cfg
index 0f9caca..4886cb2 100644
--- a/services/nodemanager/doc/ec2.example.cfg
+++ b/services/nodemanager/doc/gce.example.cfg
@@ -1,12 +1,7 @@
-# EC2 configuration for Arvados Node Manager.
+# Google Compute Engine configuration for Arvados Node Manager.
 # All times are in seconds unless specified otherwise.
 
 [Daemon]
-# The dispatcher can customize the start and stop procedure for
-# cloud nodes.  For example, the SLURM dispatcher drains nodes
-# through SLURM before shutting them down.
-#dispatcher = slurm
-
 # Node Manager will ensure that there are at least this many nodes
 # running at all times.
 min_nodes = 0
@@ -15,7 +10,7 @@ min_nodes = 0
 # many are running.
 max_nodes = 8
 
-# Poll EC2 nodes and Arvados for new information every N seconds.
+# Poll compute nodes and Arvados for new information every N seconds.
 poll_time = 60
 
 # Polls have exponential backoff when services fail to respond.
@@ -68,8 +63,9 @@ timeout = 15
 insecure = no
 
 [Cloud]
-provider = ec2
+provider = gce
 
+# XXX(twp): figure out good default settings for GCE
 # It's usually most cost-effective to shut down compute nodes during narrow
 # windows of time.  For example, EC2 bills each node by the hour, so the best
 # time to shut down a node is right before a new hour of uptime starts.
@@ -83,48 +79,52 @@ provider = ec2
 shutdown_windows = 54, 5, 1
 
 [Cloud Credentials]
-key = KEY
-secret = SECRET_KEY
-region = us-east-1
-timeout = 60
+user_id = USERID
+key = SECRET_KEY
+project = project_name
+timeout = 60             # used by NodeManagerConfig
+
+# Optional settings. For full documentation see
+# http://libcloud.readthedocs.org/en/latest/compute/drivers/gce.html#libcloud.compute.drivers.gce.GCENodeDriver
+#
+# datacenter = 'us-central1-a'
+# auth_type = 'SA'               # SA, IA or GCE
+# scopes = https://www.googleapis.com/auth/compute
+# credential_file = 
 
 [Cloud List]
-# This section defines filters that find compute nodes.
-# Tags that you specify here will automatically be added to nodes you create.
-# Replace colons in Amazon filters with underscores
-# (e.g., write "tag:mytag" as "tag_mytag").
-instance-state-name = running
-tag_arvados-class = dynamic-compute
-tag_cluster = zyxwv
+# Keywords here will be used to populate the metadata field for a GCE node.
 
 [Cloud Create]
 # New compute nodes will send pings to Arvados at this host.
 # You may specify a port, and use brackets to disambiguate IPv6 addresses.
 ping_host = hostname:port
 
-# Give the name of an SSH key on AWS...
-ex_keyname = string
-
-# ... or a file path for an SSH key that can log in to the compute node.
-# (One or the other, not both.)
+# A file path for an SSH key that can log in to the compute node.
 # ssh_key = path
 
-# The EC2 IDs of the image and subnet compute nodes should use.
+# The GCE IDs of the image and network compute nodes should use.
 image_id = idstring
-subnet_id = idstring
+network_id = idstring
 
-# Comma-separated EC2 IDs for the security group(s) assigned to each
-# compute node.
-security_groups = idstring1, idstring2
+# JSON string of service account authorizations for this cluster.
+# See http://libcloud.readthedocs.org/en/latest/compute/drivers/gce.html#specifying-service-account-scopes
+# service_accounts = [ { 'email': 'ex at mple.com', 'scopes': ['storage-ro'] } ]
 
-[Size t2.medium]
-# You can define any number of Size sections to list EC2 sizes you're
+[Size n1-standard-2]
+# You can define any number of Size sections to list node sizes you're
 # willing to use.  The Node Manager should boot the cheapest size(s) that
 # can run jobs in the queue (N.B.: defining more than one size has not been
 # tested yet).
-# Each size section MUST define the number of cores it has.  You may also
-# want to define the number of mebibytes of scratch space for Crunch jobs.
-# You can also override Amazon's provided data fields by setting the same
-# names here.
+#
+# The Size fields are interpreted the same way as with a libcloud NodeSize:
+# http://libcloud.readthedocs.org/en/latest/compute/api.html#libcloud.compute.base.NodeSize
+#
+# Each size section MUST define the number of cores are available in this
+# size class (since libcloud does not provide any consistent API for exposing
+# this setting).
+# You may also want to define the amount of scratch space (expressed
+# in GB) for Crunch jobs.
 cores = 2
-scratch = 100
\ No newline at end of file
+scratch = 100
+ram = 512
diff --git a/services/nodemanager/tests/test_computenode_driver_gce.py b/services/nodemanager/tests/test_computenode_driver_gce.py
new file mode 100644
index 0000000..075760a
--- /dev/null
+++ b/services/nodemanager/tests/test_computenode_driver_gce.py
@@ -0,0 +1,104 @@
+#!/usr/bin/env python
+
+from __future__ import absolute_import, print_function
+
+import time
+import unittest
+
+import mock
+
+import arvnodeman.computenode.driver.gce as gce
+from . import testutil
+
+class GCEComputeNodeDriverTestCase(unittest.TestCase):
+    def setUp(self):
+        self.driver_mock = mock.MagicMock(name='driver_mock')
+
+    def new_driver(self, auth_kwargs={}, list_kwargs={}, create_kwargs={}):
+        create_kwargs.setdefault('ping_host', '100::')
+        return gce.ComputeNodeDriver(
+            auth_kwargs, list_kwargs, create_kwargs,
+            driver_class=self.driver_mock)
+
+    def test_driver_instantiation(self):
+        kwargs = {'user_id': 'foo'}
+        driver = self.new_driver(auth_kwargs=kwargs)
+        self.assertTrue(self.driver_mock.called)
+        self.assertEqual(kwargs, self.driver_mock.call_args[1])
+
+    def test_create_location_loaded_at_initialization(self):
+        kwargs = {'location': 'testregion'}
+        driver = self.new_driver(create_kwargs=kwargs)
+        self.assertTrue(self.driver_mock().list_locations)
+
+    def test_create_image_loaded_at_initialization(self):
+        kwargs = {'image': 'testimage'}
+        driver = self.new_driver(create_kwargs=kwargs)
+        self.assertTrue(self.driver_mock().list_images)
+
+    def test_create_includes_ping_secret(self):
+        arv_node = testutil.arvados_node_mock(info={'ping_secret': 'ssshh'})
+        driver = self.new_driver()
+        driver.create_node(testutil.MockSize(1), arv_node)
+        create_method = self.driver_mock().create_node
+        self.assertTrue(create_method.called)
+        self.assertIn('ping_secret=ssshh',
+                      create_method.call_args[1].get('ex_userdata',
+                                                     'arg missing'))
+
+    def test_generate_metadata_for_new_arvados_node(self):
+        arv_node = testutil.arvados_node_mock(8)
+        driver = self.new_driver(list_kwargs={'list': 'test'})
+        self.assertEqual({'ex_metadata': {'list': 'test'}},
+                         driver.arvados_create_kwargs(arv_node))
+
+    def test_tags_set_default_hostname_from_new_arvados_node(self):
+        arv_node = testutil.arvados_node_mock(hostname=None)
+        cloud_node = testutil.cloud_node_mock(1)
+        driver = self.new_driver()
+        driver.sync_node(cloud_node, arv_node)
+        tag_mock = self.driver_mock().ex_set_node_tags
+        self.assertTrue(tag_mock.called)
+        self.assertEqual(['hostname-dynamic.compute.zzzzz.arvadosapi.com'],
+                         tag_mock.call_args[0][1])
+
+    def test_sync_node_sets_static_hostname(self):
+        arv_node = testutil.arvados_node_mock(1)
+        cloud_node = testutil.cloud_node_mock(2)
+        driver = self.new_driver()
+        driver.sync_node(cloud_node, arv_node)
+        tag_mock = self.driver_mock().ex_set_node_tags
+        self.assertTrue(tag_mock.called)
+        self.assertEqual(['hostname-compute1.zzzzz.arvadosapi.com'],
+                         tag_mock.call_args[0][1])
+
+    def test_node_create_time(self):
+        refsecs = int(time.time())
+        reftuple = time.gmtime(refsecs)
+        node = testutil.cloud_node_mock()
+        node.extra = {'launch_time': time.strftime('%Y-%m-%dT%H:%M:%S.000Z',
+                                                   reftuple)}
+        self.assertEqual(refsecs, gce.ComputeNodeDriver.node_start_time(node))
+
+    def test_generate_metadata_for_new_arvados_node(self):
+        arv_node = testutil.arvados_node_mock(8)
+        driver = self.new_driver(list_kwargs={'list': 'test'})
+        self.assertEqual({'ex_metadata': {'list': 'test'}},
+                         driver.arvados_create_kwargs(arv_node))
+
+    def test_deliver_ssh_key_in_metadata(self):
+        test_ssh_key = 'ssh-rsa-foo'
+        arv_node = testutil.arvados_node_mock(1)
+        with mock.patch('__builtin__.open', mock.mock_open(read_data=test_ssh_key)) as mock_file:
+            driver = self.new_driver(create_kwargs={'ssh_key': 'ssh-key-file'})
+        mock_file.assert_called_once_with('ssh-key-file')
+        self.assertEqual({'ex_metadata': {'sshKeys': 'root:ssh-rsa-foo'}},
+                         driver.arvados_create_kwargs(arv_node))
+
+    def test_create_driver_with_service_accounts(self):
+        srv_acct_config = { 'service_accounts': '{ "email": "foo at bar", "scopes":["storage-full"]}' }
+        arv_node = testutil.arvados_node_mock(1)
+        driver = self.new_driver(create_kwargs=srv_acct_config)
+        create_kwargs = driver.arvados_create_kwargs(arv_node)
+        self.assertEqual({u'email': u'foo at bar', u'scopes': [u'storage-full']},
+                         create_kwargs['ex_service_accounts'])

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list