[ARVADOS] created: ab8acfe2a31b34a850b6bf9a776cfcc0b1a3bbef
git at public.curoverse.com
git at public.curoverse.com
Tue Nov 18 17:35:05 EST 2014
at ab8acfe2a31b34a850b6bf9a776cfcc0b1a3bbef (commit)
commit ab8acfe2a31b34a850b6bf9a776cfcc0b1a3bbef
Author: Tim Pierce <twp at curoverse.com>
Date: Tue Nov 18 13:49:10 2014 -0500
4138: support for Google Cloud Engine.
* Added:
** nodemanager/arvnodeman/computenode/drivers/gce.py
** nodemanager/doc/gce.example.cfg
** nodemanager/tests/test_computenode_driver_gce.py
Updated comment in nodemanager/arvnodeman/computenode/drivers/ec2.py.
diff --git a/services/nodemanager/arvnodeman/computenode/driver/gce.py b/services/nodemanager/arvnodeman/computenode/driver/gce.py
new file mode 100644
index 0000000..a4fd57d
--- /dev/null
+++ b/services/nodemanager/arvnodeman/computenode/driver/gce.py
@@ -0,0 +1,86 @@
+#!/usr/bin/env python
+
+from __future__ import absolute_import, print_function
+
+import functools
+import json
+import time
+
+import libcloud.compute.base as cloud_base
+import libcloud.compute.providers as cloud_provider
+import libcloud.compute.types as cloud_types
+from libcloud.compute.drivers import gce
+
+from . import BaseComputeNodeDriver
+from .. import arvados_node_fqdn
+
+class ComputeNodeDriver(BaseComputeNodeDriver):
+ """Compute node driver wrapper for GCE
+
+ This translates cloud driver requests to GCE's specific parameters.
+ """
+ DEFAULT_DRIVER = cloud_provider.get_driver(cloud_types.Provider.GCE)
+ SEARCH_CACHE = {}
+ ssh_key = None
+ service_accounts = None
+
+ def __init__(self, auth_kwargs, list_kwargs, create_kwargs,
+ driver_class=DEFAULT_DRIVER):
+ super(ComputeNodeDriver, self).__init__(
+ auth_kwargs, list_kwargs, create_kwargs,
+ driver_class)
+
+ for key in self.create_kwargs.keys():
+ init_method = getattr(self, '_init_' + key, None)
+ if init_method is not None:
+ new_pair = init_method(self.create_kwargs.pop(key))
+ if new_pair is not None:
+ self.create_kwargs[new_pair[0]] = new_pair[1]
+
+ def _init_image_id(self, image_id):
+ return 'image', self.search_for(image_id, 'list_images')
+
+ def _init_ping_host(self, ping_host):
+ self.ping_host = ping_host
+
+ def _init_service_accounts(self, service_accounts_str):
+ self.service_accounts = json.loads(service_accounts_str)
+
+ def _init_network_id(self, subnet_id):
+ return 'ex_network', self.search_for(subnet_id, 'ex_list_networks')
+
+ def _init_ssh_key(self, filename):
+ with open(filename) as ssh_file:
+ self.ssh_key = ssh_file.read().strip()
+
+ def arvados_create_kwargs(self, arvados_node):
+ result = {'ex_metadata': self.list_kwargs.copy() }
+ ping_secret = arvados_node['info'].get('ping_secret')
+ if ping_secret is not None:
+ ping_url = ('https://{}/arvados/v1/nodes/{}/ping?ping_secret={}'.
+ format(self.ping_host, arvados_node['uuid'],
+ ping_secret))
+ result['ex_userdata'] = ping_url
+ if self.service_accounts is not None:
+ result['ex_service_accounts'] = self.service_accounts
+
+ # SSH keys are delivered to GCE nodes via ex_metadata: see
+ # http://stackoverflow.com/questions/26752617/creating-sshkeys-for-gce-instance-using-libcloud
+ if self.ssh_key is not None:
+ result['ex_metadata']['sshKeys'] = 'root:{}'.format(self.ssh_key)
+ return result
+
+ # When an Arvados node is synced with a GCE node, the Arvados hostname
+ # is forwarded in a GCE tag 'hostname-foo'.
+ # TODO(twp): implement an ex_set_metadata method (at least until
+ # libcloud supports the API setMetadata method) so we can pass this
+ # sensibly in the node metadata.
+ def sync_node(self, cloud_node, arvados_node):
+ tags = ['hostname-{}'.format(arvados_node_fqdn(arvados_node))]
+ self.real.ex_set_node_tags(cloud_node, tags)
+
+ @classmethod
+ def node_start_time(cls, node):
+ time_str = node.extra['launch_time'].split('.', 2)[0] + 'UTC'
+ return time.mktime(time.strptime(
+ time_str,'%Y-%m-%dT%H:%M:%S%Z')) - time.timezone
diff --git a/services/nodemanager/doc/ec2.example.cfg b/services/nodemanager/doc/ec2.example.cfg
index 0f9caca..31ea6ea 100644
--- a/services/nodemanager/doc/ec2.example.cfg
+++ b/services/nodemanager/doc/ec2.example.cfg
@@ -122,9 +122,11 @@ security_groups = idstring1, idstring2
# willing to use. The Node Manager should boot the cheapest size(s) that
# can run jobs in the queue (N.B.: defining more than one size has not been
# tested yet).
-# Each size section MUST define the number of cores it has. You may also
-# want to define the number of mebibytes of scratch space for Crunch jobs.
-# You can also override Amazon's provided data fields by setting the same
-# names here.
+# Each size section MUST define the number of cores are available in this
+# size class (since libcloud does not provide any consistent API for exposing
+# this setting).
+# You may also want to define the amount of scratch space (expressed
+# in GB) for Crunch jobs. You can also override Amazon's provided
+# data fields by setting the same names here.
cores = 2
-scratch = 100
\ No newline at end of file
+scratch = 100
diff --git a/services/nodemanager/doc/ec2.example.cfg b/services/nodemanager/doc/gce.example.cfg
similarity index 65%
copy from services/nodemanager/doc/ec2.example.cfg
copy to services/nodemanager/doc/gce.example.cfg
index 0f9caca..4886cb2 100644
--- a/services/nodemanager/doc/ec2.example.cfg
+++ b/services/nodemanager/doc/gce.example.cfg
@@ -1,12 +1,7 @@
-# EC2 configuration for Arvados Node Manager.
+# Google Compute Engine configuration for Arvados Node Manager.
# All times are in seconds unless specified otherwise.
[Daemon]
-# The dispatcher can customize the start and stop procedure for
-# cloud nodes. For example, the SLURM dispatcher drains nodes
-# through SLURM before shutting them down.
-#dispatcher = slurm
-
# Node Manager will ensure that there are at least this many nodes
# running at all times.
min_nodes = 0
@@ -15,7 +10,7 @@ min_nodes = 0
# many are running.
max_nodes = 8
-# Poll EC2 nodes and Arvados for new information every N seconds.
+# Poll compute nodes and Arvados for new information every N seconds.
poll_time = 60
# Polls have exponential backoff when services fail to respond.
@@ -68,8 +63,9 @@ timeout = 15
insecure = no
[Cloud]
-provider = ec2
+provider = gce
+# XXX(twp): figure out good default settings for GCE
# It's usually most cost-effective to shut down compute nodes during narrow
# windows of time. For example, EC2 bills each node by the hour, so the best
# time to shut down a node is right before a new hour of uptime starts.
@@ -83,48 +79,52 @@ provider = ec2
shutdown_windows = 54, 5, 1
[Cloud Credentials]
-key = KEY
-secret = SECRET_KEY
-region = us-east-1
-timeout = 60
+user_id = USERID
+key = SECRET_KEY
+project = project_name
+timeout = 60 # used by NodeManagerConfig
+
+# Optional settings. For full documentation see
+# http://libcloud.readthedocs.org/en/latest/compute/drivers/gce.html#libcloud.compute.drivers.gce.GCENodeDriver
+#
+# datacenter = 'us-central1-a'
+# auth_type = 'SA' # SA, IA or GCE
+# scopes = https://www.googleapis.com/auth/compute
+# credential_file =
[Cloud List]
-# This section defines filters that find compute nodes.
-# Tags that you specify here will automatically be added to nodes you create.
-# Replace colons in Amazon filters with underscores
-# (e.g., write "tag:mytag" as "tag_mytag").
-instance-state-name = running
-tag_arvados-class = dynamic-compute
-tag_cluster = zyxwv
+# Keywords here will be used to populate the metadata field for a GCE node.
[Cloud Create]
# New compute nodes will send pings to Arvados at this host.
# You may specify a port, and use brackets to disambiguate IPv6 addresses.
ping_host = hostname:port
-# Give the name of an SSH key on AWS...
-ex_keyname = string
-
-# ... or a file path for an SSH key that can log in to the compute node.
-# (One or the other, not both.)
+# A file path for an SSH key that can log in to the compute node.
# ssh_key = path
-# The EC2 IDs of the image and subnet compute nodes should use.
+# The GCE IDs of the image and network compute nodes should use.
image_id = idstring
-subnet_id = idstring
+network_id = idstring
-# Comma-separated EC2 IDs for the security group(s) assigned to each
-# compute node.
-security_groups = idstring1, idstring2
+# JSON string of service account authorizations for this cluster.
+# See http://libcloud.readthedocs.org/en/latest/compute/drivers/gce.html#specifying-service-account-scopes
+# service_accounts = [ { 'email': 'ex at mple.com', 'scopes': ['storage-ro'] } ]
-[Size t2.medium]
-# You can define any number of Size sections to list EC2 sizes you're
+[Size n1-standard-2]
+# You can define any number of Size sections to list node sizes you're
# willing to use. The Node Manager should boot the cheapest size(s) that
# can run jobs in the queue (N.B.: defining more than one size has not been
# tested yet).
-# Each size section MUST define the number of cores it has. You may also
-# want to define the number of mebibytes of scratch space for Crunch jobs.
-# You can also override Amazon's provided data fields by setting the same
-# names here.
+#
+# The Size fields are interpreted the same way as with a libcloud NodeSize:
+# http://libcloud.readthedocs.org/en/latest/compute/api.html#libcloud.compute.base.NodeSize
+#
+# Each size section MUST define the number of cores are available in this
+# size class (since libcloud does not provide any consistent API for exposing
+# this setting).
+# You may also want to define the amount of scratch space (expressed
+# in GB) for Crunch jobs.
cores = 2
-scratch = 100
\ No newline at end of file
+scratch = 100
+ram = 512
diff --git a/services/nodemanager/tests/test_computenode_driver_gce.py b/services/nodemanager/tests/test_computenode_driver_gce.py
new file mode 100644
index 0000000..075760a
--- /dev/null
+++ b/services/nodemanager/tests/test_computenode_driver_gce.py
@@ -0,0 +1,104 @@
+#!/usr/bin/env python
+
+from __future__ import absolute_import, print_function
+
+import time
+import unittest
+
+import mock
+
+import arvnodeman.computenode.driver.gce as gce
+from . import testutil
+
+class GCEComputeNodeDriverTestCase(unittest.TestCase):
+ def setUp(self):
+ self.driver_mock = mock.MagicMock(name='driver_mock')
+
+ def new_driver(self, auth_kwargs={}, list_kwargs={}, create_kwargs={}):
+ create_kwargs.setdefault('ping_host', '100::')
+ return gce.ComputeNodeDriver(
+ auth_kwargs, list_kwargs, create_kwargs,
+ driver_class=self.driver_mock)
+
+ def test_driver_instantiation(self):
+ kwargs = {'user_id': 'foo'}
+ driver = self.new_driver(auth_kwargs=kwargs)
+ self.assertTrue(self.driver_mock.called)
+ self.assertEqual(kwargs, self.driver_mock.call_args[1])
+
+ def test_create_location_loaded_at_initialization(self):
+ kwargs = {'location': 'testregion'}
+ driver = self.new_driver(create_kwargs=kwargs)
+ self.assertTrue(self.driver_mock().list_locations)
+
+ def test_create_image_loaded_at_initialization(self):
+ kwargs = {'image': 'testimage'}
+ driver = self.new_driver(create_kwargs=kwargs)
+ self.assertTrue(self.driver_mock().list_images)
+
+ def test_create_includes_ping_secret(self):
+ arv_node = testutil.arvados_node_mock(info={'ping_secret': 'ssshh'})
+ driver = self.new_driver()
+ driver.create_node(testutil.MockSize(1), arv_node)
+ create_method = self.driver_mock().create_node
+ self.assertTrue(create_method.called)
+ self.assertIn('ping_secret=ssshh',
+ create_method.call_args[1].get('ex_userdata',
+ 'arg missing'))
+
+ def test_generate_metadata_for_new_arvados_node(self):
+ arv_node = testutil.arvados_node_mock(8)
+ driver = self.new_driver(list_kwargs={'list': 'test'})
+ self.assertEqual({'ex_metadata': {'list': 'test'}},
+ driver.arvados_create_kwargs(arv_node))
+
+ def test_tags_set_default_hostname_from_new_arvados_node(self):
+ arv_node = testutil.arvados_node_mock(hostname=None)
+ cloud_node = testutil.cloud_node_mock(1)
+ driver = self.new_driver()
+ driver.sync_node(cloud_node, arv_node)
+ tag_mock = self.driver_mock().ex_set_node_tags
+ self.assertTrue(tag_mock.called)
+ self.assertEqual(['hostname-dynamic.compute.zzzzz.arvadosapi.com'],
+ tag_mock.call_args[0][1])
+
+ def test_sync_node_sets_static_hostname(self):
+ arv_node = testutil.arvados_node_mock(1)
+ cloud_node = testutil.cloud_node_mock(2)
+ driver = self.new_driver()
+ driver.sync_node(cloud_node, arv_node)
+ tag_mock = self.driver_mock().ex_set_node_tags
+ self.assertTrue(tag_mock.called)
+ self.assertEqual(['hostname-compute1.zzzzz.arvadosapi.com'],
+ tag_mock.call_args[0][1])
+
+ def test_node_create_time(self):
+ refsecs = int(time.time())
+ reftuple = time.gmtime(refsecs)
+ node = testutil.cloud_node_mock()
+ node.extra = {'launch_time': time.strftime('%Y-%m-%dT%H:%M:%S.000Z',
+ reftuple)}
+ self.assertEqual(refsecs, gce.ComputeNodeDriver.node_start_time(node))
+
+ def test_generate_metadata_for_new_arvados_node(self):
+ arv_node = testutil.arvados_node_mock(8)
+ driver = self.new_driver(list_kwargs={'list': 'test'})
+ self.assertEqual({'ex_metadata': {'list': 'test'}},
+ driver.arvados_create_kwargs(arv_node))
+
+ def test_deliver_ssh_key_in_metadata(self):
+ test_ssh_key = 'ssh-rsa-foo'
+ arv_node = testutil.arvados_node_mock(1)
+ with mock.patch('__builtin__.open', mock.mock_open(read_data=test_ssh_key)) as mock_file:
+ driver = self.new_driver(create_kwargs={'ssh_key': 'ssh-key-file'})
+ mock_file.assert_called_once_with('ssh-key-file')
+ self.assertEqual({'ex_metadata': {'sshKeys': 'root:ssh-rsa-foo'}},
+ driver.arvados_create_kwargs(arv_node))
+
+ def test_create_driver_with_service_accounts(self):
+ srv_acct_config = { 'service_accounts': '{ "email": "foo at bar", "scopes":["storage-full"]}' }
+ arv_node = testutil.arvados_node_mock(1)
+ driver = self.new_driver(create_kwargs=srv_acct_config)
+ create_kwargs = driver.arvados_create_kwargs(arv_node)
+ self.assertEqual({u'email': u'foo at bar', u'scopes': [u'storage-full']},
+ create_kwargs['ex_service_accounts'])
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list