[ARVADOS] created: 10b25cacfc521b3dc74c2204fc2b29aca8ad2631
git at public.curoverse.com
git at public.curoverse.com
Thu Aug 27 14:33:54 EDT 2015
at 10b25cacfc521b3dc74c2204fc2b29aca8ad2631 (commit)
commit 10b25cacfc521b3dc74c2204fc2b29aca8ad2631
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date: Thu Aug 27 14:23:06 2015 -0400
6507: Change version number for libcloud fork to 0.18.1.dev1 because local tags create too much trouble.
diff --git a/services/nodemanager/setup.py b/services/nodemanager/setup.py
index d2de38f..16df376 100644
--- a/services/nodemanager/setup.py
+++ b/services/nodemanager/setup.py
@@ -31,11 +31,11 @@ setup(name='arvados-node-manager',
'python-daemon',
],
dependency_links = [
- "https://github.com/curoverse/libcloud/archive/apache-libcloud-0.18.0+azurearm.zip"
+ "https://github.com/curoverse/libcloud/archive/apache-libcloud-0.18.1.dev1.zip"
],
scripts=['bin/arvados-node-manager'],
test_suite='tests',
- tests_require=['mock>=1.0', "apache-libcloud==0.18.0+azurearm"],
+ tests_require=['mock>=1.0', "apache-libcloud==0.18.1.dev1"],
zip_safe=False,
cmdclass={'egg_info': tagger},
)
commit 9b914107504ece419ee2f7d72be7d6262037ff52
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date: Wed Aug 26 15:24:11 2015 -0400
6507: Filter out "tag_" from create_kwargs.
diff --git a/services/nodemanager/arvnodeman/computenode/driver/azure.py b/services/nodemanager/arvnodeman/computenode/driver/azure.py
index f4686b9..014b92c 100644
--- a/services/nodemanager/arvnodeman/computenode/driver/azure.py
+++ b/services/nodemanager/arvnodeman/computenode/driver/azure.py
@@ -27,6 +27,10 @@ class ComputeNodeDriver(BaseComputeNodeDriver):
self.tags = {key[4:]: value
for key, value in create_kwargs.iteritems()
if key.startswith('tag_')}
+ # filter out tags from create_kwargs
+ create_kwargs = {key: value
+ for key, value in create_kwargs.iteritems()
+ if not key.startswith('tag_')}
super(ComputeNodeDriver, self).__init__(
auth_kwargs, list_kwargs, create_kwargs,
driver_class)
commit 03980b49e2fb3cac357e417acea64cd342d1065e
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date: Wed Aug 26 15:23:35 2015 -0400
6507: Use dependency_links to specify custom libcloud package needed for azure support.
diff --git a/services/nodemanager/setup.py b/services/nodemanager/setup.py
index 502690b..d2de38f 100644
--- a/services/nodemanager/setup.py
+++ b/services/nodemanager/setup.py
@@ -25,14 +25,17 @@ setup(name='arvados-node-manager',
license='GNU Affero General Public License, version 3.0',
packages=find_packages(),
install_requires=[
- 'apache-libcloud>=0.18',
+ 'apache-libcloud>=0.16',
'arvados-python-client>=0.1.20150206225333',
'pykka',
'python-daemon',
],
+ dependency_links = [
+ "https://github.com/curoverse/libcloud/archive/apache-libcloud-0.18.0+azurearm.zip"
+ ],
scripts=['bin/arvados-node-manager'],
test_suite='tests',
- tests_require=['mock>=1.0'],
+ tests_require=['mock>=1.0', "apache-libcloud==0.18.0+azurearm"],
zip_safe=False,
cmdclass={'egg_info': tagger},
)
commit 19a2e9a97939126293ce33d72f576f6f54da574f
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date: Tue Aug 25 13:02:55 2015 -0400
6507: Remove unused imports, move _init_ssh_key() into base, set hostname tag on sync_node(), tweak tests.
diff --git a/services/nodemanager/arvnodeman/computenode/driver/__init__.py b/services/nodemanager/arvnodeman/computenode/driver/__init__.py
index 16134a2..e3298a7 100644
--- a/services/nodemanager/arvnodeman/computenode/driver/__init__.py
+++ b/services/nodemanager/arvnodeman/computenode/driver/__init__.py
@@ -9,8 +9,6 @@ from libcloud.compute.base import NodeDriver
from ...config import NETWORK_ERRORS
-import pprint
-
class BaseComputeNodeDriver(object):
"""Abstract base class for compute node drivers.
@@ -58,6 +56,11 @@ class BaseComputeNodeDriver(object):
def _init_ping_host(self, ping_host):
self.ping_host = ping_host
+ def _init_ssh_key(self, filename):
+ with open(filename) as ssh_file:
+ key = cloud_base.NodeAuthSSHKey(ssh_file.read())
+ return 'auth', key
+
def search_for(self, term, list_method, key=attrgetter('id'), **kwargs):
"""Return one matching item from a list of cloud objects.
diff --git a/services/nodemanager/arvnodeman/computenode/driver/azure.py b/services/nodemanager/arvnodeman/computenode/driver/azure.py
index d075bdb..f4686b9 100644
--- a/services/nodemanager/arvnodeman/computenode/driver/azure.py
+++ b/services/nodemanager/arvnodeman/computenode/driver/azure.py
@@ -3,7 +3,6 @@
from __future__ import absolute_import, print_function
import time
-from operator import attrgetter
import libcloud.compute.base as cloud_base
import libcloud.compute.providers as cloud_provider
@@ -37,8 +36,7 @@ class ComputeNodeDriver(BaseComputeNodeDriver):
name = 'compute-{}-{}'.format(node_id, cluster_id)
tags = {
'booted_at': time.strftime(ARVADOS_TIMEFMT, time.gmtime()),
- 'arv-ping-url': self._make_ping_url(arvados_node),
- 'hostname': arvados_node_fqdn(arvados_node)
+ 'arv-ping-url': self._make_ping_url(arvados_node)
}
tags.update(self.tags)
return {
@@ -47,16 +45,12 @@ class ComputeNodeDriver(BaseComputeNodeDriver):
}
def sync_node(self, cloud_node, arvados_node):
- pass
+ self.real.ex_create_tags(cloud_node,
+ {'hostname': arvados_node_fqdn(arvados_node)})
def _init_image(self, urn):
return "image", self.get_image(urn)
- def _init_ssh_key(self, filename):
- with open(filename) as ssh_file:
- key = cloud_base.NodeAuthSSHKey(ssh_file.read())
- return 'auth', key
-
def list_nodes(self):
# Azure only supports filtering node lists by resource group.
# Do our own filtering based on tag.
diff --git a/services/nodemanager/arvnodeman/computenode/driver/ec2.py b/services/nodemanager/arvnodeman/computenode/driver/ec2.py
index 588ca51..6afe316 100644
--- a/services/nodemanager/arvnodeman/computenode/driver/ec2.py
+++ b/services/nodemanager/arvnodeman/computenode/driver/ec2.py
@@ -64,11 +64,6 @@ class ComputeNodeDriver(BaseComputeNodeDriver):
def _init_subnet_id(self, subnet_id):
return 'ex_subnet', self.search_for(subnet_id, 'ex_list_subnets')
- def _init_ssh_key(self, filename):
- with open(filename) as ssh_file:
- key = cloud_base.NodeAuthSSHKey(ssh_file.read())
- return 'auth', key
-
def arvados_create_kwargs(self, arvados_node):
return {'name': arvados_node_fqdn(arvados_node),
'ex_userdata': self._make_ping_url(arvados_node)}
diff --git a/services/nodemanager/tests/test_computenode_driver_azure.py b/services/nodemanager/tests/test_computenode_driver_azure.py
index 97f2d84..90fea0c 100644
--- a/services/nodemanager/tests/test_computenode_driver_azure.py
+++ b/services/nodemanager/tests/test_computenode_driver_azure.py
@@ -27,21 +27,19 @@ class AzureComputeNodeDriverTestCase(testutil.DriverTestMixin, unittest.TestCase
def test_create_image_loaded_at_initialization(self):
get_method = self.driver_mock().get_image
- get_method.return_value = [testutil.cloud_object_mock('id_b')]
+ get_method.return_value = testutil.cloud_object_mock('id_b')
driver = self.new_driver(create_kwargs={'image': 'id_b'})
self.assertEqual(1, get_method.call_count)
- def test_create_includes_ping_and_hostname(self):
+ def test_create_includes_ping(self):
arv_node = testutil.arvados_node_mock(info={'ping_secret': 'ssshh'})
+ arv_node["hostname"] = None
driver = self.new_driver()
driver.create_node(testutil.MockSize(1), arv_node)
create_method = self.driver_mock().create_node
self.assertTrue(create_method.called)
- print(create_method.call_args[1])
self.assertIn('ping_secret=ssshh',
create_method.call_args[1].get('ex_tags', {}).get('arv-ping-url', ""))
- self.assertEqual('compute99.zzzzz.arvadosapi.com',
- create_method.call_args[1].get('ex_tags', {}).get('hostname', ""))
def test_name_from_new_arvados_node(self):
arv_node = testutil.arvados_node_mock(hostname=None)
@@ -81,3 +79,11 @@ class AzureComputeNodeDriverTestCase(testutil.DriverTestMixin, unittest.TestCase
self.assertFalse(
azure.ComputeNodeDriver.is_cloud_exception(ValueError("test error")),
"ValueError flagged as cloud exception")
+
+ def test_sync_node(self):
+ arv_node = testutil.arvados_node_mock(1)
+ cloud_node = testutil.cloud_node_mock(2)
+ driver = self.new_driver()
+ driver.sync_node(cloud_node, arv_node)
+ self.check_node_tagged(cloud_node,
+ {'hostname': 'compute1.zzzzz.arvadosapi.com'})
commit b95518608653185f96f378ea3df4cf1ad7b05817
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date: Thu Aug 20 14:05:37 2015 -0400
6507: Fix test
diff --git a/services/nodemanager/tests/test_computenode_driver_azure.py b/services/nodemanager/tests/test_computenode_driver_azure.py
index 3b7cca2..97f2d84 100644
--- a/services/nodemanager/tests/test_computenode_driver_azure.py
+++ b/services/nodemanager/tests/test_computenode_driver_azure.py
@@ -26,11 +26,10 @@ class AzureComputeNodeDriverTestCase(testutil.DriverTestMixin, unittest.TestCase
self.assertEqual(kwargs, self.driver_mock.call_args[1])
def test_create_image_loaded_at_initialization(self):
- list_method = self.driver_mock().list_images
- list_method.return_value = [testutil.cloud_object_mock(c)
- for c in 'abc']
+ get_method = self.driver_mock().get_image
+ get_method.return_value = [testutil.cloud_object_mock('id_b')]
driver = self.new_driver(create_kwargs={'image': 'id_b'})
- self.assertEqual(1, list_method.call_count)
+ self.assertEqual(1, get_method.call_count)
def test_create_includes_ping_and_hostname(self):
arv_node = testutil.arvados_node_mock(info={'ping_secret': 'ssshh'})
commit 44494089c502572ee231bb421da70889b68fae4f
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date: Thu Aug 20 14:04:03 2015 -0400
6507: Use get_image() instead of list_images() for image lookup.
diff --git a/services/nodemanager/arvnodeman/computenode/driver/azure.py b/services/nodemanager/arvnodeman/computenode/driver/azure.py
index dd2b636..d075bdb 100644
--- a/services/nodemanager/arvnodeman/computenode/driver/azure.py
+++ b/services/nodemanager/arvnodeman/computenode/driver/azure.py
@@ -50,7 +50,7 @@ class ComputeNodeDriver(BaseComputeNodeDriver):
pass
def _init_image(self, urn):
- return "image", self.list_images(ex_urn=urn)[0]
+ return "image", self.get_image(urn)
def _init_ssh_key(self, filename):
with open(filename) as ssh_file:
commit 5467329d6822455de4644a277f741068cf5f1ec9
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date: Wed Aug 19 11:22:35 2015 -0400
6507: Fix tests
diff --git a/services/nodemanager/tests/test_computenode_driver_azure.py b/services/nodemanager/tests/test_computenode_driver_azure.py
index 049e708..3b7cca2 100644
--- a/services/nodemanager/tests/test_computenode_driver_azure.py
+++ b/services/nodemanager/tests/test_computenode_driver_azure.py
@@ -32,7 +32,7 @@ class AzureComputeNodeDriverTestCase(testutil.DriverTestMixin, unittest.TestCase
driver = self.new_driver(create_kwargs={'image': 'id_b'})
self.assertEqual(1, list_method.call_count)
- def test_create_includes_ping_url(self):
+ def test_create_includes_ping_and_hostname(self):
arv_node = testutil.arvados_node_mock(info={'ping_secret': 'ssshh'})
driver = self.new_driver()
driver.create_node(testutil.MockSize(1), arv_node)
@@ -41,6 +41,8 @@ class AzureComputeNodeDriverTestCase(testutil.DriverTestMixin, unittest.TestCase
print(create_method.call_args[1])
self.assertIn('ping_secret=ssshh',
create_method.call_args[1].get('ex_tags', {}).get('arv-ping-url', ""))
+ self.assertEqual('compute99.zzzzz.arvadosapi.com',
+ create_method.call_args[1].get('ex_tags', {}).get('hostname', ""))
def test_name_from_new_arvados_node(self):
arv_node = testutil.arvados_node_mock(hostname=None)
@@ -54,14 +56,6 @@ class AzureComputeNodeDriverTestCase(testutil.DriverTestMixin, unittest.TestCase
self.assertIs(cloud_node, tag_mock.call_args[0][0])
self.assertEqual(expected_tags, tag_mock.call_args[0][1])
- def test_sync_node(self):
- arv_node = testutil.arvados_node_mock(1)
- cloud_node = testutil.cloud_node_mock(2)
- driver = self.new_driver()
- driver.sync_node(cloud_node, arv_node)
- self.check_node_tagged(cloud_node,
- {'hostname': 'compute1.zzzzz.arvadosapi.com'})
-
def test_node_create_time(self):
refsecs = int(time.time())
reftuple = time.gmtime(refsecs)
commit 0600e45775658866f624b87efeef6a1067db5c39
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date: Wed Aug 19 11:19:45 2015 -0400
6507: Set hostname tag in arvados_create_kwargs instead of sync_node.
diff --git a/services/nodemanager/arvnodeman/computenode/driver/azure.py b/services/nodemanager/arvnodeman/computenode/driver/azure.py
index 725020e..dd2b636 100644
--- a/services/nodemanager/arvnodeman/computenode/driver/azure.py
+++ b/services/nodemanager/arvnodeman/computenode/driver/azure.py
@@ -38,6 +38,7 @@ class ComputeNodeDriver(BaseComputeNodeDriver):
tags = {
'booted_at': time.strftime(ARVADOS_TIMEFMT, time.gmtime()),
'arv-ping-url': self._make_ping_url(arvados_node),
+ 'hostname': arvados_node_fqdn(arvados_node)
}
tags.update(self.tags)
return {
@@ -46,8 +47,7 @@ class ComputeNodeDriver(BaseComputeNodeDriver):
}
def sync_node(self, cloud_node, arvados_node):
- hostname = arvados_node_fqdn(arvados_node)
- self.real.ex_create_tags(cloud_node, {"hostname": hostname})
+ pass
def _init_image(self, urn):
return "image", self.list_images(ex_urn=urn)[0]
commit 8552d32092e45a1f6ee1424e92882ec84b51cb8a
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date: Wed Aug 19 11:13:34 2015 -0400
6507: Paste in command line for creating service principals instead of just
referencing generic documentation.
diff --git a/services/nodemanager/doc/azure.example.cfg b/services/nodemanager/doc/azure.example.cfg
index 6bfd28b..e92431f 100644
--- a/services/nodemanager/doc/azure.example.cfg
+++ b/services/nodemanager/doc/azure.example.cfg
@@ -88,18 +88,18 @@ provider = azure
shutdown_windows = 20, 999999
[Cloud Credentials]
-# Get these using "azure account list" with the azure CLI.
+# Use "azure account list" with the azure CLI to get these values.
tenant_id = 00000000-0000-0000-0000-000000000000
subscription_id = 00000000-0000-0000-0000-000000000000
-# Follow the directions on the following page to create an Active Directory
-# "service principal" that NodeManager will use to authenticate. Note that the
-# role must be "Owner" to manipulate resources.
-#
+# The following directions are based on
# https://azure.microsoft.com/en-us/documentation/articles/resource-group-authenticate-service-principal/
#
-# The key is the the "Application Id" and the secret is the password provided
-# when creating the service principal.
+# azure ad app create --name "<Your Application Display Name>" --home-page "<https://YourApplicationHomePage>" --identifier-uris "<https://YouApplicationUri>" --password <Your_Password>
+# azure ad sp create "<Application_Id>"
+# azure role assignment create --objectId "<Object_Id>" -o Owner -c /subscriptions/{subscriptionId}/
+#
+# Use <Application_Id> for "key" and the <Your_Password> for "secret"
#
key = 00000000-0000-0000-0000-000000000000
secret = PASSWORD
commit 0fde046a6c68909ae25af809557fcd64eb7264d7
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date: Wed Aug 19 11:07:53 2015 -0400
6507: Added notes to sample azure configuration file
diff --git a/services/nodemanager/doc/azure.example.cfg b/services/nodemanager/doc/azure.example.cfg
index 8fad85d..6bfd28b 100644
--- a/services/nodemanager/doc/azure.example.cfg
+++ b/services/nodemanager/doc/azure.example.cfg
@@ -88,24 +88,56 @@ provider = azure
shutdown_windows = 20, 999999
[Cloud Credentials]
-subscription_id = SUBSCRIPTION_ID
-key_file = PATH_TO_PEM_FILE
+# Get these using "azure account list" with the azure CLI.
+tenant_id = 00000000-0000-0000-0000-000000000000
+subscription_id = 00000000-0000-0000-0000-000000000000
+
+# Follow the directions on the following page to create an Active Directory
+# "service principal" that NodeManager will use to authenticate. Note that the
+# role must be "Owner" to manipulate resources.
+#
+# https://azure.microsoft.com/en-us/documentation/articles/resource-group-authenticate-service-principal/
+#
+# The key is the the "Application Id" and the secret is the password provided
+# when creating the service principal.
+#
+key = 00000000-0000-0000-0000-000000000000
+secret = PASSWORD
timeout = 60
+region = East US
[Cloud List]
-# This section defines filters that find compute nodes.
-# Tags that you specify here will automatically be added to nodes you create.
-# Replace colons in Microsoft filters with underscores
-# (e.g., write "tag:mytag" as "tag_mytag").
-instance-state-name = running
+# The resource group in which the compute node virtual machines will be created
+# and listed.
+ex_resource_group = ArvadosResourceGroup
+
+[Cloud Create]
+# The image id, in the form "Publisher:Offer:SKU:Version"
+image = Canonical:UbuntuServer:14.04.3-LTS:14.04.201508050
+
+# Path to a local ssh key file that will be used to provision new nodes.
+ssh_key = /home/arvadosuser/.ssh/id_rsa.pub
+
+# The account name for the admin user that will be provisioned on new nodes.
+ex_user_name = arvadosuser
+
+# The Azure storage account that will be used to store the node OS disk images.
+ex_storage_account = arvadosstorage
+
+# The virtual network the VMs will be associated with.
+ex_network = ArvadosNetwork
+
+# Optional subnet of the virtual network.
+#ex_subnet = default
+
+# Node tags
tag_arvados-class = dynamic-compute
tag_cluster = zyxwv
-[Cloud Create]
-image: ???
-ex_cloud_service_name: ???
+# the API server to ping
+ping_host = hostname:port
-[Size A3]
+[Size Standard_A2]
# You can define any number of Size sections to list Azure sizes you're
# willing to use. The Node Manager should boot the cheapest size(s) that
# can run jobs in the queue (N.B.: defining more than one size has not been
commit 45a172b5f59ea7464b7241212464bf9113a18f36
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date: Wed Aug 19 10:36:42 2015 -0400
6507: Add basic azure driver tests.
diff --git a/services/nodemanager/arvnodeman/computenode/driver/azure.py b/services/nodemanager/arvnodeman/computenode/driver/azure.py
index ecab75c..725020e 100644
--- a/services/nodemanager/arvnodeman/computenode/driver/azure.py
+++ b/services/nodemanager/arvnodeman/computenode/driver/azure.py
@@ -19,7 +19,11 @@ class ComputeNodeDriver(BaseComputeNodeDriver):
def __init__(self, auth_kwargs, list_kwargs, create_kwargs,
driver_class=DEFAULT_DRIVER):
- list_kwargs["ex_resource_group"] = create_kwargs["ex_resource_group"]
+
+ if not list_kwargs.get("ex_resource_group"):
+ raise Exception("Must include ex_resource_group in Cloud List configuration (list_kwargs)")
+
+ create_kwargs["ex_resource_group"] = list_kwargs["ex_resource_group"]
self.tags = {key[4:]: value
for key, value in create_kwargs.iteritems()
@@ -43,7 +47,7 @@ class ComputeNodeDriver(BaseComputeNodeDriver):
def sync_node(self, cloud_node, arvados_node):
hostname = arvados_node_fqdn(arvados_node)
- self.real.ex_create_tags(cloud_node.id, {"hostname": hostname})
+ self.real.ex_create_tags(cloud_node, {"hostname": hostname})
def _init_image(self, urn):
return "image", self.list_images(ex_urn=urn)[0]
diff --git a/services/nodemanager/tests/test_computenode_driver_azure.py b/services/nodemanager/tests/test_computenode_driver_azure.py
new file mode 100644
index 0000000..049e708
--- /dev/null
+++ b/services/nodemanager/tests/test_computenode_driver_azure.py
@@ -0,0 +1,90 @@
+#!/usr/bin/env python
+
+from __future__ import absolute_import, print_function
+
+import ssl
+import time
+import unittest
+
+import libcloud.common.types as cloud_types
+import mock
+
+import arvnodeman.computenode.driver.azure as azure
+from . import testutil
+
+class AzureComputeNodeDriverTestCase(testutil.DriverTestMixin, unittest.TestCase):
+ TEST_CLASS = azure.ComputeNodeDriver
+
+ def new_driver(self, auth_kwargs={}, list_kwargs={}, create_kwargs={}):
+ list_kwargs.setdefault("ex_resource_group", "TestResourceGroup")
+ return super(AzureComputeNodeDriverTestCase, self).new_driver(auth_kwargs, list_kwargs, create_kwargs)
+
+ def test_driver_instantiation(self):
+ kwargs = {'key': 'testkey'}
+ driver = self.new_driver(auth_kwargs=kwargs)
+ self.assertTrue(self.driver_mock.called)
+ self.assertEqual(kwargs, self.driver_mock.call_args[1])
+
+ def test_create_image_loaded_at_initialization(self):
+ list_method = self.driver_mock().list_images
+ list_method.return_value = [testutil.cloud_object_mock(c)
+ for c in 'abc']
+ driver = self.new_driver(create_kwargs={'image': 'id_b'})
+ self.assertEqual(1, list_method.call_count)
+
+ def test_create_includes_ping_url(self):
+ arv_node = testutil.arvados_node_mock(info={'ping_secret': 'ssshh'})
+ driver = self.new_driver()
+ driver.create_node(testutil.MockSize(1), arv_node)
+ create_method = self.driver_mock().create_node
+ self.assertTrue(create_method.called)
+ print(create_method.call_args[1])
+ self.assertIn('ping_secret=ssshh',
+ create_method.call_args[1].get('ex_tags', {}).get('arv-ping-url', ""))
+
+ def test_name_from_new_arvados_node(self):
+ arv_node = testutil.arvados_node_mock(hostname=None)
+ driver = self.new_driver()
+ self.assertEqual('compute-000000000000063-zzzzz',
+ driver.arvados_create_kwargs(arv_node)['name'])
+
+ def check_node_tagged(self, cloud_node, expected_tags):
+ tag_mock = self.driver_mock().ex_create_tags
+ self.assertTrue(tag_mock.called)
+ self.assertIs(cloud_node, tag_mock.call_args[0][0])
+ self.assertEqual(expected_tags, tag_mock.call_args[0][1])
+
+ def test_sync_node(self):
+ arv_node = testutil.arvados_node_mock(1)
+ cloud_node = testutil.cloud_node_mock(2)
+ driver = self.new_driver()
+ driver.sync_node(cloud_node, arv_node)
+ self.check_node_tagged(cloud_node,
+ {'hostname': 'compute1.zzzzz.arvadosapi.com'})
+
+ def test_node_create_time(self):
+ refsecs = int(time.time())
+ reftuple = time.gmtime(refsecs)
+ node = testutil.cloud_node_mock()
+ node.extra = {'tags': {'booted_at': time.strftime('%Y-%m-%dT%H:%M:%S.000Z',
+ reftuple)}}
+ self.assertEqual(refsecs, azure.ComputeNodeDriver.node_start_time(node))
+
+ def test_node_fqdn(self):
+ name = 'fqdntest.zzzzz.arvadosapi.com'
+ node = testutil.cloud_node_mock()
+ node.extra = {'tags': {"hostname": name}}
+ self.assertEqual(name, azure.ComputeNodeDriver.node_fqdn(node))
+
+ def test_cloud_exceptions(self):
+ for error in [Exception("test exception"),
+ IOError("test exception"),
+ ssl.SSLError("test exception"),
+ cloud_types.LibcloudError("test exception")]:
+ self.assertTrue(azure.ComputeNodeDriver.is_cloud_exception(error),
+ "{} not flagged as cloud exception".format(error))
+
+ def test_noncloud_exceptions(self):
+ self.assertFalse(
+ azure.ComputeNodeDriver.is_cloud_exception(ValueError("test error")),
+ "ValueError flagged as cloud exception")
commit c46ab7c622127315b4e90c98e859588a3403267a
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date: Tue Aug 18 17:23:14 2015 -0400
Create, destroy, tag nodes.
diff --git a/services/nodemanager/arvnodeman/computenode/driver/azure.py b/services/nodemanager/arvnodeman/computenode/driver/azure.py
index c054fac..ecab75c 100644
--- a/services/nodemanager/arvnodeman/computenode/driver/azure.py
+++ b/services/nodemanager/arvnodeman/computenode/driver/azure.py
@@ -3,41 +3,67 @@
from __future__ import absolute_import, print_function
import time
+from operator import attrgetter
import libcloud.compute.base as cloud_base
import libcloud.compute.providers as cloud_provider
import libcloud.compute.types as cloud_types
from . import BaseComputeNodeDriver
-from .. import arvados_node_fqdn
+from .. import arvados_node_fqdn, arvados_timestamp, ARVADOS_TIMEFMT
class ComputeNodeDriver(BaseComputeNodeDriver):
- DEFAULT_DRIVER = cloud_provider.get_driver(cloud_types.Provider.AZURE)
+ DEFAULT_DRIVER = cloud_provider.get_driver(cloud_types.Provider.AZURE_ARM)
SEARCH_CACHE = {}
def __init__(self, auth_kwargs, list_kwargs, create_kwargs,
driver_class=DEFAULT_DRIVER):
+ list_kwargs["ex_resource_group"] = create_kwargs["ex_resource_group"]
+
+ self.tags = {key[4:]: value
+ for key, value in create_kwargs.iteritems()
+ if key.startswith('tag_')}
super(ComputeNodeDriver, self).__init__(
auth_kwargs, list_kwargs, create_kwargs,
driver_class)
def arvados_create_kwargs(self, arvados_node):
- return {'name': arvados_node["uuid"]}
+ cluster_id, _, node_id = arvados_node['uuid'].split('-')
+ name = 'compute-{}-{}'.format(node_id, cluster_id)
+ tags = {
+ 'booted_at': time.strftime(ARVADOS_TIMEFMT, time.gmtime()),
+ 'arv-ping-url': self._make_ping_url(arvados_node),
+ }
+ tags.update(self.tags)
+ return {
+ 'name': name,
+ 'ex_tags': tags,
+ }
def sync_node(self, cloud_node, arvados_node):
- print("In sync_node")
+ hostname = arvados_node_fqdn(arvados_node)
+ self.real.ex_create_tags(cloud_node.id, {"hostname": hostname})
+
+ def _init_image(self, urn):
+ return "image", self.list_images(ex_urn=urn)[0]
- def _init_image(self, image):
- return 'image', self.search_for(image, 'list_images')
+ def _init_ssh_key(self, filename):
+ with open(filename) as ssh_file:
+ key = cloud_base.NodeAuthSSHKey(ssh_file.read())
+ return 'auth', key
- def _init_password(self, password):
- return 'auth', cloud_base.NodeAuthPassword(password)
+ def list_nodes(self):
+ # Azure only supports filtering node lists by resource group.
+ # Do our own filtering based on tag.
+ return [node for node in
+ super(ComputeNodeDriver, self).list_nodes()
+ if node.extra["tags"].get("arvados-class") == self.tags["arvados-class"]]
@classmethod
def node_fqdn(cls, node):
- return node.name
+ return node.extra["tags"].get("hostname")
@classmethod
def node_start_time(cls, node):
- pass
+ return arvados_timestamp(node.extra["tags"].get("booted_at"))
commit d5341150545efd0960acf34186ca18b98a1b1860
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date: Fri Aug 14 13:48:23 2015 -0400
6507: Initial commit
diff --git a/services/nodemanager/arvnodeman/computenode/driver/__init__.py b/services/nodemanager/arvnodeman/computenode/driver/__init__.py
index 042f6a5..16134a2 100644
--- a/services/nodemanager/arvnodeman/computenode/driver/__init__.py
+++ b/services/nodemanager/arvnodeman/computenode/driver/__init__.py
@@ -9,6 +9,8 @@ from libcloud.compute.base import NodeDriver
from ...config import NETWORK_ERRORS
+import pprint
+
class BaseComputeNodeDriver(object):
"""Abstract base class for compute node drivers.
@@ -56,7 +58,7 @@ class BaseComputeNodeDriver(object):
def _init_ping_host(self, ping_host):
self.ping_host = ping_host
- def search_for(self, term, list_method, key=attrgetter('id')):
+ def search_for(self, term, list_method, key=attrgetter('id'), **kwargs):
"""Return one matching item from a list of cloud objects.
Raises ValueError if the number of matching objects is not exactly 1.
@@ -71,7 +73,8 @@ class BaseComputeNodeDriver(object):
"""
cache_key = (list_method, term)
if cache_key not in self.SEARCH_CACHE:
- results = [item for item in getattr(self.real, list_method)()
+ items = getattr(self.real, list_method)(**kwargs)
+ results = [item for item in items
if key(item) == term]
count = len(results)
if count != 1:
diff --git a/services/nodemanager/arvnodeman/computenode/driver/azure.py b/services/nodemanager/arvnodeman/computenode/driver/azure.py
new file mode 100644
index 0000000..c054fac
--- /dev/null
+++ b/services/nodemanager/arvnodeman/computenode/driver/azure.py
@@ -0,0 +1,43 @@
+#!/usr/bin/env python
+
+from __future__ import absolute_import, print_function
+
+import time
+
+import libcloud.compute.base as cloud_base
+import libcloud.compute.providers as cloud_provider
+import libcloud.compute.types as cloud_types
+
+from . import BaseComputeNodeDriver
+from .. import arvados_node_fqdn
+
+class ComputeNodeDriver(BaseComputeNodeDriver):
+
+ DEFAULT_DRIVER = cloud_provider.get_driver(cloud_types.Provider.AZURE)
+ SEARCH_CACHE = {}
+
+ def __init__(self, auth_kwargs, list_kwargs, create_kwargs,
+ driver_class=DEFAULT_DRIVER):
+ super(ComputeNodeDriver, self).__init__(
+ auth_kwargs, list_kwargs, create_kwargs,
+ driver_class)
+
+ def arvados_create_kwargs(self, arvados_node):
+ return {'name': arvados_node["uuid"]}
+
+ def sync_node(self, cloud_node, arvados_node):
+ print("In sync_node")
+
+ def _init_image(self, image):
+ return 'image', self.search_for(image, 'list_images')
+
+ def _init_password(self, password):
+ return 'auth', cloud_base.NodeAuthPassword(password)
+
+ @classmethod
+ def node_fqdn(cls, node):
+ return node.name
+
+ @classmethod
+ def node_start_time(cls, node):
+ pass
diff --git a/services/nodemanager/doc/azure.example.cfg b/services/nodemanager/doc/azure.example.cfg
new file mode 100644
index 0000000..8fad85d
--- /dev/null
+++ b/services/nodemanager/doc/azure.example.cfg
@@ -0,0 +1,119 @@
+# Azure configuration for Arvados Node Manager.
+# All times are in seconds unless specified otherwise.
+
+[Daemon]
+# The dispatcher can customize the start and stop procedure for
+# cloud nodes. For example, the SLURM dispatcher drains nodes
+# through SLURM before shutting them down.
+#dispatcher = slurm
+
+# Node Manager will ensure that there are at least this many nodes
+# running at all times.
+min_nodes = 0
+
+# Node Manager will not start any compute nodes when at least this
+# many are running.
+max_nodes = 8
+
+# Poll Azure nodes and Arvados for new information every N seconds.
+poll_time = 60
+
+# Polls have exponential backoff when services fail to respond.
+# This is the longest time to wait between polls.
+max_poll_time = 300
+
+# If Node Manager can't succesfully poll a service for this long,
+# it will never start or stop compute nodes, on the assumption that its
+# information is too outdated.
+poll_stale_after = 600
+
+# If Node Manager boots a cloud node, and it does not pair with an Arvados
+# node before this long, assume that there was a cloud bootstrap failure and
+# shut it down. Note that normal shutdown windows apply (see the Cloud
+# section), so this should be shorter than the first shutdown window value.
+boot_fail_after = 1800
+
+# "Node stale time" affects two related behaviors.
+# 1. If a compute node has been running for at least this long, but it
+# isn't paired with an Arvados node, do not shut it down, but leave it alone.
+# This prevents the node manager from shutting down a node that might
+# actually be doing work, but is having temporary trouble contacting the
+# API server.
+# 2. When the Node Manager starts a new compute node, it will try to reuse
+# an Arvados node that hasn't been updated for this long.
+node_stale_after = 14400
+
+# File path for Certificate Authorities
+certs_file = /etc/ssl/certs/ca-certificates.crt
+
+[Logging]
+# Log file path
+file = /var/log/arvados/node-manager.log
+
+# Log level for most Node Manager messages.
+# Choose one of DEBUG, INFO, WARNING, ERROR, or CRITICAL.
+# WARNING lets you know when polling a service fails.
+# INFO additionally lets you know when a compute node is started or stopped.
+level = INFO
+
+# You can also set different log levels for specific libraries.
+# Pykka is the Node Manager's actor library.
+# Setting this to DEBUG will display tracebacks for uncaught
+# exceptions in the actors, but it's also very chatty.
+pykka = WARNING
+
+# Setting apiclient to INFO will log the URL of every Arvados API request.
+apiclient = WARNING
+
+[Arvados]
+host = zyxwv.arvadosapi.com
+token = ARVADOS_TOKEN
+timeout = 15
+
+# Accept an untrusted SSL certificate from the API server?
+insecure = no
+
+[Cloud]
+provider = azure
+
+# Shutdown windows define periods of time when a node may and may not be shut
+# down. These are windows in full minutes, separated by commas. Counting from
+# the time the node is booted, the node WILL NOT shut down for N1 minutes; then
+# it MAY shut down for N2 minutes; then it WILL NOT shut down for N3 minutes;
+# and so on. For example, "20, 999999" means the node may shut down between
+# the 20th and 999999th minutes of uptime.
+# Azure bills by the minute, so it makes sense to agressively shut down idle
+# nodes. Specify at least two windows. You can add as many as you need beyond
+# that.
+shutdown_windows = 20, 999999
+
+[Cloud Credentials]
+subscription_id = SUBSCRIPTION_ID
+key_file = PATH_TO_PEM_FILE
+timeout = 60
+
+[Cloud List]
+# This section defines filters that find compute nodes.
+# Tags that you specify here will automatically be added to nodes you create.
+# Replace colons in Microsoft filters with underscores
+# (e.g., write "tag:mytag" as "tag_mytag").
+instance-state-name = running
+tag_arvados-class = dynamic-compute
+tag_cluster = zyxwv
+
+[Cloud Create]
+image: ???
+ex_cloud_service_name: ???
+
+[Size A3]
+# You can define any number of Size sections to list Azure sizes you're
+# willing to use. The Node Manager should boot the cheapest size(s) that
+# can run jobs in the queue (N.B.: defining more than one size has not been
+# tested yet).
+# Each size section MUST define the number of cores are available in this
+# size class (since libcloud does not provide any consistent API for exposing
+# this setting).
+# You may also want to define the amount of scratch space (expressed
+# in GB) for Crunch jobs. You can also override Microsoft's provided
+# data fields by setting the same names here.
+cores = 4
diff --git a/services/nodemanager/setup.py b/services/nodemanager/setup.py
index d9fcbcf..502690b 100644
--- a/services/nodemanager/setup.py
+++ b/services/nodemanager/setup.py
@@ -25,7 +25,7 @@ setup(name='arvados-node-manager',
license='GNU Affero General Public License, version 3.0',
packages=find_packages(),
install_requires=[
- 'apache-libcloud>=0.16',
+ 'apache-libcloud>=0.18',
'arvados-python-client>=0.1.20150206225333',
'pykka',
'python-daemon',
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list