[ARVADOS] created: 10b25cacfc521b3dc74c2204fc2b29aca8ad2631

git at public.curoverse.com git at public.curoverse.com
Thu Aug 27 14:33:54 EDT 2015


        at  10b25cacfc521b3dc74c2204fc2b29aca8ad2631 (commit)


commit 10b25cacfc521b3dc74c2204fc2b29aca8ad2631
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Thu Aug 27 14:23:06 2015 -0400

    6507: Change version number for libcloud fork to 0.18.1.dev1 because local tags create too much trouble.

diff --git a/services/nodemanager/setup.py b/services/nodemanager/setup.py
index d2de38f..16df376 100644
--- a/services/nodemanager/setup.py
+++ b/services/nodemanager/setup.py
@@ -31,11 +31,11 @@ setup(name='arvados-node-manager',
         'python-daemon',
         ],
       dependency_links = [
-          "https://github.com/curoverse/libcloud/archive/apache-libcloud-0.18.0+azurearm.zip"
+          "https://github.com/curoverse/libcloud/archive/apache-libcloud-0.18.1.dev1.zip"
       ],
       scripts=['bin/arvados-node-manager'],
       test_suite='tests',
-      tests_require=['mock>=1.0', "apache-libcloud==0.18.0+azurearm"],
+      tests_require=['mock>=1.0', "apache-libcloud==0.18.1.dev1"],
       zip_safe=False,
       cmdclass={'egg_info': tagger},
       )

commit 9b914107504ece419ee2f7d72be7d6262037ff52
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Wed Aug 26 15:24:11 2015 -0400

    6507: Filter out "tag_" from create_kwargs.

diff --git a/services/nodemanager/arvnodeman/computenode/driver/azure.py b/services/nodemanager/arvnodeman/computenode/driver/azure.py
index f4686b9..014b92c 100644
--- a/services/nodemanager/arvnodeman/computenode/driver/azure.py
+++ b/services/nodemanager/arvnodeman/computenode/driver/azure.py
@@ -27,6 +27,10 @@ class ComputeNodeDriver(BaseComputeNodeDriver):
         self.tags = {key[4:]: value
                      for key, value in create_kwargs.iteritems()
                      if key.startswith('tag_')}
+        # filter out tags from create_kwargs
+        create_kwargs = {key: value
+                         for key, value in create_kwargs.iteritems()
+                         if not key.startswith('tag_')}
         super(ComputeNodeDriver, self).__init__(
             auth_kwargs, list_kwargs, create_kwargs,
             driver_class)

commit 03980b49e2fb3cac357e417acea64cd342d1065e
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Wed Aug 26 15:23:35 2015 -0400

    6507: Use dependency_links to specify custom libcloud package needed for azure support.

diff --git a/services/nodemanager/setup.py b/services/nodemanager/setup.py
index 502690b..d2de38f 100644
--- a/services/nodemanager/setup.py
+++ b/services/nodemanager/setup.py
@@ -25,14 +25,17 @@ setup(name='arvados-node-manager',
       license='GNU Affero General Public License, version 3.0',
       packages=find_packages(),
       install_requires=[
-        'apache-libcloud>=0.18',
+        'apache-libcloud>=0.16',
         'arvados-python-client>=0.1.20150206225333',
         'pykka',
         'python-daemon',
         ],
+      dependency_links = [
+          "https://github.com/curoverse/libcloud/archive/apache-libcloud-0.18.0+azurearm.zip"
+      ],
       scripts=['bin/arvados-node-manager'],
       test_suite='tests',
-      tests_require=['mock>=1.0'],
+      tests_require=['mock>=1.0', "apache-libcloud==0.18.0+azurearm"],
       zip_safe=False,
       cmdclass={'egg_info': tagger},
       )

commit 19a2e9a97939126293ce33d72f576f6f54da574f
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Tue Aug 25 13:02:55 2015 -0400

    6507: Remove unused imports, move _init_ssh_key() into base, set hostname tag on sync_node(), tweak tests.

diff --git a/services/nodemanager/arvnodeman/computenode/driver/__init__.py b/services/nodemanager/arvnodeman/computenode/driver/__init__.py
index 16134a2..e3298a7 100644
--- a/services/nodemanager/arvnodeman/computenode/driver/__init__.py
+++ b/services/nodemanager/arvnodeman/computenode/driver/__init__.py
@@ -9,8 +9,6 @@ from libcloud.compute.base import NodeDriver
 
 from ...config import NETWORK_ERRORS
 
-import pprint
-
 class BaseComputeNodeDriver(object):
     """Abstract base class for compute node drivers.
 
@@ -58,6 +56,11 @@ class BaseComputeNodeDriver(object):
     def _init_ping_host(self, ping_host):
         self.ping_host = ping_host
 
+    def _init_ssh_key(self, filename):
+        with open(filename) as ssh_file:
+            key = cloud_base.NodeAuthSSHKey(ssh_file.read())
+        return 'auth', key
+
     def search_for(self, term, list_method, key=attrgetter('id'), **kwargs):
         """Return one matching item from a list of cloud objects.
 
diff --git a/services/nodemanager/arvnodeman/computenode/driver/azure.py b/services/nodemanager/arvnodeman/computenode/driver/azure.py
index d075bdb..f4686b9 100644
--- a/services/nodemanager/arvnodeman/computenode/driver/azure.py
+++ b/services/nodemanager/arvnodeman/computenode/driver/azure.py
@@ -3,7 +3,6 @@
 from __future__ import absolute_import, print_function
 
 import time
-from operator import attrgetter
 
 import libcloud.compute.base as cloud_base
 import libcloud.compute.providers as cloud_provider
@@ -37,8 +36,7 @@ class ComputeNodeDriver(BaseComputeNodeDriver):
         name = 'compute-{}-{}'.format(node_id, cluster_id)
         tags = {
             'booted_at': time.strftime(ARVADOS_TIMEFMT, time.gmtime()),
-            'arv-ping-url': self._make_ping_url(arvados_node),
-            'hostname': arvados_node_fqdn(arvados_node)
+            'arv-ping-url': self._make_ping_url(arvados_node)
         }
         tags.update(self.tags)
         return {
@@ -47,16 +45,12 @@ class ComputeNodeDriver(BaseComputeNodeDriver):
         }
 
     def sync_node(self, cloud_node, arvados_node):
-        pass
+        self.real.ex_create_tags(cloud_node,
+                                 {'hostname': arvados_node_fqdn(arvados_node)})
 
     def _init_image(self, urn):
         return "image", self.get_image(urn)
 
-    def _init_ssh_key(self, filename):
-        with open(filename) as ssh_file:
-            key = cloud_base.NodeAuthSSHKey(ssh_file.read())
-        return 'auth', key
-
     def list_nodes(self):
         # Azure only supports filtering node lists by resource group.
         # Do our own filtering based on tag.
diff --git a/services/nodemanager/arvnodeman/computenode/driver/ec2.py b/services/nodemanager/arvnodeman/computenode/driver/ec2.py
index 588ca51..6afe316 100644
--- a/services/nodemanager/arvnodeman/computenode/driver/ec2.py
+++ b/services/nodemanager/arvnodeman/computenode/driver/ec2.py
@@ -64,11 +64,6 @@ class ComputeNodeDriver(BaseComputeNodeDriver):
     def _init_subnet_id(self, subnet_id):
         return 'ex_subnet', self.search_for(subnet_id, 'ex_list_subnets')
 
-    def _init_ssh_key(self, filename):
-        with open(filename) as ssh_file:
-            key = cloud_base.NodeAuthSSHKey(ssh_file.read())
-        return 'auth', key
-
     def arvados_create_kwargs(self, arvados_node):
         return {'name': arvados_node_fqdn(arvados_node),
                 'ex_userdata': self._make_ping_url(arvados_node)}
diff --git a/services/nodemanager/tests/test_computenode_driver_azure.py b/services/nodemanager/tests/test_computenode_driver_azure.py
index 97f2d84..90fea0c 100644
--- a/services/nodemanager/tests/test_computenode_driver_azure.py
+++ b/services/nodemanager/tests/test_computenode_driver_azure.py
@@ -27,21 +27,19 @@ class AzureComputeNodeDriverTestCase(testutil.DriverTestMixin, unittest.TestCase
 
     def test_create_image_loaded_at_initialization(self):
         get_method = self.driver_mock().get_image
-        get_method.return_value = [testutil.cloud_object_mock('id_b')]
+        get_method.return_value = testutil.cloud_object_mock('id_b')
         driver = self.new_driver(create_kwargs={'image': 'id_b'})
         self.assertEqual(1, get_method.call_count)
 
-    def test_create_includes_ping_and_hostname(self):
+    def test_create_includes_ping(self):
         arv_node = testutil.arvados_node_mock(info={'ping_secret': 'ssshh'})
+        arv_node["hostname"] = None
         driver = self.new_driver()
         driver.create_node(testutil.MockSize(1), arv_node)
         create_method = self.driver_mock().create_node
         self.assertTrue(create_method.called)
-        print(create_method.call_args[1])
         self.assertIn('ping_secret=ssshh',
                       create_method.call_args[1].get('ex_tags', {}).get('arv-ping-url', ""))
-        self.assertEqual('compute99.zzzzz.arvadosapi.com',
-                      create_method.call_args[1].get('ex_tags', {}).get('hostname', ""))
 
     def test_name_from_new_arvados_node(self):
         arv_node = testutil.arvados_node_mock(hostname=None)
@@ -81,3 +79,11 @@ class AzureComputeNodeDriverTestCase(testutil.DriverTestMixin, unittest.TestCase
         self.assertFalse(
             azure.ComputeNodeDriver.is_cloud_exception(ValueError("test error")),
             "ValueError flagged as cloud exception")
+
+    def test_sync_node(self):
+        arv_node = testutil.arvados_node_mock(1)
+        cloud_node = testutil.cloud_node_mock(2)
+        driver = self.new_driver()
+        driver.sync_node(cloud_node, arv_node)
+        self.check_node_tagged(cloud_node,
+                               {'hostname': 'compute1.zzzzz.arvadosapi.com'})

commit b95518608653185f96f378ea3df4cf1ad7b05817
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Thu Aug 20 14:05:37 2015 -0400

    6507: Fix test

diff --git a/services/nodemanager/tests/test_computenode_driver_azure.py b/services/nodemanager/tests/test_computenode_driver_azure.py
index 3b7cca2..97f2d84 100644
--- a/services/nodemanager/tests/test_computenode_driver_azure.py
+++ b/services/nodemanager/tests/test_computenode_driver_azure.py
@@ -26,11 +26,10 @@ class AzureComputeNodeDriverTestCase(testutil.DriverTestMixin, unittest.TestCase
         self.assertEqual(kwargs, self.driver_mock.call_args[1])
 
     def test_create_image_loaded_at_initialization(self):
-        list_method = self.driver_mock().list_images
-        list_method.return_value = [testutil.cloud_object_mock(c)
-                                    for c in 'abc']
+        get_method = self.driver_mock().get_image
+        get_method.return_value = [testutil.cloud_object_mock('id_b')]
         driver = self.new_driver(create_kwargs={'image': 'id_b'})
-        self.assertEqual(1, list_method.call_count)
+        self.assertEqual(1, get_method.call_count)
 
     def test_create_includes_ping_and_hostname(self):
         arv_node = testutil.arvados_node_mock(info={'ping_secret': 'ssshh'})

commit 44494089c502572ee231bb421da70889b68fae4f
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Thu Aug 20 14:04:03 2015 -0400

    6507: Use get_image() instead of list_images() for image lookup.

diff --git a/services/nodemanager/arvnodeman/computenode/driver/azure.py b/services/nodemanager/arvnodeman/computenode/driver/azure.py
index dd2b636..d075bdb 100644
--- a/services/nodemanager/arvnodeman/computenode/driver/azure.py
+++ b/services/nodemanager/arvnodeman/computenode/driver/azure.py
@@ -50,7 +50,7 @@ class ComputeNodeDriver(BaseComputeNodeDriver):
         pass
 
     def _init_image(self, urn):
-        return "image", self.list_images(ex_urn=urn)[0]
+        return "image", self.get_image(urn)
 
     def _init_ssh_key(self, filename):
         with open(filename) as ssh_file:

commit 5467329d6822455de4644a277f741068cf5f1ec9
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Wed Aug 19 11:22:35 2015 -0400

    6507: Fix tests

diff --git a/services/nodemanager/tests/test_computenode_driver_azure.py b/services/nodemanager/tests/test_computenode_driver_azure.py
index 049e708..3b7cca2 100644
--- a/services/nodemanager/tests/test_computenode_driver_azure.py
+++ b/services/nodemanager/tests/test_computenode_driver_azure.py
@@ -32,7 +32,7 @@ class AzureComputeNodeDriverTestCase(testutil.DriverTestMixin, unittest.TestCase
         driver = self.new_driver(create_kwargs={'image': 'id_b'})
         self.assertEqual(1, list_method.call_count)
 
-    def test_create_includes_ping_url(self):
+    def test_create_includes_ping_and_hostname(self):
         arv_node = testutil.arvados_node_mock(info={'ping_secret': 'ssshh'})
         driver = self.new_driver()
         driver.create_node(testutil.MockSize(1), arv_node)
@@ -41,6 +41,8 @@ class AzureComputeNodeDriverTestCase(testutil.DriverTestMixin, unittest.TestCase
         print(create_method.call_args[1])
         self.assertIn('ping_secret=ssshh',
                       create_method.call_args[1].get('ex_tags', {}).get('arv-ping-url', ""))
+        self.assertEqual('compute99.zzzzz.arvadosapi.com',
+                      create_method.call_args[1].get('ex_tags', {}).get('hostname', ""))
 
     def test_name_from_new_arvados_node(self):
         arv_node = testutil.arvados_node_mock(hostname=None)
@@ -54,14 +56,6 @@ class AzureComputeNodeDriverTestCase(testutil.DriverTestMixin, unittest.TestCase
         self.assertIs(cloud_node, tag_mock.call_args[0][0])
         self.assertEqual(expected_tags, tag_mock.call_args[0][1])
 
-    def test_sync_node(self):
-        arv_node = testutil.arvados_node_mock(1)
-        cloud_node = testutil.cloud_node_mock(2)
-        driver = self.new_driver()
-        driver.sync_node(cloud_node, arv_node)
-        self.check_node_tagged(cloud_node,
-                               {'hostname': 'compute1.zzzzz.arvadosapi.com'})
-
     def test_node_create_time(self):
         refsecs = int(time.time())
         reftuple = time.gmtime(refsecs)

commit 0600e45775658866f624b87efeef6a1067db5c39
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Wed Aug 19 11:19:45 2015 -0400

    6507: Set hostname tag in arvados_create_kwargs instead of sync_node.

diff --git a/services/nodemanager/arvnodeman/computenode/driver/azure.py b/services/nodemanager/arvnodeman/computenode/driver/azure.py
index 725020e..dd2b636 100644
--- a/services/nodemanager/arvnodeman/computenode/driver/azure.py
+++ b/services/nodemanager/arvnodeman/computenode/driver/azure.py
@@ -38,6 +38,7 @@ class ComputeNodeDriver(BaseComputeNodeDriver):
         tags = {
             'booted_at': time.strftime(ARVADOS_TIMEFMT, time.gmtime()),
             'arv-ping-url': self._make_ping_url(arvados_node),
+            'hostname': arvados_node_fqdn(arvados_node)
         }
         tags.update(self.tags)
         return {
@@ -46,8 +47,7 @@ class ComputeNodeDriver(BaseComputeNodeDriver):
         }
 
     def sync_node(self, cloud_node, arvados_node):
-        hostname = arvados_node_fqdn(arvados_node)
-        self.real.ex_create_tags(cloud_node, {"hostname": hostname})
+        pass
 
     def _init_image(self, urn):
         return "image", self.list_images(ex_urn=urn)[0]

commit 8552d32092e45a1f6ee1424e92882ec84b51cb8a
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Wed Aug 19 11:13:34 2015 -0400

    6507: Paste in command line for creating service principals instead of just
    referencing generic documentation.

diff --git a/services/nodemanager/doc/azure.example.cfg b/services/nodemanager/doc/azure.example.cfg
index 6bfd28b..e92431f 100644
--- a/services/nodemanager/doc/azure.example.cfg
+++ b/services/nodemanager/doc/azure.example.cfg
@@ -88,18 +88,18 @@ provider = azure
 shutdown_windows = 20, 999999
 
 [Cloud Credentials]
-# Get these using "azure account list" with the azure CLI.
+# Use "azure account list" with the azure CLI to get these values.
 tenant_id = 00000000-0000-0000-0000-000000000000
 subscription_id = 00000000-0000-0000-0000-000000000000
 
-# Follow the directions on the following page to create an Active Directory
-# "service principal" that NodeManager will use to authenticate.  Note that the
-# role must be "Owner" to manipulate resources.
-#
+# The following directions are based on
 # https://azure.microsoft.com/en-us/documentation/articles/resource-group-authenticate-service-principal/
 #
-# The key is the the "Application Id" and the secret is the password provided
-# when creating the service principal.
+# azure ad app create --name "<Your Application Display Name>" --home-page "<https://YourApplicationHomePage>" --identifier-uris "<https://YouApplicationUri>" --password <Your_Password>
+# azure ad sp create "<Application_Id>"
+# azure role assignment create --objectId "<Object_Id>" -o Owner -c /subscriptions/{subscriptionId}/
+#
+# Use <Application_Id> for "key" and the <Your_Password> for "secret"
 #
 key = 00000000-0000-0000-0000-000000000000
 secret = PASSWORD

commit 0fde046a6c68909ae25af809557fcd64eb7264d7
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Wed Aug 19 11:07:53 2015 -0400

    6507: Added notes to sample azure configuration file

diff --git a/services/nodemanager/doc/azure.example.cfg b/services/nodemanager/doc/azure.example.cfg
index 8fad85d..6bfd28b 100644
--- a/services/nodemanager/doc/azure.example.cfg
+++ b/services/nodemanager/doc/azure.example.cfg
@@ -88,24 +88,56 @@ provider = azure
 shutdown_windows = 20, 999999
 
 [Cloud Credentials]
-subscription_id = SUBSCRIPTION_ID
-key_file = PATH_TO_PEM_FILE
+# Get these using "azure account list" with the azure CLI.
+tenant_id = 00000000-0000-0000-0000-000000000000
+subscription_id = 00000000-0000-0000-0000-000000000000
+
+# Follow the directions on the following page to create an Active Directory
+# "service principal" that NodeManager will use to authenticate.  Note that the
+# role must be "Owner" to manipulate resources.
+#
+# https://azure.microsoft.com/en-us/documentation/articles/resource-group-authenticate-service-principal/
+#
+# The key is the the "Application Id" and the secret is the password provided
+# when creating the service principal.
+#
+key = 00000000-0000-0000-0000-000000000000
+secret = PASSWORD
 timeout = 60
+region = East US
 
 [Cloud List]
-# This section defines filters that find compute nodes.
-# Tags that you specify here will automatically be added to nodes you create.
-# Replace colons in Microsoft filters with underscores
-# (e.g., write "tag:mytag" as "tag_mytag").
-instance-state-name = running
+# The resource group in which the compute node virtual machines will be created
+# and listed.
+ex_resource_group = ArvadosResourceGroup
+
+[Cloud Create]
+# The image id, in the form "Publisher:Offer:SKU:Version"
+image = Canonical:UbuntuServer:14.04.3-LTS:14.04.201508050
+
+# Path to a local ssh key file that will be used to provision new nodes.
+ssh_key = /home/arvadosuser/.ssh/id_rsa.pub
+
+# The account name for the admin user that will be provisioned on new nodes.
+ex_user_name = arvadosuser
+
+# The Azure storage account that will be used to store the node OS disk images.
+ex_storage_account = arvadosstorage
+
+# The virtual network the VMs will be associated with.
+ex_network = ArvadosNetwork
+
+# Optional subnet of the virtual network.
+#ex_subnet = default
+
+# Node tags
 tag_arvados-class = dynamic-compute
 tag_cluster = zyxwv
 
-[Cloud Create]
-image: ???
-ex_cloud_service_name: ???
+# the API server to ping
+ping_host = hostname:port
 
-[Size A3]
+[Size Standard_A2]
 # You can define any number of Size sections to list Azure sizes you're
 # willing to use.  The Node Manager should boot the cheapest size(s) that
 # can run jobs in the queue (N.B.: defining more than one size has not been

commit 45a172b5f59ea7464b7241212464bf9113a18f36
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Wed Aug 19 10:36:42 2015 -0400

    6507: Add basic azure driver tests.

diff --git a/services/nodemanager/arvnodeman/computenode/driver/azure.py b/services/nodemanager/arvnodeman/computenode/driver/azure.py
index ecab75c..725020e 100644
--- a/services/nodemanager/arvnodeman/computenode/driver/azure.py
+++ b/services/nodemanager/arvnodeman/computenode/driver/azure.py
@@ -19,7 +19,11 @@ class ComputeNodeDriver(BaseComputeNodeDriver):
 
     def __init__(self, auth_kwargs, list_kwargs, create_kwargs,
                  driver_class=DEFAULT_DRIVER):
-        list_kwargs["ex_resource_group"] = create_kwargs["ex_resource_group"]
+
+        if not list_kwargs.get("ex_resource_group"):
+            raise Exception("Must include ex_resource_group in Cloud List configuration (list_kwargs)")
+
+        create_kwargs["ex_resource_group"] = list_kwargs["ex_resource_group"]
 
         self.tags = {key[4:]: value
                      for key, value in create_kwargs.iteritems()
@@ -43,7 +47,7 @@ class ComputeNodeDriver(BaseComputeNodeDriver):
 
     def sync_node(self, cloud_node, arvados_node):
         hostname = arvados_node_fqdn(arvados_node)
-        self.real.ex_create_tags(cloud_node.id, {"hostname": hostname})
+        self.real.ex_create_tags(cloud_node, {"hostname": hostname})
 
     def _init_image(self, urn):
         return "image", self.list_images(ex_urn=urn)[0]
diff --git a/services/nodemanager/tests/test_computenode_driver_azure.py b/services/nodemanager/tests/test_computenode_driver_azure.py
new file mode 100644
index 0000000..049e708
--- /dev/null
+++ b/services/nodemanager/tests/test_computenode_driver_azure.py
@@ -0,0 +1,90 @@
+#!/usr/bin/env python
+
+from __future__ import absolute_import, print_function
+
+import ssl
+import time
+import unittest
+
+import libcloud.common.types as cloud_types
+import mock
+
+import arvnodeman.computenode.driver.azure as azure
+from . import testutil
+
+class AzureComputeNodeDriverTestCase(testutil.DriverTestMixin, unittest.TestCase):
+    TEST_CLASS = azure.ComputeNodeDriver
+
+    def new_driver(self, auth_kwargs={}, list_kwargs={}, create_kwargs={}):
+        list_kwargs.setdefault("ex_resource_group", "TestResourceGroup")
+        return super(AzureComputeNodeDriverTestCase, self).new_driver(auth_kwargs, list_kwargs, create_kwargs)
+
+    def test_driver_instantiation(self):
+        kwargs = {'key': 'testkey'}
+        driver = self.new_driver(auth_kwargs=kwargs)
+        self.assertTrue(self.driver_mock.called)
+        self.assertEqual(kwargs, self.driver_mock.call_args[1])
+
+    def test_create_image_loaded_at_initialization(self):
+        list_method = self.driver_mock().list_images
+        list_method.return_value = [testutil.cloud_object_mock(c)
+                                    for c in 'abc']
+        driver = self.new_driver(create_kwargs={'image': 'id_b'})
+        self.assertEqual(1, list_method.call_count)
+
+    def test_create_includes_ping_url(self):
+        arv_node = testutil.arvados_node_mock(info={'ping_secret': 'ssshh'})
+        driver = self.new_driver()
+        driver.create_node(testutil.MockSize(1), arv_node)
+        create_method = self.driver_mock().create_node
+        self.assertTrue(create_method.called)
+        print(create_method.call_args[1])
+        self.assertIn('ping_secret=ssshh',
+                      create_method.call_args[1].get('ex_tags', {}).get('arv-ping-url', ""))
+
+    def test_name_from_new_arvados_node(self):
+        arv_node = testutil.arvados_node_mock(hostname=None)
+        driver = self.new_driver()
+        self.assertEqual('compute-000000000000063-zzzzz',
+                         driver.arvados_create_kwargs(arv_node)['name'])
+
+    def check_node_tagged(self, cloud_node, expected_tags):
+        tag_mock = self.driver_mock().ex_create_tags
+        self.assertTrue(tag_mock.called)
+        self.assertIs(cloud_node, tag_mock.call_args[0][0])
+        self.assertEqual(expected_tags, tag_mock.call_args[0][1])
+
+    def test_sync_node(self):
+        arv_node = testutil.arvados_node_mock(1)
+        cloud_node = testutil.cloud_node_mock(2)
+        driver = self.new_driver()
+        driver.sync_node(cloud_node, arv_node)
+        self.check_node_tagged(cloud_node,
+                               {'hostname': 'compute1.zzzzz.arvadosapi.com'})
+
+    def test_node_create_time(self):
+        refsecs = int(time.time())
+        reftuple = time.gmtime(refsecs)
+        node = testutil.cloud_node_mock()
+        node.extra = {'tags': {'booted_at': time.strftime('%Y-%m-%dT%H:%M:%S.000Z',
+                                                   reftuple)}}
+        self.assertEqual(refsecs, azure.ComputeNodeDriver.node_start_time(node))
+
+    def test_node_fqdn(self):
+        name = 'fqdntest.zzzzz.arvadosapi.com'
+        node = testutil.cloud_node_mock()
+        node.extra = {'tags': {"hostname": name}}
+        self.assertEqual(name, azure.ComputeNodeDriver.node_fqdn(node))
+
+    def test_cloud_exceptions(self):
+        for error in [Exception("test exception"),
+                      IOError("test exception"),
+                      ssl.SSLError("test exception"),
+                      cloud_types.LibcloudError("test exception")]:
+            self.assertTrue(azure.ComputeNodeDriver.is_cloud_exception(error),
+                            "{} not flagged as cloud exception".format(error))
+
+    def test_noncloud_exceptions(self):
+        self.assertFalse(
+            azure.ComputeNodeDriver.is_cloud_exception(ValueError("test error")),
+            "ValueError flagged as cloud exception")

commit c46ab7c622127315b4e90c98e859588a3403267a
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Tue Aug 18 17:23:14 2015 -0400

    Create, destroy, tag nodes.

diff --git a/services/nodemanager/arvnodeman/computenode/driver/azure.py b/services/nodemanager/arvnodeman/computenode/driver/azure.py
index c054fac..ecab75c 100644
--- a/services/nodemanager/arvnodeman/computenode/driver/azure.py
+++ b/services/nodemanager/arvnodeman/computenode/driver/azure.py
@@ -3,41 +3,67 @@
 from __future__ import absolute_import, print_function
 
 import time
+from operator import attrgetter
 
 import libcloud.compute.base as cloud_base
 import libcloud.compute.providers as cloud_provider
 import libcloud.compute.types as cloud_types
 
 from . import BaseComputeNodeDriver
-from .. import arvados_node_fqdn
+from .. import arvados_node_fqdn, arvados_timestamp, ARVADOS_TIMEFMT
 
 class ComputeNodeDriver(BaseComputeNodeDriver):
 
-    DEFAULT_DRIVER = cloud_provider.get_driver(cloud_types.Provider.AZURE)
+    DEFAULT_DRIVER = cloud_provider.get_driver(cloud_types.Provider.AZURE_ARM)
     SEARCH_CACHE = {}
 
     def __init__(self, auth_kwargs, list_kwargs, create_kwargs,
                  driver_class=DEFAULT_DRIVER):
+        list_kwargs["ex_resource_group"] = create_kwargs["ex_resource_group"]
+
+        self.tags = {key[4:]: value
+                     for key, value in create_kwargs.iteritems()
+                     if key.startswith('tag_')}
         super(ComputeNodeDriver, self).__init__(
             auth_kwargs, list_kwargs, create_kwargs,
             driver_class)
 
     def arvados_create_kwargs(self, arvados_node):
-        return {'name': arvados_node["uuid"]}
+        cluster_id, _, node_id = arvados_node['uuid'].split('-')
+        name = 'compute-{}-{}'.format(node_id, cluster_id)
+        tags = {
+            'booted_at': time.strftime(ARVADOS_TIMEFMT, time.gmtime()),
+            'arv-ping-url': self._make_ping_url(arvados_node),
+        }
+        tags.update(self.tags)
+        return {
+            'name': name,
+            'ex_tags': tags,
+        }
 
     def sync_node(self, cloud_node, arvados_node):
-        print("In sync_node")
+        hostname = arvados_node_fqdn(arvados_node)
+        self.real.ex_create_tags(cloud_node.id, {"hostname": hostname})
+
+    def _init_image(self, urn):
+        return "image", self.list_images(ex_urn=urn)[0]
 
-    def _init_image(self, image):
-        return 'image', self.search_for(image, 'list_images')
+    def _init_ssh_key(self, filename):
+        with open(filename) as ssh_file:
+            key = cloud_base.NodeAuthSSHKey(ssh_file.read())
+        return 'auth', key
 
-    def _init_password(self, password):
-        return 'auth', cloud_base.NodeAuthPassword(password)
+    def list_nodes(self):
+        # Azure only supports filtering node lists by resource group.
+        # Do our own filtering based on tag.
+        return [node for node in
+                super(ComputeNodeDriver, self).list_nodes()
+                if node.extra["tags"].get("arvados-class") == self.tags["arvados-class"]]
 
     @classmethod
     def node_fqdn(cls, node):
-        return node.name
+        return node.extra["tags"].get("hostname")
 
     @classmethod
     def node_start_time(cls, node):
-        pass
+        return arvados_timestamp(node.extra["tags"].get("booted_at"))

commit d5341150545efd0960acf34186ca18b98a1b1860
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Fri Aug 14 13:48:23 2015 -0400

    6507: Initial commit

diff --git a/services/nodemanager/arvnodeman/computenode/driver/__init__.py b/services/nodemanager/arvnodeman/computenode/driver/__init__.py
index 042f6a5..16134a2 100644
--- a/services/nodemanager/arvnodeman/computenode/driver/__init__.py
+++ b/services/nodemanager/arvnodeman/computenode/driver/__init__.py
@@ -9,6 +9,8 @@ from libcloud.compute.base import NodeDriver
 
 from ...config import NETWORK_ERRORS
 
+import pprint
+
 class BaseComputeNodeDriver(object):
     """Abstract base class for compute node drivers.
 
@@ -56,7 +58,7 @@ class BaseComputeNodeDriver(object):
     def _init_ping_host(self, ping_host):
         self.ping_host = ping_host
 
-    def search_for(self, term, list_method, key=attrgetter('id')):
+    def search_for(self, term, list_method, key=attrgetter('id'), **kwargs):
         """Return one matching item from a list of cloud objects.
 
         Raises ValueError if the number of matching objects is not exactly 1.
@@ -71,7 +73,8 @@ class BaseComputeNodeDriver(object):
         """
         cache_key = (list_method, term)
         if cache_key not in self.SEARCH_CACHE:
-            results = [item for item in getattr(self.real, list_method)()
+            items = getattr(self.real, list_method)(**kwargs)
+            results = [item for item in items
                        if key(item) == term]
             count = len(results)
             if count != 1:
diff --git a/services/nodemanager/arvnodeman/computenode/driver/azure.py b/services/nodemanager/arvnodeman/computenode/driver/azure.py
new file mode 100644
index 0000000..c054fac
--- /dev/null
+++ b/services/nodemanager/arvnodeman/computenode/driver/azure.py
@@ -0,0 +1,43 @@
+#!/usr/bin/env python
+
+from __future__ import absolute_import, print_function
+
+import time
+
+import libcloud.compute.base as cloud_base
+import libcloud.compute.providers as cloud_provider
+import libcloud.compute.types as cloud_types
+
+from . import BaseComputeNodeDriver
+from .. import arvados_node_fqdn
+
+class ComputeNodeDriver(BaseComputeNodeDriver):
+
+    DEFAULT_DRIVER = cloud_provider.get_driver(cloud_types.Provider.AZURE)
+    SEARCH_CACHE = {}
+
+    def __init__(self, auth_kwargs, list_kwargs, create_kwargs,
+                 driver_class=DEFAULT_DRIVER):
+        super(ComputeNodeDriver, self).__init__(
+            auth_kwargs, list_kwargs, create_kwargs,
+            driver_class)
+
+    def arvados_create_kwargs(self, arvados_node):
+        return {'name': arvados_node["uuid"]}
+
+    def sync_node(self, cloud_node, arvados_node):
+        print("In sync_node")
+
+    def _init_image(self, image):
+        return 'image', self.search_for(image, 'list_images')
+
+    def _init_password(self, password):
+        return 'auth', cloud_base.NodeAuthPassword(password)
+
+    @classmethod
+    def node_fqdn(cls, node):
+        return node.name
+
+    @classmethod
+    def node_start_time(cls, node):
+        pass
diff --git a/services/nodemanager/doc/azure.example.cfg b/services/nodemanager/doc/azure.example.cfg
new file mode 100644
index 0000000..8fad85d
--- /dev/null
+++ b/services/nodemanager/doc/azure.example.cfg
@@ -0,0 +1,119 @@
+# Azure configuration for Arvados Node Manager.
+# All times are in seconds unless specified otherwise.
+
+[Daemon]
+# The dispatcher can customize the start and stop procedure for
+# cloud nodes.  For example, the SLURM dispatcher drains nodes
+# through SLURM before shutting them down.
+#dispatcher = slurm
+
+# Node Manager will ensure that there are at least this many nodes
+# running at all times.
+min_nodes = 0
+
+# Node Manager will not start any compute nodes when at least this
+# many are running.
+max_nodes = 8
+
+# Poll Azure nodes and Arvados for new information every N seconds.
+poll_time = 60
+
+# Polls have exponential backoff when services fail to respond.
+# This is the longest time to wait between polls.
+max_poll_time = 300
+
+# If Node Manager can't succesfully poll a service for this long,
+# it will never start or stop compute nodes, on the assumption that its
+# information is too outdated.
+poll_stale_after = 600
+
+# If Node Manager boots a cloud node, and it does not pair with an Arvados
+# node before this long, assume that there was a cloud bootstrap failure and
+# shut it down.  Note that normal shutdown windows apply (see the Cloud
+# section), so this should be shorter than the first shutdown window value.
+boot_fail_after = 1800
+
+# "Node stale time" affects two related behaviors.
+# 1. If a compute node has been running for at least this long, but it
+# isn't paired with an Arvados node, do not shut it down, but leave it alone.
+# This prevents the node manager from shutting down a node that might
+# actually be doing work, but is having temporary trouble contacting the
+# API server.
+# 2. When the Node Manager starts a new compute node, it will try to reuse
+# an Arvados node that hasn't been updated for this long.
+node_stale_after = 14400
+
+# File path for Certificate Authorities
+certs_file = /etc/ssl/certs/ca-certificates.crt
+
+[Logging]
+# Log file path
+file = /var/log/arvados/node-manager.log
+
+# Log level for most Node Manager messages.
+# Choose one of DEBUG, INFO, WARNING, ERROR, or CRITICAL.
+# WARNING lets you know when polling a service fails.
+# INFO additionally lets you know when a compute node is started or stopped.
+level = INFO
+
+# You can also set different log levels for specific libraries.
+# Pykka is the Node Manager's actor library.
+# Setting this to DEBUG will display tracebacks for uncaught
+# exceptions in the actors, but it's also very chatty.
+pykka = WARNING
+
+# Setting apiclient to INFO will log the URL of every Arvados API request.
+apiclient = WARNING
+
+[Arvados]
+host = zyxwv.arvadosapi.com
+token = ARVADOS_TOKEN
+timeout = 15
+
+# Accept an untrusted SSL certificate from the API server?
+insecure = no
+
+[Cloud]
+provider = azure
+
+# Shutdown windows define periods of time when a node may and may not be shut
+# down.  These are windows in full minutes, separated by commas.  Counting from
+# the time the node is booted, the node WILL NOT shut down for N1 minutes; then
+# it MAY shut down for N2 minutes; then it WILL NOT shut down for N3 minutes;
+# and so on.  For example, "20, 999999" means the node may shut down between
+# the 20th and 999999th minutes of uptime.
+# Azure bills by the minute, so it makes sense to agressively shut down idle
+# nodes.  Specify at least two windows.  You can add as many as you need beyond
+# that.
+shutdown_windows = 20, 999999
+
+[Cloud Credentials]
+subscription_id = SUBSCRIPTION_ID
+key_file = PATH_TO_PEM_FILE
+timeout = 60
+
+[Cloud List]
+# This section defines filters that find compute nodes.
+# Tags that you specify here will automatically be added to nodes you create.
+# Replace colons in Microsoft filters with underscores
+# (e.g., write "tag:mytag" as "tag_mytag").
+instance-state-name = running
+tag_arvados-class = dynamic-compute
+tag_cluster = zyxwv
+
+[Cloud Create]
+image: ???
+ex_cloud_service_name: ???
+
+[Size A3]
+# You can define any number of Size sections to list Azure sizes you're
+# willing to use.  The Node Manager should boot the cheapest size(s) that
+# can run jobs in the queue (N.B.: defining more than one size has not been
+# tested yet).
+# Each size section MUST define the number of cores are available in this
+# size class (since libcloud does not provide any consistent API for exposing
+# this setting).
+# You may also want to define the amount of scratch space (expressed
+# in GB) for Crunch jobs.  You can also override Microsoft's provided
+# data fields by setting the same names here.
+cores = 4
diff --git a/services/nodemanager/setup.py b/services/nodemanager/setup.py
index d9fcbcf..502690b 100644
--- a/services/nodemanager/setup.py
+++ b/services/nodemanager/setup.py
@@ -25,7 +25,7 @@ setup(name='arvados-node-manager',
       license='GNU Affero General Public License, version 3.0',
       packages=find_packages(),
       install_requires=[
-        'apache-libcloud>=0.16',
+        'apache-libcloud>=0.18',
         'arvados-python-client>=0.1.20150206225333',
         'pykka',
         'python-daemon',

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list