[ARVADOS] updated: 053de78cd0599647dfc40bff3252d2f17d959217

git at public.curoverse.com git at public.curoverse.com
Wed Oct 7 09:09:04 EDT 2015


Summary of changes:
 services/keepstore/azure_blob_volume.go            | 243 ++++++++++++++
 services/keepstore/azure_blob_volume_test.go       | 353 +++++++++++++++++++++
 services/keepstore/collision.go                    |  35 ++
 services/keepstore/handler_test.go                 |  16 +
 services/keepstore/handlers.go                     |  38 +--
 .../keepstore/handlers_with_generic_volume_test.go |   6 +-
 services/keepstore/keepstore.go                    | 101 +-----
 services/keepstore/keepstore_test.go               |  30 +-
 services/keepstore/pull_worker.go                  |   2 +-
 services/keepstore/volume.go                       |   5 +
 services/keepstore/volume_generic_test.go          |  44 ++-
 services/keepstore/volume_test.go                  |   4 +
 services/keepstore/volume_unix.go                  | 135 ++++++--
 .../nodemanager/arvnodeman/computenode/__init__.py |  13 +
 .../arvnodeman/computenode/dispatch/__init__.py    |  21 +-
 .../arvnodeman/computenode/dispatch/slurm.py       |   8 +-
 .../arvnodeman/computenode/driver/__init__.py      |   4 +
 .../arvnodeman/computenode/driver/azure.py         |   7 +
 services/nodemanager/arvnodeman/daemon.py          |  18 +-
 services/nodemanager/setup.py                      |   4 +-
 .../nodemanager/tests/test_computenode_dispatch.py |  51 ++-
 .../tests/test_computenode_dispatch_slurm.py       |   2 +-
 services/nodemanager/tests/test_daemon.py          |  66 +++-
 23 files changed, 992 insertions(+), 214 deletions(-)
 create mode 100644 services/keepstore/azure_blob_volume.go
 create mode 100644 services/keepstore/azure_blob_volume_test.go

  discards  ca2421b9d76782171c17abb2583bead4b7b6b821 (commit)
       via  053de78cd0599647dfc40bff3252d2f17d959217 (commit)
       via  05b52b297b30d075ef2409a123f7d096c1156cf8 (commit)
       via  1f8d81d0eeda07c3cedcaad3e942ec8dedd461cb (commit)
       via  f81f84e19902e37c28fd1610999cfefa1c4a0b71 (commit)
       via  2c07efe6ac7455059f2fccd558ea796f9c315e19 (commit)
       via  d5f5f869d46f9096c7c680d608c1cc654d1d7fa0 (commit)
       via  11df73b96ae395fca11b4006253475046e3b74cc (commit)
       via  2e919859109fe27d552b81b13d47aed61e80eca6 (commit)
       via  be81c03a3c26f365eba35b91e4f0827244a02ef7 (commit)
       via  c0f33379c7fd062fc097ecef92808334e821cb6b (commit)
       via  f6aa7c0c8c84b85b550d73117c6fdbd663a38c4c (commit)
       via  560c318fdc49835b03f96af35774fbbfa7984fe7 (commit)
       via  46b11ba2ed71e2c074e9e6c8f5b9f7a003e7067f (commit)
       via  e9f437d9e590cc37ada8534401d254bd5e0a5e85 (commit)
       via  72e3566f2cdacd44f095183ebf88f7aab8b0d8dc (commit)
       via  852eadc79b7103b3889eed53a851a1c26c4daeab (commit)
       via  4eac79ebafa9b7979bbd295c2da85acbb3981bac (commit)
       via  b7f7878f8f0648ba5a53e24abb109ce9ad59bfc3 (commit)
       via  b2bcd45082d2df2b5a17645eb60473cc17c76e88 (commit)
       via  da74a60c2d276ed8612f138d73e73787f450ea2e (commit)
       via  96c3fcd2d013af7747f20fea55f460ca2d2dd637 (commit)
       via  109cd685ecbfb5b685347731340c6dd69e630617 (commit)
       via  8626abb0a44cfc303bef3552a7bc57163c79231a (commit)

This update added new revisions after undoing existing revisions.  That is
to say, the old revision is not a strict subset of the new revision.  This
situation occurs when you --force push a change and generate a repository
containing something like this:

 * -- * -- B -- O -- O -- O (ca2421b9d76782171c17abb2583bead4b7b6b821)
            \
             N -- N -- N (053de78cd0599647dfc40bff3252d2f17d959217)

When this happens we assume that you've already had alert emails for all
of the O revisions, and so we here report only the revisions in the N
branch from the common base, B.

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit 053de78cd0599647dfc40bff3252d2f17d959217
Author: Brett Smith <brett at curoverse.com>
Date:   Fri Oct 2 11:07:27 2015 -0400

    7435: Node Manager stops trying to shut down delisted cloud nodes.
    
    If the underlying node is gone, trying to destroy it in the cloud will
    almost certainly fail.  It's hard to predict what will happen to
    related actions like draining the node in SLURM.  Just cancel the
    attempt, and trust other systems like SLURM and Crunch to deal with
    the disappearance on their own.

diff --git a/services/nodemanager/arvnodeman/daemon.py b/services/nodemanager/arvnodeman/daemon.py
index ddddd41..1d52073 100644
--- a/services/nodemanager/arvnodeman/daemon.py
+++ b/services/nodemanager/arvnodeman/daemon.py
@@ -182,9 +182,14 @@ class NodeManagerDaemonActor(actor_class):
                     self._pair_nodes(record, arv_rec.arvados_node)
                     break
         for key, record in self.cloud_nodes.orphans.iteritems():
+            if key in self.shutdowns:
+                try:
+                    self.shutdowns[key].stop().get()
+                except pykka.ActorDeadError:
+                    pass
+                del self.shutdowns[key]
             record.actor.stop()
             record.cloud_node = None
-            self.shutdowns.pop(key, None)
 
     def update_arvados_nodes(self, nodelist):
         self._update_poll_time('arvados_nodes')
diff --git a/services/nodemanager/tests/test_daemon.py b/services/nodemanager/tests/test_daemon.py
index 0206f4c..16f5604 100644
--- a/services/nodemanager/tests/test_daemon.py
+++ b/services/nodemanager/tests/test_daemon.py
@@ -505,3 +505,26 @@ class NodeManagerDaemonActorTestCase(testutil.ActorTestMixin,
         self.timer.deliver()
         self.stop_proxy(self.daemon)
         self.assertEqual(1, self.node_setup.start.call_count)
+
+    def test_shutdown_actor_stopped_when_cloud_node_delisted(self):
+        self.make_daemon(cloud_nodes=[testutil.cloud_node_mock()])
+        self.assertEqual(1, self.alive_monitor_count())
+        monitor = self.monitor_list()[0].proxy()
+        self.daemon.node_can_shutdown(monitor).get(self.TIMEOUT)
+        self.daemon.update_cloud_nodes([]).get(self.TIMEOUT)
+        self.stop_proxy(self.daemon)
+        self.assertEqual(
+            1, self.node_shutdown.start().proxy().stop().get.call_count)
+
+    def test_shutdown_actor_cleanup_copes_with_dead_actors(self):
+        self.make_daemon(cloud_nodes=[testutil.cloud_node_mock()])
+        self.assertEqual(1, self.alive_monitor_count())
+        monitor = self.monitor_list()[0].proxy()
+        self.daemon.node_can_shutdown(monitor).get(self.TIMEOUT)
+        # We're mainly testing that update_cloud_nodes catches and handles
+        # the ActorDeadError.
+        stop_method = self.node_shutdown.start().proxy().stop().get
+        stop_method.side_effect = pykka.ActorDeadError
+        self.daemon.update_cloud_nodes([]).get(self.TIMEOUT)
+        self.stop_proxy(self.daemon)
+        self.assertEqual(1, stop_method.call_count)

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list