[ARVADOS] created: fc6a133423d79b224adff0e666b9da594524b460

Git user git at public.curoverse.com
Thu Feb 25 09:08:09 EST 2016


        at  fc6a133423d79b224adff0e666b9da594524b460 (commit)


commit fc6a133423d79b224adff0e666b9da594524b460
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Thu Feb 25 09:08:01 2016 -0500

    8437: Add FullStopActor which uses os.killpg() to terminate node manager on_failure.  Added test.

diff --git a/services/nodemanager/arvnodeman/config.py b/services/nodemanager/arvnodeman/config.py
index dd45165..dcfe1ce 100644
--- a/services/nodemanager/arvnodeman/config.py
+++ b/services/nodemanager/arvnodeman/config.py
@@ -12,13 +12,15 @@ import httplib2
 import pykka
 from apiclient import errors as apierror
 
+from .fullstopactor import FullStopActor
+
 # IOError is the base class for socket.error, ssl.SSLError, and friends.
 # It seems like it hits the sweet spot for operations we want to retry:
 # it's low-level, but unlikely to catch code bugs.
 NETWORK_ERRORS = (IOError,)
 ARVADOS_ERRORS = NETWORK_ERRORS + (apierror.Error,)
 
-actor_class = pykka.ThreadingActor
+actor_class = FullStopActor
 
 class NodeManagerConfig(ConfigParser.SafeConfigParser):
     """Node Manager Configuration class.
diff --git a/services/nodemanager/arvnodeman/fullstopactor.py b/services/nodemanager/arvnodeman/fullstopactor.py
new file mode 100644
index 0000000..07e0625
--- /dev/null
+++ b/services/nodemanager/arvnodeman/fullstopactor.py
@@ -0,0 +1,17 @@
+from __future__ import absolute_import, print_function
+
+import errno
+import logging
+import os
+import threading
+import traceback
+
+import pykka
+
+class FullStopActor(pykka.ThreadingActor):
+    def on_failure(self, exception_type, exception_value, tb):
+        lg = getattr(self, "_logger", logging)
+        if (exception_type in (threading.ThreadError, MemoryError) or
+            exception_type is OSError and exception_value.errno == errno.ENOMEM):
+            lg.critical("Unhandled exception is a fatal error, killing Node Manager")
+            os.killpg(os.getpgid(0), 9)

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list