[ARVADOS] created: fc6a133423d79b224adff0e666b9da594524b460
Git user
git at public.curoverse.com
Thu Feb 25 09:08:09 EST 2016
at fc6a133423d79b224adff0e666b9da594524b460 (commit)
commit fc6a133423d79b224adff0e666b9da594524b460
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date: Thu Feb 25 09:08:01 2016 -0500
8437: Add FullStopActor which uses os.killpg() to terminate node manager on_failure. Added test.
diff --git a/services/nodemanager/arvnodeman/config.py b/services/nodemanager/arvnodeman/config.py
index dd45165..dcfe1ce 100644
--- a/services/nodemanager/arvnodeman/config.py
+++ b/services/nodemanager/arvnodeman/config.py
@@ -12,13 +12,15 @@ import httplib2
import pykka
from apiclient import errors as apierror
+from .fullstopactor import FullStopActor
+
# IOError is the base class for socket.error, ssl.SSLError, and friends.
# It seems like it hits the sweet spot for operations we want to retry:
# it's low-level, but unlikely to catch code bugs.
NETWORK_ERRORS = (IOError,)
ARVADOS_ERRORS = NETWORK_ERRORS + (apierror.Error,)
-actor_class = pykka.ThreadingActor
+actor_class = FullStopActor
class NodeManagerConfig(ConfigParser.SafeConfigParser):
"""Node Manager Configuration class.
diff --git a/services/nodemanager/arvnodeman/fullstopactor.py b/services/nodemanager/arvnodeman/fullstopactor.py
new file mode 100644
index 0000000..07e0625
--- /dev/null
+++ b/services/nodemanager/arvnodeman/fullstopactor.py
@@ -0,0 +1,17 @@
+from __future__ import absolute_import, print_function
+
+import errno
+import logging
+import os
+import threading
+import traceback
+
+import pykka
+
+class FullStopActor(pykka.ThreadingActor):
+ def on_failure(self, exception_type, exception_value, tb):
+ lg = getattr(self, "_logger", logging)
+ if (exception_type in (threading.ThreadError, MemoryError) or
+ exception_type is OSError and exception_value.errno == errno.ENOMEM):
+ lg.critical("Unhandled exception is a fatal error, killing Node Manager")
+ os.killpg(os.getpgid(0), 9)
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list