[ARVADOS] updated: dca92e413653e0906179ec0ab929693c8514df23
git at public.curoverse.com
git at public.curoverse.com
Sun Aug 24 11:51:42 EDT 2014
Summary of changes:
sdk/python/arvados/keep.py | 58 +++++++++--------
sdk/python/arvados/retry.py | 45 ++++---------
sdk/python/tests/test_keep_client.py | 6 +-
sdk/python/tests/test_retry.py | 120 ++++++++++++++---------------------
4 files changed, 96 insertions(+), 133 deletions(-)
via dca92e413653e0906179ec0ab929693c8514df23 (commit)
via 4f749c94687df66b31bba57547eee3c674a021a7 (commit)
from 66eb1f645adc591318f1d857242730d48e5a1b3f (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
commit dca92e413653e0906179ec0ab929693c8514df23
Author: Brett Smith <brett at curoverse.com>
Date: Sun Aug 24 11:49:00 2014 -0400
3147: Fixup KeepClient's RetryLoop use.
Switch to RetryLoop, and use the loop end logic better.
diff --git a/sdk/python/arvados/keep.py b/sdk/python/arvados/keep.py
index 4a4998a..b9aa9db 100644
--- a/sdk/python/arvados/keep.py
+++ b/sdk/python/arvados/keep.py
@@ -514,7 +514,7 @@ class KeepClient(object):
finally:
self._cache_lock.release()
- def add_new_services(self, roots_map, md5_s, force_rebuild, **headers):
+ def map_new_services(self, roots_map, md5_s, force_rebuild, **headers):
# roots_map is a dictionary, mapping Keep service root strings
# to KeepService objects. Poll for Keep services, and add any
# new ones to roots_map. Return the current list of local
@@ -526,6 +526,24 @@ class KeepClient(object):
roots_map[root] = self.KeepService(root, **headers)
return local_roots
+ @staticmethod
+ def _check_loop_result(result):
+ # KeepClient RetryLoops should save results as a 2-tuple: the
+ # actual result of the request, and the number of servers that
+ # received the request.
+ # This method returns True if there's a real result, False if
+ # there are no more servers receiving the request, otherwise None.
+ if isinstance(result, Exception):
+ return None
+ result, tried_server_count = result
+ if (result is not None) and (result is not False):
+ return True
+ elif tried_server_count < 1:
+ _logger.info("No more Keep services to try; giving up")
+ return False
+ else:
+ return None
+
def get(self, loc_s, num_retries=0):
"""Get data from Keep.
@@ -565,34 +583,28 @@ class KeepClient(object):
# Map root URLs their KeepService objects.
roots_map = {root: self.KeepService(root) for root in hint_roots}
blob = None
- loop = retry.HTTPRetryLoop(num_retries,
- lambda r: None if blob is None else True)
+ loop = retry.RetryLoop(num_retries, self._check_loop_result,
+ backoff_start=2)
for tries_left in loop:
try:
- local_roots = self.add_new_services(
+ local_roots = self.map_new_services(
roots_map, expect_hash,
force_rebuild=(tries_left < num_retries))
except Exception as error:
loop.save_result(error)
continue
- # Build an ordered list of KeepService objects that haven't
- # returned permanent failure.
+ # Query KeepService objects that haven't returned
+ # permanent failure, in our specified shuffle order.
services_to_try = [roots_map[root]
for root in (local_roots + hint_roots)
if roots_map[root].usable()]
- if not services_to_try:
- _logger.info(
- "All Keep services for %s have permafailed; giving up",
- loc_s)
- break
-
http = httplib2.Http(timeout=self.timeout)
for keep_service in services_to_try:
blob = keep_service.get(http, locator)
if blob is not None:
- loop.save_result(blob)
break
+ loop.save_result((blob, len(services_to_try)))
# Always cache the result, then return it if we succeeded.
slot.set(blob)
@@ -605,7 +617,7 @@ class KeepClient(object):
# Not Found; otherwise a generic error.
not_founds = sum(1 for ks in roots_map.values()
if ks.last_status() in set([403, 404, 410]))
- if (float(not_founds) / len(roots_map)) >= .75:
+ if roots_map and ((float(not_founds) / len(roots_map)) >= .75):
raise arvados.errors.NotFoundError(loc_s)
else:
raise arvados.errors.KeepReadError(loc_s)
@@ -633,17 +645,15 @@ class KeepClient(object):
headers = {}
if self.using_proxy:
- # We're using a proxy, so tell the proxy how many copies we
- # want it to store
+ # Tell the proxy how many copies we want it to store
headers['X-Keep-Desired-Replication'] = str(copies)
roots_map = {}
thread_limiter = KeepClient.ThreadLimiter(copies)
- loop = retry.HTTPRetryLoop(
- num_retries,
- lambda r: True if (thread_limiter.done() >= copies) else None)
+ loop = retry.RetryLoop(num_retries, self._check_loop_result,
+ backoff_start=2)
for tries_left in loop:
try:
- local_roots = self.add_new_services(
+ local_roots = self.map_new_services(
roots_map, data_hash,
force_rebuild=(tries_left < num_retries), **headers)
except Exception as error:
@@ -663,15 +673,9 @@ class KeepClient(object):
timeout=self.timeout)
t.start()
threads.append(t)
-
- if not threads:
- _logger.info(
- "All Keep services for %s have finished; giving up",
- data_hash)
- break
for t in threads:
t.join()
- loop.save_result(None)
+ loop.save_result((thread_limiter.done() >= copies, len(threads)))
if loop.success():
return thread_limiter.response()
diff --git a/sdk/python/tests/test_keep_client.py b/sdk/python/tests/test_keep_client.py
index 3476069..4ac9df1 100644
--- a/sdk/python/tests/test_keep_client.py
+++ b/sdk/python/tests/test_keep_client.py
@@ -283,6 +283,7 @@ no_backoff = mock.patch('time.sleep', lambda n: None)
class KeepClientRetryGetTestCase(unittest.TestCase, KeepClientRetryTestMixin):
DEFAULT_EXPECT = KeepClientRetryTestMixin.TEST_DATA
DEFAULT_EXCEPTION = arvados.errors.KeepReadError
+ HINTED_LOCATOR = KeepClientRetryTestMixin.TEST_LOCATOR + '+K at xyzzy'
def run_method(self, locator=KeepClientRetryTestMixin.TEST_LOCATOR,
*args, **kwargs):
@@ -298,17 +299,16 @@ class KeepClientRetryGetTestCase(unittest.TestCase, KeepClientRetryTestMixin):
# This test rigs up 50/50 disagreement between two servers, and
# checks that it does not become a NotFoundError.
client = self.new_client()
- client.service_roots = [self.PROXY_ADDR, self.PROXY_ADDR]
with self.mock_responses(self.DEFAULT_EXPECT, 404, 500):
with self.assertRaises(arvados.errors.KeepReadError) as exc_check:
- client.get(self.TEST_LOCATOR)
+ client.get(self.HINTED_LOCATOR)
self.assertNotIsInstance(
exc_check.exception, arvados.errors.NotFoundError,
"mixed errors raised NotFoundError")
def test_hint_server_can_succeed_without_retries(self):
with self.mock_responses(self.DEFAULT_EXPECT, 404, 200, 500):
- self.check_success(locator=self.TEST_LOCATOR + '+K at xyzzy')
+ self.check_success(locator=self.HINTED_LOCATOR)
@no_backoff
commit 4f749c94687df66b31bba57547eee3c674a021a7
Author: Brett Smith <brett at curoverse.com>
Date: Sun Aug 24 11:25:29 2014 -0400
3147: Fixup RetryLoop.
Having HTTPRetryLoop separate is unnecessary.
diff --git a/sdk/python/arvados/retry.py b/sdk/python/arvados/retry.py
index f5edc75..5dc31ae 100644
--- a/sdk/python/arvados/retry.py
+++ b/sdk/python/arvados/retry.py
@@ -28,7 +28,7 @@ class RetryLoop(object):
return loop.last_result()
"""
def __init__(self, num_retries, success_check=lambda r: True,
- save_results=1):
+ backoff_start=0, backoff_growth=2, save_results=1):
"""Construct a new RetryLoop.
Arguments:
@@ -40,12 +40,19 @@ class RetryLoop(object):
represents a permanent failure state, and None if the loop
should continue. If no function is provided, the loop will
end as soon as it records any result.
+ * backoff_start: The number of seconds that must pass before the
+ loop's second iteration. Default 0, which disables all waiting.
+ * backoff_growth: The wait time multiplier after each iteration.
+ Default 2 (i.e., double the wait time each time).
* save_results: Specify a number to save the last N results
that the loop recorded. These records are available through
the results attribute, oldest first. Default 1.
"""
self.tries_left = num_retries + 1
self.check_result = success_check
+ self.backoff_wait = backoff_start
+ self.backoff_growth = backoff_growth
+ self.next_start_time = 0
self.results = deque(maxlen=save_results)
self._running = None
self._success = None
@@ -62,6 +69,11 @@ class RetryLoop(object):
if (self.tries_left < 1) or not self.running():
self._running = False
raise StopIteration
+ else:
+ wait_time = max(0, self.next_start_time - time.time())
+ time.sleep(wait_time)
+ self.backoff_wait *= self.backoff_growth
+ self.next_start_time = time.time() + self.backoff_wait
self.tries_left -= 1
return self.tries_left
@@ -126,34 +138,3 @@ def check_http_response_success(result):
return False
else:
return None # Get well soon, server.
-
-class HTTPRetryLoop(RetryLoop):
- """Coordinate limited retries of HTTP requests.
-
- This RetryLoop uses check_http_response_success as the default
- success check, and provides exponential backoff between
- iterations.
- """
- def __init__(self, num_retries, success_check=check_http_response_success,
- backoff_start=1, backoff_growth=2, save_results=1):
- """Construct an HTTPRetryLoop.
-
- New arguments (see RetryLoop for others):
- * backoff_start: The number of seconds that must pass before the
- loop's second iteration. Default 1.
- * backoff_growth: The wait time multiplier after each iteration.
- Default 2 (i.e., double the wait time each time).
- """
- self.backoff_wait = backoff_start
- self.backoff_growth = backoff_growth
- self.next_start_time = 0
- super(HTTPRetryLoop, self).__init__(num_retries, success_check,
- save_results)
-
- def next(self):
- if self.running() and (self.tries_left > 0):
- wait_time = max(0, self.next_start_time - time.time())
- time.sleep(wait_time)
- self.backoff_wait *= self.backoff_growth
- self.next_start_time = time.time() + self.backoff_wait
- return super(HTTPRetryLoop, self).next()
diff --git a/sdk/python/tests/test_retry.py b/sdk/python/tests/test_retry.py
index d6e3d62..ed0a406 100644
--- a/sdk/python/tests/test_retry.py
+++ b/sdk/python/tests/test_retry.py
@@ -9,7 +9,7 @@ import mock
from arvados_testutil import fake_httplib2_response
-class RetryLoopTestCase(unittest.TestCase):
+class RetryLoopTestMixin(object):
@staticmethod
def loop_success(result):
# During the tests, we use integers that look like HTTP status
@@ -23,9 +23,10 @@ class RetryLoopTestCase(unittest.TestCase):
else:
return None
- def run_loop(self, num_retries, *results):
+ def run_loop(self, num_retries, *results, **kwargs):
responses = itertools.chain(results, itertools.repeat(None))
- retrier = arv_retry.RetryLoop(num_retries, self.loop_success)
+ retrier = arv_retry.RetryLoop(num_retries, self.loop_success,
+ **kwargs)
for tries_left, response in itertools.izip(retrier, responses):
retrier.save_result(response)
return retrier
@@ -35,6 +36,8 @@ class RetryLoopTestCase(unittest.TestCase):
"loop success flag is incorrect")
self.assertEqual(last_code, retrier.last_result())
+
+class RetryLoopTestCase(unittest.TestCase, RetryLoopTestMixin):
def test_zero_retries_and_success(self):
retrier = self.run_loop(0, 200)
self.check_result(retrier, True, 200)
@@ -94,79 +97,54 @@ class RetryLoopTestCase(unittest.TestCase):
self.assertRaises(arv_error.AssertionError, retrier.save_result, 1)
+ at mock.patch('time.time', side_effect=itertools.count())
@mock.patch('time.sleep')
-class HTTPRetryLoopTestCase(unittest.TestCase):
- def run_loop(self, num_retries, *codes, **kwargs):
- responses = itertools.chain(
- ((fake_httplib2_response(c), str(c)) for c in codes),
- itertools.repeat((None, None)))
- retrier = arv_retry.HTTPRetryLoop(num_retries, **kwargs)
- for tries_left, response in itertools.izip(retrier, responses):
- retrier.save_result(response)
- return retrier
-
- def check_result(self, retrier, expect_success, last_status,
- sleep_mock, sleep_count):
- self.assertIs(retrier.success(), expect_success,
- "loop success flag is incorrect")
- self.assertEqual(str(last_status), retrier.last_result()[1],
- "wrong loop result")
- self.assertEqual(sleep_count, sleep_mock.call_count,
+class RetryLoopBackoffTestCase(unittest.TestCase, RetryLoopTestMixin):
+ def run_loop(self, num_retries, *results, **kwargs):
+ kwargs.setdefault('backoff_start', 8)
+ return super(RetryLoopBackoffTestCase, self).run_loop(
+ num_retries, *results, **kwargs)
+
+ def check_backoff(self, sleep_mock, sleep_count, multiplier=1):
+ # Figure out how much time we actually spent sleeping.
+ sleep_times = [arglist[0][0] for arglist in sleep_mock.call_args_list
+ if arglist[0][0] > 0]
+ self.assertEqual(sleep_count, len(sleep_times),
"loop did not back off correctly")
-
- def sleep_times(self, sleep_mock):
- return (arglist[0][0] for arglist in sleep_mock.call_args_list)
-
- def check_backoff_growth(self, sleep_mock, multiplier=1):
- check = (self.assertGreater if (multiplier == 1)
- else self.assertGreaterEqual)
- sleep_times = self.sleep_times(sleep_mock)
- last_wait = next(sleep_times)
+ last_wait = 0
for this_wait in sleep_times:
- check(this_wait, last_wait * multiplier,
- "loop did not grow backoff times correctly")
+ self.assertGreater(this_wait, last_wait * multiplier,
+ "loop did not grow backoff times correctly")
last_wait = this_wait
- def test_no_retries_and_success(self, sleep_mock):
- retrier = self.run_loop(0, 200)
- self.check_result(retrier, True, 200, sleep_mock, 0)
-
- def test_no_retries_and_tempfail(self, sleep_mock):
- retrier = self.run_loop(0, 500, 200)
- self.check_result(retrier, None, 500, sleep_mock, 0)
-
- def test_no_retries_and_permfail(self, sleep_mock):
- retrier = self.run_loop(0, 400, 200)
- self.check_result(retrier, False, 400, sleep_mock, 0)
-
- def test_retries_with_immediate_success(self, sleep_mock):
- retrier = self.run_loop(3, 200, 500, 500)
- self.check_result(retrier, True, 200, sleep_mock, 0)
-
- def test_retries_with_delayed_success(self, sleep_mock):
- retrier = self.run_loop(3, 500, 500, 200, 502)
- self.check_result(retrier, True, 200, sleep_mock, 2)
- self.check_backoff_growth(sleep_mock)
-
- def test_retries_then_permfail(self, sleep_mock):
- retrier = self.run_loop(3, 500, 404, 200, 200)
- self.check_result(retrier, False, 404, sleep_mock, 1)
-
- def test_retries_all_tempfail(self, sleep_mock):
- retrier = self.run_loop(3, 502, 502, 502, 500, 200)
- self.check_result(retrier, None, 500, sleep_mock, 3)
- self.check_backoff_growth(sleep_mock)
-
- def test_backoff_parameters(self, sleep_mock):
- with mock.patch('time.time', side_effects=itertools.count):
- self.run_loop(3, 500, 500, 500, 500,
- backoff_start=5, backoff_growth=10)
- self.check_backoff_growth(sleep_mock, 10)
-
- def test_custom_success_check(self, mock):
- retrier = self.run_loop(3, 200, 777, 201, 202, 203,
- success_check=lambda r: r[1] == '777' or None)
- self.check_result(retrier, True, 777, mock, 1)
+ def test_no_backoff_with_no_retries(self, sleep_mock, time_mock):
+ self.run_loop(0, 500, 201)
+ self.check_backoff(sleep_mock, 0)
+
+ def test_no_backoff_after_success(self, sleep_mock, time_mock):
+ self.run_loop(1, 200, 501)
+ self.check_backoff(sleep_mock, 0)
+
+ def test_no_backoff_after_permfail(self, sleep_mock, time_mock):
+ self.run_loop(1, 400, 201)
+ self.check_backoff(sleep_mock, 0)
+
+ def test_backoff_before_success(self, sleep_mock, time_mock):
+ self.run_loop(5, 500, 501, 502, 203, 504)
+ self.check_backoff(sleep_mock, 3)
+
+ def test_backoff_before_permfail(self, sleep_mock, time_mock):
+ self.run_loop(5, 500, 501, 502, 403, 504)
+ self.check_backoff(sleep_mock, 3)
+
+ def test_backoff_all_tempfail(self, sleep_mock, time_mock):
+ self.run_loop(3, 500, 501, 502, 503, 504)
+ self.check_backoff(sleep_mock, 3)
+
+ def test_backoff_multiplier(self, sleep_mock, time_mock):
+ self.run_loop(5, 500, 501, 502, 503, 504, 505,
+ backoff_start=5, backoff_growth=10)
+ self.check_backoff(sleep_mock, 5, 9)
class CheckHTTPResponseSuccessTestCase(unittest.TestCase):
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list