[ARVADOS] created: 9b4000637a9315fd15d8b506381a713243bd1699
git at public.curoverse.com
git at public.curoverse.com
Thu Jan 21 14:52:54 EST 2016
at 9b4000637a9315fd15d8b506381a713243bd1699 (commit)
commit 9b4000637a9315fd15d8b506381a713243bd1699
Author: Tom Clegg <tom at curoverse.com>
Date: Thu Jan 21 14:35:34 2016 -0500
8281: Fix KeepClient retry bugs.
get() and put() were both handling all Curl exceptions -- including
timeouts -- by marking the keep service as unusable. For example, if a
single proxy is the only service available, a single timeout was
fatal. This is fixed by setting the retry loop status to None instead
of False after curl exceptions.
put() was repeating its retry loop until it achieved the desired
number of replicas _in a single iteration_. For example, when trying
to store 2 replicas, 6 loop iterations with a single success in each
iteration would result in 6 copies being stored but put() declaring
failure. This is fixed by checking against a cumulative "done" counter
instead of the "copies done in this loop iteration" counter.
diff --git a/sdk/python/arvados/keep.py b/sdk/python/arvados/keep.py
index 4fa8a4f..6264b19 100644
--- a/sdk/python/arvados/keep.py
+++ b/sdk/python/arvados/keep.py
@@ -379,6 +379,7 @@ class KeepClient(object):
url = self.root + str(locator)
_logger.debug("Request: GET %s", url)
curl = self._get_user_agent()
+ ok = None
try:
with timer.Timer() as t:
self._headers = {}
@@ -410,7 +411,6 @@ class KeepClient(object):
self._result = {
'error': e,
}
- ok = False
self._usable = ok != False
if self._result.get('status_code', None):
# The client worked well enough to get an HTTP status
@@ -445,6 +445,7 @@ class KeepClient(object):
url = self.root + hash_s
_logger.debug("Request: PUT %s", url)
curl = self._get_user_agent()
+ ok = None
try:
with timer.Timer() as t:
self._headers = {}
@@ -486,7 +487,6 @@ class KeepClient(object):
self._result = {
'error': e,
}
- ok = False
self._usable = ok != False # still usable if ok is True or None
if self._result.get('status_code', None):
# Client is functional. See comment in get().
@@ -1018,6 +1018,7 @@ class KeepClient(object):
roots_map = {}
loop = retry.RetryLoop(num_retries, self._check_loop_result,
backoff_start=2)
+ done = 0
for tries_left in loop:
try:
sorted_roots = self.map_new_services(
@@ -1046,7 +1047,8 @@ class KeepClient(object):
threads.append(t)
for t in threads:
t.join()
- loop.save_result((thread_limiter.done() >= copies, len(threads)))
+ done += thread_limiter.done()
+ loop.save_result(done >= copies, len(threads)))
if loop.success():
return thread_limiter.response()
diff --git a/sdk/python/tests/test_keep_client.py b/sdk/python/tests/test_keep_client.py
index 2b380f0..9a0fe80 100644
--- a/sdk/python/tests/test_keep_client.py
+++ b/sdk/python/tests/test_keep_client.py
@@ -852,6 +852,10 @@ class KeepClientRetryTestMixin(object):
with self.TEST_PATCHER(self.DEFAULT_EXPECT, 500, 200):
self.check_success(num_retries=3)
+ def test_exception_then_success(self):
+ with self.TEST_PATCHER(self.DEFAULT_EXPECT, Exception('mock err'), 200):
+ self.check_success(num_retries=3)
+
def test_no_default_retry(self):
with self.TEST_PATCHER(self.DEFAULT_EXPECT, 500, 200):
self.check_exception()
@@ -928,3 +932,38 @@ class KeepClientRetryPutTestCase(KeepClientRetryTestMixin, unittest.TestCase):
def test_do_not_send_multiple_copies_to_same_server(self):
with tutil.mock_keep_responses(self.DEFAULT_EXPECT, 200):
self.check_exception(copies=2, num_retries=3)
+
+
+ at tutil.skip_sleep
+class RetryNeedsMultipleServices(unittest.TestCase, tutil.ApiClientMock):
+ # Test put()s that need two distinct servers to succeed, possibly
+ # requiring multiple passes through the retry loop.
+
+ def setUp(self):
+ self.api_client = self.mock_keep_services(count=2)
+ self.keep_client = arvados.KeepClient(api_client=self.api_client)
+
+ def test_success_after_exception(self):
+ with tutil.mock_keep_responses(
+ 'acbd18db4cc2f85cedef654fccc4a4d8+3',
+ Exception('mock err'), 200, 200) as req_mock:
+ self.keep_client.put('foo', num_retries=1, copies=2)
+ self.assertTrue(3, req_mock.call_count)
+
+ def test_success_after_retryable_error(self):
+ with tutil.mock_keep_responses(
+ 'acbd18db4cc2f85cedef654fccc4a4d8+3',
+ 500, 200, 200) as req_mock:
+ self.keep_client.put('foo', num_retries=1, copies=2)
+ self.assertTrue(3, req_mock.call_count)
+
+ def test_fail_after_final_error(self):
+ # First retry loop gets a 200 (can't achieve replication by
+ # storing again on that server) and a 400 (can't retry that
+ # server at all), so we shouldn't try a third request.
+ with tutil.mock_keep_responses(
+ 'acbd18db4cc2f85cedef654fccc4a4d8+3',
+ 200, 400, 200) as req_mock:
+ with self.assertRaises(arvados.errors.KeepWriteError):
+ self.keep_client.put('foo', num_retries=1, copies=2)
+ self.assertTrue(2, req_mock.call_count)
diff --git a/services/fuse/tests/test_retry.py b/services/fuse/tests/test_retry.py
index f6c0807..b46ba78 100644
--- a/services/fuse/tests/test_retry.py
+++ b/services/fuse/tests/test_retry.py
@@ -1,11 +1,17 @@
import arvados
import arvados_fuse.command
+import json
import mock
import os
+import pycurl
+import Queue
import run_test_server
import tempfile
import unittest
+from .integration_test import IntegrationTest
+
+
class KeepClientRetry(unittest.TestCase):
origKeepClient = arvados.keep.KeepClient
@@ -33,3 +39,22 @@ class KeepClientRetry(unittest.TestCase):
def test_no_retry(self):
self._test_retry(0, ['--retries=0'])
+
+class RetryPUT(IntegrationTest):
+ @mock.patch('time.sleep')
+ @IntegrationTest.mount(argv=['--read-write', '--mount-tmp=zzz'])
+ def test_retry_write(self, sleep):
+ mockedCurl = mock.Mock(spec=pycurl.Curl(), wraps=pycurl.Curl())
+ mockedCurl.perform.side_effect = Exception('mock error (ok)')
+ q = Queue.Queue()
+ q.put(mockedCurl)
+ q.put(pycurl.Curl())
+ q.put(pycurl.Curl())
+ with mock.patch('arvados.keep.KeepClient.KeepService._get_user_agent', side_effect=lambda: q.get(block=None)):
+ self.pool_test(os.path.join(self.mnt, 'zzz'))
+ self.assertTrue(mockedCurl.perform.called)
+ @staticmethod
+ def _test_retry_write(self, tmp):
+ with open(os.path.join(tmp, 'foo'), 'w') as f:
+ f.write('foo')
+ json.load(open(os.path.join(tmp, '.arvados#collection')))
commit 4ae033c520ef9d305ac6f1c633a8d98c9852d873
Author: Tom Clegg <tom at curoverse.com>
Date: Thu Jan 21 04:01:16 2016 -0500
8281: Fix arv-mount ignoring --retries argument when writing file data.
"num_retries" arguments get passed around extensively in arvfile.py
and collection.py in the Python SDK, but ultimately the writing of
file data is done by a _BlockManager which doesn't have any way to
accept that argument or pass it along to a KeepClient, so PUT requests
always use the CollectionWriter's KeepClient's default num_retries.
In arv-mount's case, we have been telling CollectionWriter the
num_retries we want. When CollectionWriter creates a KeepClient,
num_retries gets passed along -- normally this works around the fact
that num_retries gets lost by the _BlockManager layer. However, we
provided our own KeepClient to use instead of letting CollectionWriter
create one, and we forgot to set num_retries on our own KeepClient, so
we weren't retrying PUT requests.
diff --git a/services/fuse/arvados_fuse/command.py b/services/fuse/arvados_fuse/command.py
index cd81a53..71623a5 100644
--- a/services/fuse/arvados_fuse/command.py
+++ b/services/fuse/arvados_fuse/command.py
@@ -151,7 +151,8 @@ class Mount(object):
self.api = arvados.safeapi.ThreadSafeApiCache(
apiconfig=arvados.config.settings(),
keep_params={
- "block_cache": arvados.keep.KeepBlockCache(self.args.file_cache)
+ 'block_cache': arvados.keep.KeepBlockCache(self.args.file_cache),
+ 'num_retries': self.args.retries,
})
# Do a sanity check that we have a working arvados host + token.
self.api.users().current().execute()
diff --git a/services/fuse/arvados_fuse/fusedir.py b/services/fuse/arvados_fuse/fusedir.py
index a2135b3..7e627ef 100644
--- a/services/fuse/arvados_fuse/fusedir.py
+++ b/services/fuse/arvados_fuse/fusedir.py
@@ -493,7 +493,8 @@ class TmpCollectionDirectory(CollectionDirectoryBase):
def __init__(self, parent_inode, inodes, api_client, num_retries):
collection = self.UnsaveableCollection(
api_client=api_client,
- keep_client=api_client.keep)
+ keep_client=api_client.keep,
+ num_retries=num_retries)
super(TmpCollectionDirectory, self).__init__(
parent_inode, inodes, collection)
self.collection_record_file = None
diff --git a/services/fuse/tests/test_retry.py b/services/fuse/tests/test_retry.py
new file mode 100644
index 0000000..f6c0807
--- /dev/null
+++ b/services/fuse/tests/test_retry.py
@@ -0,0 +1,35 @@
+import arvados
+import arvados_fuse.command
+import mock
+import os
+import run_test_server
+import tempfile
+import unittest
+
+class KeepClientRetry(unittest.TestCase):
+ origKeepClient = arvados.keep.KeepClient
+
+ def setUp(self):
+ self.mnt = tempfile.mkdtemp()
+ run_test_server.authorize_with('active')
+
+ def tearDown(self):
+ os.rmdir(self.mnt)
+
+ @mock.patch('arvados_fuse.arvados.keep.KeepClient')
+ def _test_retry(self, num_retries, argv, kc):
+ kc.side_effect = lambda *args, **kw: self.origKeepClient(*args, **kw)
+ with arvados_fuse.command.Mount(
+ arvados_fuse.command.ArgumentParser().parse_args(
+ argv+[self.mnt])):
+ pass
+ self.assertEqual(num_retries, kc.call_args[1].get('num_retries'))
+
+ def test_default_retry_3(self):
+ self._test_retry(3, [])
+
+ def test_retry_2(self):
+ self._test_retry(2, ['--retries=2'])
+
+ def test_no_retry(self):
+ self._test_retry(0, ['--retries=0'])
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list