[ARVADOS] updated: 787fdb3943a9189486fc0ad95a460180a2469e31

git at public.curoverse.com git at public.curoverse.com
Thu Jan 21 16:14:09 EST 2016


Summary of changes:
 apps/workbench/fpm-info.sh                         |   1 +
 doc/install/install-keepstore.html.textile.liquid  |   2 +
 sdk/go/blockdigest/blockdigest.go                  |   2 +-
 sdk/go/blockdigest/blockdigest_test.go             |   7 +-
 sdk/go/crunchrunner/upload.go                      |  12 +-
 sdk/go/keepclient/collectionreader.go              |   4 +
 sdk/go/manifest/manifest.go                        |  43 +-
 sdk/go/manifest/manifest_test.go                   |  16 +-
 sdk/python/arvados/keep.py                         |   4 +-
 services/api/fpm-info.sh                           |  10 +
 services/api/test/fixtures/containers.yml          |  25 +
 services/crunch-dispatch-local/.gitignore          |   1 +
 .../crunch-dispatch-local/crunch-dispatch-local.go | 218 +++++++
 .../crunch-dispatch-local_test.go                  | 159 +++++
 services/crunch-run/crunchrun.go                   | 433 ++++++++++++++
 services/crunch-run/crunchrun_test.go              | 659 +++++++++++++++++++++
 services/crunch-run/logging.go                     | 201 +++++++
 services/crunch-run/logging_test.go                |  98 +++
 services/crunch-run/upload.go                      | 193 ++++++
 services/fuse/arvados_fuse/fusedir.py              |   7 +
 20 files changed, 2060 insertions(+), 35 deletions(-)
 create mode 100644 apps/workbench/fpm-info.sh
 create mode 100644 services/api/fpm-info.sh
 create mode 100644 services/api/test/fixtures/containers.yml
 create mode 100644 services/crunch-dispatch-local/.gitignore
 create mode 100644 services/crunch-dispatch-local/crunch-dispatch-local.go
 create mode 100644 services/crunch-dispatch-local/crunch-dispatch-local_test.go
 create mode 100644 services/crunch-run/crunchrun.go
 create mode 100644 services/crunch-run/crunchrun_test.go
 create mode 100644 services/crunch-run/logging.go
 create mode 100644 services/crunch-run/logging_test.go
 create mode 100644 services/crunch-run/upload.go

  discards  9b4000637a9315fd15d8b506381a713243bd1699 (commit)
  discards  4ae033c520ef9d305ac6f1c633a8d98c9852d873 (commit)
       via  787fdb3943a9189486fc0ad95a460180a2469e31 (commit)
       via  de9e183ceac9e5dc53cd4fbd089b752a75a4d5cf (commit)
       via  b73a5d296669ae58c8cd5a7d2e1aedd19f0c0029 (commit)
       via  44ff73fa397095d69819761e66933783a5f6d541 (commit)
       via  97fe5c7710eb3a61757618df774447839b317ecb (commit)
       via  d9b71fc2f2aa903a57a7ee083e5e92432f517bb7 (commit)
       via  e6be21fbf15765ba1ad380fa94de3d2b333407cf (commit)
       via  e13caf8cda321b07ce2bbe6e68d0e07c1d856a58 (commit)
       via  c62ae0ad70080b1217dc478b4921441013d21db4 (commit)
       via  6c3a694c75b069aa7f1792ce28c18c0471d981c2 (commit)
       via  ec3deb3aee4bab253882ab7d83c062a208a72644 (commit)
       via  c95847885fd858916dac6d8a7c4f0b364f9db340 (commit)
       via  f87bda0e69ec38a7485f250328c32643c5587fd8 (commit)
       via  4c1281bba5d3e01d677165b6e2fa7d9209e233b5 (commit)
       via  179d0ce2912bde1f88e22087386842adcfe361ac (commit)
       via  e4b791cbe3536f08fbb5df10cf8de11c2d816e04 (commit)
       via  bc9845761c44beecdd046620694f6d88af0e32fd (commit)
       via  aed1bd10f559fd5428bc1c259d324aa78a2de511 (commit)
       via  ef3e45fcc338a85432c207685567385972f79ee6 (commit)
       via  8cb032bf7b9a8db6095d027d4088c4841c07d3f7 (commit)
       via  0568e6de6a234ea25a3654ac4a68ad4b45a99ba9 (commit)
       via  eff4b3c943e8c85242f75f09cd6c8a81b9b86309 (commit)
       via  6bcd3f15fd664e1d3b7200e77c1a09f94c06054f (commit)
       via  24209ad2d22dffd191d2acc34d3487b99924c030 (commit)
       via  5d986feae8c88fa4e15e14f185a10c8d7e822594 (commit)
       via  343fffd1284fa47e58b45d0852075d9ef6695c79 (commit)
       via  f7115688cc3a63b4721ca59259bc8276685bac9a (commit)
       via  c152ad446aaa61efde38cba91307cc8c29c4667f (commit)
       via  0a46b75ac1e96a6f5ce3ae797eb4306b352fab36 (commit)

This update added new revisions after undoing existing revisions.  That is
to say, the old revision is not a strict subset of the new revision.  This
situation occurs when you --force push a change and generate a repository
containing something like this:

 * -- * -- B -- O -- O -- O (9b4000637a9315fd15d8b506381a713243bd1699)
            \
             N -- N -- N (787fdb3943a9189486fc0ad95a460180a2469e31)

When this happens we assume that you've already had alert emails for all
of the O revisions, and so we here report only the revisions in the N
branch from the common base, B.

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit 787fdb3943a9189486fc0ad95a460180a2469e31
Author: Tom Clegg <tom at curoverse.com>
Date:   Thu Jan 21 16:10:11 2016 -0500

    8281: Limit # write threads to #copies remaining, not #copies total.

diff --git a/sdk/python/arvados/keep.py b/sdk/python/arvados/keep.py
index 965c5d5..cd39f83 100644
--- a/sdk/python/arvados/keep.py
+++ b/sdk/python/arvados/keep.py
@@ -1029,7 +1029,7 @@ class KeepClient(object):
                 continue
 
             thread_limiter = KeepClient.ThreadLimiter(
-                copies, self.max_replicas_per_service)
+                copies - done, self.max_replicas_per_service)
             threads = []
             for service_root, ks in [(root, roots_map[root])
                                      for root in sorted_roots]:

commit de9e183ceac9e5dc53cd4fbd089b752a75a4d5cf
Author: Tom Clegg <tom at curoverse.com>
Date:   Thu Jan 21 14:35:34 2016 -0500

    8281: Fix KeepClient retry bugs.
    
    get() and put() were both handling all Curl exceptions -- including
    timeouts -- by marking the keep service as unusable. For example, if a
    single proxy is the only service available, a single timeout was
    fatal. This is fixed by setting the retry loop status to None instead
    of False after curl exceptions.
    
    put() was repeating its retry loop until it achieved the desired
    number of replicas _in a single iteration_. For example, when trying
    to store 2 replicas, 6 loop iterations with a single success in each
    iteration would result in 6 copies being stored but put() declaring
    failure. This is fixed by checking against a cumulative "done" counter
    instead of the "copies done in this loop iteration" counter.

diff --git a/sdk/python/arvados/keep.py b/sdk/python/arvados/keep.py
index 4fa8a4f..965c5d5 100644
--- a/sdk/python/arvados/keep.py
+++ b/sdk/python/arvados/keep.py
@@ -379,6 +379,7 @@ class KeepClient(object):
             url = self.root + str(locator)
             _logger.debug("Request: GET %s", url)
             curl = self._get_user_agent()
+            ok = None
             try:
                 with timer.Timer() as t:
                     self._headers = {}
@@ -410,7 +411,6 @@ class KeepClient(object):
                 self._result = {
                     'error': e,
                 }
-                ok = False
             self._usable = ok != False
             if self._result.get('status_code', None):
                 # The client worked well enough to get an HTTP status
@@ -445,6 +445,7 @@ class KeepClient(object):
             url = self.root + hash_s
             _logger.debug("Request: PUT %s", url)
             curl = self._get_user_agent()
+            ok = None
             try:
                 with timer.Timer() as t:
                     self._headers = {}
@@ -486,7 +487,6 @@ class KeepClient(object):
                 self._result = {
                     'error': e,
                 }
-                ok = False
             self._usable = ok != False # still usable if ok is True or None
             if self._result.get('status_code', None):
                 # Client is functional. See comment in get().
@@ -1018,6 +1018,7 @@ class KeepClient(object):
         roots_map = {}
         loop = retry.RetryLoop(num_retries, self._check_loop_result,
                                backoff_start=2)
+        done = 0
         for tries_left in loop:
             try:
                 sorted_roots = self.map_new_services(
@@ -1046,7 +1047,8 @@ class KeepClient(object):
                 threads.append(t)
             for t in threads:
                 t.join()
-            loop.save_result((thread_limiter.done() >= copies, len(threads)))
+            done += thread_limiter.done()
+            loop.save_result((done >= copies, len(threads)))
 
         if loop.success():
             return thread_limiter.response()
diff --git a/sdk/python/tests/test_keep_client.py b/sdk/python/tests/test_keep_client.py
index 2b380f0..9a0fe80 100644
--- a/sdk/python/tests/test_keep_client.py
+++ b/sdk/python/tests/test_keep_client.py
@@ -852,6 +852,10 @@ class KeepClientRetryTestMixin(object):
         with self.TEST_PATCHER(self.DEFAULT_EXPECT, 500, 200):
             self.check_success(num_retries=3)
 
+    def test_exception_then_success(self):
+        with self.TEST_PATCHER(self.DEFAULT_EXPECT, Exception('mock err'), 200):
+            self.check_success(num_retries=3)
+
     def test_no_default_retry(self):
         with self.TEST_PATCHER(self.DEFAULT_EXPECT, 500, 200):
             self.check_exception()
@@ -928,3 +932,38 @@ class KeepClientRetryPutTestCase(KeepClientRetryTestMixin, unittest.TestCase):
     def test_do_not_send_multiple_copies_to_same_server(self):
         with tutil.mock_keep_responses(self.DEFAULT_EXPECT, 200):
             self.check_exception(copies=2, num_retries=3)
+
+
+ at tutil.skip_sleep
+class RetryNeedsMultipleServices(unittest.TestCase, tutil.ApiClientMock):
+    # Test put()s that need two distinct servers to succeed, possibly
+    # requiring multiple passes through the retry loop.
+
+    def setUp(self):
+        self.api_client = self.mock_keep_services(count=2)
+        self.keep_client = arvados.KeepClient(api_client=self.api_client)
+
+    def test_success_after_exception(self):
+        with tutil.mock_keep_responses(
+                'acbd18db4cc2f85cedef654fccc4a4d8+3',
+                Exception('mock err'), 200, 200) as req_mock:
+            self.keep_client.put('foo', num_retries=1, copies=2)
+        self.assertTrue(3, req_mock.call_count)
+
+    def test_success_after_retryable_error(self):
+        with tutil.mock_keep_responses(
+                'acbd18db4cc2f85cedef654fccc4a4d8+3',
+                500, 200, 200) as req_mock:
+            self.keep_client.put('foo', num_retries=1, copies=2)
+        self.assertTrue(3, req_mock.call_count)
+
+    def test_fail_after_final_error(self):
+        # First retry loop gets a 200 (can't achieve replication by
+        # storing again on that server) and a 400 (can't retry that
+        # server at all), so we shouldn't try a third request.
+        with tutil.mock_keep_responses(
+                'acbd18db4cc2f85cedef654fccc4a4d8+3',
+                200, 400, 200) as req_mock:
+            with self.assertRaises(arvados.errors.KeepWriteError):
+                self.keep_client.put('foo', num_retries=1, copies=2)
+        self.assertTrue(2, req_mock.call_count)
diff --git a/services/fuse/tests/test_retry.py b/services/fuse/tests/test_retry.py
index f6c0807..b46ba78 100644
--- a/services/fuse/tests/test_retry.py
+++ b/services/fuse/tests/test_retry.py
@@ -1,11 +1,17 @@
 import arvados
 import arvados_fuse.command
+import json
 import mock
 import os
+import pycurl
+import Queue
 import run_test_server
 import tempfile
 import unittest
 
+from .integration_test import IntegrationTest
+
+
 class KeepClientRetry(unittest.TestCase):
     origKeepClient = arvados.keep.KeepClient
 
@@ -33,3 +39,22 @@ class KeepClientRetry(unittest.TestCase):
 
     def test_no_retry(self):
         self._test_retry(0, ['--retries=0'])
+
+class RetryPUT(IntegrationTest):
+    @mock.patch('time.sleep')
+    @IntegrationTest.mount(argv=['--read-write', '--mount-tmp=zzz'])
+    def test_retry_write(self, sleep):
+        mockedCurl = mock.Mock(spec=pycurl.Curl(), wraps=pycurl.Curl())
+        mockedCurl.perform.side_effect = Exception('mock error (ok)')
+        q = Queue.Queue()
+        q.put(mockedCurl)
+        q.put(pycurl.Curl())
+        q.put(pycurl.Curl())
+        with mock.patch('arvados.keep.KeepClient.KeepService._get_user_agent', side_effect=lambda: q.get(block=None)):
+            self.pool_test(os.path.join(self.mnt, 'zzz'))
+            self.assertTrue(mockedCurl.perform.called)
+    @staticmethod
+    def _test_retry_write(self, tmp):
+        with open(os.path.join(tmp, 'foo'), 'w') as f:
+            f.write('foo')
+        json.load(open(os.path.join(tmp, '.arvados#collection')))

commit b73a5d296669ae58c8cd5a7d2e1aedd19f0c0029
Author: Tom Clegg <tom at curoverse.com>
Date:   Thu Jan 21 04:01:16 2016 -0500

    8281: Fix arv-mount ignoring --retries argument when writing file data.
    
    "num_retries" arguments get passed around extensively in arvfile.py
    and collection.py in the Python SDK, but ultimately the writing of
    file data is done by a _BlockManager which doesn't have any way to
    accept that argument or pass it along to a KeepClient, so PUT requests
    always use the CollectionWriter's KeepClient's default num_retries.
    
    In arv-mount's case, we have been telling CollectionWriter the
    num_retries we want. When CollectionWriter creates a KeepClient,
    num_retries gets passed along -- normally this works around the fact
    that num_retries gets lost by the _BlockManager layer. However, we
    provided our own KeepClient to use instead of letting CollectionWriter
    create one, and we forgot to set num_retries on our own KeepClient, so
    we weren't retrying PUT requests.

diff --git a/services/fuse/arvados_fuse/command.py b/services/fuse/arvados_fuse/command.py
index cd81a53..71623a5 100644
--- a/services/fuse/arvados_fuse/command.py
+++ b/services/fuse/arvados_fuse/command.py
@@ -151,7 +151,8 @@ class Mount(object):
         self.api = arvados.safeapi.ThreadSafeApiCache(
             apiconfig=arvados.config.settings(),
             keep_params={
-                "block_cache": arvados.keep.KeepBlockCache(self.args.file_cache)
+                'block_cache': arvados.keep.KeepBlockCache(self.args.file_cache),
+                'num_retries': self.args.retries,
             })
         # Do a sanity check that we have a working arvados host + token.
         self.api.users().current().execute()
diff --git a/services/fuse/arvados_fuse/fusedir.py b/services/fuse/arvados_fuse/fusedir.py
index 00efab7..093657e 100644
--- a/services/fuse/arvados_fuse/fusedir.py
+++ b/services/fuse/arvados_fuse/fusedir.py
@@ -500,7 +500,8 @@ class TmpCollectionDirectory(CollectionDirectoryBase):
     def __init__(self, parent_inode, inodes, api_client, num_retries):
         collection = self.UnsaveableCollection(
             api_client=api_client,
-            keep_client=api_client.keep)
+            keep_client=api_client.keep,
+            num_retries=num_retries)
         super(TmpCollectionDirectory, self).__init__(
             parent_inode, inodes, collection)
         self.collection_record_file = None
diff --git a/services/fuse/tests/test_retry.py b/services/fuse/tests/test_retry.py
new file mode 100644
index 0000000..f6c0807
--- /dev/null
+++ b/services/fuse/tests/test_retry.py
@@ -0,0 +1,35 @@
+import arvados
+import arvados_fuse.command
+import mock
+import os
+import run_test_server
+import tempfile
+import unittest
+
+class KeepClientRetry(unittest.TestCase):
+    origKeepClient = arvados.keep.KeepClient
+
+    def setUp(self):
+        self.mnt = tempfile.mkdtemp()
+        run_test_server.authorize_with('active')
+
+    def tearDown(self):
+        os.rmdir(self.mnt)
+
+    @mock.patch('arvados_fuse.arvados.keep.KeepClient')
+    def _test_retry(self, num_retries, argv, kc):
+        kc.side_effect = lambda *args, **kw: self.origKeepClient(*args, **kw)
+        with arvados_fuse.command.Mount(
+                arvados_fuse.command.ArgumentParser().parse_args(
+                    argv+[self.mnt])):
+            pass
+        self.assertEqual(num_retries, kc.call_args[1].get('num_retries'))
+
+    def test_default_retry_3(self):
+        self._test_retry(3, [])
+
+    def test_retry_2(self):
+        self._test_retry(2, ['--retries=2'])
+
+    def test_no_retry(self):
+        self._test_retry(0, ['--retries=0'])

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list