[arvados] created: 2.6.0-116-g7acf7acdd

git repository hosting git at public.arvados.org
Thu May 4 21:43:43 UTC 2023


        at  7acf7acdd72bc6fa1f466f3a7cbe036224e3ac33 (commit)


commit 7acf7acdd72bc6fa1f466f3a7cbe036224e3ac33
Author: Brett Smith <brett.smith at curii.com>
Date:   Thu May 4 17:41:23 2023 -0400

    12684: PySDK client retries specific 4xx errors
    
    The rationale for retrying these codes is the same as for retrying them
    in the retry module.
    
    Arvados-DCO-1.1-Signed-off-by: Brett Smith <brett.smith at curii.com>

diff --git a/sdk/python/arvados/api.py b/sdk/python/arvados/api.py
index 2e33e0f2c..2ddce8e4c 100644
--- a/sdk/python/arvados/api.py
+++ b/sdk/python/arvados/api.py
@@ -32,6 +32,7 @@ from apiclient import discovery as apiclient_discovery
 from apiclient import errors as apiclient_errors
 from . import config
 from . import errors
+from . import retry
 from . import util
 from . import cache
 
@@ -46,6 +47,10 @@ RETRY_DELAY_INITIAL = 0
 RETRY_DELAY_BACKOFF = 0
 RETRY_COUNT = 0
 
+# An unused HTTP 5xx status code to request a retry internally.
+# See _intercept_http_request. This should not be user-visible.
+_RETRY_4XX_STATUS = 545
+
 if sys.version_info >= (3,):
     httplib2.SSLHandshakeError = None
 
@@ -76,10 +81,15 @@ def _retry_request(http, num_retries, *args, **kwargs):
     except AttributeError:
         # `http` client object does not have a `num_retries` attribute.
         # It apparently hasn't gone through _patch_http_request, possibly
-        # because this isn't an Arvados API client. We need to continue on to
+        # because this isn't an Arvados API client. Pass through to
         # avoid interfering with other Google API clients.
-        pass
-    return _orig_retry_request(http, num_retries, *args, **kwargs)
+        return _orig_retry_request(http, num_retries, *args, **kwargs)
+    response, body = _orig_retry_request(http, num_retries, *args, **kwargs)
+    # If _intercept_http_request ran out of retries for a 4xx response,
+    # restore the original status code.
+    if response.status == _RETRY_4XX_STATUS:
+        response.status = int(response['status'])
+    return (response, body)
 apiclient.http._retry_request = _retry_request
 
 def _intercept_http_request(self, uri, method="GET", headers={}, **kwargs):
@@ -103,9 +113,15 @@ def _intercept_http_request(self, uri, method="GET", headers={}, **kwargs):
 
         self._last_request_time = time.time()
         try:
-            return self.orig_http_request(uri, method, headers=headers, **kwargs)
+            response, body = self.orig_http_request(uri, method, headers=headers, **kwargs)
         except ssl.SSLCertVerificationError as e:
             raise ssl.SSLCertVerificationError(e.args[0], "Could not connect to %s\n%s\nPossible causes: remote SSL/TLS certificate expired, or was issued by an untrusted certificate authority." % (uri, e)) from None
+        # googleapiclient only retries 403, 429, and 5xx status codes.
+        # If we got another 4xx status that we want to retry, convert it into
+        # 5xx so googleapiclient handles it the way we want.
+        if response.status in retry._HTTP_CAN_RETRY and response.status < 500:
+            response.status = _RETRY_4XX_STATUS
+        return (response, body)
     except Exception as e:
         # Prepend "[request_id] " to the error message, which we
         # assume is the first string argument passed to the exception
diff --git a/sdk/python/tests/arvados_testutil.py b/sdk/python/tests/arvados_testutil.py
index 003565979..35e85d119 100644
--- a/sdk/python/tests/arvados_testutil.py
+++ b/sdk/python/tests/arvados_testutil.py
@@ -60,10 +60,10 @@ def mock_responses(body, *codes, **headers):
     return mock.patch('httplib2.Http.request', side_effect=queue_with((
         (fake_httplib2_response(code, **headers), body) for code in codes)))
 
-def mock_api_responses(api_client, body, codes, headers={}):
+def mock_api_responses(api_client, body, codes, headers={}, method='request'):
     if not isinstance(body, bytes) and hasattr(body, 'encode'):
         body = body.encode()
-    return mock.patch.object(api_client._http, 'request', side_effect=queue_with((
+    return mock.patch.object(api_client._http, method, side_effect=queue_with((
         (fake_httplib2_response(code, **headers), body) for code in codes)))
 
 def str_keep_locator(s):
diff --git a/sdk/python/tests/test_api.py b/sdk/python/tests/test_api.py
index 15fead7ad..b5c20296b 100644
--- a/sdk/python/tests/test_api.py
+++ b/sdk/python/tests/test_api.py
@@ -30,7 +30,7 @@ from arvados.api import (
     api_kwargs_from_config,
     OrderedJsonModel,
 )
-from .arvados_testutil import fake_httplib2_response, queue_with
+from .arvados_testutil import fake_httplib2_response, mock_api_responses, queue_with
 
 if not mimetypes.inited:
     mimetypes.init()
@@ -38,6 +38,7 @@ if not mimetypes.inited:
 class ArvadosApiTest(run_test_server.TestCaseWithServers):
     MAIN_SERVER = {}
     ERROR_HEADERS = {'Content-Type': mimetypes.types_map['.json']}
+    RETRIED_4XX = frozenset([408, 409, 422, 423])
 
     def api_error_response(self, code, *errors):
         return (fake_httplib2_response(code, **self.ERROR_HEADERS),
@@ -149,6 +150,38 @@ class ArvadosApiTest(run_test_server.TestCaseWithServers):
         self.assertEqual(api._http.timeout, 1234,
             "Requested timeout value was 1234")
 
+    def test_4xx_retried(self):
+        client = arvados.api('v1')
+        for code in self.RETRIED_4XX:
+            name = f'retried #{code}'
+            with self.subTest(name), mock.patch('time.sleep'):
+                expected = {'username': name}
+                with mock_api_responses(
+                        client,
+                        json.dumps(expected),
+                        [code, code, 200],
+                        self.ERROR_HEADERS,
+                        'orig_http_request',
+                ):
+                    actual = client.users().current().execute()
+                self.assertEqual(actual, expected)
+
+    def test_4xx_raised_after_retry_exhaustion(self):
+        client = arvados.api('v1', num_retries=1)
+        for code in self.RETRIED_4XX:
+            with self.subTest(f'failed {code}'), mock.patch('time.sleep'):
+                with mock_api_responses(
+                        client,
+                        b'{}',
+                        [code, code, code, 200],
+                        self.ERROR_HEADERS,
+                        'orig_http_request',
+                ), self.assertRaises(arvados.errors.ApiError) as exc_check:
+                    client.users().current().execute()
+                response = exc_check.exception.args[0]
+                self.assertEqual(response.status, code)
+                self.assertEqual(response.get('status'), str(code))
+
     def test_ordered_json_model(self):
         mock_responses = {
             'arvados.humans.get': (

commit 6a9db2982b45d9dc38ae9251c1495e8f543487e7
Author: Brett Smith <brett.smith at curii.com>
Date:   Thu May 4 16:21:08 2023 -0400

    12684: Support num_retries in PySDK client constructors
    
    This lets users set their preferred retry strategy once, rather than in
    every call to execute(), which is error-prone. The default num_retries
    is 10 because we expect most users to care more about eventual success
    than responsiveness. See the added release notes for further discussion
    and rationale.
    
    Changes to the rest of the code are mostly about supporting this
    consistently. Tests that relied on the old no-default-num_retries
    behavior now specify that explicitly.
    
    Arvados-DCO-1.1-Signed-off-by: Brett Smith <brett.smith at curii.com>

diff --git a/doc/admin/upgrading.html.textile.liquid b/doc/admin/upgrading.html.textile.liquid
index 5c76f534a..6ff346983 100644
--- a/doc/admin/upgrading.html.textile.liquid
+++ b/doc/admin/upgrading.html.textile.liquid
@@ -32,6 +32,17 @@ h2(#main). development main (as of 2023-04-18)
 
 "previous: Upgrading to 2.6.1":#v2_6_1
 
+h3. Python SDK automatically retries failed requests much more
+
+The Python SDK has always provided functionality to retry API requests that fail due to temporary problems like network failures, by passing @num_retries=N@ to a request's @execute()@ method. In this release, API client constructor functions like @arvados.api@ also accept a @num_retries@ argument. This value is stored on the client object and used as a floor for all API requests made with this client. This allows developers to set their preferred retry strategy once, without having to pass it to each @execute()@ call.
+
+The default value for @num_retries@ in API constructor functions is 10. This means that an API request that repeatedly encounters temporary problems may spend up to about 35 minutes retrying in the worst case. We believe this is an appropriate default for most users, where eventual success is a much greater concern than responsiveness. If you have client applications where this is undesirable, update them to pass a lower @num_retries@ value to the constructor function. You can even pass @num_retries=0@ to have the API client act as it did before, like this:
+
+{% codeblock as python %}
+import arvados
+arv_client = arvados.api('v1', num_retries=0, ...)
+{% endcodeblock %}
+
 h3. UseAWSS3v2Driver option removed
 
 The old "v1" S3 driver for keepstore has been removed. The new "v2" implementation, which has been the default since Arvados 2.5.0, is always used. The @Volumes.*.DriverParameters.UseAWSS3v2Driver@ configuration key is no longer recognized. If your config file uses it, remove it to avoid warning messages at startup.
diff --git a/doc/sdk/python/api-client.html.textile.liquid b/doc/sdk/python/api-client.html.textile.liquid
index 020c0fc62..dabd2d37f 100644
--- a/doc/sdk/python/api-client.html.textile.liquid
+++ b/doc/sdk/python/api-client.html.textile.liquid
@@ -46,7 +46,7 @@ The API client has a method that corresponds to each "type of resource supported
 
 Each resource object has a method that corresponds to each API method supported by that resource type. You call these methods with the keyword arguments and values documented in the API reference. They return an API request object.
 
-Each API request object has an @execute()@ method. You may pass a @num_retries@ integer argument to retry the operation that many times, with exponential back-off, in case of temporary errors like network problems. If it ultimately succeeds, it returns the kind of object documented in the API reference for that method. Usually that's a dictionary with details about the object you requested. If there's a problem, it raises an exception.
+Each API request object has an @execute()@ method. If it succeeds, it returns the kind of object documented in the API reference for that method. Usually that's a dictionary with details about the object you requested. If there's a problem, it raises an exception.
 
 Putting it all together, basic API requests usually look like:
 
@@ -54,10 +54,19 @@ Putting it all together, basic API requests usually look like:
 arv_object = arv_client.resource_type().api_method(
     argument=...,
     other_argument=...,
-).execute(num_retries=3)
+).execute()
 {% endcodeblock %}
 
-The following sections detail how to call "common resource methods in the API":{{site.baseurl}}/api/methods.html with more concrete examples. Additional methods may be available on specific resource types.
+Later sections detail how to call "common resource methods in the API":{{site.baseurl}}/api/methods.html with more concrete examples. Additional methods may be available on specific resource types.
+
+h3. Retrying failed requests
+
+If you execute an API request and it fails because of a temporary error like a network problem, the SDK waits with randomized exponential back-off, then retries the request. You can specify the maximum number of retries by passing a @num_retries@ integer to either @arvados.api@ or the @execute()@ method; the SDK will use whichever number is greater. The default number of retries is 10, which means that an API request could take up to about 35 minutes if the temporary problem persists that long. To disable automatic retries, just pass @num_retries=0@ to @arvados.api@:
+
+{% codeblock as python %}
+import arvados
+arv_client = arvados.api('v1', num_retries=0, ...)
+{% endcodeblock %}
 
 h2. get method
 
diff --git a/sdk/cwl/arvados_cwl/__init__.py b/sdk/cwl/arvados_cwl/__init__.py
index fe27b91ab..472243397 100644
--- a/sdk/cwl/arvados_cwl/__init__.py
+++ b/sdk/cwl/arvados_cwl/__init__.py
@@ -68,7 +68,10 @@ def versionstring():
 
 
 def arg_parser():  # type: () -> argparse.ArgumentParser
-    parser = argparse.ArgumentParser(description='Arvados executor for Common Workflow Language')
+    parser = argparse.ArgumentParser(
+        description='Arvados executor for Common Workflow Language',
+        parents=[arv_cmd.retry_opt],
+    )
 
     parser.add_argument("--basedir",
                         help="Base directory used to resolve relative references in the input, default to directory of input object file or current directory (if inputs piped/provided on command line).")
@@ -333,8 +336,14 @@ def main(args=sys.argv[1:],
     try:
         if api_client is None:
             api_client = arvados.safeapi.ThreadSafeApiCache(
-                api_params={"model": OrderedJsonModel(), "timeout": arvargs.http_timeout},
-                keep_params={"num_retries": 4},
+                api_params={
+                    'model': OrderedJsonModel(),
+                    'num_retries': arvargs.retries,
+                    'timeout': arvargs.http_timeout,
+                },
+                keep_params={
+                    'num_retries': arvargs.retries,
+                },
                 version='v1',
             )
             keep_client = api_client.keep
@@ -342,8 +351,18 @@ def main(args=sys.argv[1:],
             api_client.users().current().execute()
         if keep_client is None:
             block_cache = arvados.keep.KeepBlockCache(disk_cache=True)
-            keep_client = arvados.keep.KeepClient(api_client=api_client, num_retries=4, block_cache=block_cache)
-        executor = ArvCwlExecutor(api_client, arvargs, keep_client=keep_client, num_retries=4, stdout=stdout)
+            keep_client = arvados.keep.KeepClient(
+                api_client=api_client,
+                block_cache=block_cache,
+                num_retries=arvargs.retries,
+            )
+        executor = ArvCwlExecutor(
+            api_client,
+            arvargs,
+            keep_client=keep_client,
+            num_retries=arvargs.retries,
+            stdout=stdout,
+        )
     except WorkflowException as e:
         logger.error(e, exc_info=(sys.exc_info()[1] if arvargs.debug else False))
         return 1
diff --git a/sdk/python/arvados/api.py b/sdk/python/arvados/api.py
index 537ad2082..2e33e0f2c 100644
--- a/sdk/python/arvados/api.py
+++ b/sdk/python/arvados/api.py
@@ -27,6 +27,7 @@ import time
 import types
 
 import apiclient
+import apiclient.http
 from apiclient import discovery as apiclient_discovery
 from apiclient import errors as apiclient_errors
 from . import config
@@ -68,6 +69,19 @@ class OrderedJsonModel(apiclient.model.JsonModel):
         return body
 
 
+_orig_retry_request = apiclient.http._retry_request
+def _retry_request(http, num_retries, *args, **kwargs):
+    try:
+        num_retries = max(num_retries, http.num_retries)
+    except AttributeError:
+        # `http` client object does not have a `num_retries` attribute.
+        # It apparently hasn't gone through _patch_http_request, possibly
+        # because this isn't an Arvados API client. We need to continue on to
+        # avoid interfering with other Google API clients.
+        pass
+    return _orig_retry_request(http, num_retries, *args, **kwargs)
+apiclient.http._retry_request = _retry_request
+
 def _intercept_http_request(self, uri, method="GET", headers={}, **kwargs):
     if not headers.get('X-Request-Id'):
         headers['X-Request-Id'] = self._request_id()
@@ -102,9 +116,10 @@ def _intercept_http_request(self, uri, method="GET", headers={}, **kwargs):
                 raise type(e)(*e.args)
         raise
 
-def _patch_http_request(http, api_token):
+def _patch_http_request(http, api_token, num_retries):
     http.arvados_api_token = api_token
     http.max_request_size = 0
+    http.num_retries = num_retries
     http.orig_http_request = http.request
     http.request = types.MethodType(_intercept_http_request, http)
     http._last_request_time = 0
@@ -157,6 +172,7 @@ def api_client(
         cache=True,
         http=None,
         insecure=False,
+        num_retries=10,
         request_id=None,
         timeout=5*60,
         **kwargs,
@@ -194,6 +210,10 @@ def api_client(
     insecure: bool
     : If true, ignore SSL certificate validation errors. Default `False`.
 
+    num_retries: int
+    : The number of times to retry each API request if it encounters a
+      temporary failure. Default 10.
+
     request_id: str | None
     : Default `X-Request-Id` header value for outgoing requests that
       don't already provide one. If `None` or omitted, generate a random
@@ -214,13 +234,14 @@ def api_client(
         )
     if http.timeout is None:
         http.timeout = timeout
-    http = _patch_http_request(http, token)
+    http = _patch_http_request(http, token, num_retries)
 
     svc = apiclient_discovery.build(
         'arvados', version,
         cache_discovery=False,
         discoveryServiceUrl=discoveryServiceUrl,
         http=http,
+        num_retries=num_retries,
         **kwargs,
     )
     svc.api_token = token
diff --git a/sdk/python/arvados/collection.py b/sdk/python/arvados/collection.py
index ebca15c54..23b4393a9 100644
--- a/sdk/python/arvados/collection.py
+++ b/sdk/python/arvados/collection.py
@@ -1256,7 +1256,7 @@ class Collection(RichCollectionBase):
     def __init__(self, manifest_locator_or_text=None,
                  api_client=None,
                  keep_client=None,
-                 num_retries=None,
+                 num_retries=10,
                  parent=None,
                  apiconfig=None,
                  block_manager=None,
@@ -1324,7 +1324,7 @@ class Collection(RichCollectionBase):
         else:
             self._config = config.settings()
 
-        self.num_retries = num_retries if num_retries is not None else 0
+        self.num_retries = num_retries
         self._manifest_locator = None
         self._manifest_text = None
         self._portable_data_hash = None
diff --git a/sdk/python/arvados/commands/_util.py b/sdk/python/arvados/commands/_util.py
index d10d38eb5..17454b7d1 100644
--- a/sdk/python/arvados/commands/_util.py
+++ b/sdk/python/arvados/commands/_util.py
@@ -17,9 +17,9 @@ def _pos_int(s):
     return num
 
 retry_opt = argparse.ArgumentParser(add_help=False)
-retry_opt.add_argument('--retries', type=_pos_int, default=3, help="""
+retry_opt.add_argument('--retries', type=_pos_int, default=10, help="""
 Maximum number of times to retry server requests that encounter temporary
-failures (e.g., server down).  Default 3.""")
+failures (e.g., server down).  Default 10.""")
 
 def _ignore_error(error):
     return None
diff --git a/sdk/python/arvados/commands/arv_copy.py b/sdk/python/arvados/commands/arv_copy.py
index 7951842ac..63c0cbea2 100755
--- a/sdk/python/arvados/commands/arv_copy.py
+++ b/sdk/python/arvados/commands/arv_copy.py
@@ -129,8 +129,8 @@ def main():
         args.source_arvados = args.object_uuid[:5]
 
     # Create API clients for the source and destination instances
-    src_arv = api_for_instance(args.source_arvados)
-    dst_arv = api_for_instance(args.destination_arvados)
+    src_arv = api_for_instance(args.source_arvados, args.retries)
+    dst_arv = api_for_instance(args.destination_arvados, args.retries)
 
     if not args.project_uuid:
         args.project_uuid = dst_arv.users().current().execute(num_retries=args.retries)["uuid"]
@@ -187,7 +187,7 @@ def set_src_owner_uuid(resource, uuid, args):
 #     Otherwise, it is presumed to be the name of a file in
 #     $HOME/.config/arvados/instance_name.conf
 #
-def api_for_instance(instance_name):
+def api_for_instance(instance_name, num_retries):
     if not instance_name:
         # Use environment
         return arvados.api('v1', model=OrderedJsonModel())
@@ -214,7 +214,9 @@ def api_for_instance(instance_name):
                              host=cfg['ARVADOS_API_HOST'],
                              token=cfg['ARVADOS_API_TOKEN'],
                              insecure=api_is_insecure,
-                             model=OrderedJsonModel())
+                             model=OrderedJsonModel(),
+                             num_retries=num_retries,
+                             )
     else:
         abort('need ARVADOS_API_HOST and ARVADOS_API_TOKEN for {}'.format(instance_name))
     return client
diff --git a/sdk/python/arvados/commands/federation_migrate.py b/sdk/python/arvados/commands/federation_migrate.py
index 5c1bb29e7..32b3211f1 100755
--- a/sdk/python/arvados/commands/federation_migrate.py
+++ b/sdk/python/arvados/commands/federation_migrate.py
@@ -24,6 +24,7 @@ import os
 import hashlib
 import re
 from arvados._version import __version__
+from . import _util as arv_cmd
 
 EMAIL=0
 USERNAME=1
@@ -43,10 +44,10 @@ def connect_clusters(args):
                 host = r[0]
                 token = r[1]
                 print("Contacting %s" % (host))
-                arv = arvados.api(host=host, token=token, cache=False)
+                arv = arvados.api(host=host, token=token, cache=False, num_retries=args.retries)
                 clusters[arv._rootDesc["uuidPrefix"]] = arv
     else:
-        arv = arvados.api(cache=False)
+        arv = arvados.api(cache=False, num_retries=args.retries)
         rh = arv._rootDesc["remoteHosts"]
         tok = arv.api_client_authorizations().current().execute()
         token = "v2/%s/%s" % (tok["uuid"], tok["api_token"])
@@ -326,7 +327,10 @@ def migrate_user(args, migratearv, email, new_user_uuid, old_user_uuid):
 
 
 def main():
-    parser = argparse.ArgumentParser(description='Migrate users to federated identity, see https://doc.arvados.org/admin/merge-remote-account.html')
+    parser = argparse.ArgumentParser(
+        description='Migrate users to federated identity, see https://doc.arvados.org/admin/merge-remote-account.html',
+        parents=[arv_cmd.retry_opt],
+    )
     parser.add_argument(
         '--version', action='version', version="%s %s" % (sys.argv[0], __version__),
         help='Print version and exit.')
diff --git a/sdk/python/arvados/commands/get.py b/sdk/python/arvados/commands/get.py
index bb421def6..89b333808 100755
--- a/sdk/python/arvados/commands/get.py
+++ b/sdk/python/arvados/commands/get.py
@@ -155,7 +155,7 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr):
     request_id = arvados.util.new_request_id()
     logger.info('X-Request-Id: '+request_id)
 
-    api_client = arvados.api('v1', request_id=request_id)
+    api_client = arvados.api('v1', request_id=request_id, num_retries=args.retries)
 
     r = re.search(r'^(.*?)(/.*)?$', args.locator)
     col_loc = r.group(1)
diff --git a/sdk/python/arvados/commands/keepdocker.py b/sdk/python/arvados/commands/keepdocker.py
index 2d5c0150c..922256a27 100644
--- a/sdk/python/arvados/commands/keepdocker.py
+++ b/sdk/python/arvados/commands/keepdocker.py
@@ -359,7 +359,7 @@ def _uuid2pdh(api, uuid):
 def main(arguments=None, stdout=sys.stdout, install_sig_handlers=True, api=None):
     args = arg_parser.parse_args(arguments)
     if api is None:
-        api = arvados.api('v1')
+        api = arvados.api('v1', num_retries=args.retries)
 
     if args.image is None or args.image == 'images':
         fmt = "{:30}  {:10}  {:12}  {:29}  {:20}\n"
diff --git a/sdk/python/arvados/commands/ls.py b/sdk/python/arvados/commands/ls.py
index 86e728ed4..ac038f504 100644
--- a/sdk/python/arvados/commands/ls.py
+++ b/sdk/python/arvados/commands/ls.py
@@ -43,7 +43,7 @@ def main(args, stdout, stderr, api_client=None, logger=None):
     args = parse_args(args)
 
     if api_client is None:
-        api_client = arvados.api('v1')
+        api_client = arvados.api('v1', num_retries=args.retries)
 
     if logger is None:
         logger = logging.getLogger('arvados.arv-ls')
diff --git a/sdk/python/arvados/commands/put.py b/sdk/python/arvados/commands/put.py
index be7cd629c..0e732eafd 100644
--- a/sdk/python/arvados/commands/put.py
+++ b/sdk/python/arvados/commands/put.py
@@ -1136,7 +1136,7 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr,
     logging.getLogger('arvados').handlers[0].setFormatter(formatter)
 
     if api_client is None:
-        api_client = arvados.api('v1', request_id=request_id)
+        api_client = arvados.api('v1', request_id=request_id, num_retries=args.retries)
 
     if install_sig_handlers:
         arv_cmd.install_signal_handlers()
diff --git a/sdk/python/arvados/commands/ws.py b/sdk/python/arvados/commands/ws.py
index 37dab55d6..04a90cf20 100644
--- a/sdk/python/arvados/commands/ws.py
+++ b/sdk/python/arvados/commands/ws.py
@@ -10,12 +10,13 @@ import arvados
 import json
 from arvados.events import subscribe
 from arvados._version import __version__
+from . import _util as arv_cmd
 import signal
 
 def main(arguments=None):
     logger = logging.getLogger('arvados.arv-ws')
 
-    parser = argparse.ArgumentParser()
+    parser = argparse.ArgumentParser(parents=[arv_cmd.retry_opt])
     parser.add_argument('--version', action='version',
                         version="%s %s" % (sys.argv[0], __version__),
                         help='Print version and exit.')
@@ -56,7 +57,7 @@ def main(arguments=None):
             filters = new_filters
             known_component_jobs = pipeline_jobs
 
-    api = arvados.api('v1')
+    api = arvados.api('v1', num_retries=args.retries)
 
     if args.uuid:
         filters += [ ['object_uuid', '=', args.uuid] ]
diff --git a/sdk/python/arvados/keep.py b/sdk/python/arvados/keep.py
index 8658774cb..a2c8fd249 100644
--- a/sdk/python/arvados/keep.py
+++ b/sdk/python/arvados/keep.py
@@ -835,7 +835,7 @@ class KeepClient(object):
     def __init__(self, api_client=None, proxy=None,
                  timeout=DEFAULT_TIMEOUT, proxy_timeout=DEFAULT_PROXY_TIMEOUT,
                  api_token=None, local_store=None, block_cache=None,
-                 num_retries=0, session=None):
+                 num_retries=10, session=None):
         """Initialize a new KeepClient.
 
         Arguments:
@@ -888,7 +888,7 @@ class KeepClient(object):
         :num_retries:
           The default number of times to retry failed requests.
           This will be used as the default num_retries value when get() and
-          put() are called.  Default 0.
+          put() are called.  Default 10.
         """
         self.lock = threading.Lock()
         if proxy is None:
diff --git a/sdk/python/arvados/stream.py b/sdk/python/arvados/stream.py
index edfb7711b..eadfbbec0 100644
--- a/sdk/python/arvados/stream.py
+++ b/sdk/python/arvados/stream.py
@@ -24,7 +24,7 @@ from ._normalize_stream import normalize_stream
 
 class StreamReader(object):
     def __init__(self, tokens, keep=None, debug=False, _empty=False,
-                 num_retries=0):
+                 num_retries=10):
         self._stream_name = None
         self._data_locators = []
         self._files = collections.OrderedDict()
diff --git a/sdk/python/tests/test_api.py b/sdk/python/tests/test_api.py
index 780ff07bf..15fead7ad 100644
--- a/sdk/python/tests/test_api.py
+++ b/sdk/python/tests/test_api.py
@@ -7,6 +7,7 @@ from builtins import str
 from builtins import range
 import arvados
 import collections
+import contextlib
 import httplib2
 import itertools
 import json
@@ -14,6 +15,7 @@ import mimetypes
 import os
 import socket
 import string
+import sys
 import unittest
 import urllib.parse as urlparse
 
@@ -338,6 +340,77 @@ class ArvadosApiTest(run_test_server.TestCaseWithServers):
                 api_client(*args, insecure=True)
 
 
+class ConstructNumRetriesTestCase(unittest.TestCase):
+    @staticmethod
+    def _fake_retry_request(http, num_retries, req_type, sleep, rand, uri, method, *args, **kwargs):
+        return http.request(uri, method, *args, **kwargs)
+
+    @contextlib.contextmanager
+    def patch_retry(self):
+        # We have this dedicated context manager that goes through `sys.modules`
+        # instead of just using `mock.patch` because of the unfortunate
+        # `arvados.api` name collision.
+        orig_func = sys.modules['arvados.api']._orig_retry_request
+        expect_name = 'googleapiclient.http._retry_request'
+        self.assertEqual(
+            '{0.__module__}.{0.__name__}'.format(orig_func), expect_name,
+            f"test setup problem: {expect_name} not at arvados.api._orig_retry_request",
+        )
+        retry_mock = mock.Mock(wraps=self._fake_retry_request)
+        sys.modules['arvados.api']._orig_retry_request = retry_mock
+        try:
+            yield retry_mock
+        finally:
+            sys.modules['arvados.api']._orig_retry_request = orig_func
+
+    def _iter_num_retries(self, retry_mock):
+        for call in retry_mock.call_args_list:
+            try:
+                yield call.args[1]
+            except IndexError:
+                yield call.kwargs['num_retries']
+
+    def test_default_num_retries(self):
+        with self.patch_retry() as retry_mock:
+            client = arvados.api('v1')
+        actual = set(self._iter_num_retries(retry_mock))
+        self.assertEqual(len(actual), 1)
+        self.assertTrue(actual.pop() > 6, "num_retries lower than expected")
+
+    def _test_calls(self, init_arg, call_args, expected):
+        with self.patch_retry() as retry_mock:
+            client = arvados.api('v1', num_retries=init_arg)
+            for num_retries in call_args:
+                client.users().current().execute(num_retries=num_retries)
+        actual = self._iter_num_retries(retry_mock)
+        # The constructor makes two requests with its num_retries argument:
+        # one for the discovery document, and one for the config.
+        self.assertEqual(next(actual, None), init_arg)
+        self.assertEqual(next(actual, None), init_arg)
+        self.assertEqual(list(actual), expected)
+
+    def test_discovery_num_retries(self):
+        for num_retries in [0, 5, 55]:
+            with self.subTest(f"num_retries={num_retries}"):
+                self._test_calls(num_retries, [], [])
+
+    def test_num_retries_called_le_init(self):
+        for n in [6, 10]:
+            with self.subTest(f"init_arg={n}"):
+                call_args = [n - 4, n - 2, n]
+                expected = [n] * 3
+                self._test_calls(n, call_args, expected)
+
+    def test_num_retries_called_ge_init(self):
+        for n in [0, 10]:
+            with self.subTest(f"init_arg={n}"):
+                call_args = [n, n + 4, n + 8]
+                self._test_calls(n, call_args, call_args)
+
+    def test_num_retries_called_mixed(self):
+        self._test_calls(5, [2, 6, 4, 8], [5, 6, 5, 8])
+
+
 class PreCloseSocketTestCase(unittest.TestCase):
     def setUp(self):
         self.api = arvados.api('v1')
diff --git a/sdk/python/tests/test_collections.py b/sdk/python/tests/test_collections.py
index 8986cf225..c79607fca 100644
--- a/sdk/python/tests/test_collections.py
+++ b/sdk/python/tests/test_collections.py
@@ -538,11 +538,11 @@ class CollectionReaderTestCase(unittest.TestCase, CollectionTestMixin):
         self.mock_get_collection(client, status, 'foo_file')
         return client
 
-    def test_init_no_default_retries(self):
+    def test_init_default_retries(self):
         client = self.api_client_mock(200)
         reader = arvados.CollectionReader(self.DEFAULT_UUID, api_client=client)
         reader.manifest_text()
-        client.collections().get().execute.assert_called_with(num_retries=0)
+        client.collections().get().execute.assert_called_with(num_retries=10)
 
     def test_uuid_init_success(self):
         client = self.api_client_mock(200)
diff --git a/sdk/python/tests/test_keep_client.py b/sdk/python/tests/test_keep_client.py
index 0fe396113..f472c0830 100644
--- a/sdk/python/tests/test_keep_client.py
+++ b/sdk/python/tests/test_keep_client.py
@@ -276,7 +276,7 @@ class KeepClientServiceTestCase(unittest.TestCase, tutil.ApiClientMock, DiskCach
         try:
             # this will fail, but it ensures we get the service
             # discovery response
-            keep_client.put('baz2')
+            keep_client.put('baz2', num_retries=0)
         except:
             pass
         self.assertTrue(keep_client.using_proxy)
@@ -338,7 +338,11 @@ class KeepClientServiceTestCase(unittest.TestCase, tutil.ApiClientMock, DiskCach
         api_client = self.mock_keep_services(count=1)
         force_timeout = socket.timeout("timed out")
         with tutil.mock_keep_responses(force_timeout, 0) as mock:
-            keep_client = arvados.KeepClient(api_client=api_client, block_cache=self.make_block_cache(self.disk_cache))
+            keep_client = arvados.KeepClient(
+                api_client=api_client,
+                block_cache=self.make_block_cache(self.disk_cache),
+                num_retries=0,
+            )
             with self.assertRaises(arvados.errors.KeepReadError):
                 keep_client.get('ffffffffffffffffffffffffffffffff')
             self.assertEqual(
@@ -355,7 +359,11 @@ class KeepClientServiceTestCase(unittest.TestCase, tutil.ApiClientMock, DiskCach
         api_client = self.mock_keep_services(count=1)
         force_timeout = socket.timeout("timed out")
         with tutil.mock_keep_responses(force_timeout, 0) as mock:
-            keep_client = arvados.KeepClient(api_client=api_client, block_cache=self.make_block_cache(self.disk_cache))
+            keep_client = arvados.KeepClient(
+                api_client=api_client,
+                block_cache=self.make_block_cache(self.disk_cache),
+                num_retries=0,
+            )
             with self.assertRaises(arvados.errors.KeepWriteError):
                 keep_client.put(b'foo')
             self.assertEqual(
@@ -372,7 +380,11 @@ class KeepClientServiceTestCase(unittest.TestCase, tutil.ApiClientMock, DiskCach
         api_client = self.mock_keep_services(count=1)
         force_timeout = socket.timeout("timed out")
         with tutil.mock_keep_responses(force_timeout, 0) as mock:
-            keep_client = arvados.KeepClient(api_client=api_client, block_cache=self.make_block_cache(self.disk_cache))
+            keep_client = arvados.KeepClient(
+                api_client=api_client,
+                block_cache=self.make_block_cache(self.disk_cache),
+                num_retries=0,
+            )
             with self.assertRaises(arvados.errors.KeepReadError):
                 keep_client.head('ffffffffffffffffffffffffffffffff')
             self.assertEqual(
@@ -389,7 +401,11 @@ class KeepClientServiceTestCase(unittest.TestCase, tutil.ApiClientMock, DiskCach
         api_client = self.mock_keep_services(service_type='proxy', count=1)
         force_timeout = socket.timeout("timed out")
         with tutil.mock_keep_responses(force_timeout, 0) as mock:
-            keep_client = arvados.KeepClient(api_client=api_client, block_cache=self.make_block_cache(self.disk_cache))
+            keep_client = arvados.KeepClient(
+                api_client=api_client,
+                block_cache=self.make_block_cache(self.disk_cache),
+                num_retries=0,
+            )
             with self.assertRaises(arvados.errors.KeepReadError):
                 keep_client.get('ffffffffffffffffffffffffffffffff')
             self.assertEqual(
@@ -406,7 +422,11 @@ class KeepClientServiceTestCase(unittest.TestCase, tutil.ApiClientMock, DiskCach
         api_client = self.mock_keep_services(service_type='proxy', count=1)
         force_timeout = socket.timeout("timed out")
         with tutil.mock_keep_responses(force_timeout, 0) as mock:
-            keep_client = arvados.KeepClient(api_client=api_client, block_cache=self.make_block_cache(self.disk_cache))
+            keep_client = arvados.KeepClient(
+                api_client=api_client,
+                block_cache=self.make_block_cache(self.disk_cache),
+                num_retries=0,
+            )
             with self.assertRaises(arvados.errors.KeepReadError):
                 keep_client.head('ffffffffffffffffffffffffffffffff')
             self.assertEqual(
@@ -424,7 +444,10 @@ class KeepClientServiceTestCase(unittest.TestCase, tutil.ApiClientMock, DiskCach
         api_client = self.mock_keep_services(service_type='proxy', count=1)
         force_timeout = socket.timeout("timed out")
         with tutil.mock_keep_responses(force_timeout, 0) as mock:
-            keep_client = arvados.KeepClient(api_client=api_client)
+            keep_client = arvados.KeepClient(
+                api_client=api_client,
+                num_retries=0,
+            )
             with self.assertRaises(arvados.errors.KeepWriteError):
                 keep_client.put('foo')
             self.assertEqual(
@@ -441,7 +464,11 @@ class KeepClientServiceTestCase(unittest.TestCase, tutil.ApiClientMock, DiskCach
         api_client = mock.MagicMock(name='api_client')
         api_client.keep_services().accessible().execute.side_effect = (
             arvados.errors.ApiError)
-        keep_client = arvados.KeepClient(api_client=api_client, block_cache=self.make_block_cache(self.disk_cache))
+        keep_client = arvados.KeepClient(
+            api_client=api_client,
+            block_cache=self.make_block_cache(self.disk_cache),
+            num_retries=0,
+        )
         with self.assertRaises(exc_class) as err_check:
             getattr(keep_client, verb)('d41d8cd98f00b204e9800998ecf8427e+0')
         self.assertEqual(0, len(err_check.exception.request_errors()))
@@ -461,7 +488,11 @@ class KeepClientServiceTestCase(unittest.TestCase, tutil.ApiClientMock, DiskCach
             "retry error reporting test", 500, 500, 500, 500, 500, 500, 502, 502)
         with req_mock, tutil.skip_sleep, \
                 self.assertRaises(exc_class) as err_check:
-            keep_client = arvados.KeepClient(api_client=api_client, block_cache=self.make_block_cache(self.disk_cache))
+            keep_client = arvados.KeepClient(
+                api_client=api_client,
+                block_cache=self.make_block_cache(self.disk_cache),
+                num_retries=0,
+            )
             getattr(keep_client, verb)('d41d8cd98f00b204e9800998ecf8427e+0',
                                        num_retries=3)
         self.assertEqual([502, 502], [
@@ -484,7 +515,11 @@ class KeepClientServiceTestCase(unittest.TestCase, tutil.ApiClientMock, DiskCach
         api_client = self.mock_keep_services(count=3)
         with tutil.mock_keep_responses(data_loc, 200, 500, 500) as req_mock, \
                 self.assertRaises(arvados.errors.KeepWriteError) as exc_check:
-            keep_client = arvados.KeepClient(api_client=api_client, block_cache=self.make_block_cache(self.disk_cache))
+            keep_client = arvados.KeepClient(
+                api_client=api_client,
+                block_cache=self.make_block_cache(self.disk_cache),
+                num_retries=0,
+            )
             keep_client.put(data)
         self.assertEqual(2, len(exc_check.exception.request_errors()))
 
@@ -494,8 +529,12 @@ class KeepClientServiceTestCase(unittest.TestCase, tutil.ApiClientMock, DiskCach
         api_client = self.mock_keep_services(service_type='proxy', read_only=True, count=1)
         with tutil.mock_keep_responses(data_loc, 200, 500, 500) as req_mock, \
                 self.assertRaises(arvados.errors.KeepWriteError) as exc_check:
-          keep_client = arvados.KeepClient(api_client=api_client, block_cache=self.make_block_cache(self.disk_cache))
-          keep_client.put(data)
+            keep_client = arvados.KeepClient(
+                api_client=api_client,
+                block_cache=self.make_block_cache(self.disk_cache),
+                num_retries=0,
+            )
+            keep_client.put(data)
         self.assertEqual(True, ("no Keep services available" in str(exc_check.exception)))
         self.assertEqual(0, len(exc_check.exception.request_errors()))
 
@@ -503,7 +542,11 @@ class KeepClientServiceTestCase(unittest.TestCase, tutil.ApiClientMock, DiskCach
         body = b'oddball service get'
         api_client = self.mock_keep_services(service_type='fancynewblobstore')
         with tutil.mock_keep_responses(body, 200):
-            keep_client = arvados.KeepClient(api_client=api_client, block_cache=self.make_block_cache(self.disk_cache))
+            keep_client = arvados.KeepClient(
+                api_client=api_client,
+                block_cache=self.make_block_cache(self.disk_cache),
+                num_retries=0,
+            )
             actual = keep_client.get(tutil.str_keep_locator(body))
         self.assertEqual(body, actual)
 
@@ -512,7 +555,11 @@ class KeepClientServiceTestCase(unittest.TestCase, tutil.ApiClientMock, DiskCach
         pdh = tutil.str_keep_locator(body)
         api_client = self.mock_keep_services(service_type='fancynewblobstore')
         with tutil.mock_keep_responses(pdh, 200):
-            keep_client = arvados.KeepClient(api_client=api_client, block_cache=self.make_block_cache(self.disk_cache))
+            keep_client = arvados.KeepClient(
+                api_client=api_client,
+                block_cache=self.make_block_cache(self.disk_cache),
+                num_retries=0,
+            )
             actual = keep_client.put(body, copies=1)
         self.assertEqual(pdh, actual)
 
@@ -524,7 +571,11 @@ class KeepClientServiceTestCase(unittest.TestCase, tutil.ApiClientMock, DiskCach
         headers = {'x-keep-replicas-stored': 3}
         with tutil.mock_keep_responses(pdh, 200, 418, 418, 418,
                                        **headers) as req_mock:
-            keep_client = arvados.KeepClient(api_client=api_client, block_cache=self.make_block_cache(self.disk_cache))
+            keep_client = arvados.KeepClient(
+                api_client=api_client,
+                block_cache=self.make_block_cache(self.disk_cache),
+                num_retries=0,
+            )
             actual = keep_client.put(body, copies=2)
         self.assertEqual(pdh, actual)
         self.assertEqual(1, req_mock.call_count)
@@ -638,7 +689,7 @@ class KeepStorageClassesTestCase(unittest.TestCase, tutil.ApiClientMock, DiskCac
             'x-keep-storage-classes-confirmed': 'foo=1'}
         with tutil.mock_keep_responses(self.locator, 200, 503, **headers) as mock:
             with self.assertRaises(arvados.errors.KeepWriteError):
-                self.keep_client.put(self.data, copies=1, classes=['foo', 'bar'])
+                self.keep_client.put(self.data, copies=1, classes=['foo', 'bar'], num_retries=0)
             # 1st request, both classes pending
             req1_headers = mock.responses[0].getopt(pycurl.HTTPHEADER)
             self.assertIn('X-Keep-Storage-Classes: bar, foo', req1_headers)
@@ -689,7 +740,7 @@ class KeepStorageClassesTestCase(unittest.TestCase, tutil.ApiClientMock, DiskCac
             with tutil.mock_keep_responses(self.locator, return_code, return_code, **headers):
                 case_desc = 'wanted_copies={}, wanted_classes="{}", confirmed_copies={}, confirmed_classes="{}"'.format(w_copies, ', '.join(w_classes), c_copies, c_classes)
                 with self.assertRaises(arvados.errors.KeepWriteError, msg=case_desc):
-                    self.keep_client.put(self.data, copies=w_copies, classes=w_classes)
+                    self.keep_client.put(self.data, copies=w_copies, classes=w_classes, num_retries=0)
 
 @tutil.skip_sleep
 @parameterized.parameterized_class([{"disk_cache": True}, {"disk_cache": False}])
@@ -1250,10 +1301,6 @@ class KeepClientRetryTestMixin(object):
         with self.TEST_PATCHER(self.DEFAULT_EXPECT, Exception('mock err'), 200):
             self.check_success(num_retries=3)
 
-    def test_no_default_retry(self):
-        with self.TEST_PATCHER(self.DEFAULT_EXPECT, 500, 200):
-            self.check_exception()
-
     def test_no_retry_after_permanent_error(self):
         with self.TEST_PATCHER(self.DEFAULT_EXPECT, 403, 200):
             self.check_exception(num_retries=3)
@@ -1293,7 +1340,7 @@ class KeepClientRetryGetTestCase(KeepClientRetryTestMixin, unittest.TestCase, Di
         # and a high threshold of servers report that it's not found.
         # This test rigs up 50/50 disagreement between two servers, and
         # checks that it does not become a NotFoundError.
-        client = self.new_client()
+        client = self.new_client(num_retries=0)
         with tutil.mock_keep_responses(self.DEFAULT_EXPECT, 404, 500):
             with self.assertRaises(arvados.errors.KeepReadError) as exc_check:
                 client.get(self.HINTED_LOCATOR)
@@ -1341,7 +1388,7 @@ class KeepClientRetryHeadTestCase(KeepClientRetryTestMixin, unittest.TestCase, D
         # and a high threshold of servers report that it's not found.
         # This test rigs up 50/50 disagreement between two servers, and
         # checks that it does not become a NotFoundError.
-        client = self.new_client()
+        client = self.new_client(num_retries=0)
         with tutil.mock_keep_responses(self.DEFAULT_EXPECT, 404, 500):
             with self.assertRaises(arvados.errors.KeepReadError) as exc_check:
                 client.head(self.HINTED_LOCATOR)
diff --git a/sdk/python/tests/test_retry_job_helpers.py b/sdk/python/tests/test_retry_job_helpers.py
index 76c62cb0c..9389b25c8 100644
--- a/sdk/python/tests/test_retry_job_helpers.py
+++ b/sdk/python/tests/test_retry_job_helpers.py
@@ -28,7 +28,7 @@ class ApiClientRetryTestMixin(object):
     def setUp(self):
         # Patch arvados.api() to return our mock API, so we can mock
         # its http requests.
-        self.api_client = arvados.api('v1', cache=False)
+        self.api_client = arvados.api('v1', cache=False, num_retries=0)
         self.api_patch = mock.patch('arvados.api', return_value=self.api_client)
         self.api_patch.start()
 
diff --git a/sdk/python/tests/test_stream.py b/sdk/python/tests/test_stream.py
index dc84a037f..12a3340ea 100644
--- a/sdk/python/tests/test_stream.py
+++ b/sdk/python/tests/test_stream.py
@@ -223,13 +223,6 @@ class StreamRetryTestMixin(object):
             reader = self.reader_for('bar_file')
             self.assertEqual(b'bar', self.read_for_test(reader, 3))
 
-    @tutil.skip_sleep
-    def test_read_no_default_retry(self):
-        with tutil.mock_keep_responses('', 500):
-            reader = self.reader_for('user_agreement')
-            with self.assertRaises(arvados.errors.KeepReadError):
-                self.read_for_test(reader, 10)
-
     @tutil.skip_sleep
     def test_read_with_instance_retries(self):
         with tutil.mock_keep_responses('foo', 500, 200):
diff --git a/services/fuse/arvados_fuse/command.py b/services/fuse/arvados_fuse/command.py
index e275825a6..95b9a9773 100644
--- a/services/fuse/arvados_fuse/command.py
+++ b/services/fuse/arvados_fuse/command.py
@@ -230,6 +230,9 @@ class Mount(object):
         try:
             self.api = arvados.safeapi.ThreadSafeApiCache(
                 apiconfig=arvados.config.settings(),
+                api_params={
+                    'num_retries': self.args.retries,
+                },
                 # default value of file_cache is 0, this tells KeepBlockCache to
                 # choose a default based on whether disk_cache is enabled or not.
                 keep_params={

commit 35895ee91c820680bb7df9696ab2e92525ead2ac
Author: Brett Smith <brett.smith at curii.com>
Date:   Wed May 3 14:19:06 2023 -0400

    12684: Remove custom retry logic from PySDK
    
    This logic traces its roots back to
    `5722c604c6f5dc1553674d179ec016ec12e2b090`. The goal of that commit was to
    work around a bug in httplib, which we no longer use as a client
    library. `31eb1bdc31e1d030844a6fdc7f4ba4286ec79d4f` made an analogous
    change for httplib2.
    
    `8a0eb69984a93852ec888cd3e02b778b0be758ed` made three major changes:
    
    1. Proactively close sockets if they seem likely to be stale
    2. Wrap the retry logic in a loop
    3. Generalize catching `httplib.BadStatusLine` to `httplib.HTTPException`
       (which covers all kinds of malformed HTTP responses)
    
    However, #1 functionally obsoletes the exception handlers added in the
    earlier commits. Preemptively closing the sockets prevents httplib/2
    from trying to reuse stale ones. So these exception handlers, along with
    their retry loops, no longer serve their original purpose.
    
    Remove this logic in favor of using the retry logic built into
    googleapiclient. That logic is easier to configure and more refined.
    
    Arvados-DCO-1.1-Signed-off-by: Brett Smith <brett.smith at curii.com>

diff --git a/sdk/python/arvados/api.py b/sdk/python/arvados/api.py
index 19154f3e8..537ad2082 100644
--- a/sdk/python/arvados/api.py
+++ b/sdk/python/arvados/api.py
@@ -37,9 +37,13 @@ from . import cache
 _logger = logging.getLogger('arvados.api')
 
 MAX_IDLE_CONNECTION_DURATION = 30
-RETRY_DELAY_INITIAL = 2
-RETRY_DELAY_BACKOFF = 2
-RETRY_COUNT = 2
+
+# These constants supported our own retry logic that we've since removed in
+# favor of using googleapiclient's num_retries. They're kept here purely for
+# API compatibility, but set to 0 to indicate no retries happen.
+RETRY_DELAY_INITIAL = 0
+RETRY_DELAY_BACKOFF = 0
+RETRY_COUNT = 0
 
 if sys.version_info >= (3,):
     httplib2.SSLHandshakeError = None
@@ -75,12 +79,7 @@ def _intercept_http_request(self, uri, method="GET", headers={}, **kwargs):
 
         headers['Authorization'] = 'OAuth2 %s' % self.arvados_api_token
 
-        retryable = method in [
-            'DELETE', 'GET', 'HEAD', 'OPTIONS', 'PUT']
-        retry_count = self._retry_count if retryable else 0
-
-        if (not retryable and
-            time.time() - self._last_request_time > self._max_keepalive_idle):
+        if (time.time() - self._last_request_time) > self._max_keepalive_idle:
             # High probability of failure due to connection atrophy. Make
             # sure this request [re]opens a new connection by closing and
             # forgetting all cached connections first.
@@ -88,32 +87,11 @@ def _intercept_http_request(self, uri, method="GET", headers={}, **kwargs):
                 conn.close()
             self.connections.clear()
 
-        delay = self._retry_delay_initial
-        for _ in range(retry_count):
-            self._last_request_time = time.time()
-            try:
-                return self.orig_http_request(uri, method, headers=headers, **kwargs)
-            except http.client.HTTPException:
-                _logger.debug("[%s] Retrying API request in %d s after HTTP error",
-                              headers['X-Request-Id'], delay, exc_info=True)
-            except ssl.SSLCertVerificationError as e:
-                raise ssl.SSLCertVerificationError(e.args[0], "Could not connect to %s\n%s\nPossible causes: remote SSL/TLS certificate expired, or was issued by an untrusted certificate authority." % (uri, e)) from None
-            except socket.error:
-                # This is the one case where httplib2 doesn't close the
-                # underlying connection first.  Close all open
-                # connections, expecting this object only has the one
-                # connection to the API server.  This is safe because
-                # httplib2 reopens connections when needed.
-                _logger.debug("[%s] Retrying API request in %d s after socket error",
-                              headers['X-Request-Id'], delay, exc_info=True)
-                for conn in self.connections.values():
-                    conn.close()
-
-            time.sleep(delay)
-            delay = delay * self._retry_delay_backoff
-
         self._last_request_time = time.time()
-        return self.orig_http_request(uri, method, headers=headers, **kwargs)
+        try:
+            return self.orig_http_request(uri, method, headers=headers, **kwargs)
+        except ssl.SSLCertVerificationError as e:
+            raise ssl.SSLCertVerificationError(e.args[0], "Could not connect to %s\n%s\nPossible causes: remote SSL/TLS certificate expired, or was issued by an untrusted certificate authority." % (uri, e)) from None
     except Exception as e:
         # Prepend "[request_id] " to the error message, which we
         # assume is the first string argument passed to the exception
@@ -131,9 +109,6 @@ def _patch_http_request(http, api_token):
     http.request = types.MethodType(_intercept_http_request, http)
     http._last_request_time = 0
     http._max_keepalive_idle = MAX_IDLE_CONNECTION_DURATION
-    http._retry_delay_initial = RETRY_DELAY_INITIAL
-    http._retry_delay_backoff = RETRY_DELAY_BACKOFF
-    http._retry_count = RETRY_COUNT
     http._request_id = util.new_request_id
     return http
 
diff --git a/sdk/python/tests/test_api.py b/sdk/python/tests/test_api.py
index 20c4f346a..780ff07bf 100644
--- a/sdk/python/tests/test_api.py
+++ b/sdk/python/tests/test_api.py
@@ -27,9 +27,6 @@ from arvados.api import (
     normalize_api_kwargs,
     api_kwargs_from_config,
     OrderedJsonModel,
-    RETRY_DELAY_INITIAL,
-    RETRY_DELAY_BACKOFF,
-    RETRY_COUNT,
 )
 from .arvados_testutil import fake_httplib2_response, queue_with
 
@@ -341,7 +338,7 @@ class ArvadosApiTest(run_test_server.TestCaseWithServers):
                 api_client(*args, insecure=True)
 
 
-class RetryREST(unittest.TestCase):
+class PreCloseSocketTestCase(unittest.TestCase):
     def setUp(self):
         self.api = arvados.api('v1')
         self.assertTrue(hasattr(self.api._http, 'orig_http_request'),
@@ -353,59 +350,6 @@ class RetryREST(unittest.TestCase):
         # All requests succeed by default. Tests override as needed.
         self.api._http.orig_http_request.return_value = self.request_success
 
-    @mock.patch('time.sleep')
-    def test_socket_error_retry_get(self, sleep):
-        self.api._http.orig_http_request.side_effect = (
-            socket.error('mock error'),
-            self.request_success,
-        )
-        self.assertEqual(self.api.users().current().execute(),
-                         self.mock_response)
-        self.assertGreater(self.api._http.orig_http_request.call_count, 1,
-                           "client got the right response without retrying")
-        self.assertEqual(sleep.call_args_list,
-                         [mock.call(RETRY_DELAY_INITIAL)])
-
-    @mock.patch('time.sleep')
-    def test_same_automatic_request_id_on_retry(self, sleep):
-        self.api._http.orig_http_request.side_effect = (
-            socket.error('mock error'),
-            self.request_success,
-        )
-        self.api.users().current().execute()
-        calls = self.api._http.orig_http_request.call_args_list
-        self.assertEqual(len(calls), 2)
-        self.assertEqual(
-            calls[0][1]['headers']['X-Request-Id'],
-            calls[1][1]['headers']['X-Request-Id'])
-        self.assertRegex(calls[0][1]['headers']['X-Request-Id'], r'^req-[a-z0-9]{20}$')
-
-    @mock.patch('time.sleep')
-    def test_provided_request_id_on_retry(self, sleep):
-        self.api.request_id='fake-request-id'
-        self.api._http.orig_http_request.side_effect = (
-            socket.error('mock error'),
-            self.request_success,
-        )
-        self.api.users().current().execute()
-        calls = self.api._http.orig_http_request.call_args_list
-        self.assertEqual(len(calls), 2)
-        for call in calls:
-            self.assertEqual(call[1]['headers']['X-Request-Id'], 'fake-request-id')
-
-    @mock.patch('time.sleep')
-    def test_socket_error_retry_delay(self, sleep):
-        self.api._http.orig_http_request.side_effect = socket.error('mock')
-        self.api._http._retry_count = 3
-        with self.assertRaises(socket.error):
-            self.api.users().current().execute()
-        self.assertEqual(self.api._http.orig_http_request.call_count, 4)
-        self.assertEqual(sleep.call_args_list, [
-            mock.call(RETRY_DELAY_INITIAL),
-            mock.call(RETRY_DELAY_INITIAL * RETRY_DELAY_BACKOFF),
-            mock.call(RETRY_DELAY_INITIAL * RETRY_DELAY_BACKOFF**2),
-        ])
-
     @mock.patch('time.time', side_effect=[i*2**20 for i in range(99)])
     def test_close_old_connections_non_retryable(self, sleep):
         self._test_connection_close(expect=1)
@@ -429,18 +373,6 @@ class RetryREST(unittest.TestCase):
         for c in mock_conns.values():
             self.assertEqual(c.close.call_count, expect)
 
-    @mock.patch('time.sleep')
-    def test_socket_error_no_retry_post(self, sleep):
-        self.api._http.orig_http_request.side_effect = (
-            socket.error('mock error'),
-            self.request_success,
-        )
-        with self.assertRaises(socket.error):
-            self.api.users().create(body={}).execute()
-        self.assertEqual(self.api._http.orig_http_request.call_count, 1,
-                         "client should try non-retryable method exactly once")
-        self.assertEqual(sleep.call_args_list, [])
-
 
 if __name__ == '__main__':
     unittest.main()

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list