[arvados] updated: 2.6.2-19-g434336ff5

git repository hosting git at public.arvados.org
Wed Jun 7 20:33:53 UTC 2023


Summary of changes:
 doc/admin/upgrading.html.textile.liquid |  4 ++-
 sdk/cwl/arvados_cwl/__init__.py         | 10 ++++--
 sdk/cwl/arvados_cwl/arvcontainer.py     |  1 +
 sdk/python/arvados/__init__.py          | 17 +++++-----
 sdk/python/arvados/api.py               | 47 +++++++++++++++++++++++----
 sdk/python/arvados/logging.py           | 31 ++++++++++++++++++
 sdk/python/tests/test_api.py            | 43 +++++++++++++++++++++++++
 tools/salt-install/installer.sh         | 57 +++++++++++++++++----------------
 8 files changed, 163 insertions(+), 47 deletions(-)
 create mode 100644 sdk/python/arvados/logging.py

       via  434336ff51b9e016a87f3005c4022c826d2dde58 (commit)
       via  3cd9690e6ee44ee92ef65c9548389a79547bdff0 (commit)
       via  dcc59708da5c60cacb6e64553d0a8901c37591ee (commit)
      from  dd0ceb3261e2f28552f11df5fbedb5333a86b522 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit 434336ff51b9e016a87f3005c4022c826d2dde58
Author: Brett Smith <brett.smith at curii.com>
Date:   Wed Jun 7 16:28:53 2023 -0400

    Merge branch '20613-googleapiclient-init-logs'
    
    Closes #20613.
    
    Arvados-DCO-1.1-Signed-off-by: Brett Smith <brett.smith at curii.com>

diff --git a/doc/admin/upgrading.html.textile.liquid b/doc/admin/upgrading.html.textile.liquid
index 22bc65c40..113b867b0 100644
--- a/doc/admin/upgrading.html.textile.liquid
+++ b/doc/admin/upgrading.html.textile.liquid
@@ -28,7 +28,7 @@ TODO: extract this information based on git commit messages and generate changel
 <div class="releasenotes">
 </notextile>
 
-h2(#v2_6_3). v2.6.3 (2023-06-06)
+h2(#v2_6_3). v2.6.3 (2023-06-08)
 
 "previous: Upgrading to 2.6.2":#v2_6_2
 
@@ -43,6 +43,8 @@ import arvados
 arv_client = arvados.api('v1', num_retries=0, ...)
 {% endcodeblock %}
 
+The first time the Python SDK fetches an Arvados API discovery document, it will ensure that @googleapiclient.http@ logs are handled so you have a way to know about early problems that are being retried. If you prefer to handle these logs your own way, just ensure that the @googleapiclient.http@ logger (or a parent logger) has a handler installed before you call any Arvados API client constructor.
+
 h2(#v2_6_2). v2.6.2 (2023-05-22)
 
 "previous: Upgrading to 2.6.1":#v2_6_1
diff --git a/sdk/cwl/arvados_cwl/__init__.py b/sdk/cwl/arvados_cwl/__init__.py
index 6a8e320c6..8108934aa 100644
--- a/sdk/cwl/arvados_cwl/__init__.py
+++ b/sdk/cwl/arvados_cwl/__init__.py
@@ -28,6 +28,7 @@ from cwltool.utils import adjustFileObjs, adjustDirObjs, get_listing
 
 import arvados
 import arvados.config
+import arvados.logging
 from arvados.keep import KeepClient
 from arvados.errors import ApiError
 import arvados.commands._util as arv_cmd
@@ -377,9 +378,11 @@ def main(args=sys.argv[1:],
     logger.setLevel(logging.INFO)
     logging.getLogger('arvados').setLevel(logging.INFO)
     logging.getLogger('arvados.keep').setLevel(logging.WARNING)
-    # API retries are at WARNING level and can be noisy, but as long as
+    # API retries are filtered to the INFO level and can be noisy, but as long as
     # they succeed we don't need to see warnings about it.
-    logging.getLogger('googleapiclient').setLevel(logging.ERROR)
+    googleapiclient_http_logger = logging.getLogger('googleapiclient.http')
+    googleapiclient_http_logger.addFilter(arvados.logging.GoogleHTTPClientFilter())
+    googleapiclient_http_logger.setLevel(logging.WARNING)
 
     if arvargs.debug:
         logger.setLevel(logging.DEBUG)
@@ -387,7 +390,8 @@ def main(args=sys.argv[1:],
         # In debug mode show logs about retries, but we arn't
         # debugging the google client so we don't need to see
         # everything.
-        logging.getLogger('googleapiclient').setLevel(logging.WARNING)
+        googleapiclient_http_logger.setLevel(logging.NOTSET)
+        logging.getLogger('googleapiclient').setLevel(logging.INFO)
 
     if arvargs.quiet:
         logger.setLevel(logging.WARN)
diff --git a/sdk/python/arvados/__init__.py b/sdk/python/arvados/__init__.py
index c8c702980..39fdb1100 100644
--- a/sdk/python/arvados/__init__.py
+++ b/sdk/python/arvados/__init__.py
@@ -13,7 +13,7 @@ import hashlib
 import http.client
 import httplib2
 import json
-import logging
+import logging as stdliblog
 import os
 import pprint
 import re
@@ -33,20 +33,21 @@ from .collection import CollectionReader, CollectionWriter, ResumableCollectionW
 from arvados.keep import *
 from arvados.stream import *
 from .arvfile import StreamFileReader
+from .logging import log_format, log_date_format, log_handler
 from .retry import RetryLoop
 import arvados.errors as errors
 import arvados.util as util
 
+# Override logging module pulled in via `from ... import *`
+# so users can `import arvados.logging`.
+logging = sys.modules['arvados.logging']
+
 # Set up Arvados logging based on the user's configuration.
 # All Arvados code should log under the arvados hierarchy.
-log_format = '%(asctime)s %(name)s[%(process)d] %(levelname)s: %(message)s'
-log_date_format = '%Y-%m-%d %H:%M:%S'
-log_handler = logging.StreamHandler()
-log_handler.setFormatter(logging.Formatter(log_format, log_date_format))
-logger = logging.getLogger('arvados')
+logger = stdliblog.getLogger('arvados')
 logger.addHandler(log_handler)
-logger.setLevel(logging.DEBUG if config.get('ARVADOS_DEBUG')
-                else logging.WARNING)
+logger.setLevel(stdliblog.DEBUG if config.get('ARVADOS_DEBUG')
+                else stdliblog.WARNING)
 
 def task_set_output(self, s, num_retries=5):
     for tries_left in RetryLoop(num_retries=num_retries, backoff_start=0):
diff --git a/sdk/python/arvados/api.py b/sdk/python/arvados/api.py
index 2ddce8e4c..a7f383759 100644
--- a/sdk/python/arvados/api.py
+++ b/sdk/python/arvados/api.py
@@ -23,6 +23,7 @@ import re
 import socket
 import ssl
 import sys
+import threading
 import time
 import types
 
@@ -35,8 +36,10 @@ from . import errors
 from . import retry
 from . import util
 from . import cache
+from .logging import GoogleHTTPClientFilter, log_handler
 
 _logger = logging.getLogger('arvados.api')
+_googleapiclient_log_lock = threading.Lock()
 
 MAX_IDLE_CONNECTION_DURATION = 30
 
@@ -252,14 +255,44 @@ def api_client(
         http.timeout = timeout
     http = _patch_http_request(http, token, num_retries)
 
-    svc = apiclient_discovery.build(
-        'arvados', version,
-        cache_discovery=False,
-        discoveryServiceUrl=discoveryServiceUrl,
-        http=http,
-        num_retries=num_retries,
-        **kwargs,
+    # The first time a client is instantiated, temporarily route
+    # googleapiclient.http retry logs if they're not already. These are
+    # important because temporary problems fetching the discovery document
+    # can cause clients to appear to hang early. This can be removed after
+    # we have a more general story for handling googleapiclient logs (#20521).
+    client_logger = logging.getLogger('googleapiclient.http')
+    # "first time a client is instantiated" = thread that acquires this lock
+    # It is never released.
+    # googleapiclient sets up its own NullHandler so we detect if logging is
+    # configured by looking for a real handler anywhere in the hierarchy.
+    client_logger_unconfigured = _googleapiclient_log_lock.acquire(blocking=False) and all(
+        isinstance(handler, logging.NullHandler)
+        for logger_name in ['', 'googleapiclient', 'googleapiclient.http']
+        for handler in logging.getLogger(logger_name).handlers
     )
+    if client_logger_unconfigured:
+        client_level = client_logger.level
+        client_filter = GoogleHTTPClientFilter()
+        client_logger.addFilter(client_filter)
+        client_logger.addHandler(log_handler)
+        if logging.NOTSET < client_level < client_filter.retry_levelno:
+            client_logger.setLevel(client_level)
+        else:
+            client_logger.setLevel(client_filter.retry_levelno)
+    try:
+        svc = apiclient_discovery.build(
+            'arvados', version,
+            cache_discovery=False,
+            discoveryServiceUrl=discoveryServiceUrl,
+            http=http,
+            num_retries=num_retries,
+            **kwargs,
+        )
+    finally:
+        if client_logger_unconfigured:
+            client_logger.removeHandler(log_handler)
+            client_logger.removeFilter(client_filter)
+            client_logger.setLevel(client_level)
     svc.api_token = token
     svc.insecure = insecure
     svc.request_id = request_id
diff --git a/sdk/python/arvados/logging.py b/sdk/python/arvados/logging.py
new file mode 100644
index 000000000..c6371f41b
--- /dev/null
+++ b/sdk/python/arvados/logging.py
@@ -0,0 +1,31 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+"""Logging utilities for Arvados clients"""
+
+import logging
+
+log_format = '%(asctime)s %(name)s[%(process)d] %(levelname)s: %(message)s'
+log_date_format = '%Y-%m-%d %H:%M:%S'
+log_handler = logging.StreamHandler()
+log_handler.setFormatter(logging.Formatter(log_format, log_date_format))
+
+class GoogleHTTPClientFilter:
+    """Common googleapiclient.http log filters for Arvados clients
+
+    This filter makes `googleapiclient.http` log messages more useful for
+    typical Arvados applications. Currently it only changes the level of
+    retry messages (to INFO by default), but its functionality may be
+    extended in the future. Typical usage looks like:
+
+        logging.getLogger('googleapiclient.http').addFilter(GoogleHTTPClientFilter())
+    """
+    def __init__(self, *, retry_level='INFO'):
+        self.retry_levelname = retry_level
+        self.retry_levelno = getattr(logging, retry_level)
+
+    def filter(self, record):
+        if record.msg.startswith(('Sleeping ', 'Retry ')):
+            record.levelname = self.retry_levelname
+            record.levelno = self.retry_levelno
+        return True
diff --git a/sdk/python/tests/test_api.py b/sdk/python/tests/test_api.py
index 8667d5160..0f85e5520 100644
--- a/sdk/python/tests/test_api.py
+++ b/sdk/python/tests/test_api.py
@@ -11,6 +11,7 @@ import contextlib
 import httplib2
 import itertools
 import json
+import logging
 import mimetypes
 import os
 import socket
@@ -29,8 +30,10 @@ from arvados.api import (
     normalize_api_kwargs,
     api_kwargs_from_config,
     OrderedJsonModel,
+    _googleapiclient_log_lock,
 )
 from .arvados_testutil import fake_httplib2_response, mock_api_responses, queue_with
+import httplib2.error
 
 if not mimetypes.inited:
     mimetypes.init()
@@ -391,6 +394,46 @@ class ArvadosApiTest(run_test_server.TestCaseWithServers):
                 args[arg_index] = arg_value
                 api_client(*args, insecure=True)
 
+    def test_initial_retry_logs(self):
+        try:
+            _googleapiclient_log_lock.release()
+        except RuntimeError:
+            # Lock was never acquired - that's the state we want anyway
+            pass
+        real_logger = logging.getLogger('googleapiclient.http')
+        mock_logger = mock.Mock(wraps=real_logger)
+        mock_logger.handlers = logging.getLogger('googleapiclient').handlers
+        mock_logger.level = logging.NOTSET
+        with mock.patch('logging.getLogger', return_value=mock_logger), \
+             mock.patch('time.sleep'), \
+             self.assertLogs(real_logger, 'INFO') as actual_logs:
+            try:
+                api_client('v1', 'https://test.invalid/', 'NoToken', num_retries=1)
+            except httplib2.error.ServerNotFoundError:
+                pass
+        mock_logger.addFilter.assert_called()
+        mock_logger.addHandler.assert_called()
+        mock_logger.setLevel.assert_called()
+        mock_logger.removeHandler.assert_called()
+        mock_logger.removeFilter.assert_called()
+        self.assertRegex(actual_logs.output[0], r'^INFO:googleapiclient\.http:Sleeping \d')
+
+    def test_configured_logger_untouched(self):
+        real_logger = logging.getLogger('googleapiclient.http')
+        mock_logger = mock.Mock(wraps=real_logger)
+        mock_logger.handlers = logging.getLogger().handlers
+        with mock.patch('logging.getLogger', return_value=mock_logger), \
+             mock.patch('time.sleep'):
+            try:
+                api_client('v1', 'https://test.invalid/', 'NoToken', num_retries=1)
+            except httplib2.error.ServerNotFoundError:
+                pass
+        mock_logger.addFilter.assert_not_called()
+        mock_logger.addHandler.assert_not_called()
+        mock_logger.setLevel.assert_not_called()
+        mock_logger.removeHandler.assert_not_called()
+        mock_logger.removeFilter.assert_not_called()
+
 
 class ConstructNumRetriesTestCase(unittest.TestCase):
     @staticmethod

commit 3cd9690e6ee44ee92ef65c9548389a79547bdff0
Author: Peter Amstutz <peter.amstutz at curii.com>
Date:   Wed Jun 7 13:51:21 2023 -0400

    Merge branch '20548-installer-deploy' refs #20548
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>

diff --git a/tools/salt-install/installer.sh b/tools/salt-install/installer.sh
index e5aff213e..56fd061a4 100755
--- a/tools/salt-install/installer.sh
+++ b/tools/salt-install/installer.sh
@@ -60,6 +60,13 @@ checktools() {
     fi
 }
 
+cleanup() {
+    local NODE=$1
+    local SSH=`ssh_cmd "$NODE"`
+    # Delete the old repository
+    $SSH $DEPLOY_USER@$NODE rm -rf ${GITTARGET}.git ${GITTARGET}
+}
+
 sync() {
     local NODE=$1
     local BRANCH=$2
@@ -68,33 +75,26 @@ sync() {
     # each node, pushing our branch, and updating the checkout.
 
     if [[ "$NODE" != localhost ]] ; then
-		SSH=`ssh_cmd "$NODE"`
-		GIT="eval `git_cmd $NODE`"
-		if ! $SSH $DEPLOY_USER@$NODE test -d ${GITTARGET}.git ; then
-
-			# Initialize the git repository (1st time case).  We're
-			# actually going to make two repositories here because git
-			# will complain if you try to push to a repository with a
-			# checkout. So we're going to create a "bare" repository
-			# and then clone a regular repository (with a checkout)
-			# from that.
-
-			$SSH $DEPLOY_USER@$NODE git init --bare --shared=0600 ${GITTARGET}.git
-			if ! $GIT remote add $NODE $DEPLOY_USER@$NODE:${GITTARGET}.git ; then
-				$GIT remote set-url $NODE $DEPLOY_USER@$NODE:${GITTARGET}.git
-			fi
-			$GIT push $NODE $BRANCH
-			$SSH $DEPLOY_USER@$NODE "umask 0077 && git clone ${GITTARGET}.git ${GITTARGET}"
-		fi
+	SSH=`ssh_cmd "$NODE"`
+	GIT="eval `git_cmd $NODE`"
 
-		# The update case.
-		#
-		# Push to the bare repository on the remote node, then in the
-		# remote node repository with the checkout, pull the branch
-		# from the bare repository.
+	cleanup $NODE
 
-		$GIT push $NODE $BRANCH
-		$SSH $DEPLOY_USER@$NODE "git -C ${GITTARGET} checkout ${BRANCH} && git -C ${GITTARGET} pull"
+	# Update the git remote for the remote repository.
+	if ! $GIT remote add $NODE $DEPLOY_USER@$NODE:${GITTARGET}.git ; then
+	    $GIT remote set-url $NODE $DEPLOY_USER@$NODE:${GITTARGET}.git
+	fi
+
+	# Initialize the git repository.  We're
+	# actually going to make two repositories here because git
+	# will complain if you try to push to a repository with a
+	# checkout. So we're going to create a "bare" repository
+	# and then clone a regular repository (with a checkout)
+	# from that.
+
+	$SSH $DEPLOY_USER@$NODE git init --bare --shared=0600 ${GITTARGET}.git
+	$GIT push $NODE $BRANCH
+	$SSH $DEPLOY_USER@$NODE "umask 0077 && git clone -s ${GITTARGET}.git ${GITTARGET} && git -C ${GITTARGET} checkout ${BRANCH}"
     fi
 }
 
@@ -112,7 +112,7 @@ deploynode() {
     fi
 
     logfile=deploy-${NODE}-$(date -Iseconds).log
-	SSH=`ssh_cmd "$NODE"`
+    SSH=`ssh_cmd "$NODE"`
 
     if [[ "$NODE" = localhost ]] ; then
 	    SUDO=''
@@ -120,8 +120,9 @@ deploynode() {
 			SUDO=sudo
 		fi
 		$SUDO ./provision.sh --config ${CONFIG_FILE} ${ROLES} 2>&1 | tee $logfile
-	else
-		$SSH $DEPLOY_USER@$NODE "cd ${GITTARGET} && sudo ./provision.sh --config ${CONFIG_FILE} ${ROLES}" 2>&1 | tee $logfile
+    else
+	    $SSH $DEPLOY_USER@$NODE "cd ${GITTARGET} && git log -n1 HEAD && sudo ./provision.sh --config ${CONFIG_FILE} ${ROLES}" 2>&1 | tee $logfile
+	    cleanup $NODE
     fi
 }
 

commit dcc59708da5c60cacb6e64553d0a8901c37591ee
Author: Peter Amstutz <peter.amstutz at curii.com>
Date:   Mon Jun 5 18:10:07 2023 -0400

    Initialize rcode variable, refs #20611
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>

diff --git a/sdk/cwl/arvados_cwl/arvcontainer.py b/sdk/cwl/arvados_cwl/arvcontainer.py
index 6180a4c62..a94fdac52 100644
--- a/sdk/cwl/arvados_cwl/arvcontainer.py
+++ b/sdk/cwl/arvados_cwl/arvcontainer.py
@@ -459,6 +459,7 @@ class ArvadosContainer(JobBase):
     def done(self, record):
         outputs = {}
         retried = False
+        rcode = None
         try:
             container = self.arvrunner.api.containers().get(
                 uuid=record["container_uuid"]

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list