[arvados] updated: 2.6.2-19-g434336ff5
git repository hosting
git at public.arvados.org
Wed Jun 7 20:33:53 UTC 2023
Summary of changes:
doc/admin/upgrading.html.textile.liquid | 4 ++-
sdk/cwl/arvados_cwl/__init__.py | 10 ++++--
sdk/cwl/arvados_cwl/arvcontainer.py | 1 +
sdk/python/arvados/__init__.py | 17 +++++-----
sdk/python/arvados/api.py | 47 +++++++++++++++++++++++----
sdk/python/arvados/logging.py | 31 ++++++++++++++++++
sdk/python/tests/test_api.py | 43 +++++++++++++++++++++++++
tools/salt-install/installer.sh | 57 +++++++++++++++++----------------
8 files changed, 163 insertions(+), 47 deletions(-)
create mode 100644 sdk/python/arvados/logging.py
via 434336ff51b9e016a87f3005c4022c826d2dde58 (commit)
via 3cd9690e6ee44ee92ef65c9548389a79547bdff0 (commit)
via dcc59708da5c60cacb6e64553d0a8901c37591ee (commit)
from dd0ceb3261e2f28552f11df5fbedb5333a86b522 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
commit 434336ff51b9e016a87f3005c4022c826d2dde58
Author: Brett Smith <brett.smith at curii.com>
Date: Wed Jun 7 16:28:53 2023 -0400
Merge branch '20613-googleapiclient-init-logs'
Closes #20613.
Arvados-DCO-1.1-Signed-off-by: Brett Smith <brett.smith at curii.com>
diff --git a/doc/admin/upgrading.html.textile.liquid b/doc/admin/upgrading.html.textile.liquid
index 22bc65c40..113b867b0 100644
--- a/doc/admin/upgrading.html.textile.liquid
+++ b/doc/admin/upgrading.html.textile.liquid
@@ -28,7 +28,7 @@ TODO: extract this information based on git commit messages and generate changel
<div class="releasenotes">
</notextile>
-h2(#v2_6_3). v2.6.3 (2023-06-06)
+h2(#v2_6_3). v2.6.3 (2023-06-08)
"previous: Upgrading to 2.6.2":#v2_6_2
@@ -43,6 +43,8 @@ import arvados
arv_client = arvados.api('v1', num_retries=0, ...)
{% endcodeblock %}
+The first time the Python SDK fetches an Arvados API discovery document, it will ensure that @googleapiclient.http@ logs are handled so you have a way to know about early problems that are being retried. If you prefer to handle these logs your own way, just ensure that the @googleapiclient.http@ logger (or a parent logger) has a handler installed before you call any Arvados API client constructor.
+
h2(#v2_6_2). v2.6.2 (2023-05-22)
"previous: Upgrading to 2.6.1":#v2_6_1
diff --git a/sdk/cwl/arvados_cwl/__init__.py b/sdk/cwl/arvados_cwl/__init__.py
index 6a8e320c6..8108934aa 100644
--- a/sdk/cwl/arvados_cwl/__init__.py
+++ b/sdk/cwl/arvados_cwl/__init__.py
@@ -28,6 +28,7 @@ from cwltool.utils import adjustFileObjs, adjustDirObjs, get_listing
import arvados
import arvados.config
+import arvados.logging
from arvados.keep import KeepClient
from arvados.errors import ApiError
import arvados.commands._util as arv_cmd
@@ -377,9 +378,11 @@ def main(args=sys.argv[1:],
logger.setLevel(logging.INFO)
logging.getLogger('arvados').setLevel(logging.INFO)
logging.getLogger('arvados.keep').setLevel(logging.WARNING)
- # API retries are at WARNING level and can be noisy, but as long as
+ # API retries are filtered to the INFO level and can be noisy, but as long as
# they succeed we don't need to see warnings about it.
- logging.getLogger('googleapiclient').setLevel(logging.ERROR)
+ googleapiclient_http_logger = logging.getLogger('googleapiclient.http')
+ googleapiclient_http_logger.addFilter(arvados.logging.GoogleHTTPClientFilter())
+ googleapiclient_http_logger.setLevel(logging.WARNING)
if arvargs.debug:
logger.setLevel(logging.DEBUG)
@@ -387,7 +390,8 @@ def main(args=sys.argv[1:],
# In debug mode show logs about retries, but we arn't
# debugging the google client so we don't need to see
# everything.
- logging.getLogger('googleapiclient').setLevel(logging.WARNING)
+ googleapiclient_http_logger.setLevel(logging.NOTSET)
+ logging.getLogger('googleapiclient').setLevel(logging.INFO)
if arvargs.quiet:
logger.setLevel(logging.WARN)
diff --git a/sdk/python/arvados/__init__.py b/sdk/python/arvados/__init__.py
index c8c702980..39fdb1100 100644
--- a/sdk/python/arvados/__init__.py
+++ b/sdk/python/arvados/__init__.py
@@ -13,7 +13,7 @@ import hashlib
import http.client
import httplib2
import json
-import logging
+import logging as stdliblog
import os
import pprint
import re
@@ -33,20 +33,21 @@ from .collection import CollectionReader, CollectionWriter, ResumableCollectionW
from arvados.keep import *
from arvados.stream import *
from .arvfile import StreamFileReader
+from .logging import log_format, log_date_format, log_handler
from .retry import RetryLoop
import arvados.errors as errors
import arvados.util as util
+# Override logging module pulled in via `from ... import *`
+# so users can `import arvados.logging`.
+logging = sys.modules['arvados.logging']
+
# Set up Arvados logging based on the user's configuration.
# All Arvados code should log under the arvados hierarchy.
-log_format = '%(asctime)s %(name)s[%(process)d] %(levelname)s: %(message)s'
-log_date_format = '%Y-%m-%d %H:%M:%S'
-log_handler = logging.StreamHandler()
-log_handler.setFormatter(logging.Formatter(log_format, log_date_format))
-logger = logging.getLogger('arvados')
+logger = stdliblog.getLogger('arvados')
logger.addHandler(log_handler)
-logger.setLevel(logging.DEBUG if config.get('ARVADOS_DEBUG')
- else logging.WARNING)
+logger.setLevel(stdliblog.DEBUG if config.get('ARVADOS_DEBUG')
+ else stdliblog.WARNING)
def task_set_output(self, s, num_retries=5):
for tries_left in RetryLoop(num_retries=num_retries, backoff_start=0):
diff --git a/sdk/python/arvados/api.py b/sdk/python/arvados/api.py
index 2ddce8e4c..a7f383759 100644
--- a/sdk/python/arvados/api.py
+++ b/sdk/python/arvados/api.py
@@ -23,6 +23,7 @@ import re
import socket
import ssl
import sys
+import threading
import time
import types
@@ -35,8 +36,10 @@ from . import errors
from . import retry
from . import util
from . import cache
+from .logging import GoogleHTTPClientFilter, log_handler
_logger = logging.getLogger('arvados.api')
+_googleapiclient_log_lock = threading.Lock()
MAX_IDLE_CONNECTION_DURATION = 30
@@ -252,14 +255,44 @@ def api_client(
http.timeout = timeout
http = _patch_http_request(http, token, num_retries)
- svc = apiclient_discovery.build(
- 'arvados', version,
- cache_discovery=False,
- discoveryServiceUrl=discoveryServiceUrl,
- http=http,
- num_retries=num_retries,
- **kwargs,
+ # The first time a client is instantiated, temporarily route
+ # googleapiclient.http retry logs if they're not already. These are
+ # important because temporary problems fetching the discovery document
+ # can cause clients to appear to hang early. This can be removed after
+ # we have a more general story for handling googleapiclient logs (#20521).
+ client_logger = logging.getLogger('googleapiclient.http')
+ # "first time a client is instantiated" = thread that acquires this lock
+ # It is never released.
+ # googleapiclient sets up its own NullHandler so we detect if logging is
+ # configured by looking for a real handler anywhere in the hierarchy.
+ client_logger_unconfigured = _googleapiclient_log_lock.acquire(blocking=False) and all(
+ isinstance(handler, logging.NullHandler)
+ for logger_name in ['', 'googleapiclient', 'googleapiclient.http']
+ for handler in logging.getLogger(logger_name).handlers
)
+ if client_logger_unconfigured:
+ client_level = client_logger.level
+ client_filter = GoogleHTTPClientFilter()
+ client_logger.addFilter(client_filter)
+ client_logger.addHandler(log_handler)
+ if logging.NOTSET < client_level < client_filter.retry_levelno:
+ client_logger.setLevel(client_level)
+ else:
+ client_logger.setLevel(client_filter.retry_levelno)
+ try:
+ svc = apiclient_discovery.build(
+ 'arvados', version,
+ cache_discovery=False,
+ discoveryServiceUrl=discoveryServiceUrl,
+ http=http,
+ num_retries=num_retries,
+ **kwargs,
+ )
+ finally:
+ if client_logger_unconfigured:
+ client_logger.removeHandler(log_handler)
+ client_logger.removeFilter(client_filter)
+ client_logger.setLevel(client_level)
svc.api_token = token
svc.insecure = insecure
svc.request_id = request_id
diff --git a/sdk/python/arvados/logging.py b/sdk/python/arvados/logging.py
new file mode 100644
index 000000000..c6371f41b
--- /dev/null
+++ b/sdk/python/arvados/logging.py
@@ -0,0 +1,31 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+"""Logging utilities for Arvados clients"""
+
+import logging
+
+log_format = '%(asctime)s %(name)s[%(process)d] %(levelname)s: %(message)s'
+log_date_format = '%Y-%m-%d %H:%M:%S'
+log_handler = logging.StreamHandler()
+log_handler.setFormatter(logging.Formatter(log_format, log_date_format))
+
+class GoogleHTTPClientFilter:
+ """Common googleapiclient.http log filters for Arvados clients
+
+ This filter makes `googleapiclient.http` log messages more useful for
+ typical Arvados applications. Currently it only changes the level of
+ retry messages (to INFO by default), but its functionality may be
+ extended in the future. Typical usage looks like:
+
+ logging.getLogger('googleapiclient.http').addFilter(GoogleHTTPClientFilter())
+ """
+ def __init__(self, *, retry_level='INFO'):
+ self.retry_levelname = retry_level
+ self.retry_levelno = getattr(logging, retry_level)
+
+ def filter(self, record):
+ if record.msg.startswith(('Sleeping ', 'Retry ')):
+ record.levelname = self.retry_levelname
+ record.levelno = self.retry_levelno
+ return True
diff --git a/sdk/python/tests/test_api.py b/sdk/python/tests/test_api.py
index 8667d5160..0f85e5520 100644
--- a/sdk/python/tests/test_api.py
+++ b/sdk/python/tests/test_api.py
@@ -11,6 +11,7 @@ import contextlib
import httplib2
import itertools
import json
+import logging
import mimetypes
import os
import socket
@@ -29,8 +30,10 @@ from arvados.api import (
normalize_api_kwargs,
api_kwargs_from_config,
OrderedJsonModel,
+ _googleapiclient_log_lock,
)
from .arvados_testutil import fake_httplib2_response, mock_api_responses, queue_with
+import httplib2.error
if not mimetypes.inited:
mimetypes.init()
@@ -391,6 +394,46 @@ class ArvadosApiTest(run_test_server.TestCaseWithServers):
args[arg_index] = arg_value
api_client(*args, insecure=True)
+ def test_initial_retry_logs(self):
+ try:
+ _googleapiclient_log_lock.release()
+ except RuntimeError:
+ # Lock was never acquired - that's the state we want anyway
+ pass
+ real_logger = logging.getLogger('googleapiclient.http')
+ mock_logger = mock.Mock(wraps=real_logger)
+ mock_logger.handlers = logging.getLogger('googleapiclient').handlers
+ mock_logger.level = logging.NOTSET
+ with mock.patch('logging.getLogger', return_value=mock_logger), \
+ mock.patch('time.sleep'), \
+ self.assertLogs(real_logger, 'INFO') as actual_logs:
+ try:
+ api_client('v1', 'https://test.invalid/', 'NoToken', num_retries=1)
+ except httplib2.error.ServerNotFoundError:
+ pass
+ mock_logger.addFilter.assert_called()
+ mock_logger.addHandler.assert_called()
+ mock_logger.setLevel.assert_called()
+ mock_logger.removeHandler.assert_called()
+ mock_logger.removeFilter.assert_called()
+ self.assertRegex(actual_logs.output[0], r'^INFO:googleapiclient\.http:Sleeping \d')
+
+ def test_configured_logger_untouched(self):
+ real_logger = logging.getLogger('googleapiclient.http')
+ mock_logger = mock.Mock(wraps=real_logger)
+ mock_logger.handlers = logging.getLogger().handlers
+ with mock.patch('logging.getLogger', return_value=mock_logger), \
+ mock.patch('time.sleep'):
+ try:
+ api_client('v1', 'https://test.invalid/', 'NoToken', num_retries=1)
+ except httplib2.error.ServerNotFoundError:
+ pass
+ mock_logger.addFilter.assert_not_called()
+ mock_logger.addHandler.assert_not_called()
+ mock_logger.setLevel.assert_not_called()
+ mock_logger.removeHandler.assert_not_called()
+ mock_logger.removeFilter.assert_not_called()
+
class ConstructNumRetriesTestCase(unittest.TestCase):
@staticmethod
commit 3cd9690e6ee44ee92ef65c9548389a79547bdff0
Author: Peter Amstutz <peter.amstutz at curii.com>
Date: Wed Jun 7 13:51:21 2023 -0400
Merge branch '20548-installer-deploy' refs #20548
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>
diff --git a/tools/salt-install/installer.sh b/tools/salt-install/installer.sh
index e5aff213e..56fd061a4 100755
--- a/tools/salt-install/installer.sh
+++ b/tools/salt-install/installer.sh
@@ -60,6 +60,13 @@ checktools() {
fi
}
+cleanup() {
+ local NODE=$1
+ local SSH=`ssh_cmd "$NODE"`
+ # Delete the old repository
+ $SSH $DEPLOY_USER@$NODE rm -rf ${GITTARGET}.git ${GITTARGET}
+}
+
sync() {
local NODE=$1
local BRANCH=$2
@@ -68,33 +75,26 @@ sync() {
# each node, pushing our branch, and updating the checkout.
if [[ "$NODE" != localhost ]] ; then
- SSH=`ssh_cmd "$NODE"`
- GIT="eval `git_cmd $NODE`"
- if ! $SSH $DEPLOY_USER@$NODE test -d ${GITTARGET}.git ; then
-
- # Initialize the git repository (1st time case). We're
- # actually going to make two repositories here because git
- # will complain if you try to push to a repository with a
- # checkout. So we're going to create a "bare" repository
- # and then clone a regular repository (with a checkout)
- # from that.
-
- $SSH $DEPLOY_USER@$NODE git init --bare --shared=0600 ${GITTARGET}.git
- if ! $GIT remote add $NODE $DEPLOY_USER@$NODE:${GITTARGET}.git ; then
- $GIT remote set-url $NODE $DEPLOY_USER@$NODE:${GITTARGET}.git
- fi
- $GIT push $NODE $BRANCH
- $SSH $DEPLOY_USER@$NODE "umask 0077 && git clone ${GITTARGET}.git ${GITTARGET}"
- fi
+ SSH=`ssh_cmd "$NODE"`
+ GIT="eval `git_cmd $NODE`"
- # The update case.
- #
- # Push to the bare repository on the remote node, then in the
- # remote node repository with the checkout, pull the branch
- # from the bare repository.
+ cleanup $NODE
- $GIT push $NODE $BRANCH
- $SSH $DEPLOY_USER@$NODE "git -C ${GITTARGET} checkout ${BRANCH} && git -C ${GITTARGET} pull"
+ # Update the git remote for the remote repository.
+ if ! $GIT remote add $NODE $DEPLOY_USER@$NODE:${GITTARGET}.git ; then
+ $GIT remote set-url $NODE $DEPLOY_USER@$NODE:${GITTARGET}.git
+ fi
+
+ # Initialize the git repository. We're
+ # actually going to make two repositories here because git
+ # will complain if you try to push to a repository with a
+ # checkout. So we're going to create a "bare" repository
+ # and then clone a regular repository (with a checkout)
+ # from that.
+
+ $SSH $DEPLOY_USER@$NODE git init --bare --shared=0600 ${GITTARGET}.git
+ $GIT push $NODE $BRANCH
+ $SSH $DEPLOY_USER@$NODE "umask 0077 && git clone -s ${GITTARGET}.git ${GITTARGET} && git -C ${GITTARGET} checkout ${BRANCH}"
fi
}
@@ -112,7 +112,7 @@ deploynode() {
fi
logfile=deploy-${NODE}-$(date -Iseconds).log
- SSH=`ssh_cmd "$NODE"`
+ SSH=`ssh_cmd "$NODE"`
if [[ "$NODE" = localhost ]] ; then
SUDO=''
@@ -120,8 +120,9 @@ deploynode() {
SUDO=sudo
fi
$SUDO ./provision.sh --config ${CONFIG_FILE} ${ROLES} 2>&1 | tee $logfile
- else
- $SSH $DEPLOY_USER@$NODE "cd ${GITTARGET} && sudo ./provision.sh --config ${CONFIG_FILE} ${ROLES}" 2>&1 | tee $logfile
+ else
+ $SSH $DEPLOY_USER@$NODE "cd ${GITTARGET} && git log -n1 HEAD && sudo ./provision.sh --config ${CONFIG_FILE} ${ROLES}" 2>&1 | tee $logfile
+ cleanup $NODE
fi
}
commit dcc59708da5c60cacb6e64553d0a8901c37591ee
Author: Peter Amstutz <peter.amstutz at curii.com>
Date: Mon Jun 5 18:10:07 2023 -0400
Initialize rcode variable, refs #20611
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>
diff --git a/sdk/cwl/arvados_cwl/arvcontainer.py b/sdk/cwl/arvados_cwl/arvcontainer.py
index 6180a4c62..a94fdac52 100644
--- a/sdk/cwl/arvados_cwl/arvcontainer.py
+++ b/sdk/cwl/arvados_cwl/arvcontainer.py
@@ -459,6 +459,7 @@ class ArvadosContainer(JobBase):
def done(self, record):
outputs = {}
retried = False
+ rcode = None
try:
container = self.arvrunner.api.containers().get(
uuid=record["container_uuid"]
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list