Closes #20613.
Arvados-DCO-1.1-Signed-off-by: Brett Smith <brett.smith@curii.com>
<div class="releasenotes">
</notextile>
-h2(#v2_6_3). v2.6.3 (2023-06-06)
+h2(#v2_6_3). v2.6.3 (2023-06-08)
"previous: Upgrading to 2.6.2":#v2_6_2
arv_client = arvados.api('v1', num_retries=0, ...)
{% endcodeblock %}
+The first time the Python SDK fetches an Arvados API discovery document, it will ensure that @googleapiclient.http@ logs are handled so you have a way to know about early problems that are being retried. If you prefer to handle these logs your own way, just ensure that the @googleapiclient.http@ logger (or a parent logger) has a handler installed before you call any Arvados API client constructor.
+
h2(#v2_6_2). v2.6.2 (2023-05-22)
"previous: Upgrading to 2.6.1":#v2_6_1
import arvados
import arvados.config
+import arvados.logging
from arvados.keep import KeepClient
from arvados.errors import ApiError
import arvados.commands._util as arv_cmd
logger.setLevel(logging.INFO)
logging.getLogger('arvados').setLevel(logging.INFO)
logging.getLogger('arvados.keep').setLevel(logging.WARNING)
- # API retries are at WARNING level and can be noisy, but as long as
+ # API retries are filtered to the INFO level and can be noisy, but as long as
# they succeed we don't need to see warnings about it.
- logging.getLogger('googleapiclient').setLevel(logging.ERROR)
+ googleapiclient_http_logger = logging.getLogger('googleapiclient.http')
+ googleapiclient_http_logger.addFilter(arvados.logging.GoogleHTTPClientFilter())
+ googleapiclient_http_logger.setLevel(logging.WARNING)
if arvargs.debug:
logger.setLevel(logging.DEBUG)
# In debug mode show logs about retries, but we arn't
# debugging the google client so we don't need to see
# everything.
- logging.getLogger('googleapiclient').setLevel(logging.WARNING)
+ googleapiclient_http_logger.setLevel(logging.NOTSET)
+ logging.getLogger('googleapiclient').setLevel(logging.INFO)
if arvargs.quiet:
logger.setLevel(logging.WARN)
import http.client
import httplib2
import json
-import logging
+import logging as stdliblog
import os
import pprint
import re
from arvados.keep import *
from arvados.stream import *
from .arvfile import StreamFileReader
+from .logging import log_format, log_date_format, log_handler
from .retry import RetryLoop
import arvados.errors as errors
import arvados.util as util
+# Override logging module pulled in via `from ... import *`
+# so users can `import arvados.logging`.
+logging = sys.modules['arvados.logging']
+
# Set up Arvados logging based on the user's configuration.
# All Arvados code should log under the arvados hierarchy.
-log_format = '%(asctime)s %(name)s[%(process)d] %(levelname)s: %(message)s'
-log_date_format = '%Y-%m-%d %H:%M:%S'
-log_handler = logging.StreamHandler()
-log_handler.setFormatter(logging.Formatter(log_format, log_date_format))
-logger = logging.getLogger('arvados')
+logger = stdliblog.getLogger('arvados')
logger.addHandler(log_handler)
-logger.setLevel(logging.DEBUG if config.get('ARVADOS_DEBUG')
- else logging.WARNING)
+logger.setLevel(stdliblog.DEBUG if config.get('ARVADOS_DEBUG')
+ else stdliblog.WARNING)
def task_set_output(self, s, num_retries=5):
for tries_left in RetryLoop(num_retries=num_retries, backoff_start=0):
import socket
import ssl
import sys
+import threading
import time
import types
from . import retry
from . import util
from . import cache
+from .logging import GoogleHTTPClientFilter, log_handler
_logger = logging.getLogger('arvados.api')
+_googleapiclient_log_lock = threading.Lock()
MAX_IDLE_CONNECTION_DURATION = 30
http.timeout = timeout
http = _patch_http_request(http, token, num_retries)
- svc = apiclient_discovery.build(
- 'arvados', version,
- cache_discovery=False,
- discoveryServiceUrl=discoveryServiceUrl,
- http=http,
- num_retries=num_retries,
- **kwargs,
+ # The first time a client is instantiated, temporarily route
+ # googleapiclient.http retry logs if they're not already. These are
+ # important because temporary problems fetching the discovery document
+ # can cause clients to appear to hang early. This can be removed after
+ # we have a more general story for handling googleapiclient logs (#20521).
+ client_logger = logging.getLogger('googleapiclient.http')
+ # "first time a client is instantiated" = thread that acquires this lock
+ # It is never released.
+ # googleapiclient sets up its own NullHandler so we detect if logging is
+ # configured by looking for a real handler anywhere in the hierarchy.
+ client_logger_unconfigured = _googleapiclient_log_lock.acquire(blocking=False) and all(
+ isinstance(handler, logging.NullHandler)
+ for logger_name in ['', 'googleapiclient', 'googleapiclient.http']
+ for handler in logging.getLogger(logger_name).handlers
)
+ if client_logger_unconfigured:
+ client_level = client_logger.level
+ client_filter = GoogleHTTPClientFilter()
+ client_logger.addFilter(client_filter)
+ client_logger.addHandler(log_handler)
+ if logging.NOTSET < client_level < client_filter.retry_levelno:
+ client_logger.setLevel(client_level)
+ else:
+ client_logger.setLevel(client_filter.retry_levelno)
+ try:
+ svc = apiclient_discovery.build(
+ 'arvados', version,
+ cache_discovery=False,
+ discoveryServiceUrl=discoveryServiceUrl,
+ http=http,
+ num_retries=num_retries,
+ **kwargs,
+ )
+ finally:
+ if client_logger_unconfigured:
+ client_logger.removeHandler(log_handler)
+ client_logger.removeFilter(client_filter)
+ client_logger.setLevel(client_level)
svc.api_token = token
svc.insecure = insecure
svc.request_id = request_id
--- /dev/null
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+"""Logging utilities for Arvados clients"""
+
+import logging
+
+log_format = '%(asctime)s %(name)s[%(process)d] %(levelname)s: %(message)s'
+log_date_format = '%Y-%m-%d %H:%M:%S'
+log_handler = logging.StreamHandler()
+log_handler.setFormatter(logging.Formatter(log_format, log_date_format))
+
+class GoogleHTTPClientFilter:
+ """Common googleapiclient.http log filters for Arvados clients
+
+ This filter makes `googleapiclient.http` log messages more useful for
+ typical Arvados applications. Currently it only changes the level of
+ retry messages (to INFO by default), but its functionality may be
+ extended in the future. Typical usage looks like:
+
+ logging.getLogger('googleapiclient.http').addFilter(GoogleHTTPClientFilter())
+ """
+ def __init__(self, *, retry_level='INFO'):
+ self.retry_levelname = retry_level
+ self.retry_levelno = getattr(logging, retry_level)
+
+ def filter(self, record):
+ if record.msg.startswith(('Sleeping ', 'Retry ')):
+ record.levelname = self.retry_levelname
+ record.levelno = self.retry_levelno
+ return True
import httplib2
import itertools
import json
+import logging
import mimetypes
import os
import socket
normalize_api_kwargs,
api_kwargs_from_config,
OrderedJsonModel,
+ _googleapiclient_log_lock,
)
from .arvados_testutil import fake_httplib2_response, mock_api_responses, queue_with
+import httplib2.error
if not mimetypes.inited:
mimetypes.init()
args[arg_index] = arg_value
api_client(*args, insecure=True)
+ def test_initial_retry_logs(self):
+ try:
+ _googleapiclient_log_lock.release()
+ except RuntimeError:
+ # Lock was never acquired - that's the state we want anyway
+ pass
+ real_logger = logging.getLogger('googleapiclient.http')
+ mock_logger = mock.Mock(wraps=real_logger)
+ mock_logger.handlers = logging.getLogger('googleapiclient').handlers
+ mock_logger.level = logging.NOTSET
+ with mock.patch('logging.getLogger', return_value=mock_logger), \
+ mock.patch('time.sleep'), \
+ self.assertLogs(real_logger, 'INFO') as actual_logs:
+ try:
+ api_client('v1', 'https://test.invalid/', 'NoToken', num_retries=1)
+ except httplib2.error.ServerNotFoundError:
+ pass
+ mock_logger.addFilter.assert_called()
+ mock_logger.addHandler.assert_called()
+ mock_logger.setLevel.assert_called()
+ mock_logger.removeHandler.assert_called()
+ mock_logger.removeFilter.assert_called()
+ self.assertRegex(actual_logs.output[0], r'^INFO:googleapiclient\.http:Sleeping \d')
+
+ def test_configured_logger_untouched(self):
+ real_logger = logging.getLogger('googleapiclient.http')
+ mock_logger = mock.Mock(wraps=real_logger)
+ mock_logger.handlers = logging.getLogger().handlers
+ with mock.patch('logging.getLogger', return_value=mock_logger), \
+ mock.patch('time.sleep'):
+ try:
+ api_client('v1', 'https://test.invalid/', 'NoToken', num_retries=1)
+ except httplib2.error.ServerNotFoundError:
+ pass
+ mock_logger.addFilter.assert_not_called()
+ mock_logger.addHandler.assert_not_called()
+ mock_logger.setLevel.assert_not_called()
+ mock_logger.removeHandler.assert_not_called()
+ mock_logger.removeFilter.assert_not_called()
+
class ConstructNumRetriesTestCase(unittest.TestCase):
@staticmethod