From 98a7cfb8f90ad1c62057a09024a78a3e7b7834f7 Mon Sep 17 00:00:00 2001 From: helix4u <4317663+helix4u@users.noreply.github.com> Date: Sun, 28 Jun 2026 15:10:35 -0600 Subject: [PATCH] fix(logging): suppress Windows lock timeout tracebacks --- hermes_logging.py | 36 ++++++++++++++++ tests/test_hermes_logging.py | 79 ++++++++++++++++++++++++++++++++++++ 2 files changed, 115 insertions(+) diff --git a/hermes_logging.py b/hermes_logging.py index 40d7fb44fda..247a6f62334 100644 --- a/hermes_logging.py +++ b/hermes_logging.py @@ -115,6 +115,26 @@ def _safe_stderr(): # type: ignore[return] # Best-effort: if wrapping fails, return the original stream. return stream + +_CONCURRENT_LOG_LOCK_TIMEOUT = "Cannot acquire lock after 20 attempts" + + +def _is_windows_concurrent_log_lock_timeout(exc: BaseException | None) -> bool: + """Return True for concurrent-log-handler's Windows lock timeout. + + On Windows Desktop, slash-command workers and the gateway can all write to + the same rotating log files. ``concurrent-log-handler`` serializes rollover + with a cross-process lock, but when another process holds that lock too + long it raises this RuntimeError. Logging failures should not escape into + Desktop chat output. + """ + return ( + sys.platform == "win32" + and isinstance(exc, RuntimeError) + and _CONCURRENT_LOG_LOCK_TIMEOUT in str(exc) + ) + + # Third-party loggers that are noisy at DEBUG/INFO level. _NOISY_LOGGERS = ( "openai", @@ -494,6 +514,22 @@ class _ManagedRotatingFileHandler(RotatingFileHandler): self._reopen_if_externally_rotated() super().emit(record) + def handleError(self, record: logging.LogRecord) -> None: + """Suppress the known Windows ``concurrent-log-handler`` lock timeout + instead of printing a traceback. + + CLH's own ``emit()`` wraps its body in ``try/except Exception: + self.handleError(record)``, so the ``"Cannot acquire lock after N + attempts"`` RuntimeError raised in ``_do_lock()`` is caught inside CLH + and routed here — it never propagates out of ``super().emit()``. This + override is the single point where that timeout can be silenced before + the stdlib handler prints it to stderr (which, under the Desktop + slash-worker, is captured and surfaced into chat output).""" + exc = sys.exc_info()[1] + if _is_windows_concurrent_log_lock_timeout(exc): + return + super().handleError(record) + def _open(self): stream = super()._open() self._chmod_if_managed() diff --git a/tests/test_hermes_logging.py b/tests/test_hermes_logging.py index e9cc6052500..ca0b13ff5da 100644 --- a/tests/test_hermes_logging.py +++ b/tests/test_hermes_logging.py @@ -818,6 +818,85 @@ class TestAddRotatingHandler: h.close() +class TestWindowsConcurrentLogLockTimeout: + """Windows concurrent-log-handler lock timeouts stay inside logging.""" + + def _make_logger_and_handler(self, log_path: Path): + logger = logging.getLogger(f"_test_concurrent_lock_timeout_{log_path.stem}") + logger.handlers.clear() + logger.propagate = False + logger.setLevel(logging.INFO) + + handler = hermes_logging._ManagedRotatingFileHandler( + str(log_path), maxBytes=1, backupCount=1, encoding="utf-8", + ) + handler.setFormatter(logging.Formatter("%(message)s")) + logger.addHandler(handler) + return logger, handler + + def test_helper_only_matches_windows_concurrent_lock_timeout(self): + with patch.object(hermes_logging.sys, "platform", "win32"): + assert hermes_logging._is_windows_concurrent_log_lock_timeout( + RuntimeError("Cannot acquire lock after 20 attempts") + ) + assert not hermes_logging._is_windows_concurrent_log_lock_timeout( + RuntimeError("some other logging failure") + ) + + with patch.object(hermes_logging.sys, "platform", "linux"): + assert not hermes_logging._is_windows_concurrent_log_lock_timeout( + RuntimeError("Cannot acquire lock after 20 attempts") + ) + + def test_lock_timeout_routed_to_handle_error_is_suppressed(self, tmp_path, capsys): + """Mirror CLH's real control flow. + + ``concurrent-log-handler``'s ``emit()`` wraps its whole body in + ``try/except Exception: self.handleError(record)``, so the lock + RuntimeError raised in ``_do_lock()`` is caught *inside* CLH and routed + to ``handleError`` with the exception live in ``sys.exc_info()``. We + invoke ``handleError`` the same way CLH would and assert no traceback + reaches stderr (the slash-worker surface).""" + logger, handler = self._make_logger_and_handler(tmp_path / "agent.log") + record = logger.makeRecord( + logger.name, logging.INFO, __file__, 0, "force rollover", (), None, + ) + try: + with patch.object(hermes_logging.sys, "platform", "win32"): + try: + raise RuntimeError("Cannot acquire lock after 20 attempts") + except RuntimeError: + handler.handleError(record) + + captured = capsys.readouterr() + assert "Cannot acquire lock after 20 attempts" not in captured.err + assert "--- Logging error ---" not in captured.err + finally: + logger.removeHandler(handler) + handler.close() + + def test_other_errors_routed_to_handle_error_still_print(self, tmp_path, capsys): + """An unrelated failure routed through ``handleError`` must still emit the + normal stdlib logging-error output — only the known CLH timeout is silent.""" + logger, handler = self._make_logger_and_handler(tmp_path / "agent.log") + record = logger.makeRecord( + logger.name, logging.INFO, __file__, 0, "force rollover", (), None, + ) + try: + with patch.object(hermes_logging.sys, "platform", "win32"): + try: + raise RuntimeError("unexpected logging failure") + except RuntimeError: + handler.handleError(record) + + captured = capsys.readouterr() + assert "unexpected logging failure" in captured.err + assert "--- Logging error ---" in captured.err + finally: + logger.removeHandler(handler) + handler.close() + + class TestReadLoggingConfig: """_read_logging_config() reads from config.yaml."""