fix(gateway): tolerate Unicode in stderr log handlers on Windows

On Windows with non-UTF-8 console encodings (e.g. cp949, cp1252),
StreamHandler emits raise UnicodeEncodeError when log messages contain
characters outside the console codepage — such as the em-dash (U+2014)
in the session hygiene message.

This crashed the gateway process silently, leaving no diagnostic output.

Fix: add _safe_stderr() helper that wraps sys.stderr in a TextIOWrapper
with encoding='utf-8' and errors='replace' when the console encoding
is not UTF-8.  Applied to both:
- hermes_logging.py setup_verbose_logging() stderr handler
- gateway/run.py optional stderr handler

The wrapper ensures log lines are never lost — un-encodable characters
are replaced with '?' instead of crashing the process.

Fixes #40432
This commit is contained in:
annguyenNous 2026-06-06 19:54:58 +07:00 committed by Teknium
parent fc086da8bd
commit b08662b782
3 changed files with 111 additions and 3 deletions

View file

@ -19706,7 +19706,7 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
# Centralized logging — agent.log (INFO+), errors.log (WARNING+),
# and gateway.log (INFO+, gateway-component records only).
# Idempotent, so repeated calls from AIAgent.__init__ won't duplicate.
from hermes_logging import setup_logging
from hermes_logging import setup_logging, _safe_stderr
setup_logging(hermes_home=_hermes_home, mode="gateway")
# Optional stderr handler — level driven by -v/-q flags on the CLI.
@ -19718,7 +19718,7 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
from agent.redact import RedactingFormatter
_stderr_level = {0: logging.WARNING, 1: logging.INFO}.get(verbosity, logging.DEBUG)
_stderr_handler = logging.StreamHandler()
_stderr_handler = logging.StreamHandler(_safe_stderr())
_stderr_handler.setLevel(_stderr_level)
_stderr_handler.setFormatter(RedactingFormatter('%(levelname)s %(name)s: %(message)s'))
logging.getLogger().addHandler(_stderr_handler)

View file

@ -27,8 +27,10 @@ Session context:
that thread will include ``[session_id]`` for filtering/correlation.
"""
import io
import logging
import os
import sys
import threading
from logging.handlers import RotatingFileHandler
from pathlib import Path
@ -50,6 +52,40 @@ _session_context = threading.local()
_LOG_FORMAT = "%(asctime)s %(levelname)s%(session_tag)s %(name)s: %(message)s"
_LOG_FORMAT_VERBOSE = "%(asctime)s - %(name)s - %(levelname)s%(session_tag)s - %(message)s"
def _safe_stderr(): # type: ignore[return]
"""Return a stderr stream that tolerates Unicode on all platforms.
On Windows the console encoding is often a legacy MBCS codec
(cp949, cp1252, ) that raises ``UnicodeEncodeError`` for characters
like the em-dash (U+2014). We wrap ``sys.stderr`` in a
``TextIOWrapper`` with ``errors='replace'`` so log lines are never
lost un-encodable characters are replaced with ``?`` instead of
crashing the process.
"""
stream = sys.stderr
encoding = getattr(stream, "encoding", None) or "utf-8"
# Already UTF-8 or surrogate-aware — no wrapping needed.
if encoding.lower().replace("-", "") in ("utf8", "utf8surrogateescape"):
return stream
try:
buf = getattr(stream, "buffer", None)
if buf is not None:
wrapped = io.TextIOWrapper(
buf,
encoding="utf-8",
errors="replace",
line_buffering=True,
)
# Prevent the wrapper from closing the underlying buffer
# when it is garbage-collected.
wrapped.close = lambda: None # type: ignore[assignment]
return wrapped
except Exception:
pass
# Best-effort: if wrapping fails, return the original stream.
return stream
# Third-party loggers that are noisy at DEBUG/INFO level.
_NOISY_LOGGERS = (
"openai",
@ -298,7 +334,7 @@ def setup_verbose_logging() -> None:
if getattr(h, "_hermes_verbose", False):
return
handler = logging.StreamHandler()
handler = logging.StreamHandler(_safe_stderr())
handler.setLevel(logging.DEBUG)
handler.setFormatter(RedactingFormatter(_LOG_FORMAT_VERBOSE, datefmt="%H:%M:%S"))
handler._hermes_verbose = True # type: ignore[attr-defined]

View file

@ -3,6 +3,7 @@
import logging
import os
import stat
import sys
import threading
from logging.handlers import RotatingFileHandler
from pathlib import Path
@ -997,3 +998,74 @@ class TestExternalRotationRecovery:
assert gw_path.exists(), "gateway.log was never recreated"
assert "AFTER rotation" in gw_path.read_text()
assert "AFTER rotation" not in rotated.read_text()
class TestSafeStderr:
"""Tests for _safe_stderr() — Unicode tolerance on Windows console."""
def test_returns_stderr_on_utf8_system(self, monkeypatch):
"""On UTF-8 systems, _safe_stderr() returns sys.stderr unchanged."""
import io
fake_stderr = io.StringIO()
monkeypatch.setattr(sys, "stderr", fake_stderr)
# On Linux/macOS, encoding is typically utf-8
result = hermes_logging._safe_stderr()
# Should return the same object (or a equivalent stream)
assert result is fake_stderr or getattr(result, "encoding", "").lower().startswith("utf")
def test_wraps_non_utf8_stderr(self, monkeypatch):
"""On non-UTF-8 systems (e.g. Windows cp949), wraps stderr with UTF-8."""
import io
class FakeStderr:
"""Simulates a Windows stderr with legacy encoding."""
encoding = "cp949"
buffer = io.BytesIO()
def write(self, s):
pass
def flush(self):
pass
fake = FakeStderr()
monkeypatch.setattr(sys, "stderr", fake)
result = hermes_logging._safe_stderr()
# Should be a TextIOWrapper, not the original FakeStderr
assert isinstance(result, io.TextIOWrapper)
assert result.encoding == "utf-8"
assert result.errors == "replace"
def test_handler_emits_unicode_without_crash(self, tmp_path):
"""StreamHandler with _safe_stderr can emit Unicode messages."""
import io
# Create a stderr-like stream with ASCII encoding
class AsciiStream:
encoding = "ascii"
buffer = io.BytesIO()
def write(self, s):
self.buffer.write(s.encode("ascii", errors="replace"))
def flush(self):
pass
# Without the fix, this would crash on cp949/ASCII stderr.
# With the wrapper, the em-dash is replaced with '?'
handler = logging.StreamHandler(
io.TextIOWrapper(
io.BytesIO(),
encoding="utf-8",
errors="replace",
)
)
handler.setFormatter(logging.Formatter("%(message)s"))
logger = logging.getLogger("_test_unicode")
logger.addHandler(handler)
logger.setLevel(logging.DEBUG)
try:
# Em-dash U+2014 — the exact character from the bug report
logger.info("Session hygiene: 400 messages — auto-compressing")
finally:
logger.removeHandler(handler)