mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-09 08:21:50 +00:00
fix(gateway): tolerate Unicode in stderr log handlers on Windows
On Windows with non-UTF-8 console encodings (e.g. cp949, cp1252), StreamHandler emits raise UnicodeEncodeError when log messages contain characters outside the console codepage — such as the em-dash (U+2014) in the session hygiene message. This crashed the gateway process silently, leaving no diagnostic output. Fix: add _safe_stderr() helper that wraps sys.stderr in a TextIOWrapper with encoding='utf-8' and errors='replace' when the console encoding is not UTF-8. Applied to both: - hermes_logging.py setup_verbose_logging() stderr handler - gateway/run.py optional stderr handler The wrapper ensures log lines are never lost — un-encodable characters are replaced with '?' instead of crashing the process. Fixes #40432
This commit is contained in:
parent
fc086da8bd
commit
b08662b782
3 changed files with 111 additions and 3 deletions
|
|
@ -19706,7 +19706,7 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
|
|||
# Centralized logging — agent.log (INFO+), errors.log (WARNING+),
|
||||
# and gateway.log (INFO+, gateway-component records only).
|
||||
# Idempotent, so repeated calls from AIAgent.__init__ won't duplicate.
|
||||
from hermes_logging import setup_logging
|
||||
from hermes_logging import setup_logging, _safe_stderr
|
||||
setup_logging(hermes_home=_hermes_home, mode="gateway")
|
||||
|
||||
# Optional stderr handler — level driven by -v/-q flags on the CLI.
|
||||
|
|
@ -19718,7 +19718,7 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
|
|||
from agent.redact import RedactingFormatter
|
||||
|
||||
_stderr_level = {0: logging.WARNING, 1: logging.INFO}.get(verbosity, logging.DEBUG)
|
||||
_stderr_handler = logging.StreamHandler()
|
||||
_stderr_handler = logging.StreamHandler(_safe_stderr())
|
||||
_stderr_handler.setLevel(_stderr_level)
|
||||
_stderr_handler.setFormatter(RedactingFormatter('%(levelname)s %(name)s: %(message)s'))
|
||||
logging.getLogger().addHandler(_stderr_handler)
|
||||
|
|
|
|||
|
|
@ -27,8 +27,10 @@ Session context:
|
|||
that thread will include ``[session_id]`` for filtering/correlation.
|
||||
"""
|
||||
|
||||
import io
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
import threading
|
||||
from logging.handlers import RotatingFileHandler
|
||||
from pathlib import Path
|
||||
|
|
@ -50,6 +52,40 @@ _session_context = threading.local()
|
|||
_LOG_FORMAT = "%(asctime)s %(levelname)s%(session_tag)s %(name)s: %(message)s"
|
||||
_LOG_FORMAT_VERBOSE = "%(asctime)s - %(name)s - %(levelname)s%(session_tag)s - %(message)s"
|
||||
|
||||
|
||||
def _safe_stderr(): # type: ignore[return]
|
||||
"""Return a stderr stream that tolerates Unicode on all platforms.
|
||||
|
||||
On Windows the console encoding is often a legacy MBCS codec
|
||||
(cp949, cp1252, …) that raises ``UnicodeEncodeError`` for characters
|
||||
like the em-dash (U+2014). We wrap ``sys.stderr`` in a
|
||||
``TextIOWrapper`` with ``errors='replace'`` so log lines are never
|
||||
lost — un-encodable characters are replaced with ``?`` instead of
|
||||
crashing the process.
|
||||
"""
|
||||
stream = sys.stderr
|
||||
encoding = getattr(stream, "encoding", None) or "utf-8"
|
||||
# Already UTF-8 or surrogate-aware — no wrapping needed.
|
||||
if encoding.lower().replace("-", "") in ("utf8", "utf8surrogateescape"):
|
||||
return stream
|
||||
try:
|
||||
buf = getattr(stream, "buffer", None)
|
||||
if buf is not None:
|
||||
wrapped = io.TextIOWrapper(
|
||||
buf,
|
||||
encoding="utf-8",
|
||||
errors="replace",
|
||||
line_buffering=True,
|
||||
)
|
||||
# Prevent the wrapper from closing the underlying buffer
|
||||
# when it is garbage-collected.
|
||||
wrapped.close = lambda: None # type: ignore[assignment]
|
||||
return wrapped
|
||||
except Exception:
|
||||
pass
|
||||
# Best-effort: if wrapping fails, return the original stream.
|
||||
return stream
|
||||
|
||||
# Third-party loggers that are noisy at DEBUG/INFO level.
|
||||
_NOISY_LOGGERS = (
|
||||
"openai",
|
||||
|
|
@ -298,7 +334,7 @@ def setup_verbose_logging() -> None:
|
|||
if getattr(h, "_hermes_verbose", False):
|
||||
return
|
||||
|
||||
handler = logging.StreamHandler()
|
||||
handler = logging.StreamHandler(_safe_stderr())
|
||||
handler.setLevel(logging.DEBUG)
|
||||
handler.setFormatter(RedactingFormatter(_LOG_FORMAT_VERBOSE, datefmt="%H:%M:%S"))
|
||||
handler._hermes_verbose = True # type: ignore[attr-defined]
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@
|
|||
import logging
|
||||
import os
|
||||
import stat
|
||||
import sys
|
||||
import threading
|
||||
from logging.handlers import RotatingFileHandler
|
||||
from pathlib import Path
|
||||
|
|
@ -997,3 +998,74 @@ class TestExternalRotationRecovery:
|
|||
assert gw_path.exists(), "gateway.log was never recreated"
|
||||
assert "AFTER rotation" in gw_path.read_text()
|
||||
assert "AFTER rotation" not in rotated.read_text()
|
||||
|
||||
|
||||
class TestSafeStderr:
|
||||
"""Tests for _safe_stderr() — Unicode tolerance on Windows console."""
|
||||
|
||||
def test_returns_stderr_on_utf8_system(self, monkeypatch):
|
||||
"""On UTF-8 systems, _safe_stderr() returns sys.stderr unchanged."""
|
||||
import io
|
||||
fake_stderr = io.StringIO()
|
||||
monkeypatch.setattr(sys, "stderr", fake_stderr)
|
||||
# On Linux/macOS, encoding is typically utf-8
|
||||
result = hermes_logging._safe_stderr()
|
||||
# Should return the same object (or a equivalent stream)
|
||||
assert result is fake_stderr or getattr(result, "encoding", "").lower().startswith("utf")
|
||||
|
||||
def test_wraps_non_utf8_stderr(self, monkeypatch):
|
||||
"""On non-UTF-8 systems (e.g. Windows cp949), wraps stderr with UTF-8."""
|
||||
import io
|
||||
|
||||
class FakeStderr:
|
||||
"""Simulates a Windows stderr with legacy encoding."""
|
||||
encoding = "cp949"
|
||||
buffer = io.BytesIO()
|
||||
|
||||
def write(self, s):
|
||||
pass
|
||||
|
||||
def flush(self):
|
||||
pass
|
||||
|
||||
fake = FakeStderr()
|
||||
monkeypatch.setattr(sys, "stderr", fake)
|
||||
result = hermes_logging._safe_stderr()
|
||||
# Should be a TextIOWrapper, not the original FakeStderr
|
||||
assert isinstance(result, io.TextIOWrapper)
|
||||
assert result.encoding == "utf-8"
|
||||
assert result.errors == "replace"
|
||||
|
||||
def test_handler_emits_unicode_without_crash(self, tmp_path):
|
||||
"""StreamHandler with _safe_stderr can emit Unicode messages."""
|
||||
import io
|
||||
|
||||
# Create a stderr-like stream with ASCII encoding
|
||||
class AsciiStream:
|
||||
encoding = "ascii"
|
||||
buffer = io.BytesIO()
|
||||
|
||||
def write(self, s):
|
||||
self.buffer.write(s.encode("ascii", errors="replace"))
|
||||
|
||||
def flush(self):
|
||||
pass
|
||||
|
||||
# Without the fix, this would crash on cp949/ASCII stderr.
|
||||
# With the wrapper, the em-dash is replaced with '?'
|
||||
handler = logging.StreamHandler(
|
||||
io.TextIOWrapper(
|
||||
io.BytesIO(),
|
||||
encoding="utf-8",
|
||||
errors="replace",
|
||||
)
|
||||
)
|
||||
handler.setFormatter(logging.Formatter("%(message)s"))
|
||||
logger = logging.getLogger("_test_unicode")
|
||||
logger.addHandler(handler)
|
||||
logger.setLevel(logging.DEBUG)
|
||||
try:
|
||||
# Em-dash U+2014 — the exact character from the bug report
|
||||
logger.info("Session hygiene: 400 messages — auto-compressing")
|
||||
finally:
|
||||
logger.removeHandler(handler)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue