fix: guard init-time stdio writes

This commit is contained in:
teknium1 2026-03-14 02:19:46 -07:00
parent 163fa4a9d1
commit 936040d8f7
2 changed files with 56 additions and 12 deletions

View file

@ -110,18 +110,17 @@ HONCHO_TOOL_NAMES = {
class _SafeWriter: class _SafeWriter:
"""Transparent stdout wrapper that catches OSError from broken pipes. """Transparent stdio wrapper that catches OSError from broken pipes.
When hermes-agent runs as a systemd service, Docker container, or headless When hermes-agent runs as a systemd service, Docker container, or headless
daemon, the stdout pipe can become unavailable (idle timeout, buffer daemon, the stdout/stderr pipe can become unavailable (idle timeout, buffer
exhaustion, socket reset). Any print() call then raises exhaustion, socket reset). Any print() call then raises
``OSError: [Errno 5] Input/output error``, which can crash ``OSError: [Errno 5] Input/output error``, which can crash agent setup or
run_conversation() especially via double-fault when the except handler run_conversation() especially via double-fault when an except handler
also tries to print. also tries to print.
This wrapper delegates all writes to the underlying stream and silently This wrapper delegates all writes to the underlying stream and silently
catches OSError. It is installed once at the start of run_conversation() catches OSError. It is transparent when the wrapped stream is healthy.
and is transparent when stdout is healthy (zero overhead on the happy path).
""" """
__slots__ = ("_inner",) __slots__ = ("_inner",)
@ -154,6 +153,14 @@ class _SafeWriter:
return getattr(self._inner, name) return getattr(self._inner, name)
def _install_safe_stdio() -> None:
"""Wrap stdout/stderr so best-effort console output cannot crash the agent."""
for stream_name in ("stdout", "stderr"):
stream = getattr(sys, stream_name, None)
if stream is not None and not isinstance(stream, _SafeWriter):
setattr(sys, stream_name, _SafeWriter(stream))
class IterationBudget: class IterationBudget:
"""Thread-safe shared iteration counter for parent and child agents. """Thread-safe shared iteration counter for parent and child agents.
@ -324,6 +331,8 @@ class AIAgent:
honcho_manager: Optional shared HonchoSessionManager owned by the caller. honcho_manager: Optional shared HonchoSessionManager owned by the caller.
honcho_config: Optional HonchoClientConfig corresponding to honcho_manager. honcho_config: Optional HonchoClientConfig corresponding to honcho_manager.
""" """
_install_safe_stdio()
self.model = model self.model = model
self.max_iterations = max_iterations self.max_iterations = max_iterations
# Shared iteration budget — parent creates, children inherit. # Shared iteration budget — parent creates, children inherit.
@ -3868,10 +3877,9 @@ class AIAgent:
Returns: Returns:
Dict: Complete conversation result with final response and message history Dict: Complete conversation result with final response and message history
""" """
# Guard stdout against OSError from broken pipes (systemd/headless/daemon). # Guard stdio against OSError from broken pipes (systemd/headless/daemon).
# Installed once, transparent when stdout is healthy, prevents crash on write. # Installed once, transparent when streams are healthy, prevents crash on write.
if not isinstance(sys.stdout, _SafeWriter): _install_safe_stdio()
sys.stdout = _SafeWriter(sys.stdout)
# Generate unique task_id if not provided to isolate VMs between concurrent tasks # Generate unique task_id if not provided to isolate VMs between concurrent tasks
effective_task_id = task_id or str(uuid.uuid4()) effective_task_id = task_id or str(uuid.uuid4())

View file

@ -1800,12 +1800,13 @@ class TestSafeWriter:
sys.stdout = original sys.stdout = original
def test_installed_in_run_conversation(self, agent): def test_installed_in_run_conversation(self, agent):
"""run_conversation installs _SafeWriter on sys.stdout.""" """run_conversation installs _SafeWriter on stdio."""
import sys import sys
from run_agent import _SafeWriter from run_agent import _SafeWriter
resp = _mock_response(content="Done", finish_reason="stop") resp = _mock_response(content="Done", finish_reason="stop")
agent.client.chat.completions.create.return_value = resp agent.client.chat.completions.create.return_value = resp
original = sys.stdout original_stdout = sys.stdout
original_stderr = sys.stderr
try: try:
with ( with (
patch.object(agent, "_persist_session"), patch.object(agent, "_persist_session"),
@ -1814,6 +1815,41 @@ class TestSafeWriter:
): ):
agent.run_conversation("test") agent.run_conversation("test")
assert isinstance(sys.stdout, _SafeWriter) assert isinstance(sys.stdout, _SafeWriter)
assert isinstance(sys.stderr, _SafeWriter)
finally:
sys.stdout = original_stdout
sys.stderr = original_stderr
def test_installed_before_init_time_honcho_error_prints(self):
"""AIAgent.__init__ wraps stdout before Honcho fallback prints can fire."""
import sys
from run_agent import _SafeWriter
broken = MagicMock()
broken.write.side_effect = OSError(5, "Input/output error")
broken.flush.side_effect = OSError(5, "Input/output error")
original = sys.stdout
sys.stdout = broken
try:
hcfg = HonchoClientConfig(enabled=True, api_key="test-honcho-key")
with (
patch("run_agent.get_tool_definitions", return_value=_make_tool_defs("web_search")),
patch("run_agent.check_toolset_requirements", return_value={}),
patch("run_agent.OpenAI"),
patch("hermes_cli.config.load_config", return_value={"memory": {}}),
patch("honcho_integration.client.HonchoClientConfig.from_global_config", return_value=hcfg),
patch("honcho_integration.client.get_honcho_client", side_effect=RuntimeError("boom")),
):
agent = AIAgent(
api_key="test-k...7890",
quiet_mode=True,
skip_context_files=True,
skip_memory=False,
)
assert isinstance(sys.stdout, _SafeWriter)
assert agent._honcho is None
finally: finally:
sys.stdout = original sys.stdout = original