mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-18 04:41:56 +00:00
Subagent stream drops were spamming the parent terminal with two lines
per blip ('Connection dropped...' + 'Reconnected...') while leaving zero
breadcrumb in agent.log to debug them.
Two underlying bugs, fixed together:
1. quiet_mode raised the run_agent/tools/etc. loggers to ERROR, which
filters records before root-logger file handlers see them. The comment
claimed 'File handlers still capture everything' — that was wrong.
Removed in both run_agent.py and cli.py; console quietness already
comes from hermes_logging not installing a console StreamHandler in
non-verbose mode.
2. The stream-retry blocks emitted two _emit_status calls per drop
('⚠️ Connection dropped... Reconnecting...' + '🔄 Reconnected —
resuming…') with no provider name, so multi-provider sessions had to
dig through agent.log to attribute a drop. Replaced both call sites
with a single _emit_stream_drop helper that emits ONE line naming the
provider and error class, and always writes a structured WARNING to
agent.log with subagent_id, depth, provider, base_url, error_type.
Net UX change: 6 lines per triple-subagent drop → 3 lines, each
naming the provider. agent.log now has a structured breadcrumb per
retry that didn't exist before.
Tests: 6 new tests in tests/run_agent/test_stream_drop_logging.py
covering the logger-level guard, structured WARNING content, single
status line per drop (no Reconnected follow-up), and provider naming.
160 lines
5.8 KiB
Python
160 lines
5.8 KiB
Python
"""Tests for the structured stream-drop log + clearer single-line status.
|
|
|
|
Regression coverage for the change that:
|
|
|
|
1. Removed ``logger.setLevel(ERROR)`` on tools/run_agent/etc. in quiet mode.
|
|
It was clobbering the root logger's file handlers (agent.log/errors.log)
|
|
because Python checks logger-level before handler propagation, so
|
|
subagent stream-drop diagnostics were never written to disk.
|
|
2. Replaced the two ``⚠️ Connection dropped …`` + ``🔄 Reconnected …``
|
|
``_emit_status`` calls with a single ``_emit_stream_drop`` helper that:
|
|
- Always writes a structured WARNING to ``agent.log``.
|
|
- Always emits exactly ONE user-visible status line per drop (no
|
|
follow-up "Reconnected" line) that names the provider and error
|
|
class so multi-provider sessions can attribute it cleanly.
|
|
- Subagent lines get the ``[subagent-N]`` ``log_prefix`` automatically
|
|
via ``_emit_status`` → ``_vprint``.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
from unittest.mock import patch
|
|
|
|
import pytest
|
|
|
|
import run_agent
|
|
from run_agent import AIAgent
|
|
|
|
|
|
def _make_agent() -> AIAgent:
|
|
return AIAgent(
|
|
api_key="test-key",
|
|
base_url="https://openrouter.ai/api/v1",
|
|
quiet_mode=True,
|
|
skip_context_files=True,
|
|
skip_memory=True,
|
|
)
|
|
|
|
|
|
def test_quiet_mode_does_not_clobber_runagent_logger_level():
|
|
"""quiet_mode must not raise the ``run_agent`` logger above WARNING.
|
|
|
|
Setting ``setLevel(ERROR)`` on the logger filters records *before* root
|
|
logger handlers (agent.log/errors.log) ever see them. Stream-drop
|
|
diagnostics must reach the file handlers regardless of console verbosity.
|
|
"""
|
|
_ = _make_agent()
|
|
for name in ("run_agent", "tools", "trajectory_compressor", "cron", "hermes_cli"):
|
|
logger = logging.getLogger(name)
|
|
assert logger.getEffectiveLevel() <= logging.WARNING, (
|
|
f"{name} logger blocked at level {logger.getEffectiveLevel()} — "
|
|
f"file handlers will lose WARNING records"
|
|
)
|
|
|
|
|
|
def test_log_stream_retry_writes_structured_warning(caplog):
|
|
agent = _make_agent()
|
|
agent._delegate_depth = 1
|
|
agent._subagent_id = "sa-7-cafef00d"
|
|
agent.provider = "openrouter"
|
|
|
|
err = ConnectionError("Network connection lost.")
|
|
with caplog.at_level(logging.WARNING, logger="run_agent"):
|
|
agent._log_stream_retry(
|
|
kind="drop mid tool-call",
|
|
error=err,
|
|
attempt=2,
|
|
max_attempts=3,
|
|
mid_tool_call=True,
|
|
)
|
|
|
|
matching = [r for r in caplog.records if "Stream drop mid tool-call" in r.getMessage()]
|
|
assert matching, f"no stream-drop WARNING captured; got {[r.getMessage() for r in caplog.records]}"
|
|
msg = matching[0].getMessage()
|
|
assert "subagent_id=sa-7-cafef00d" in msg
|
|
assert "depth=1" in msg
|
|
assert "provider=openrouter" in msg
|
|
assert "base_url=https://openrouter.ai/api/v1" in msg
|
|
assert "error_type=ConnectionError" in msg
|
|
|
|
|
|
@pytest.mark.parametrize("depth", [0, 1])
|
|
def test_emit_stream_drop_emits_status_line(depth):
|
|
"""Both top-level and subagent paths emit exactly one status line.
|
|
|
|
Subagent lines get the ``[subagent-N]`` log_prefix via the parent's
|
|
``_vprint`` plumbing — this test only checks that ``_emit_status`` is
|
|
called once with the right content. No "Reconnected" follow-up.
|
|
"""
|
|
agent = _make_agent()
|
|
agent._delegate_depth = depth
|
|
if depth > 0:
|
|
agent._subagent_id = "sa-2-cafe"
|
|
agent.provider = "openrouter"
|
|
|
|
with patch.object(agent, "_emit_status") as mock_emit:
|
|
agent._emit_stream_drop(
|
|
error=ConnectionError("boom"),
|
|
attempt=2,
|
|
max_attempts=3,
|
|
mid_tool_call=True,
|
|
)
|
|
|
|
assert mock_emit.call_count == 1, (
|
|
f"expected exactly one _emit_status call (no Reconnected follow-up), "
|
|
f"got {mock_emit.call_count}"
|
|
)
|
|
msg = mock_emit.call_args.args[0]
|
|
assert "openrouter" in msg, f"provider name missing from status: {msg}"
|
|
assert "stream drop" in msg
|
|
assert "ConnectionError" in msg
|
|
assert "retry 2/3" in msg
|
|
# Single line — no separate "Reconnected" message. But the line itself
|
|
# should mention reconnecting so the user knows we're recovering.
|
|
assert "reconnect" in msg.lower()
|
|
|
|
|
|
@pytest.mark.parametrize("mid_tool_call", [True, False])
|
|
def test_emit_stream_drop_always_writes_to_log(caplog, mid_tool_call):
|
|
"""Both subagent and top-level drops produce a WARNING in agent.log."""
|
|
agent = _make_agent()
|
|
agent._delegate_depth = 1 if mid_tool_call else 0
|
|
agent.provider = "openrouter"
|
|
if mid_tool_call:
|
|
agent._subagent_id = "sa-99-feed"
|
|
|
|
with caplog.at_level(logging.WARNING, logger="run_agent"):
|
|
agent._emit_stream_drop(
|
|
error=TimeoutError("read timeout"),
|
|
attempt=2,
|
|
max_attempts=3,
|
|
mid_tool_call=mid_tool_call,
|
|
)
|
|
|
|
found = [r for r in caplog.records if r.getMessage().startswith("Stream drop")]
|
|
assert found, "expected at least one Stream drop WARNING record"
|
|
msg = found[0].getMessage()
|
|
assert "error_type=TimeoutError" in msg
|
|
assert "provider=openrouter" in msg
|
|
|
|
|
|
def test_emit_stream_drop_provider_named_when_multi_provider():
|
|
"""The user-visible line must name the provider so multi-provider
|
|
sessions can tell which subagent dropped (the original two-line message
|
|
only said 'provider', forcing a log dive)."""
|
|
agent = _make_agent()
|
|
agent._delegate_depth = 1
|
|
agent._subagent_id = "sa-1"
|
|
agent.provider = "anthropic"
|
|
|
|
with patch.object(agent, "_emit_status") as mock_emit:
|
|
agent._emit_stream_drop(
|
|
error=ConnectionError("x"),
|
|
attempt=3,
|
|
max_attempts=3,
|
|
mid_tool_call=False,
|
|
)
|
|
|
|
msg = mock_emit.call_args.args[0]
|
|
assert "anthropic" in msg
|