mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-29 06:31:32 +00:00
feat(agent): buffer retry/fallback status, surface only on terminal failure (#33816)
Users report that the CLI/gateway floods them with confusing retry chatter
during transient failures: a single 429 can produce 10+ "Provider/Endpoint/
Retrying in 5s..." lines before the request eventually succeeds. The same
firehose hits Telegram, Discord, Slack, etc. via _emit_status.
This patch defers all retry/fallback/compression status messages until we
know the outcome:
- if the turn ultimately succeeds (any path: primary recovers, fallback
activates, compression unsticks the request), the buffer is silently
dropped — the user sees nothing.
- if every retry and fallback exhausts and the turn fails, the buffer
is flushed at the terminal-failure return so the user sees the full
retry trace alongside the final error.
Backend logging (agent.log) is unchanged — every emission site still
writes to logger.warning/info, so post-mortem diagnosis is intact.
## What changed
run_agent.py: four new methods on AIAgent:
_buffer_status(msg) — defer an _emit_status call
_buffer_vprint(msg) — defer a _vprint(force=True) line
_clear_status_buffer() — drop pending messages on success
_flush_status_buffer() — replay pending messages on terminal failure
agent/conversation_loop.py:
- converted ~30 mid-process emit/vprint sites in the retry, fallback,
compression, empty-response, and stream-watchdog paths to the buffered
helpers
- added _flush_status_buffer() at every terminal-failure return so users
still see the trace when it actually matters
- added _clear_status_buffer() at the "non-empty assistant content"
point (NOT at "API call returned bytes" — empty responses still loop
through the empty-retry path and would otherwise lose their trace
between iterations)
- silenced the two "(´;ω;`) oops, retrying..." / "(╥_╥) error,
retrying..." spinner final-frame messages — the spinner now stops
cleanly so retries leave no visible residue
agent/chat_completion_helpers.py: same conversion for codex TTFB / stale-
stream / fallback-activation status messages.
agent/stream_diag.py: _emit_stream_drop now buffers instead of emitting
directly.
## Tests
tests/run_agent/test_retry_status_buffer.py: 7 unit tests covering
accumulate→flush, clear-on-success, mixed kinds, empty-buffer no-op,
re-buffer after flush, exception swallowing.
Updated 3 existing tests that mocked _emit_status to also mock (or use)
_buffer_status:
- tests/run_agent/test_run_agent.py::test_empty_response_emits_status_for_gateway
- tests/run_agent/test_stream_drop_logging.py (2 tests)
- tests/agent/test_codex_ttfb_watchdog.py (TTFB hint test)
## Validation
Live test: hermes chat -q against an unreachable endpoint with no fallback
exhausts retries and prints the full trace at the end. Same flow against
a working endpoint prints zero retry chatter.
This commit is contained in:
parent
e0572a6def
commit
67011cc0d7
7 changed files with 354 additions and 96 deletions
77
run_agent.py
77
run_agent.py
|
|
@ -801,6 +801,83 @@ class AIAgent:
|
|||
except Exception:
|
||||
logger.debug("status_callback error in _emit_warning", exc_info=True)
|
||||
|
||||
# ── Buffered retry/fallback status ────────────────────────────────────
|
||||
# Retry and fallback chains were flooding the CLI/gateway with status
|
||||
# noise that users found confusing: a single transient 429 could produce
|
||||
# 10+ "Provider/Endpoint/Retrying in 5s..." lines before the request
|
||||
# eventually succeeded. The buffered helpers below capture these
|
||||
# status messages instead of emitting them immediately. They are
|
||||
# flushed (shown to the user) ONLY when every retry and fallback has
|
||||
# been exhausted; on success they are silently dropped. Backend logs
|
||||
# (agent.log) are unaffected — every individual emission site still
|
||||
# writes to ``logger.warning`` / ``logger.info`` for diagnosis.
|
||||
|
||||
def _buffer_status(self, message: str) -> None:
|
||||
"""Buffer a retry/fallback status message.
|
||||
|
||||
Stored as a (kind, text) tuple where ``kind`` is one of:
|
||||
- ``"status"`` -> replays via ``_emit_status``
|
||||
- ``"vprint"`` -> replays via ``_vprint(force=True)``
|
||||
- ``"warn"`` -> replays via ``_emit_warning``
|
||||
Used to defer noisy retry chatter until we know whether the
|
||||
turn ultimately recovered or failed.
|
||||
"""
|
||||
try:
|
||||
buf = getattr(self, "_retry_status_buffer", None)
|
||||
if buf is None:
|
||||
buf = []
|
||||
self._retry_status_buffer = buf
|
||||
buf.append(("status", message))
|
||||
except Exception:
|
||||
# Never break the retry loop on a buffer hiccup.
|
||||
pass
|
||||
|
||||
def _buffer_vprint(self, message: str) -> None:
|
||||
"""Buffer a vprint(force=True) retry/fallback line."""
|
||||
try:
|
||||
buf = getattr(self, "_retry_status_buffer", None)
|
||||
if buf is None:
|
||||
buf = []
|
||||
self._retry_status_buffer = buf
|
||||
buf.append(("vprint", message))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def _clear_status_buffer(self) -> None:
|
||||
"""Drop buffered retry messages — call on successful recovery."""
|
||||
try:
|
||||
buf = getattr(self, "_retry_status_buffer", None)
|
||||
if buf:
|
||||
buf.clear()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def _flush_status_buffer(self) -> None:
|
||||
"""Emit buffered retry messages — call on terminal failure.
|
||||
|
||||
Surfaces the full retry/fallback trace so the user can see what
|
||||
was tried before the turn gave up.
|
||||
"""
|
||||
try:
|
||||
buf = getattr(self, "_retry_status_buffer", None)
|
||||
if not buf:
|
||||
return
|
||||
# Drain first so a callback exception doesn't double-emit.
|
||||
messages = list(buf)
|
||||
buf.clear()
|
||||
for kind, msg in messages:
|
||||
try:
|
||||
if kind == "status":
|
||||
self._emit_status(msg)
|
||||
elif kind == "warn":
|
||||
self._emit_warning(msg)
|
||||
else:
|
||||
self._vprint(f"{self.log_prefix}{msg}", force=True)
|
||||
except Exception:
|
||||
pass
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def _disable_codex_reasoning_replay(
|
||||
self,
|
||||
messages: Optional[List[Dict[str, Any]]] = None,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue