mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-30 01:41:43 +00:00
fix(compression): notify users when configured aux model fails even if main-model fallback recovers (#16775)
A misconfigured auxiliary.compression.model is a user-fixable problem that silent recovery would hide. The previous retry-on-main logic transparently swallowed aux-model failures whenever the fallback succeeded, leaving the user's broken config in place and racking up future failures.
Track the aux-model failure on the compressor alongside the existing fallback-placeholder fields:
- _last_aux_model_failure_model: str | None
- _last_aux_model_failure_error: str | None
Both are set at the moment the aux model errors (captured before summary_model is cleared for retry), regardless of whether the retry succeeds. Cleared at compress() start and on on_session_reset() so a clean run doesn't leak stale warnings.
Surface at three places:
- gateway hygiene auto-compress: ℹ note to the platform adapter (thread_id preserved)
- gateway /compress command: ℹ line appended to the reply
- CLI via _emit_warning: deduped on (model, error) so repeat compactions don't spam
Distinct from the existing ⚠️ dropped-turns warning — different severity, different emoji, explicit 'context is intact' reassurance.
This commit is contained in:
parent
c3e3a9c184
commit
6ea5699e3f
6 changed files with 367 additions and 1 deletions
|
|
@ -181,3 +181,65 @@ async def test_compress_command_appends_warning_when_summary_generation_fails():
|
|||
assert "historical message(s) were removed" in result
|
||||
agent_instance.shutdown_memory_provider.assert_called_once()
|
||||
agent_instance.close.assert_called_once()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_compress_command_surfaces_aux_model_failure_even_when_recovered():
|
||||
"""When the user's configured ``auxiliary.compression.model`` errors out
|
||||
but compression recovers by retrying on the main model, /compress must
|
||||
STILL inform the user. Silent recovery hides broken config the user
|
||||
needs to fix."""
|
||||
history = _make_history()
|
||||
# Compressed transcript — normal successful compression, no placeholder.
|
||||
compressed = [
|
||||
history[0],
|
||||
{"role": "assistant", "content": "summary via main model"},
|
||||
history[-1],
|
||||
]
|
||||
runner = _make_runner(history)
|
||||
agent_instance = MagicMock()
|
||||
agent_instance.shutdown_memory_provider = MagicMock()
|
||||
agent_instance.close = MagicMock()
|
||||
agent_instance.context_compressor.has_content_to_compress.return_value = True
|
||||
# Fallback placeholder was NOT used — recovery succeeded.
|
||||
agent_instance.context_compressor._last_summary_fallback_used = False
|
||||
agent_instance.context_compressor._last_summary_dropped_count = 0
|
||||
agent_instance.context_compressor._last_summary_error = None
|
||||
# But the configured aux model DID fail before the retry succeeded.
|
||||
agent_instance.context_compressor._last_aux_model_failure_model = (
|
||||
"gemini-3-flash-preview"
|
||||
)
|
||||
agent_instance.context_compressor._last_aux_model_failure_error = (
|
||||
"404 model not found: gemini-3-flash-preview"
|
||||
)
|
||||
agent_instance.session_id = "sess-1"
|
||||
agent_instance._compress_context.return_value = (compressed, "")
|
||||
|
||||
def _estimate(messages):
|
||||
if messages == history:
|
||||
return 100
|
||||
if messages == compressed:
|
||||
return 60
|
||||
raise AssertionError(f"unexpected transcript: {messages!r}")
|
||||
|
||||
with (
|
||||
patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "***"}),
|
||||
patch("gateway.run._resolve_gateway_model", return_value="test-model"),
|
||||
patch("run_agent.AIAgent", return_value=agent_instance),
|
||||
patch("agent.model_metadata.estimate_messages_tokens_rough", side_effect=_estimate),
|
||||
):
|
||||
result = await runner._handle_compress_command(_make_event())
|
||||
|
||||
# Compression succeeded
|
||||
assert "Compressed:" in result
|
||||
# No ⚠️ warning (that's reserved for dropped-turns case)
|
||||
assert "⚠️" not in result
|
||||
# But there IS an info note about the broken aux model
|
||||
assert "ℹ️" in result
|
||||
assert "gemini-3-flash-preview" in result
|
||||
assert "404" in result
|
||||
assert "auxiliary.compression.model" in result
|
||||
# The user's context is explicitly called out as intact
|
||||
assert "intact" in result
|
||||
agent_instance.shutdown_memory_provider.assert_called_once()
|
||||
agent_instance.close.assert_called_once()
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue