mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-17 04:31:55 +00:00
fix(compression): notify users when configured aux model fails even if main-model fallback recovers (#16775)
A misconfigured auxiliary.compression.model is a user-fixable problem that silent recovery would hide. The previous retry-on-main logic transparently swallowed aux-model failures whenever the fallback succeeded, leaving the user's broken config in place and racking up future failures.
Track the aux-model failure on the compressor alongside the existing fallback-placeholder fields:
- _last_aux_model_failure_model: str | None
- _last_aux_model_failure_error: str | None
Both are set at the moment the aux model errors (captured before summary_model is cleared for retry), regardless of whether the retry succeeds. Cleared at compress() start and on on_session_reset() so a clean run doesn't leak stale warnings.
Surface at three places:
- gateway hygiene auto-compress: ℹ note to the platform adapter (thread_id preserved)
- gateway /compress command: ℹ line appended to the reply
- CLI via _emit_warning: deduped on (model, error) so repeat compactions don't spam
Distinct from the existing ⚠️ dropped-turns warning — different severity, different emoji, explicit 'context is intact' reassurance.
This commit is contained in:
parent
c3e3a9c184
commit
6ea5699e3f
6 changed files with 367 additions and 1 deletions
|
|
@ -285,6 +285,12 @@ class TestSummaryFallbackToMainModel:
|
|||
assert "model" not in mock_call.call_args_list[1].kwargs
|
||||
assert result is not None
|
||||
assert "summary via main model" in result
|
||||
# Aux-model failure is recorded even though retry succeeded — this is
|
||||
# how callers (gateway /compress, CLI warning) know to tell the user
|
||||
# their auxiliary.compression.model setting is broken.
|
||||
assert c._last_aux_model_failure_model == "broken-aux-model"
|
||||
assert c._last_aux_model_failure_error is not None
|
||||
assert "404" in c._last_aux_model_failure_error
|
||||
|
||||
def test_unknown_error_falls_back_to_main_and_succeeds(self):
|
||||
"""Errors that don't match the 404/503/model_not_found fast-path
|
||||
|
|
@ -317,6 +323,10 @@ class TestSummaryFallbackToMainModel:
|
|||
assert "model" not in mock_call.call_args_list[1].kwargs
|
||||
assert result is not None
|
||||
assert "summary via main model" in result
|
||||
# Aux-model failure recorded despite successful recovery
|
||||
assert c._last_aux_model_failure_model == "broken-aux-model"
|
||||
assert c._last_aux_model_failure_error is not None
|
||||
assert "400" in c._last_aux_model_failure_error
|
||||
|
||||
def test_no_fallback_when_summary_model_equals_main_model(self):
|
||||
"""If the aux model IS the main model, there's nowhere to fall back
|
||||
|
|
@ -367,6 +377,97 @@ class TestSummaryFallbackToMainModel:
|
|||
assert c._summary_model_fallen_back is True
|
||||
|
||||
|
||||
class TestAuxModelFallbackSurfacedToCallers:
|
||||
"""When summary_model fails but retry-on-main succeeds, compress() must
|
||||
expose the aux-model failure via _last_aux_model_failure_{model,error}
|
||||
so gateway /compress and CLI callers can warn the user about their
|
||||
broken auxiliary.compression.model config — silent recovery would hide
|
||||
a misconfiguration only the user can fix."""
|
||||
|
||||
def _make_msgs(self):
|
||||
return [
|
||||
{"role": "system", "content": "sys"},
|
||||
{"role": "user", "content": "msg 1"},
|
||||
{"role": "assistant", "content": "msg 2"},
|
||||
{"role": "user", "content": "msg 3"},
|
||||
{"role": "assistant", "content": "msg 4"},
|
||||
{"role": "user", "content": "msg 5"},
|
||||
{"role": "assistant", "content": "msg 6"},
|
||||
{"role": "user", "content": "msg 7"},
|
||||
]
|
||||
|
||||
def test_compress_exposes_aux_failure_fields_after_successful_fallback(self):
|
||||
mock_ok = MagicMock()
|
||||
mock_ok.choices = [MagicMock()]
|
||||
mock_ok.choices[0].message.content = "summary via main"
|
||||
err_400 = Exception("400 provider rejected configured model")
|
||||
err_400.status_code = 400
|
||||
|
||||
with patch("agent.context_compressor.get_model_context_length", return_value=100000):
|
||||
c = ContextCompressor(
|
||||
model="main-model",
|
||||
summary_model_override="broken-aux-model",
|
||||
quiet_mode=True,
|
||||
protect_first_n=2,
|
||||
protect_last_n=2,
|
||||
)
|
||||
|
||||
with patch(
|
||||
"agent.context_compressor.call_llm",
|
||||
side_effect=[err_400, mock_ok],
|
||||
):
|
||||
result = c.compress(self._make_msgs())
|
||||
|
||||
# Recovery succeeded → no fallback placeholder
|
||||
assert c._last_summary_fallback_used is False
|
||||
# But aux-model failure IS recorded for the gateway/CLI warning
|
||||
assert c._last_aux_model_failure_model == "broken-aux-model"
|
||||
assert c._last_aux_model_failure_error is not None
|
||||
assert "400" in c._last_aux_model_failure_error
|
||||
# Result is well-formed with a real summary, not a placeholder
|
||||
assert any(
|
||||
isinstance(m.get("content"), str) and "summary via main" in m["content"]
|
||||
for m in result
|
||||
)
|
||||
|
||||
def test_compress_clears_aux_failure_fields_at_start_of_next_call(self):
|
||||
"""A subsequent successful compression must clear the aux-failure
|
||||
fields so the warning doesn't persist forever."""
|
||||
mock_ok = MagicMock()
|
||||
mock_ok.choices = [MagicMock()]
|
||||
mock_ok.choices[0].message.content = "summary via main"
|
||||
err_400 = Exception("400 aux model busted")
|
||||
err_400.status_code = 400
|
||||
|
||||
with patch("agent.context_compressor.get_model_context_length", return_value=100000):
|
||||
c = ContextCompressor(
|
||||
model="main-model",
|
||||
summary_model_override="broken-aux-model",
|
||||
quiet_mode=True,
|
||||
protect_first_n=2,
|
||||
protect_last_n=2,
|
||||
)
|
||||
|
||||
# Call 1: aux fails, retry-on-main succeeds
|
||||
with patch(
|
||||
"agent.context_compressor.call_llm",
|
||||
side_effect=[err_400, mock_ok],
|
||||
):
|
||||
c.compress(self._make_msgs())
|
||||
assert c._last_aux_model_failure_model == "broken-aux-model"
|
||||
|
||||
# Call 2: clean run on main (summary_model was cleared to "" after
|
||||
# first fallback). Aux-failure fields MUST reset at compress() start
|
||||
# so the old warning state doesn't leak into this call.
|
||||
with patch(
|
||||
"agent.context_compressor.call_llm",
|
||||
return_value=mock_ok,
|
||||
):
|
||||
c.compress(self._make_msgs())
|
||||
assert c._last_aux_model_failure_model is None
|
||||
assert c._last_aux_model_failure_error is None
|
||||
|
||||
|
||||
class TestSummaryFailureTrackingForGatewayWarning:
|
||||
"""When summary generation fails, the compressor must record dropped count
|
||||
+ fallback flag so gateway hygiene & /compress can surface a visible
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue