mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-30 01:41:43 +00:00
fix(compression): notify gateway users when summary generation fails
When auxiliary compression's summary LLM call fails (e.g. model 404,
auxiliary model misconfigured), the compressor still drops the selected
turns and inserts a static fallback placeholder — the dropped context
is unrecoverable.
Previously the only signal of this was a WARNING in agent.log. Gateway
users (Telegram/Discord/etc.) had no way to know context was lost
because the existing _emit_warning path requires a status_callback,
and the gateway hygiene path uses a temporary _hyg_agent with
quiet_mode=True and no callback wired up.
Changes:
- ContextCompressor: track _last_summary_fallback_used and
_last_summary_dropped_count on each compress() call. Cleared at the
start of compress() and on session reset.
- gateway/run.py hygiene: after auto-compress, inspect the temp
agent's compressor; if fallback was used, send a visible ⚠️ warning
to the user via the platform adapter (TG/Discord/etc.) including
dropped count and the underlying error.
- gateway/run.py /compress: append the same warning to the manual
compress reply so users running /compress see the failure too.
Acceptance:
- Summary success: no user-visible warning (unchanged).
- Summary failure on gateway hygiene: user receives a TG/Discord
message with dropped count + error + remediation hint.
- Summary failure on /compress: warning appended to the command reply.
- CLI status_callback / _emit_warning path is untouched.
- Test coverage: two new tests verify the tracking fields are set on
failure and cleared on subsequent success.
This commit is contained in:
parent
f40b20d13c
commit
dfdc4276e8
3 changed files with 120 additions and 0 deletions
|
|
@ -242,6 +242,72 @@ class TestSummaryFailureCooldown:
|
|||
assert mock_call.call_count == 1
|
||||
|
||||
|
||||
class TestSummaryFailureTrackingForGatewayWarning:
|
||||
"""When summary generation fails, the compressor must record dropped count
|
||||
+ fallback flag so gateway hygiene & /compress can surface a visible
|
||||
warning instead of silently dropping context."""
|
||||
|
||||
def test_compress_records_fallback_and_dropped_count_on_summary_failure(self):
|
||||
with patch("agent.context_compressor.get_model_context_length", return_value=100000):
|
||||
c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=2, protect_last_n=2)
|
||||
|
||||
msgs = [
|
||||
{"role": "system", "content": "sys"},
|
||||
{"role": "user", "content": "msg 1"},
|
||||
{"role": "assistant", "content": "msg 2"},
|
||||
{"role": "user", "content": "msg 3"},
|
||||
{"role": "assistant", "content": "msg 4"},
|
||||
{"role": "user", "content": "msg 5"},
|
||||
{"role": "assistant", "content": "msg 6"},
|
||||
{"role": "user", "content": "msg 7"},
|
||||
]
|
||||
|
||||
# Simulate summary LLM call failing — covers the 404 / model-not-found
|
||||
# case from issue (auxiliary compression model misconfigured).
|
||||
with patch("agent.context_compressor.call_llm", side_effect=Exception("404 model not found")):
|
||||
result = c.compress(msgs)
|
||||
|
||||
assert c._last_summary_fallback_used is True
|
||||
assert c._last_summary_dropped_count > 0
|
||||
assert c._last_summary_error is not None
|
||||
# Result must still be well-formed (fallback summary present).
|
||||
assert any(
|
||||
isinstance(m.get("content"), str) and "Summary generation was unavailable" in m["content"]
|
||||
for m in result
|
||||
)
|
||||
|
||||
def test_compress_clears_fallback_flag_on_subsequent_success(self):
|
||||
mock_response = MagicMock()
|
||||
mock_response.choices = [MagicMock()]
|
||||
mock_response.choices[0].message.content = "summary text"
|
||||
|
||||
with patch("agent.context_compressor.get_model_context_length", return_value=100000):
|
||||
c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=2, protect_last_n=2)
|
||||
|
||||
msgs = [
|
||||
{"role": "system", "content": "sys"},
|
||||
{"role": "user", "content": "msg 1"},
|
||||
{"role": "assistant", "content": "msg 2"},
|
||||
{"role": "user", "content": "msg 3"},
|
||||
{"role": "assistant", "content": "msg 4"},
|
||||
{"role": "user", "content": "msg 5"},
|
||||
{"role": "assistant", "content": "msg 6"},
|
||||
{"role": "user", "content": "msg 7"},
|
||||
]
|
||||
|
||||
# First call fails, second succeeds — flag must reset on second compress.
|
||||
with patch("agent.context_compressor.call_llm", side_effect=Exception("boom")):
|
||||
c.compress(msgs)
|
||||
assert c._last_summary_fallback_used is True
|
||||
|
||||
# Reset cooldown to allow retry on second compress
|
||||
c._summary_failure_cooldown_until = 0.0
|
||||
with patch("agent.context_compressor.call_llm", return_value=mock_response):
|
||||
c.compress(msgs)
|
||||
assert c._last_summary_fallback_used is False
|
||||
assert c._last_summary_dropped_count == 0
|
||||
|
||||
|
||||
class TestSummaryPrefixNormalization:
|
||||
def test_legacy_prefix_is_replaced(self):
|
||||
summary = ContextCompressor._with_summary_prefix("[CONTEXT SUMMARY]: did work")
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue