mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
fix(agent): preserve messages when compression summary generation fails
When _generate_summary() fails (timeout, rate limit, no provider, HTTP 529), compress() previously destroyed all middle conversation turns and replaced them with a static placeholder string. This caused permanent, unrecoverable data loss — the model lost all context of earlier work. The fix is simple: when summary generation returns None, return the original messages unchanged instead of proceeding with the destruction. The conversation stays intact and compression can be retried on the next trigger. Fixes #10719, #12131, #11914, #11585
This commit is contained in:
parent
470389e6a3
commit
7039020c9f
2 changed files with 158 additions and 20 deletions
|
|
@ -817,8 +817,8 @@ The user has requested that this compaction PRIORITISE preserving all informatio
|
|||
# No provider configured — long cooldown, unlikely to self-resolve
|
||||
self._summary_failure_cooldown_until = time.monotonic() + _SUMMARY_FAILURE_COOLDOWN_SECONDS
|
||||
logging.warning("Context compression: no provider available for "
|
||||
"summary. Middle turns will be dropped without summary "
|
||||
"for %d seconds.",
|
||||
"summary. Preserving original messages and pausing "
|
||||
"summary attempts for %d seconds.",
|
||||
_SUMMARY_FAILURE_COOLDOWN_SECONDS)
|
||||
return None
|
||||
except Exception as e:
|
||||
|
|
@ -1135,6 +1135,13 @@ The user has requested that this compaction PRIORITISE preserving all informatio
|
|||
|
||||
display_tokens = current_tokens if current_tokens else self.last_prompt_tokens or estimate_messages_tokens_rough(messages)
|
||||
|
||||
# Snapshot the original messages before any lossy modifications.
|
||||
# _prune_old_tool_results() returns a NEW list (via [m.copy() for m in messages])
|
||||
# so rebinding 'messages' below does not affect original_messages.
|
||||
# If summary generation fails later, we return this exact list so
|
||||
# the caller's conversation history is completely unmodified.
|
||||
original_messages = messages
|
||||
|
||||
# Phase 1: Prune old tool results (cheap, no LLM call)
|
||||
messages, pruned_count = self._prune_old_tool_results(
|
||||
messages, protect_tail_count=self.protect_last_n,
|
||||
|
|
@ -1151,7 +1158,7 @@ The user has requested that this compaction PRIORITISE preserving all informatio
|
|||
compress_end = self._find_tail_cut_by_tokens(messages, compress_start)
|
||||
|
||||
if compress_start >= compress_end:
|
||||
return messages
|
||||
return original_messages
|
||||
|
||||
turns_to_summarize = messages[compress_start:compress_end]
|
||||
|
||||
|
|
@ -1194,19 +1201,18 @@ The user has requested that this compaction PRIORITISE preserving all informatio
|
|||
)
|
||||
compressed.append(msg)
|
||||
|
||||
# If LLM summary failed, insert a static fallback so the model
|
||||
# knows context was lost rather than silently dropping everything.
|
||||
# If LLM summary failed, preserve the original messages rather than
|
||||
# destroying conversation history. Dropping turns without a summary
|
||||
# causes permanent data loss — the model loses context of earlier work
|
||||
# with no way to recover it. (#10719, #12131, #11914, #11585)
|
||||
if not summary:
|
||||
if not self.quiet_mode:
|
||||
logger.warning("Summary generation failed — inserting static fallback context marker")
|
||||
n_dropped = compress_end - compress_start
|
||||
summary = (
|
||||
f"{SUMMARY_PREFIX}\n"
|
||||
f"Summary generation was unavailable. {n_dropped} conversation turns were "
|
||||
f"removed to free context space but could not be summarized. The removed "
|
||||
f"turns contained earlier work in this session. Continue based on the "
|
||||
f"recent messages below and the current state of any files or resources."
|
||||
)
|
||||
logger.warning(
|
||||
"Summary generation failed — preserving original messages "
|
||||
"to prevent data loss (%d middle turns kept intact)",
|
||||
compress_end - compress_start,
|
||||
)
|
||||
return original_messages
|
||||
|
||||
_merge_summary_into_tail = False
|
||||
last_head_role = messages[compress_start - 1].get("role", "user") if compress_start > 0 else "user"
|
||||
|
|
|
|||
|
|
@ -65,9 +65,10 @@ class TestCompress:
|
|||
assert result == msgs
|
||||
|
||||
def test_truncation_fallback_no_client(self, compressor):
|
||||
# compressor has client=None, so should use truncation fallback
|
||||
# With a mocked summary, compression should reduce message count
|
||||
msgs = [{"role": "system", "content": "System prompt"}] + self._make_messages(10)
|
||||
result = compressor.compress(msgs)
|
||||
with patch.object(compressor, "_generate_summary", return_value=f"{SUMMARY_PREFIX}\nSummary"):
|
||||
result = compressor.compress(msgs)
|
||||
assert len(result) < len(msgs)
|
||||
# Should keep system message and last N
|
||||
assert result[0]["role"] == "system"
|
||||
|
|
@ -75,14 +76,17 @@ class TestCompress:
|
|||
|
||||
def test_compression_increments_count(self, compressor):
|
||||
msgs = self._make_messages(10)
|
||||
compressor.compress(msgs)
|
||||
with patch.object(compressor, "_generate_summary", return_value=f"{SUMMARY_PREFIX}\nSummary"):
|
||||
compressor.compress(msgs)
|
||||
assert compressor.compression_count == 1
|
||||
compressor.compress(msgs)
|
||||
with patch.object(compressor, "_generate_summary", return_value=f"{SUMMARY_PREFIX}\nSummary 2"):
|
||||
compressor.compress(msgs)
|
||||
assert compressor.compression_count == 2
|
||||
|
||||
def test_protects_first_and_last(self, compressor):
|
||||
msgs = self._make_messages(10)
|
||||
result = compressor.compress(msgs)
|
||||
with patch.object(compressor, "_generate_summary", return_value=f"{SUMMARY_PREFIX}\nSummary"):
|
||||
result = compressor.compress(msgs)
|
||||
# First 2 messages should be preserved (protect_first_n=2)
|
||||
# Last 2 messages should be preserved (protect_last_n=2)
|
||||
assert result[-1]["content"] == msgs[-1]["content"]
|
||||
|
|
@ -93,6 +97,133 @@ class TestCompress:
|
|||
assert msgs[-2]["content"] in result[-2]["content"]
|
||||
|
||||
|
||||
class TestPreserveOnSummaryFailure:
|
||||
"""Regression: when summary generation fails, compress() must return messages
|
||||
unchanged instead of destroying middle turns. (#10719, #12131, #11914, #11585)"""
|
||||
|
||||
def test_summary_failure_preserves_all_messages(self):
|
||||
"""When _generate_summary returns None, no messages should be dropped."""
|
||||
with patch("agent.context_compressor.get_model_context_length", return_value=100000):
|
||||
c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=2, protect_last_n=2)
|
||||
|
||||
msgs = [{"role": "user" if i % 2 == 0 else "assistant", "content": f"msg {i}"} for i in range(10)]
|
||||
|
||||
with patch.object(c, "_generate_summary", return_value=None):
|
||||
result = c.compress(msgs)
|
||||
|
||||
assert len(result) == len(msgs), (
|
||||
f"Messages were dropped despite summary failure: {len(msgs)} -> {len(result)}"
|
||||
)
|
||||
|
||||
def test_summary_failure_preserves_content(self):
|
||||
"""Every original message should be present after a failed compression."""
|
||||
with patch("agent.context_compressor.get_model_context_length", return_value=100000):
|
||||
c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=2, protect_last_n=2)
|
||||
|
||||
msgs = [{"role": "user" if i % 2 == 0 else "assistant", "content": f"msg {i}"} for i in range(10)]
|
||||
original_contents = [m["content"] for m in msgs]
|
||||
|
||||
with patch.object(c, "_generate_summary", return_value=None):
|
||||
result = c.compress(msgs)
|
||||
|
||||
result_contents = [m["content"] for m in result]
|
||||
for content in original_contents:
|
||||
assert content in result_contents, f"Message '{content}' was lost"
|
||||
|
||||
def test_summary_failure_does_not_increment_compression_count(self):
|
||||
"""A failed compression should not count as a successful one."""
|
||||
with patch("agent.context_compressor.get_model_context_length", return_value=100000):
|
||||
c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=2, protect_last_n=2)
|
||||
|
||||
msgs = [{"role": "user" if i % 2 == 0 else "assistant", "content": f"msg {i}"} for i in range(10)]
|
||||
|
||||
with patch.object(c, "_generate_summary", return_value=None):
|
||||
c.compress(msgs)
|
||||
|
||||
assert c.compression_count == 0
|
||||
|
||||
def test_successful_summary_still_compresses(self):
|
||||
"""Verify the normal path still works — compression happens when summary succeeds."""
|
||||
with patch("agent.context_compressor.get_model_context_length", return_value=100000):
|
||||
c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=2, protect_last_n=2)
|
||||
|
||||
msgs = [{"role": "user" if i % 2 == 0 else "assistant", "content": f"msg {i}"} for i in range(10)]
|
||||
|
||||
with patch.object(c, "_generate_summary", return_value=f"{SUMMARY_PREFIX}\nSummary of work"):
|
||||
result = c.compress(msgs)
|
||||
|
||||
assert len(result) < len(msgs)
|
||||
assert c.compression_count == 1
|
||||
|
||||
def test_transient_llm_error_preserves_messages(self):
|
||||
"""Simulate a real transient error (timeout/rate limit) through the full
|
||||
_generate_summary path — messages should be preserved."""
|
||||
with patch("agent.context_compressor.get_model_context_length", return_value=100000):
|
||||
c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=2, protect_last_n=2)
|
||||
|
||||
msgs = [{"role": "user" if i % 2 == 0 else "assistant", "content": f"msg {i}"} for i in range(10)]
|
||||
|
||||
with patch("agent.context_compressor.call_llm", side_effect=Exception("Connection timeout")):
|
||||
result = c.compress(msgs)
|
||||
|
||||
assert len(result) == len(msgs), "Transient LLM error should not cause data loss"
|
||||
|
||||
def test_runtime_error_no_provider_preserves_messages(self):
|
||||
"""When no compression provider is configured (RuntimeError), messages
|
||||
should be preserved rather than destroyed."""
|
||||
with patch("agent.context_compressor.get_model_context_length", return_value=100000):
|
||||
c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=2, protect_last_n=2)
|
||||
|
||||
msgs = [{"role": "user" if i % 2 == 0 else "assistant", "content": f"msg {i}"} for i in range(10)]
|
||||
|
||||
with patch("agent.context_compressor.call_llm", side_effect=RuntimeError("No provider")):
|
||||
result = c.compress(msgs)
|
||||
|
||||
assert len(result) == len(msgs), "Missing provider should not cause data loss"
|
||||
|
||||
def test_summary_failure_preserves_messages_exactly_after_tool_pruning_would_run(self):
|
||||
"""On summary failure, compress() must return the exact original message
|
||||
list — including any large tool outputs that _prune_old_tool_results
|
||||
would normally replace with 1-line summaries.
|
||||
|
||||
This is the key regression test: the pre-summary pruning pass is lossy
|
||||
(it replaces tool outputs with summaries), so returning the post-pruning
|
||||
list on summary failure silently mutates the caller's conversation.
|
||||
The fix saves original_messages before pruning and returns that instead.
|
||||
"""
|
||||
with patch("agent.context_compressor.get_model_context_length", return_value=100000):
|
||||
c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=2, protect_last_n=2)
|
||||
|
||||
# Unique 500-char tool output that pruning would replace with a summary
|
||||
unique_tool_output = "UNIQUE_MARKER_" + ("x" * 486)
|
||||
msgs = [
|
||||
{"role": "user", "content": "msg 0"},
|
||||
{"role": "assistant", "content": "msg 1"},
|
||||
{"role": "user", "content": "msg 2"},
|
||||
{"role": "assistant", "content": None, "tool_calls": [
|
||||
{"id": "c1", "type": "function", "function": {"name": "terminal", "arguments": '{"command": "ls"}'}},
|
||||
]},
|
||||
{"role": "tool", "content": unique_tool_output, "tool_call_id": "c1"},
|
||||
{"role": "user", "content": "msg 5"},
|
||||
{"role": "assistant", "content": "msg 6"},
|
||||
{"role": "user", "content": "msg 7"},
|
||||
{"role": "assistant", "content": "msg 8"},
|
||||
{"role": "user", "content": "msg 9"},
|
||||
]
|
||||
|
||||
with patch.object(c, "_generate_summary", return_value=None):
|
||||
result = c.compress(msgs)
|
||||
|
||||
# Exact preservation — not just same length, same object
|
||||
assert result == msgs, (
|
||||
"Summary failure must return the exact original messages, "
|
||||
"not the post-pruning version"
|
||||
)
|
||||
assert result is msgs, (
|
||||
"Summary failure should return the same list object"
|
||||
)
|
||||
|
||||
|
||||
class TestGenerateSummaryNoneContent:
|
||||
"""Regression: content=None (from tool-call-only assistant messages) must not crash."""
|
||||
|
||||
|
|
@ -128,7 +259,8 @@ class TestGenerateSummaryNoneContent:
|
|||
{"role": "user" if i % 2 == 0 else "assistant", "content": f"msg {i}"}
|
||||
for i in range(10)
|
||||
]
|
||||
result = c.compress(msgs)
|
||||
with patch.object(c, "_generate_summary", return_value=f"{SUMMARY_PREFIX}\nSummary"):
|
||||
result = c.compress(msgs)
|
||||
assert len(result) < len(msgs)
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue