mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
fix: clear conversation_history after mid-loop compression to prevent empty sessions (#7001)
After mid-loop compression (triggered by 413, context_overflow, or Anthropic
long-context tier errors), _compress_context() creates a new session in SQLite
and resets _last_flushed_db_idx=0. However, conversation_history was not cleared,
so _flush_messages_to_session_db() computed:
flush_from = max(len(conversation_history=200), _last_flushed_db_idx=0) = 200
messages[200:] → empty (compressed messages < 200)
This resulted in zero messages being written to the new session's SQLite store.
On resume, the user would see 'Session found but has no messages.'
The preflight compression path (line 7311) already had the fix:
conversation_history = None
This commit adds the same clearing to the three mid-loop compression sites:
- Anthropic long-context tier overflow
- HTTP 413 payload too large
- Generic context_overflow error
Reported by Aaryan (Nous community).
This commit is contained in:
parent
13d7ff3420
commit
871313ae2d
2 changed files with 93 additions and 0 deletions
|
|
@ -172,6 +172,87 @@ class TestHTTP413Compression:
|
|||
mock_compress.assert_called_once()
|
||||
assert result["completed"] is True
|
||||
|
||||
def test_413_clears_conversation_history_on_persist(self, agent):
|
||||
"""After 413-triggered compression, _persist_session must receive None history.
|
||||
|
||||
Bug: _compress_context() creates a new session and resets _last_flushed_db_idx=0,
|
||||
but if conversation_history still holds the original (pre-compression) list,
|
||||
_flush_messages_to_session_db computes flush_from = max(len(history), 0) which
|
||||
exceeds len(compressed_messages), so messages[flush_from:] is empty and nothing
|
||||
is written to the new session → "Session found but has no messages" on resume.
|
||||
"""
|
||||
err_413 = _make_413_error()
|
||||
ok_resp = _mock_response(content="OK", finish_reason="stop")
|
||||
agent.client.chat.completions.create.side_effect = [err_413, ok_resp]
|
||||
|
||||
big_history = [
|
||||
{"role": "user" if i % 2 == 0 else "assistant", "content": f"msg {i}"}
|
||||
for i in range(200)
|
||||
]
|
||||
|
||||
persist_calls = []
|
||||
|
||||
with (
|
||||
patch.object(agent, "_compress_context") as mock_compress,
|
||||
patch.object(
|
||||
agent, "_persist_session",
|
||||
side_effect=lambda msgs, hist: persist_calls.append(hist),
|
||||
),
|
||||
patch.object(agent, "_save_trajectory"),
|
||||
patch.object(agent, "_cleanup_task_resources"),
|
||||
):
|
||||
mock_compress.return_value = (
|
||||
[{"role": "user", "content": "summary"}],
|
||||
"compressed prompt",
|
||||
)
|
||||
agent.run_conversation("hello", conversation_history=big_history)
|
||||
|
||||
assert len(persist_calls) >= 1, "Expected at least one _persist_session call"
|
||||
for hist in persist_calls:
|
||||
assert hist is None, (
|
||||
f"conversation_history should be None after mid-loop compression, "
|
||||
f"got list with {len(hist)} items"
|
||||
)
|
||||
|
||||
def test_context_overflow_clears_conversation_history_on_persist(self, agent):
|
||||
"""After context-overflow compression, _persist_session must receive None history."""
|
||||
err_400 = Exception(
|
||||
"Error code: 400 - This endpoint's maximum context length is 128000 tokens. "
|
||||
"However, you requested about 270460 tokens."
|
||||
)
|
||||
err_400.status_code = 400
|
||||
ok_resp = _mock_response(content="OK", finish_reason="stop")
|
||||
agent.client.chat.completions.create.side_effect = [err_400, ok_resp]
|
||||
|
||||
big_history = [
|
||||
{"role": "user" if i % 2 == 0 else "assistant", "content": f"msg {i}"}
|
||||
for i in range(200)
|
||||
]
|
||||
|
||||
persist_calls = []
|
||||
|
||||
with (
|
||||
patch.object(agent, "_compress_context") as mock_compress,
|
||||
patch.object(
|
||||
agent, "_persist_session",
|
||||
side_effect=lambda msgs, hist: persist_calls.append(hist),
|
||||
),
|
||||
patch.object(agent, "_save_trajectory"),
|
||||
patch.object(agent, "_cleanup_task_resources"),
|
||||
):
|
||||
mock_compress.return_value = (
|
||||
[{"role": "user", "content": "summary"}],
|
||||
"compressed prompt",
|
||||
)
|
||||
agent.run_conversation("hello", conversation_history=big_history)
|
||||
|
||||
assert len(persist_calls) >= 1
|
||||
for hist in persist_calls:
|
||||
assert hist is None, (
|
||||
f"conversation_history should be None after context-overflow compression, "
|
||||
f"got list with {len(hist)} items"
|
||||
)
|
||||
|
||||
def test_400_context_length_triggers_compression(self, agent):
|
||||
"""A 400 with 'maximum context length' should trigger compression, not abort as generic 4xx.
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue