mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-28 01:21:43 +00:00
refactor(memory): remove flush_memories entirely (#15696)
The AIAgent.flush_memories pre-compression save, the gateway _flush_memories_for_session, and everything feeding them are obsolete now that the background memory/skill review handles persistent memory extraction. Problems with flush_memories: - Pre-dates the background review loop. It was the only memory-save path when introduced; the background review now fires every 10 user turns on CLI and gateway alike, which is far more frequent than compression or session reset ever triggered flush. - Blocking and synchronous. Pre-compression flush ran on the live agent before compression, blocking the user-visible response. - Cache-breaking. Flush built a temporary conversation prefix (system prompt + memory-only tool list) that diverged from the live conversation's cached prefix, invalidating prompt caching. The gateway variant spawned a fresh AIAgent with its own clean prompt for each finalized session — still cache-breaking, just in a different process. - Redundant. Background review runs in the live conversation's session context, gets the same content, writes to the same memory store, and doesn't break the cache. Everything flush_memories claimed to preserve is already covered. What this removes: - AIAgent.flush_memories() method (~248 LOC in run_agent.py) - Pre-compression flush call in _compress_context - flush_memories call sites in cli.py (/new + exit) - GatewayRunner._flush_memories_for_session + _async_flush_memories (and the 3 call sites: session expiry watcher, /new, /resume) - 'flush_memories' entry from DEFAULT_CONFIG auxiliary tasks, hermes tools UI task list, auxiliary_client docstrings - _memory_flush_min_turns config + init - #15631's headroom-deduction math in _check_compression_model_feasibility (headroom was only needed because flush dragged the full main-agent system prompt along; the compression summariser sends a single user-role prompt so new_threshold = aux_context is safe again) - The dedicated test files and assertions that exercised flush-specific paths What this renames (with read-time backcompat on sessions.json): - SessionEntry.memory_flushed -> SessionEntry.expiry_finalized. The session-expiry watcher still uses the flag to avoid re-running finalize/eviction on the same expired session; the new name reflects what it now actually gates. from_dict() reads 'expiry_finalized' first, falls back to the legacy 'memory_flushed' key so existing sessions.json files upgrade seamlessly. Supersedes #15631 and #15638. Tested: 383 targeted tests pass across run_agent/, agent/, cli/, and gateway/ session-boundary suites. No behavior regressions — background memory review continues to handle persistent memory extraction on both CLI and gateway.
This commit is contained in:
parent
d635e2df3f
commit
ea01bdcebe
23 changed files with 78 additions and 1567 deletions
|
|
@ -41,8 +41,6 @@ def _make_agent(
|
|||
agent.tool_progress_callback = None
|
||||
agent._compression_warning = None
|
||||
agent._aux_compression_context_length_config = None
|
||||
# Tools feed into the headroom calculation in _check_compression_model_feasibility.
|
||||
# Tests that want to assert specific threshold values can override this.
|
||||
agent.tools = []
|
||||
|
||||
compressor = MagicMock(spec=ContextCompressor)
|
||||
|
|
@ -85,9 +83,8 @@ def test_auto_corrects_threshold_when_aux_context_below_threshold(mock_get_clien
|
|||
assert "threshold:" in messages[0]
|
||||
# Warning stored for gateway replay
|
||||
assert agent._compression_warning is not None
|
||||
# Threshold on the live compressor was actually lowered, accounting for
|
||||
# the request-overhead headroom (empty tools list → ~12K headroom only).
|
||||
assert agent.context_compressor.threshold_tokens == 68_000
|
||||
# Threshold on the live compressor was actually lowered to aux_context.
|
||||
assert agent.context_compressor.threshold_tokens == 80_000
|
||||
|
||||
|
||||
@patch("agent.model_metadata.get_model_context_length", return_value=32_768)
|
||||
|
|
@ -346,93 +343,7 @@ def test_just_below_threshold_auto_corrects(mock_get_client, mock_ctx_len):
|
|||
assert len(messages) == 1
|
||||
assert "small-model" in messages[0]
|
||||
assert "Auto-lowered" in messages[0]
|
||||
assert agent.context_compressor.threshold_tokens == 87_999
|
||||
|
||||
|
||||
# ── Headroom for system prompt + tool schemas ────────────────────────
|
||||
|
||||
|
||||
@patch("agent.model_metadata.get_model_context_length", return_value=128_000)
|
||||
@patch("agent.auxiliary_client.get_text_auxiliary_client")
|
||||
def test_auto_lowered_threshold_reserves_headroom_for_tools_and_system(mock_get_client, mock_ctx_len):
|
||||
"""When aux context binds the threshold, new_threshold must leave room
|
||||
for the system prompt and tool schemas that auxiliary callers
|
||||
(compression summariser, flush_memories) prepend to the message list.
|
||||
|
||||
Without headroom, a full-budget message window + ~25K system/tool
|
||||
overhead overflows the aux model with HTTP 400. Regression guard for
|
||||
the flush_memories-on-busy-toolset overflow path.
|
||||
"""
|
||||
# Main context 200K, threshold 70% = 140K. Aux pins at 128K (below
|
||||
# threshold → triggers auto-correct).
|
||||
agent = _make_agent(main_context=200_000, threshold_percent=0.70)
|
||||
|
||||
# Build a realistic tool schema load.
|
||||
agent.tools = [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": f"tool_{i}",
|
||||
"description": "x" * 200,
|
||||
"parameters": {"type": "object", "properties": {"arg": {"type": "string", "description": "y" * 120}}},
|
||||
},
|
||||
}
|
||||
for i in range(50)
|
||||
]
|
||||
|
||||
mock_client = MagicMock()
|
||||
mock_client.base_url = "https://openrouter.ai/api/v1"
|
||||
mock_client.api_key = "sk-aux"
|
||||
mock_get_client.return_value = (mock_client, "model-with-128k")
|
||||
|
||||
agent._emit_status = lambda msg: None
|
||||
agent._check_compression_model_feasibility()
|
||||
|
||||
new_threshold = agent.context_compressor.threshold_tokens
|
||||
|
||||
# Must have strictly reserved headroom: new_threshold < aux_context.
|
||||
assert new_threshold < 128_000, (
|
||||
f"threshold {new_threshold} did not reserve headroom below aux=128,000 "
|
||||
f"— system prompt + tools would overflow the aux model"
|
||||
)
|
||||
# Must respect the 64K hard floor.
|
||||
from agent.model_metadata import MINIMUM_CONTEXT_LENGTH
|
||||
assert new_threshold >= MINIMUM_CONTEXT_LENGTH
|
||||
|
||||
|
||||
@patch("agent.model_metadata.get_model_context_length", return_value=80_000)
|
||||
@patch("agent.auxiliary_client.get_text_auxiliary_client")
|
||||
def test_headroom_floors_at_minimum_context(mock_get_client, mock_ctx_len):
|
||||
"""If headroom subtraction would push below 64K floor, clamp to 64K
|
||||
rather than refusing the session — the aux is still workable for a
|
||||
smaller message window.
|
||||
"""
|
||||
# Aux at 80K, with enough tools to push headroom > 16K → naive subtract
|
||||
# would land at < 64K. The max(..., MINIMUM_CONTEXT_LENGTH) clamp must
|
||||
# keep the session running.
|
||||
agent = _make_agent(main_context=200_000, threshold_percent=0.50)
|
||||
agent.tools = [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": f"tool_{i}",
|
||||
"description": "z" * 2_000, # fat descriptions
|
||||
"parameters": {},
|
||||
},
|
||||
}
|
||||
for i in range(30)
|
||||
]
|
||||
|
||||
mock_client = MagicMock()
|
||||
mock_client.base_url = "https://openrouter.ai/api/v1"
|
||||
mock_client.api_key = "sk-aux"
|
||||
mock_get_client.return_value = (mock_client, "small-aux-model")
|
||||
|
||||
agent._emit_status = lambda msg: None
|
||||
agent._check_compression_model_feasibility()
|
||||
|
||||
from agent.model_metadata import MINIMUM_CONTEXT_LENGTH
|
||||
assert agent.context_compressor.threshold_tokens == MINIMUM_CONTEXT_LENGTH
|
||||
assert agent.context_compressor.threshold_tokens == 99_999
|
||||
|
||||
|
||||
# ── Two-phase: __init__ + run_conversation replay ───────────────────
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue