mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-21 10:22:18 +00:00
fix(compression): preserve goal, platform, and session indexing across rotation
Three state-loss bugs at the compression rotation boundary, fixed together because they all live in the same ~80-line rotation block: - #33618: a persistent /goal did not follow the rotation. load_goal does a flat per-session lookup with no lineage walk, so a goal silently died when compression minted a fresh child id. Added migrate_goal_to_session() and call it after the child session is created (move-not-copy: the parent row is archived as cleared so exactly one active goal row exists). - #33906/#33907: if the child create_session raised (FK constraint, contended write), the outer handler only warned and let the agent continue on the NEW id — which has no row in state.db — producing an orphan session. Now the rotation rolls agent.session_id back to the still-indexed parent (reopening it) instead of stranding the conversation on a phantom id. - #27633: the compaction-boundary on_session_start notification omitted the platform kwarg, so context-engine plugins saw source=unknown for every message after the boundary. Forward platform (matching the initial session-start call in agent_init.py). Co-authored-by: denisqq <21260182+denisqq@users.noreply.github.com> Co-authored-by: zccyman <16263913+zccyman@users.noreply.github.com> Co-authored-by: liuhao1024 <sunsky.lau@gmail.com>
This commit is contained in:
parent
b4b512c507
commit
7ace96ba40
4 changed files with 277 additions and 8 deletions
|
|
@ -592,14 +592,62 @@ def compress_context(
|
|||
except Exception:
|
||||
pass
|
||||
agent._session_db_created = False
|
||||
agent._session_db.create_session(
|
||||
session_id=agent.session_id,
|
||||
source=agent.platform or os.environ.get("HERMES_SESSION_SOURCE", "cli"),
|
||||
model=agent.model,
|
||||
model_config=agent._session_init_model_config,
|
||||
parent_session_id=old_session_id,
|
||||
)
|
||||
try:
|
||||
agent._session_db.create_session(
|
||||
session_id=agent.session_id,
|
||||
source=agent.platform or os.environ.get("HERMES_SESSION_SOURCE", "cli"),
|
||||
model=agent.model,
|
||||
model_config=agent._session_init_model_config,
|
||||
parent_session_id=old_session_id,
|
||||
)
|
||||
except Exception as _cs_err:
|
||||
# The child row could not be created (e.g. FK constraint,
|
||||
# contended write). Previously the outer handler simply
|
||||
# warned and let the agent continue on the NEW id — which
|
||||
# has no row in state.db, producing an orphan: the parent
|
||||
# is ended, the child is never indexed, and every
|
||||
# subsequent message is attributed to a session that
|
||||
# doesn't exist (#33906/#33907). Roll the live id back to
|
||||
# the parent so the conversation stays attached to a real,
|
||||
# indexed session instead of a phantom.
|
||||
logger.warning(
|
||||
"Compression child session create failed (%s) — "
|
||||
"rolling back to parent session %s to avoid an orphan.",
|
||||
_cs_err, old_session_id,
|
||||
)
|
||||
agent.session_id = old_session_id
|
||||
try:
|
||||
from gateway.session_context import set_current_session_id
|
||||
set_current_session_id(agent.session_id)
|
||||
except Exception:
|
||||
os.environ["HERMES_SESSION_ID"] = agent.session_id
|
||||
try:
|
||||
from hermes_logging import set_session_context
|
||||
set_session_context(agent.session_id)
|
||||
except Exception:
|
||||
pass
|
||||
# Re-open the parent: it was ended above, but we're
|
||||
# continuing on it, so it must not stay closed.
|
||||
try:
|
||||
agent._session_db.reopen_session(old_session_id)
|
||||
except Exception:
|
||||
pass
|
||||
old_session_id = None # no rotation happened
|
||||
# The parent row already exists in state.db, so mark the
|
||||
# session as created — _ensure_db_session would otherwise
|
||||
# retry a (harmless INSERT OR IGNORE) create next turn.
|
||||
agent._session_db_created = True
|
||||
raise
|
||||
agent._session_db_created = True
|
||||
# Carry a persistent /goal onto the continuation session.
|
||||
# Compression mints a fresh child id; load_goal does a flat
|
||||
# per-session lookup with no parent walk, so without this an
|
||||
# active goal silently dies at the boundary (#33618).
|
||||
try:
|
||||
from hermes_cli.goals import migrate_goal_to_session
|
||||
migrate_goal_to_session(old_session_id, agent.session_id, reason="compression")
|
||||
except Exception as _goal_err:
|
||||
logger.debug("Could not migrate goal on compression: %s", _goal_err)
|
||||
# Auto-number the title for the continuation session
|
||||
if old_title:
|
||||
try:
|
||||
|
|
@ -615,7 +663,18 @@ def compress_context(
|
|||
agent._session_db.update_system_prompt(agent.session_id, new_system_prompt)
|
||||
agent._last_flushed_db_idx = 0
|
||||
except Exception as e:
|
||||
logger.warning("Session DB compression split failed — new session will NOT be indexed: %s", e)
|
||||
# If the rotation rolled back to the parent (orphan-avoidance
|
||||
# above), agent.session_id is the still-indexed parent and
|
||||
# old_session_id was cleared — so this is recovery, not an
|
||||
# un-indexed orphan. Otherwise an earlier step failed before the
|
||||
# child was created and the warning's original meaning holds.
|
||||
if locals().get("old_session_id") is None and not in_place:
|
||||
logger.warning(
|
||||
"Compression rotation aborted and rolled back to the "
|
||||
"parent session (%s): %s", agent.session_id or "?", e,
|
||||
)
|
||||
else:
|
||||
logger.warning("Session DB compression split failed — new session will NOT be indexed: %s", e)
|
||||
|
||||
# Compaction-boundary bookkeeping, computed once. `old_session_id` is only
|
||||
# bound in the rotation branch; in-place leaves it unset. `_boundary_parent`
|
||||
|
|
@ -637,6 +696,7 @@ def compress_context(
|
|||
agent.session_id or "",
|
||||
boundary_reason="compression",
|
||||
old_session_id=_boundary_parent,
|
||||
platform=getattr(agent, "platform", None) or "cli",
|
||||
conversation_id=getattr(agent, "_gateway_session_key", None),
|
||||
)
|
||||
except Exception as _ce_err:
|
||||
|
|
|
|||
|
|
@ -279,6 +279,44 @@ def clear_goal(session_id: str) -> None:
|
|||
save_goal(session_id, state)
|
||||
|
||||
|
||||
def migrate_goal_to_session(old_session_id: str, new_session_id: str, *, reason: str = "") -> bool:
|
||||
"""Carry a persistent /goal from a parent session to its continuation.
|
||||
|
||||
Context compression rotates ``session_id`` to a fresh child session,
|
||||
but ``load_goal`` does a flat ``goal:<session_id>`` lookup with no
|
||||
parent-lineage walk — so an active goal silently dies at the
|
||||
compaction boundary (#33618). Copy the goal onto the new session and
|
||||
archive the old row as ``cleared`` so exactly one active goal row
|
||||
exists per logical conversation (avoids the "two active goals"
|
||||
hazard of a pure copy).
|
||||
|
||||
Returns True when a goal was migrated, False when there was nothing
|
||||
to migrate or the DB was unavailable. Best-effort and never raises —
|
||||
a failure here must not block compression.
|
||||
"""
|
||||
if not old_session_id or not new_session_id or old_session_id == new_session_id:
|
||||
return False
|
||||
try:
|
||||
state = load_goal(old_session_id)
|
||||
if state is None or getattr(state, "status", None) == "cleared":
|
||||
return False
|
||||
# Don't clobber a goal already set on the child (e.g. a resumed
|
||||
# lineage that re-established its own goal).
|
||||
if load_goal(new_session_id) is not None:
|
||||
return False
|
||||
save_goal(new_session_id, state)
|
||||
# Archive the parent's row so it isn't double-counted as active.
|
||||
clear_goal(old_session_id)
|
||||
logger.debug(
|
||||
"GoalManager: migrated goal %s -> %s (%s)",
|
||||
old_session_id, new_session_id, reason or "rotation",
|
||||
)
|
||||
return True
|
||||
except Exception as exc: # pragma: no cover - defensive
|
||||
logger.debug("GoalManager: goal migration failed: %s", exc)
|
||||
return False
|
||||
|
||||
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
# Judge
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
|
|
@ -907,6 +945,7 @@ __all__ = [
|
|||
"load_goal",
|
||||
"save_goal",
|
||||
"clear_goal",
|
||||
"migrate_goal_to_session",
|
||||
"judge_goal",
|
||||
"run_kanban_goal_loop",
|
||||
]
|
||||
|
|
|
|||
129
tests/agent/test_compression_rotation_state.py
Normal file
129
tests/agent/test_compression_rotation_state.py
Normal file
|
|
@ -0,0 +1,129 @@
|
|||
"""Compression rotation hardening — state-loss fixes at the compaction boundary.
|
||||
|
||||
When auto-compression rotates ``agent.session_id`` to a continuation child,
|
||||
three pieces of state used to be lost or corrupted:
|
||||
|
||||
* #33618 — a persistent ``/goal`` did not follow the rotation (``load_goal``
|
||||
is a flat per-session lookup with no lineage walk), so it silently died.
|
||||
* #33906/#33907 — if the child ``create_session`` raised, the outer handler
|
||||
only warned and let the agent continue on the NEW (un-indexed) id,
|
||||
producing an orphan session missing from state.db.
|
||||
* #27633 — the compaction-boundary ``on_session_start`` notification omitted
|
||||
the ``platform`` kwarg, so context-engine plugins saw ``source=unknown``
|
||||
for every message after the boundary.
|
||||
|
||||
These tests drive the real ``compress_context`` path against a real SessionDB.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
from hermes_state import SessionDB
|
||||
|
||||
|
||||
def _build_agent_with_db(db: SessionDB, session_id: str, platform: str = "telegram"):
|
||||
with patch.dict(os.environ, {"OPENROUTER_API_KEY": "test-key"}):
|
||||
from run_agent import AIAgent
|
||||
|
||||
agent = AIAgent(
|
||||
api_key="test-key",
|
||||
base_url="https://openrouter.ai/api/v1",
|
||||
model="test/model",
|
||||
platform=platform,
|
||||
quiet_mode=True,
|
||||
session_db=db,
|
||||
session_id=session_id,
|
||||
skip_context_files=True,
|
||||
skip_memory=True,
|
||||
)
|
||||
|
||||
compressor = MagicMock()
|
||||
compressor.compress.return_value = [
|
||||
{"role": "user", "content": "[CONTEXT COMPACTION] summary"},
|
||||
{"role": "user", "content": "tail"},
|
||||
]
|
||||
compressor.compression_count = 1
|
||||
compressor.last_prompt_tokens = 0
|
||||
compressor.last_completion_tokens = 0
|
||||
compressor._last_summary_error = None
|
||||
compressor._last_compress_aborted = False
|
||||
compressor._last_summary_auth_failure = False
|
||||
compressor._last_aux_model_failure_model = None
|
||||
compressor._last_aux_model_failure_error = None
|
||||
agent.context_compressor = compressor
|
||||
return agent
|
||||
|
||||
|
||||
def _msgs(n=20):
|
||||
return [{"role": "user", "content": f"m{i}"} for i in range(n)]
|
||||
|
||||
|
||||
class TestGoalMigratesOnRotation:
|
||||
def test_goal_follows_compression_rotation(self, tmp_path: Path):
|
||||
db = SessionDB(db_path=tmp_path / "state.db")
|
||||
parent = "PARENT_GOAL_ROT"
|
||||
db.create_session(parent, source="cli")
|
||||
agent = _build_agent_with_db(db, parent)
|
||||
|
||||
# Set a persistent goal on the parent via the real persistence path.
|
||||
with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path / ".hermes")}):
|
||||
(tmp_path / ".hermes").mkdir(exist_ok=True)
|
||||
import hermes_cli.goals as goals
|
||||
goals._DB_CACHE.clear()
|
||||
# Point the goal DB at the same state.db the agent uses.
|
||||
with patch.object(goals, "_get_session_db", return_value=db):
|
||||
goals.save_goal(parent, goals.GoalState(goal="finish the migration"))
|
||||
|
||||
agent._compress_context(_msgs(), "sys", approx_tokens=120_000)
|
||||
child = agent.session_id
|
||||
assert child != parent # rotation happened
|
||||
|
||||
migrated = goals.load_goal(child)
|
||||
assert migrated is not None
|
||||
assert migrated.goal == "finish the migration"
|
||||
goals._DB_CACHE.clear()
|
||||
|
||||
|
||||
class TestOrphanRollbackOnCreateFailure:
|
||||
def test_rolls_back_to_parent_when_child_create_fails(self, tmp_path: Path):
|
||||
db = SessionDB(db_path=tmp_path / "state.db")
|
||||
parent = "PARENT_ORPHAN_ROT"
|
||||
db.create_session(parent, source="cli")
|
||||
agent = _build_agent_with_db(db, parent)
|
||||
|
||||
# Make the CHILD create_session raise, but let the initial parent
|
||||
# end_session/reopen work. We patch create_session to blow up.
|
||||
real_create = db.create_session
|
||||
|
||||
def _boom(*a, **k):
|
||||
raise RuntimeError("FOREIGN KEY constraint failed")
|
||||
|
||||
with patch.object(db, "create_session", side_effect=_boom):
|
||||
agent._compress_context(_msgs(), "sys", approx_tokens=120_000)
|
||||
|
||||
# The live id must roll back to the still-indexed parent — NOT a
|
||||
# phantom child id that has no row in state.db.
|
||||
assert agent.session_id == parent
|
||||
assert db.get_session(parent) is not None
|
||||
_ = real_create # silence unused
|
||||
|
||||
|
||||
class TestPlatformForwardedAtBoundary:
|
||||
def test_on_session_start_receives_platform(self, tmp_path: Path):
|
||||
db = SessionDB(db_path=tmp_path / "state.db")
|
||||
parent = "PARENT_PLATFORM_ROT"
|
||||
db.create_session(parent, source="telegram")
|
||||
agent = _build_agent_with_db(db, parent, platform="telegram")
|
||||
|
||||
agent._compress_context(_msgs(), "sys", approx_tokens=120_000)
|
||||
|
||||
# The boundary notify must forward the platform so context-engine
|
||||
# plugins don't fall back to source=unknown (#27633).
|
||||
calls = [c for c in agent.context_compressor.on_session_start.call_args_list]
|
||||
assert calls, "on_session_start was not called at the boundary"
|
||||
kwargs = calls[-1].kwargs
|
||||
assert kwargs.get("platform") == "telegram"
|
||||
assert kwargs.get("boundary_reason") == "compression"
|
||||
|
|
@ -547,6 +547,47 @@ class TestGoalStateSubgoalsBackcompat:
|
|||
assert rt.subgoals == ["a", "b", "c"]
|
||||
|
||||
|
||||
class TestMigrateGoalToSession:
|
||||
"""migrate_goal_to_session carries a /goal from a parent session to its
|
||||
compression continuation child (#33618). load_goal does a flat
|
||||
per-session lookup with no lineage walk, so without migration an active
|
||||
goal silently dies when compression rotates session_id."""
|
||||
|
||||
def test_migrates_active_goal_to_child(self, hermes_home):
|
||||
from hermes_cli.goals import save_goal, load_goal, migrate_goal_to_session, GoalState
|
||||
save_goal("parent-sid", GoalState(goal="ship the feature"))
|
||||
assert migrate_goal_to_session("parent-sid", "child-sid", reason="compression") is True
|
||||
child = load_goal("child-sid")
|
||||
assert child is not None and child.goal == "ship the feature"
|
||||
# Parent row archived (cleared) so only the child is active.
|
||||
parent = load_goal("parent-sid")
|
||||
assert parent is not None and parent.status == "cleared"
|
||||
|
||||
def test_no_goal_to_migrate_returns_false(self, hermes_home):
|
||||
from hermes_cli.goals import migrate_goal_to_session, load_goal
|
||||
assert migrate_goal_to_session("empty-parent", "child2") is False
|
||||
assert load_goal("child2") is None
|
||||
|
||||
def test_does_not_clobber_existing_child_goal(self, hermes_home):
|
||||
from hermes_cli.goals import save_goal, load_goal, migrate_goal_to_session, GoalState
|
||||
save_goal("p3", GoalState(goal="parent goal"))
|
||||
save_goal("c3", GoalState(goal="child already has one"))
|
||||
assert migrate_goal_to_session("p3", "c3") is False
|
||||
assert load_goal("c3").goal == "child already has one"
|
||||
|
||||
def test_same_id_is_noop(self, hermes_home):
|
||||
from hermes_cli.goals import save_goal, migrate_goal_to_session, GoalState
|
||||
save_goal("same", GoalState(goal="g"))
|
||||
assert migrate_goal_to_session("same", "same") is False
|
||||
|
||||
def test_cleared_goal_not_migrated(self, hermes_home):
|
||||
from hermes_cli.goals import save_goal, clear_goal, migrate_goal_to_session, load_goal, GoalState
|
||||
save_goal("p4", GoalState(goal="done already"))
|
||||
clear_goal("p4")
|
||||
assert migrate_goal_to_session("p4", "c4") is False
|
||||
assert load_goal("c4") is None
|
||||
|
||||
|
||||
class TestGoalManagerSubgoals:
|
||||
def test_add_subgoal(self, hermes_home):
|
||||
from hermes_cli.goals import GoalManager
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue