fix(gateway): resume follows the compression tip so post-compression replies render

Auto-compression ends the live session and forks a continuation child
(linked via parent_session_id). A long-lived parent keeps its own flushed
message rows, so resolve_resume_session_id()'s empty-head walk never
redirected it — resuming the parent id reloaded the pre-compression
transcript and dropped every turn generated after compression, including
the assistant's response. On the desktop this is the recurring "I sent a
message, came back, and the reply isn't there" report on large sessions:
the chat's routed id is the pre-rotation id, and both the gateway
session.resume RPC and the REST /messages read anchored on it.

Fix the resolver at the chokepoint: resolve_resume_session_id() now
follows the compression-continuation chain forward via get_compression_tip()
before its existing empty-head descendant walk. get_compression_tip() only
follows children whose parent ended with end_reason='compression' (created
after the parent was ended), so delegation/branch children never hijack a
resume. This fixes every resume caller at once (REST /messages, CLI
--resume, gateway /resume).

session.resume in tui_gateway was the one resume path that never called the
resolver — it used the raw target id directly. Route it through
resolve_resume_session_id() too (non-lazy only; lazy watch windows must
stay on their exact child branch). Resolving up front also re-anchors the
live-session fast path so a still-live rotated session is reused by its new
key instead of rebuilding a duplicate agent on the stale parent.

Tests:
- resolve_resume_session_id follows the tip even when the parent retains
  messages, and is not confused by a delegation child.
- session.resume binds the agent to the continuation tip and returns the
  post-compression reply.
This commit is contained in:
Brooklyn Nicholson 2026-06-18 15:56:39 -05:00
parent 3042045540
commit 49596b70cb
4 changed files with 138 additions and 0 deletions

View file

@ -2820,6 +2820,24 @@ class SessionDB:
if not session_id:
return session_id
# Follow the compression-continuation chain forward to the live tip
# FIRST. Auto-compression ends the current session and forks a
# continuation child, but a long-lived parent keeps its own flushed
# message rows — so the empty-head walk below never redirects it, and
# resuming the parent id reloads the pre-compression transcript while
# the turns generated *after* compression (and their responses) sit in
# the continuation. ``get_compression_tip`` is lineage-aware: it only
# follows children whose parent ended with ``end_reason='compression'``
# (created after the parent was ended), so delegation / branch children
# never hijack the resume. This is the fix for the desktop "I came back
# and the reply isn't there" report on large sessions.
try:
tip = self.get_compression_tip(session_id)
except Exception:
tip = session_id
if tip and tip != session_id:
session_id = tip
with self._lock:
# If this session already has messages, nothing to redirect.
try:

View file

@ -83,6 +83,46 @@ def test_walks_from_middle_of_chain(db):
assert db.resolve_resume_session_id("c") == "d"
def test_follows_compression_tip_when_parent_retains_messages(db):
# The bug behind the desktop "I came back and the reply isn't there" report
# on large sessions: auto-compression ends the live session and forks a
# continuation child, but a long parent keeps its own flushed message rows.
# The empty-head walk below never redirects a non-empty head, so resuming
# the parent id reloaded the pre-compression transcript and the response
# generated *after* compression (which lives in the continuation) was
# missing. resolve_resume_session_id must follow the compression-tip chain
# forward even when the parent still has messages.
base = int(time.time()) - 10_000
db.create_session("root", source="cli")
db.append_message("root", role="user", content="pre-compression turn")
db.end_session("root", "compression")
db.create_session("cont", source="cli", parent_session_id="root")
db.append_message("cont", role="assistant", content="post-compression reply")
# Force deterministic ordering so the continuation's started_at is clearly
# at/after the parent's ended_at (the get_compression_tip discriminator).
db._conn.execute("UPDATE sessions SET started_at = ?, ended_at = ? WHERE id = 'root'", (base, base + 50))
db._conn.execute("UPDATE sessions SET started_at = ? WHERE id = 'cont'", (base + 100,))
db._conn.commit()
assert db.resolve_resume_session_id("root") == "cont"
def test_compression_tip_not_confused_with_delegation_child(db):
# A delegation/branch child is created while the parent is still live (the
# parent is NOT ended with end_reason='compression'), so resuming the
# parent must stay on the parent, not get hijacked into the subagent branch.
base = int(time.time()) - 10_000
db.create_session("conv", source="cli")
db.append_message("conv", role="user", content="parent turn")
db.create_session("subagent", source="cli", parent_session_id="conv")
db.append_message("subagent", role="assistant", content="delegated work")
db._conn.execute("UPDATE sessions SET started_at = ? WHERE id = 'conv'", (base,))
db._conn.execute("UPDATE sessions SET started_at = ? WHERE id = 'subagent'", (base + 100,))
db._conn.commit()
assert db.resolve_resume_session_id("conv") == "conv"
def test_prefers_most_recent_child_when_fork_exists(db):
# If a session was somehow forked (two children), pick the latest one.
# In practice, compression only produces single-chain shape, but the helper

View file

@ -954,6 +954,65 @@ def test_session_resume_uses_parent_lineage_for_display(monkeypatch):
assert captured["history_calls"] == [("tip", False), ("tip", True)]
def test_session_resume_follows_compression_tip(monkeypatch, tmp_path):
"""Resuming a rotated-out parent id must load the continuation's messages.
Regression for the desktop "I came back and the reply isn't there" report:
auto-compression ends the live session and forks a continuation child, so a
resume on the parent id (the desktop's routed id when the chat was opened
before it rotated) used to reload the pre-compression transcript and drop
the response generated after compression. session.resume must follow the
compression tip via resolve_resume_session_id.
"""
from hermes_state import SessionDB
db = SessionDB(db_path=tmp_path / "state.db")
base = int(time.time()) - 10_000
db.create_session("parent_root", source="tui")
db.append_message("parent_root", role="user", content="pre-compression turn")
db.end_session("parent_root", "compression")
db.create_session("cont_tip", source="tui", parent_session_id="parent_root")
db.append_message("cont_tip", role="assistant", content="post-compression reply")
db._conn.execute(
"UPDATE sessions SET started_at = ?, ended_at = ? WHERE id = 'parent_root'",
(base, base + 50),
)
db._conn.execute("UPDATE sessions SET started_at = ? WHERE id = 'cont_tip'", (base + 100,))
db._conn.commit()
captured = {}
def fake_make_agent(sid, key, session_id=None, session_db=None, **kwargs):
captured["agent_session_id"] = session_id
return types.SimpleNamespace(model="test", provider="test")
monkeypatch.setattr(server, "_get_db", lambda: db)
monkeypatch.setattr(server, "_enable_gateway_prompts", lambda: None)
monkeypatch.setattr(server, "_set_session_context", lambda target: [])
monkeypatch.setattr(server, "_clear_session_context", lambda tokens: None)
monkeypatch.setattr(server, "_make_agent", fake_make_agent)
monkeypatch.setattr(
server, "_session_info", lambda agent, *a: {"model": "test", "tools": {}, "skills": {}}
)
monkeypatch.setattr(
server, "_init_session", lambda sid, key, agent, history, cols=80, **_kwargs: None
)
try:
resp = server.handle_request(
{"id": "1", "method": "session.resume", "params": {"session_id": "parent_root"}}
)
finally:
db.close()
# The agent must bind to the continuation tip, and the returned transcript
# must include the post-compression reply (which lives only in the tip).
assert resp["result"]["session_key"] == "cont_tip"
assert captured["agent_session_id"] == "cont_tip"
texts = [m.get("text") for m in resp["result"]["messages"]]
assert "post-compression reply" in texts
def test_session_resume_passes_stored_runtime_to_agent(monkeypatch):
captured = {}

View file

@ -4419,6 +4419,27 @@ def _(rid, params: dict) -> dict:
found = {}
else:
return _err(rid, 4007, "session not found")
# Follow the compression-continuation chain to the live tip so a resume on
# a rotated-out parent id binds to the descendant that actually holds the
# post-compression turns. Auto-compression ends the session and forks a
# continuation child; without this, resuming the original id (the desktop's
# routed id when the chat was opened before it rotated) reloads the parent
# transcript and the response generated after compression is missing — the
# "I came back and the reply isn't there" bug on large sessions. Resolving
# here also re-anchors the fast path below so a still-live rotated session
# is reused (by its new key) instead of rebuilding a duplicate agent on the
# stale parent. Skipped for lazy watch windows, which intentionally attach
# to the exact child branch they were opened on.
if found and not is_truthy_value(params.get("lazy", False)):
try:
tip = db.resolve_resume_session_id(target)
except Exception:
tip = target
if tip and tip != target:
target = tip
found = db.get_session(target) or found
profile_resume_cwd = str(found.get("cwd") or "").strip() or _profile_configured_cwd(
profile_home
)