From 49596b70cb2d0d328d68645905febb074e494e77 Mon Sep 17 00:00:00 2001 From: Brooklyn Nicholson Date: Thu, 18 Jun 2026 15:56:39 -0500 Subject: [PATCH] fix(gateway): resume follows the compression tip so post-compression replies render MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Auto-compression ends the live session and forks a continuation child (linked via parent_session_id). A long-lived parent keeps its own flushed message rows, so resolve_resume_session_id()'s empty-head walk never redirected it — resuming the parent id reloaded the pre-compression transcript and dropped every turn generated after compression, including the assistant's response. On the desktop this is the recurring "I sent a message, came back, and the reply isn't there" report on large sessions: the chat's routed id is the pre-rotation id, and both the gateway session.resume RPC and the REST /messages read anchored on it. Fix the resolver at the chokepoint: resolve_resume_session_id() now follows the compression-continuation chain forward via get_compression_tip() before its existing empty-head descendant walk. get_compression_tip() only follows children whose parent ended with end_reason='compression' (created after the parent was ended), so delegation/branch children never hijack a resume. This fixes every resume caller at once (REST /messages, CLI --resume, gateway /resume). session.resume in tui_gateway was the one resume path that never called the resolver — it used the raw target id directly. Route it through resolve_resume_session_id() too (non-lazy only; lazy watch windows must stay on their exact child branch). Resolving up front also re-anchors the live-session fast path so a still-live rotated session is reused by its new key instead of rebuilding a duplicate agent on the stale parent. Tests: - resolve_resume_session_id follows the tip even when the parent retains messages, and is not confused by a delegation child. - session.resume binds the agent to the continuation tip and returns the post-compression reply. --- hermes_state.py | 18 ++++++ .../test_resolve_resume_session_id.py | 40 +++++++++++++ tests/test_tui_gateway_server.py | 59 +++++++++++++++++++ tui_gateway/server.py | 21 +++++++ 4 files changed, 138 insertions(+) diff --git a/hermes_state.py b/hermes_state.py index 9653eae017f..19c6a269b99 100644 --- a/hermes_state.py +++ b/hermes_state.py @@ -2820,6 +2820,24 @@ class SessionDB: if not session_id: return session_id + # Follow the compression-continuation chain forward to the live tip + # FIRST. Auto-compression ends the current session and forks a + # continuation child, but a long-lived parent keeps its own flushed + # message rows — so the empty-head walk below never redirects it, and + # resuming the parent id reloads the pre-compression transcript while + # the turns generated *after* compression (and their responses) sit in + # the continuation. ``get_compression_tip`` is lineage-aware: it only + # follows children whose parent ended with ``end_reason='compression'`` + # (created after the parent was ended), so delegation / branch children + # never hijack the resume. This is the fix for the desktop "I came back + # and the reply isn't there" report on large sessions. + try: + tip = self.get_compression_tip(session_id) + except Exception: + tip = session_id + if tip and tip != session_id: + session_id = tip + with self._lock: # If this session already has messages, nothing to redirect. try: diff --git a/tests/hermes_state/test_resolve_resume_session_id.py b/tests/hermes_state/test_resolve_resume_session_id.py index ec637c6d205..b4dd8717a2e 100644 --- a/tests/hermes_state/test_resolve_resume_session_id.py +++ b/tests/hermes_state/test_resolve_resume_session_id.py @@ -83,6 +83,46 @@ def test_walks_from_middle_of_chain(db): assert db.resolve_resume_session_id("c") == "d" +def test_follows_compression_tip_when_parent_retains_messages(db): + # The bug behind the desktop "I came back and the reply isn't there" report + # on large sessions: auto-compression ends the live session and forks a + # continuation child, but a long parent keeps its own flushed message rows. + # The empty-head walk below never redirects a non-empty head, so resuming + # the parent id reloaded the pre-compression transcript and the response + # generated *after* compression (which lives in the continuation) was + # missing. resolve_resume_session_id must follow the compression-tip chain + # forward even when the parent still has messages. + base = int(time.time()) - 10_000 + db.create_session("root", source="cli") + db.append_message("root", role="user", content="pre-compression turn") + db.end_session("root", "compression") + db.create_session("cont", source="cli", parent_session_id="root") + db.append_message("cont", role="assistant", content="post-compression reply") + # Force deterministic ordering so the continuation's started_at is clearly + # at/after the parent's ended_at (the get_compression_tip discriminator). + db._conn.execute("UPDATE sessions SET started_at = ?, ended_at = ? WHERE id = 'root'", (base, base + 50)) + db._conn.execute("UPDATE sessions SET started_at = ? WHERE id = 'cont'", (base + 100,)) + db._conn.commit() + + assert db.resolve_resume_session_id("root") == "cont" + + +def test_compression_tip_not_confused_with_delegation_child(db): + # A delegation/branch child is created while the parent is still live (the + # parent is NOT ended with end_reason='compression'), so resuming the + # parent must stay on the parent, not get hijacked into the subagent branch. + base = int(time.time()) - 10_000 + db.create_session("conv", source="cli") + db.append_message("conv", role="user", content="parent turn") + db.create_session("subagent", source="cli", parent_session_id="conv") + db.append_message("subagent", role="assistant", content="delegated work") + db._conn.execute("UPDATE sessions SET started_at = ? WHERE id = 'conv'", (base,)) + db._conn.execute("UPDATE sessions SET started_at = ? WHERE id = 'subagent'", (base + 100,)) + db._conn.commit() + + assert db.resolve_resume_session_id("conv") == "conv" + + def test_prefers_most_recent_child_when_fork_exists(db): # If a session was somehow forked (two children), pick the latest one. # In practice, compression only produces single-chain shape, but the helper diff --git a/tests/test_tui_gateway_server.py b/tests/test_tui_gateway_server.py index e04d07756a3..6159dab0c16 100644 --- a/tests/test_tui_gateway_server.py +++ b/tests/test_tui_gateway_server.py @@ -954,6 +954,65 @@ def test_session_resume_uses_parent_lineage_for_display(monkeypatch): assert captured["history_calls"] == [("tip", False), ("tip", True)] +def test_session_resume_follows_compression_tip(monkeypatch, tmp_path): + """Resuming a rotated-out parent id must load the continuation's messages. + + Regression for the desktop "I came back and the reply isn't there" report: + auto-compression ends the live session and forks a continuation child, so a + resume on the parent id (the desktop's routed id when the chat was opened + before it rotated) used to reload the pre-compression transcript and drop + the response generated after compression. session.resume must follow the + compression tip via resolve_resume_session_id. + """ + from hermes_state import SessionDB + + db = SessionDB(db_path=tmp_path / "state.db") + base = int(time.time()) - 10_000 + db.create_session("parent_root", source="tui") + db.append_message("parent_root", role="user", content="pre-compression turn") + db.end_session("parent_root", "compression") + db.create_session("cont_tip", source="tui", parent_session_id="parent_root") + db.append_message("cont_tip", role="assistant", content="post-compression reply") + db._conn.execute( + "UPDATE sessions SET started_at = ?, ended_at = ? WHERE id = 'parent_root'", + (base, base + 50), + ) + db._conn.execute("UPDATE sessions SET started_at = ? WHERE id = 'cont_tip'", (base + 100,)) + db._conn.commit() + + captured = {} + + def fake_make_agent(sid, key, session_id=None, session_db=None, **kwargs): + captured["agent_session_id"] = session_id + return types.SimpleNamespace(model="test", provider="test") + + monkeypatch.setattr(server, "_get_db", lambda: db) + monkeypatch.setattr(server, "_enable_gateway_prompts", lambda: None) + monkeypatch.setattr(server, "_set_session_context", lambda target: []) + monkeypatch.setattr(server, "_clear_session_context", lambda tokens: None) + monkeypatch.setattr(server, "_make_agent", fake_make_agent) + monkeypatch.setattr( + server, "_session_info", lambda agent, *a: {"model": "test", "tools": {}, "skills": {}} + ) + monkeypatch.setattr( + server, "_init_session", lambda sid, key, agent, history, cols=80, **_kwargs: None + ) + + try: + resp = server.handle_request( + {"id": "1", "method": "session.resume", "params": {"session_id": "parent_root"}} + ) + finally: + db.close() + + # The agent must bind to the continuation tip, and the returned transcript + # must include the post-compression reply (which lives only in the tip). + assert resp["result"]["session_key"] == "cont_tip" + assert captured["agent_session_id"] == "cont_tip" + texts = [m.get("text") for m in resp["result"]["messages"]] + assert "post-compression reply" in texts + + def test_session_resume_passes_stored_runtime_to_agent(monkeypatch): captured = {} diff --git a/tui_gateway/server.py b/tui_gateway/server.py index f13a4c5c760..294e543c230 100644 --- a/tui_gateway/server.py +++ b/tui_gateway/server.py @@ -4419,6 +4419,27 @@ def _(rid, params: dict) -> dict: found = {} else: return _err(rid, 4007, "session not found") + + # Follow the compression-continuation chain to the live tip so a resume on + # a rotated-out parent id binds to the descendant that actually holds the + # post-compression turns. Auto-compression ends the session and forks a + # continuation child; without this, resuming the original id (the desktop's + # routed id when the chat was opened before it rotated) reloads the parent + # transcript and the response generated after compression is missing — the + # "I came back and the reply isn't there" bug on large sessions. Resolving + # here also re-anchors the fast path below so a still-live rotated session + # is reused (by its new key) instead of rebuilding a duplicate agent on the + # stale parent. Skipped for lazy watch windows, which intentionally attach + # to the exact child branch they were opened on. + if found and not is_truthy_value(params.get("lazy", False)): + try: + tip = db.resolve_resume_session_id(target) + except Exception: + tip = target + if tip and tip != target: + target = tip + found = db.get_session(target) or found + profile_resume_cwd = str(found.get("cwd") or "").strip() or _profile_configured_cwd( profile_home )