fix(gateway): keep Telegram topic bindings aligned with compression children (#34409)

Telegram DM topic bindings persist (chat_id, thread_id) -> session_id in
SQLite so reopening a topic resumes the right Hermes session. When
compression rotated session_entry.session_id mid-turn, the binding row
stayed pointed at the pre-compression parent. On the next inbound
message in that topic the gateway reloaded the oversized parent
transcript, retriggering preflight compression — sometimes in a loop.

Two-pronged fix:

1. `_sync_telegram_topic_binding(source, entry, *, reason)` helper
   called immediately after each of the three session_id rotation sites
   in _handle_message_with_agent (hygiene compression, agent-result
   compression rotation, /compress command). Keeps future bindings
   fresh.

2. Read-path self-heal: when resolving an existing topic binding, walk
   SessionDB.get_compression_tip() forward and switch_session to the
   descendant instead of the stored parent. Rewrites the binding row to
   the tip so subsequent messages skip the walk. Heals existing stale
   state on the next user message without requiring a gateway restart.

Skipped from competing PRs as not load-bearing for the bug:
- advance_session_after_compression SessionStore primitive (#26204/
  #28870/#33416) — preserves end_reason='compression' analytics nicety
  but doesn't affect routing correctness.
- Cached-agent eviction on session_id mismatch — _compress_context()
  already mutates tmp_agent.session_id on the cached object so the
  in-memory agent self-corrects.
- Startup repair pass (#33416) — redundant once the read path heals on
  the next message; one-line CLI follow-up can address bindings for
  topics users never reopen.

Closes #20470, #29712, #33414. Acknowledges work in #23195
(@litvinovvo), #26204 (@bizyumov), #28870 (@donrhmexe), #29713
(@hehehe0803), #29945 (@eugeneb1ack), #33416 (@bizyumov).
This commit is contained in:
Teknium 2026-05-28 23:25:52 -07:00 committed by GitHub
parent ec7736f8a7
commit db96fc60d0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 150 additions and 0 deletions

View file

@ -448,6 +448,89 @@ async def test_new_inside_telegram_topic_rewrites_binding_to_new_session(tmp_pat
assert binding["session_id"] == "new-topic-session"
@pytest.mark.asyncio
async def test_topic_binding_follows_compression_tip_on_read(tmp_path, monkeypatch):
"""Stale topic bindings auto-heal to the compression child on next inbound.
Regression for #20470 / #29712 / #33414. After compression rotates the
session_id, the binding row still pointed at the parent. On the next
inbound message in that topic, the gateway used to reload the oversized
parent transcript and re-run preflight compression sometimes in a loop.
The read path now walks ``SessionDB.get_compression_tip()`` and rewrites
the binding to the descendant.
"""
import gateway.run as gateway_run
session_db = SessionDB(db_path=tmp_path / "state.db")
session_db.enable_telegram_topic_mode(chat_id="208214988", user_id="208214988")
# Build a parent -> compression child chain. end_session sets ended_at;
# create_session sets started_at to "now", so the child's started_at is
# always >= parent's ended_at on a real clock.
session_db.create_session(
session_id="parent-session", source="telegram", user_id="208214988",
)
session_db.end_session("parent-session", end_reason="compression")
session_db.create_session(
session_id="child-session",
source="telegram",
user_id="208214988",
parent_session_id="parent-session",
)
topic_source = _make_source(thread_id="17585")
topic_key = build_session_key(topic_source)
# Pre-bug binding: topic still pointed at the pre-compression parent.
session_db.bind_telegram_topic(
chat_id="208214988",
thread_id="17585",
user_id="208214988",
session_key=topic_key,
session_id="parent-session",
)
runner = _make_runner(session_db=session_db)
# switch_session() returns a SessionEntry pointing at whatever id was
# requested; capture the requested id for assertion.
switched_to: dict = {}
def fake_switch(_key, new_session_id):
switched_to["id"] = new_session_id
return SessionEntry(
session_key=topic_key,
session_id=new_session_id,
created_at=datetime.now(),
updated_at=datetime.now(),
platform=Platform.TELEGRAM,
chat_type="dm",
origin=topic_source,
)
runner.session_store.switch_session = MagicMock(side_effect=fake_switch)
runner._run_agent = AsyncMock(
return_value={
"success": True,
"final_response": "ok",
"session_id": "child-session",
"messages": [],
}
)
monkeypatch.setattr(
gateway_run, "_resolve_runtime_agent_kwargs", lambda: {"api_key": "***"}
)
await runner._handle_message(_make_event("follow up after compression", thread_id="17585"))
# The route was advanced to the compression tip, not the stale parent.
assert switched_to.get("id") == "child-session"
# The binding row was rewritten to point at the descendant so future
# inbound messages skip the tip walk and resolve directly.
refreshed = session_db.get_telegram_topic_binding(
chat_id="208214988", thread_id="17585",
)
assert refreshed is not None
assert refreshed["session_id"] == "child-session"
@pytest.mark.asyncio
async def test_topic_root_command_explicitly_migrates_and_enables_topic_mode(tmp_path, monkeypatch):
import gateway.run as gateway_run