fix(gateway): keep /model + /reasoning overrides on topic recovery & compression splits

Session-scoped /model and /reasoning overrides were silently lost on Telegram DM/forum topics and after compression session splits (#30479). Root cause: _handle_message_with_agent rewrites source.thread_id via _recover_telegram_topic_thread_id (lobby/stripped reply -> the user's bound topic) before deriving the session key. The /model and /reasoning handlers derived their override key from the raw inbound event.source, skipping that recovery, so the override was stored under one key and the next message turn read a different key. Fix: add _normalize_source_for_session_key (applies the same recovery a message turn does) and use it in both handlers before deriving the key. session_id rotation on compression was never the cause — overrides are keyed by the durable session_key; the split path preserves it. Author: teknium1 <127238744+teknium1@users.noreply.github.com>
2026-07-26 17:38:36 +00:00 · 2026-06-07 19:50:07 -07:00 · 2026-06-07 19:50:07 -07:00 · 2a10da3a16
commit 2a10da3a16
parent b8469a81e3
2 changed files with 148 additions and 1 deletions
--- a/gateway/run.py
+++ b/gateway/run.py
@ -2618,6 +2618,34 @@ class GatewayRunner:
                return None
        return None

+    def _normalize_source_for_session_key(
+        self,
+        source: SessionSource,
+    ) -> SessionSource:
+        """Apply Telegram DM topic recovery to a source for session-key purposes.
+
+        ``_handle_message_with_agent`` rewrites ``source.thread_id`` via
+        ``_recover_telegram_topic_thread_id`` *before* deriving the session
+        key for a normal message turn (a lobby/stripped reply gets pinned to
+        the user's last-active topic).  Session-scoped command handlers like
+        ``/model`` and ``/reasoning`` derive their override key from the raw
+        inbound ``event.source``, which skips that recovery — so the override
+        is stored under a different key than the next message turn reads,
+        and the override is silently dropped on Telegram forum topics and
+        after compression session splits (#30479).
+
+        Returns a recovery-normalized copy when a rewrite applies, otherwise
+        the original source unchanged.  Always derive the override storage key
+        from the result so storage and read use an identical key.
+        """
+        try:
+            recovered = self._recover_telegram_topic_thread_id(source)
+        except Exception:
+            return source
+        if recovered is None:
+            return source
+        return dataclasses.replace(source, thread_id=recovered)
+
    def _resolve_session_agent_runtime(
        self,
        *,
@ -11175,6 +11203,11 @@ class GatewayRunner:

        # Check for session override
        source = event.source
+        # Normalize the source the same way a normal message turn does
+        # (Telegram DM topic recovery) before deriving the override key, so
+        # the override is stored under the key the next message turn reads
+        # (#30479).
+        source = self._normalize_source_for_session_key(source)
        session_key = self._session_key_for_source(source)
        override = self._session_model_overrides.get(session_key, {})
        if override:
@ -12923,7 +12956,11 @@ class GatewayRunner:
        raw_args = event.get_command_args().strip()
        args, persist_global = self._parse_reasoning_command_args(raw_args)
        config_path = _hermes_home / "config.yaml"
-        session_key = self._session_key_for_source(event.source)
+        # Normalize the source (Telegram DM topic recovery) before deriving
+        # the override key so storage matches the key the next message turn
+        # reads — same fix as /model (#30479).
+        _reasoning_source = self._normalize_source_for_session_key(event.source)
+        session_key = self._session_key_for_source(_reasoning_source)
        self._show_reasoning = self._load_show_reasoning()
        self._reasoning_config = self._resolve_session_reasoning_config(
            source=event.source,
--- a/tests/gateway/test_session_override_thread_recovery.py
+++ b/tests/gateway/test_session_override_thread_recovery.py
@ -0,0 +1,110 @@
+"""Regression tests for #30479 — session-scoped /model and /reasoning overrides
+silently lost on Telegram forum/DM topics and after compression session splits.
+
+Root cause: ``_handle_message_with_agent`` rewrites ``source.thread_id`` via
+``_recover_telegram_topic_thread_id`` (lobby/stripped reply -> the user's
+last-active bound topic) *before* deriving the session key for a message turn.
+The ``/model`` and ``/reasoning`` command handlers derived their override key
+from the raw inbound ``event.source``, skipping that recovery — so the override
+was stored under one key and the next message turn read a different key, and the
+override was dropped.
+
+Fix: both command handlers normalize the source via
+``_normalize_source_for_session_key`` before deriving the override key, so
+storage and read keys are identical.
+"""
+
+import threading
+from unittest.mock import MagicMock
+
+import gateway.run as gateway_run
+from gateway.config import Platform
+from gateway.session import SessionSource, build_session_key
+
+
+def _make_runner(recovered_thread_id=None):
+    runner = object.__new__(gateway_run.GatewayRunner)
+    runner.config = None
+    runner.session_store = None
+    runner._session_db = None
+    runner._session_model_overrides = {}
+    runner._session_reasoning_overrides = {}
+    runner._agent_cache = {}
+    runner._agent_cache_lock = threading.Lock()
+    # Stub topic recovery: returns the bound topic id for a lobby message,
+    # None otherwise (the real method's contract).
+    runner._recover_telegram_topic_thread_id = MagicMock(return_value=recovered_thread_id)
+    return runner
+
+
+def _topic_dm_source(thread_id):
+    """A Telegram DM in topic mode. thread_id="" / "1" == General/lobby."""
+    return SessionSource(
+        platform=Platform.TELEGRAM,
+        chat_id="555",
+        chat_name="Forum DM",
+        chat_type="dm",
+        user_id="user-1",
+        thread_id=thread_id,
+    )
+
+
+def test_normalize_rewrites_lobby_thread_to_bound_topic():
+    """A lobby (stripped) reply gets pinned to the user's bound topic id."""
+    runner = _make_runner(recovered_thread_id="42")
+    src = _topic_dm_source(thread_id="")  # lobby/General — no message_thread_id
+
+    normalized = runner._normalize_source_for_session_key(src)
+
+    assert normalized.thread_id == "42"
+    # Original source is left untouched (we return a copy).
+    assert src.thread_id == ""
+
+
+def test_normalize_passthrough_when_no_recovery():
+    """No recovery -> source returned unchanged (identity)."""
+    runner = _make_runner(recovered_thread_id=None)
+    src = _topic_dm_source(thread_id="42")
+
+    normalized = runner._normalize_source_for_session_key(src)
+
+    assert normalized is src
+
+
+def test_normalize_swallows_recovery_exceptions():
+    """Recovery raising must not break the command — return the raw source."""
+    runner = _make_runner()
+    runner._recover_telegram_topic_thread_id = MagicMock(side_effect=RuntimeError("boom"))
+    src = _topic_dm_source(thread_id="")
+
+    normalized = runner._normalize_source_for_session_key(src)
+
+    assert normalized is src
+
+
+def test_override_key_matches_message_turn_key_after_recovery():
+    """The bug, end to end at the key level.
+
+    /model arrives as a lobby reply (thread_id="").  The next message turn
+    runs recovery and lands on the bound topic ("42").  After the fix, the
+    key the command stores under must equal the key the message turn reads.
+    """
+    runner = _make_runner(recovered_thread_id="42")
+
+    # --- /model command path (raw inbound is a lobby reply) ---
+    command_source = _topic_dm_source(thread_id="")
+    normalized_command_source = runner._normalize_source_for_session_key(command_source)
+    # _session_key_for_source falls back to build_session_key when there is no
+    # session_store; emulate that resolution here directly.
+    command_key = build_session_key(normalized_command_source)
+
+    # --- next message turn path (recovery already applied to source) ---
+    message_turn_source = _topic_dm_source(thread_id="42")
+    message_turn_key = build_session_key(message_turn_source)
+
+    assert command_key == message_turn_key
+
+    # And the orphaning the bug caused: storing under the RAW (pre-recovery)
+    # key would NOT be found by the message turn.
+    raw_key = build_session_key(command_source)
+    assert raw_key != message_turn_key