fix(gateway): stop per-turn agent-cache eviction from model + message_id signature churn

Two independent bugs evicted the cached gateway AIAgent on every turn, preventing the prompt cache from ever warming: 1. Model normalization mismatch: the post-run fallback-eviction check compared _agent.model (stripped in AIAgent.__init__) against the raw _resolve_gateway_model() config string. For vendor-prefixed config on native providers (e.g. 'deepseek/deepseek-v4-pro' vs 'deepseek-v4-pro') this was always unequal, so the agent was evicted after every successful run. Normalize _cfg_model the same way (skip aggregators). 2. Discord triggering message_id leaked into the cached system prompt via build_session_context_prompt()'s Discord IDs block. message_id changes every turn, so the agent-cache signature (computed from the ephemeral prompt) changed every Discord turn -> rebuild every message. The id is now injected per-turn into the user message (where per-turn content belongs and does not touch the cache signature); the cached IDs block carries a static pointer to it, preserving reply/react/pin via the discord tools. Adapted from #28846. Bug #1 fix is the contributor's; bug #2 reworked to be non-destructive (keeps the triggering-id capability instead of deleting it). Redundant auto-reset eviction (already on main via #9893/#48031) and the wrong-premise reset_context_note plumbing from the original PR were dropped. Co-authored-by: Hermes Agent <hermes@nousresearch.com>
2026-07-01 12:02:05 +00:00 · 2026-06-30 03:41:23 -07:00 · 2026-06-30 03:41:23 -07:00 · d6c53dcdcb
commit d6c53dcdcb
parent e7ca53e6b8
4 changed files with 91 additions and 1 deletions
--- a/gateway/run.py
+++ b/gateway/run.py
@ -9558,6 +9558,25 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
                context_note = _build_document_context_note(display_name, agent_path, mtype)
                message_text = f"{context_note}\n\n{message_text}"

+        # Discord: surface the triggering message id per-turn on the user
+        # message rather than in the cached system prompt. message_id changes
+        # every turn, so baking it into build_session_context_prompt() would
+        # bust the agent-cache signature and rebuild the AIAgent every message
+        # (destroying prompt caching). The static IDs block points the agent
+        # here; the volatile id rides the per-turn user content.
+        if (
+            source is not None
+            and getattr(source, "platform", None) == Platform.DISCORD
+            and getattr(event, "message_id", None)
+        ):
+            from gateway.session import _discord_tools_loaded as _disc_tools_loaded
+            if _disc_tools_loaded():
+                message_text = (
+                    f"[Triggering message id: `{event.message_id}` — use as "
+                    f"`message_id` for reply/react/pin via the discord tools.]\n\n"
+                    f"{message_text}"
+                )
+
        if getattr(event, "reply_to_text", None) and event.reply_to_message_id:
            # Always inject the reply-to pointer — even when the quoted text
            # already appears in history. The prefix isn't deduplication, it's
@ -17745,6 +17764,24 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
            _run_failed = _result_for_fb.get("failed") if _result_for_fb else False
            if _agent is not None and hasattr(_agent, 'model') and not _run_failed:
                _cfg_model = _resolve_gateway_model()
+                # Normalize _cfg_model the same way AIAgent.__init__ does, so a
+                # vendor-prefixed config value (e.g. "deepseek/deepseek-v4-pro")
+                # matches the agent's stripped model ("deepseek-v4-pro") on
+                # native providers. Without this, _agent.model != _cfg_model is
+                # always true for vendor-prefixed config and the cached agent is
+                # evicted on every successful turn — destroying prompt caching.
+                # Aggregators (openrouter, etc.) keep the vendor/model slug, so
+                # they're left untouched.
+                try:
+                    from hermes_cli.model_normalize import (
+                        _AGGREGATOR_PROVIDERS,
+                        normalize_model_for_provider,
+                    )
+                    _agent_provider = getattr(_agent, 'provider', '') or ''
+                    if _agent_provider and _agent_provider not in _AGGREGATOR_PROVIDERS:
+                        _cfg_model = normalize_model_for_provider(_cfg_model, _agent_provider)
+                except Exception:
+                    pass
                if _agent.model != _cfg_model and not self._is_intentional_model_switch(session_key, _agent.model):
                    # Fallback activated on a successful run — evict cached
                    # agent so the next message retries the primary model.
--- a/gateway/session.py
+++ b/gateway/session.py
@ -451,7 +451,17 @@ def build_session_context_prompt(
            else:
                id_lines.append(f"  - Channel: `{src.chat_id}`")
            if src.message_id:
-                id_lines.append(f"  - Triggering message: `{src.message_id}`")
+                # The triggering message id is volatile (changes every turn).
+                # Keep it OUT of this cached system-prompt block — including it
+                # here changes build_session_context_prompt() output per turn,
+                # which busts the gateway agent-cache signature and forces an
+                # AIAgent rebuild on every Discord message. The actual id is
+                # injected per-turn into the user message instead (see the
+                # "Triggering message id" note in run.py).
+                id_lines.append(
+                    "  - Triggering message: provided per-turn in the incoming "
+                    "user message (use it as `message_id` for reply/react/pin)"
+                )
            lines.extend(id_lines)
        else:
            lines.append("")
--- a/scripts/release.py
+++ b/scripts/release.py
@ -47,6 +47,7 @@ ACP_REGISTRY_MANIFEST = REPO_ROOT / "acp_registry" / "agent.json"
 AUTHOR_MAP = {
    "193368749+jimmyjohansson84@users.noreply.github.com": "jimmyjohansson84",  # PR #27123 salvage (Kanban unknown-skill warn-instead-of-crash; #27136)
    "gxalong@gmail.com": "Jeffgithub0029",  # PR #28558 salvage (chunk Telegram text *after* MarkdownV2/HTML formatting so escaping inflation can't push a send over the 4096 UTF-16 limit; #28557)
+    "273238055+fayenix@users.noreply.github.com": "fayenix",  # PR #28846 salvage (normalize _cfg_model in gateway fallback-eviction so vendor-prefixed config matches stripped agent.model on native providers)
    "phanvanhoa@gmail.com": "theAgenticBuilder",  # PR #14180 salvage (route delegate_task progress lines through _safe_print so ACP stdio JSON-RPC frames stay clean)
    "huangxudong663@gmail.com": "huangxudong663-sys",  # PR #15157 salvage (isinstance(dict) guard on tool-call model_extra; NVIDIA NIM non-dict crash)
    "39369769+jasonQin6@users.noreply.github.com": "jasonQin6",  # PR #15093 salvage (session staleness guard on stream consumer run() loop; #11016 follow-up)
--- a/tests/gateway/test_session.py
+++ b/tests/gateway/test_session.py
@ -235,6 +235,48 @@ class TestBuildSessionContextPrompt:
        assert "Discord" in prompt
        assert "cannot search" in prompt.lower() or "do not have access" in prompt.lower()

+    def test_discord_prompt_stable_across_message_id(self):
+        """The cached system prompt must NOT vary with the triggering message_id.
+
+        message_id changes every turn; baking it into the Discord IDs block
+        busts the gateway agent-cache signature and rebuilds the AIAgent on
+        every message (destroying prompt caching). The volatile id is injected
+        per-turn into the user message instead — the cached block only carries
+        a static pointer.
+        """
+        from unittest.mock import patch
+        import gateway.session as _gs
+
+        config = GatewayConfig(
+            platforms={
+                Platform.DISCORD: PlatformConfig(enabled=True, token="fake-d...oken"),
+            },
+        )
+
+        def _prompt_for(msg_id):
+            source = SessionSource(
+                platform=Platform.DISCORD,
+                chat_id="chan-1",
+                chat_name="Server",
+                chat_type="group",
+                user_name="alice",
+                guild_id="guild-123",
+                message_id=msg_id,
+            )
+            ctx = build_session_context(source, config)
+            return build_session_context_prompt(ctx)
+
+        # Force the Discord IDs block on (it only emits when discord tools load).
+        with patch.object(_gs, "_discord_tools_loaded", return_value=True):
+            p1 = _prompt_for("1001")
+            p2 = _prompt_for("2002")
+            p3 = _prompt_for("3003")
+
+        assert p1 == p2 == p3, "system prompt must be stable across message_id"
+        assert "1001" not in p1 and "2002" not in p2 and "3003" not in p3
+        # Static pointer tells the agent where the volatile id actually lives.
+        assert "provided per-turn in the incoming user message" in p1
+
    def test_slack_prompt_includes_platform_notes(self):
        config = GatewayConfig(
            platforms={