From d6c53dcdcb49694b382000b53f194b99d97aa72d Mon Sep 17 00:00:00 2001
From: fayenix <273238055+fayenix@users.noreply.github.com>
Date: Tue, 30 Jun 2026 03:41:23 -0700
Subject: [PATCH] fix(gateway): stop per-turn agent-cache eviction from model +
 message_id signature churn

Two independent bugs evicted the cached gateway AIAgent on every turn,
preventing the prompt cache from ever warming:

1. Model normalization mismatch: the post-run fallback-eviction check
   compared _agent.model (stripped in AIAgent.__init__) against the raw
   _resolve_gateway_model() config string. For vendor-prefixed config on
   native providers (e.g. 'deepseek/deepseek-v4-pro' vs 'deepseek-v4-pro')
   this was always unequal, so the agent was evicted after every
   successful run. Normalize _cfg_model the same way (skip aggregators).

2. Discord triggering message_id leaked into the cached system prompt via
   build_session_context_prompt()'s Discord IDs block. message_id changes
   every turn, so the agent-cache signature (computed from the ephemeral
   prompt) changed every Discord turn -> rebuild every message. The id is
   now injected per-turn into the user message (where per-turn content
   belongs and does not touch the cache signature); the cached IDs block
   carries a static pointer to it, preserving reply/react/pin via the
   discord tools.

Adapted from #28846. Bug #1 fix is the contributor's; bug #2 reworked to
be non-destructive (keeps the triggering-id capability instead of deleting
it). Redundant auto-reset eviction (already on main via #9893/#48031) and
the wrong-premise reset_context_note plumbing from the original PR were
dropped.

Co-authored-by: Hermes Agent <hermes@nousresearch.com>
---
 gateway/run.py                | 37 ++++++++++++++++++++++++++++++
 gateway/session.py            | 12 +++++++++-
 scripts/release.py            |  1 +
 tests/gateway/test_session.py | 42 +++++++++++++++++++++++++++++++++++
 4 files changed, 91 insertions(+), 1 deletion(-)

diff --git a/gateway/run.py b/gateway/run.py
index 92ae75a4e06..90622ef8d56 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -9558,6 +9558,25 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
                 context_note = _build_document_context_note(display_name, agent_path, mtype)
                 message_text = f"{context_note}\n\n{message_text}"
 
+        # Discord: surface the triggering message id per-turn on the user
+        # message rather than in the cached system prompt. message_id changes
+        # every turn, so baking it into build_session_context_prompt() would
+        # bust the agent-cache signature and rebuild the AIAgent every message
+        # (destroying prompt caching). The static IDs block points the agent
+        # here; the volatile id rides the per-turn user content.
+        if (
+            source is not None
+            and getattr(source, "platform", None) == Platform.DISCORD
+            and getattr(event, "message_id", None)
+        ):
+            from gateway.session import _discord_tools_loaded as _disc_tools_loaded
+            if _disc_tools_loaded():
+                message_text = (
+                    f"[Triggering message id: `{event.message_id}` — use as "
+                    f"`message_id` for reply/react/pin via the discord tools.]\n\n"
+                    f"{message_text}"
+                )
+
         if getattr(event, "reply_to_text", None) and event.reply_to_message_id:
             # Always inject the reply-to pointer — even when the quoted text
             # already appears in history. The prefix isn't deduplication, it's
@@ -17745,6 +17764,24 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
             _run_failed = _result_for_fb.get("failed") if _result_for_fb else False
             if _agent is not None and hasattr(_agent, 'model') and not _run_failed:
                 _cfg_model = _resolve_gateway_model()
+                # Normalize _cfg_model the same way AIAgent.__init__ does, so a
+                # vendor-prefixed config value (e.g. "deepseek/deepseek-v4-pro")
+                # matches the agent's stripped model ("deepseek-v4-pro") on
+                # native providers. Without this, _agent.model != _cfg_model is
+                # always true for vendor-prefixed config and the cached agent is
+                # evicted on every successful turn — destroying prompt caching.
+                # Aggregators (openrouter, etc.) keep the vendor/model slug, so
+                # they're left untouched.
+                try:
+                    from hermes_cli.model_normalize import (
+                        _AGGREGATOR_PROVIDERS,
+                        normalize_model_for_provider,
+                    )
+                    _agent_provider = getattr(_agent, 'provider', '') or ''
+                    if _agent_provider and _agent_provider not in _AGGREGATOR_PROVIDERS:
+                        _cfg_model = normalize_model_for_provider(_cfg_model, _agent_provider)
+                except Exception:
+                    pass
                 if _agent.model != _cfg_model and not self._is_intentional_model_switch(session_key, _agent.model):
                     # Fallback activated on a successful run — evict cached
                     # agent so the next message retries the primary model.
diff --git a/gateway/session.py b/gateway/session.py
index d2f5b500750..110e7827a26 100644
--- a/gateway/session.py
+++ b/gateway/session.py
@@ -451,7 +451,17 @@ def build_session_context_prompt(
             else:
                 id_lines.append(f"  - Channel: `{src.chat_id}`")
             if src.message_id:
-                id_lines.append(f"  - Triggering message: `{src.message_id}`")
+                # The triggering message id is volatile (changes every turn).
+                # Keep it OUT of this cached system-prompt block — including it
+                # here changes build_session_context_prompt() output per turn,
+                # which busts the gateway agent-cache signature and forces an
+                # AIAgent rebuild on every Discord message. The actual id is
+                # injected per-turn into the user message instead (see the
+                # "Triggering message id" note in run.py).
+                id_lines.append(
+                    "  - Triggering message: provided per-turn in the incoming "
+                    "user message (use it as `message_id` for reply/react/pin)"
+                )
             lines.extend(id_lines)
         else:
             lines.append("")
diff --git a/scripts/release.py b/scripts/release.py
index 1651d8d3a1e..d6337149b22 100755
--- a/scripts/release.py
+++ b/scripts/release.py
@@ -47,6 +47,7 @@ ACP_REGISTRY_MANIFEST = REPO_ROOT / "acp_registry" / "agent.json"
 AUTHOR_MAP = {
     "193368749+jimmyjohansson84@users.noreply.github.com": "jimmyjohansson84",  # PR #27123 salvage (Kanban unknown-skill warn-instead-of-crash; #27136)
     "gxalong@gmail.com": "Jeffgithub0029",  # PR #28558 salvage (chunk Telegram text *after* MarkdownV2/HTML formatting so escaping inflation can't push a send over the 4096 UTF-16 limit; #28557)
+    "273238055+fayenix@users.noreply.github.com": "fayenix",  # PR #28846 salvage (normalize _cfg_model in gateway fallback-eviction so vendor-prefixed config matches stripped agent.model on native providers)
     "phanvanhoa@gmail.com": "theAgenticBuilder",  # PR #14180 salvage (route delegate_task progress lines through _safe_print so ACP stdio JSON-RPC frames stay clean)
     "huangxudong663@gmail.com": "huangxudong663-sys",  # PR #15157 salvage (isinstance(dict) guard on tool-call model_extra; NVIDIA NIM non-dict crash)
     "39369769+jasonQin6@users.noreply.github.com": "jasonQin6",  # PR #15093 salvage (session staleness guard on stream consumer run() loop; #11016 follow-up)
diff --git a/tests/gateway/test_session.py b/tests/gateway/test_session.py
index 8b8c38a54d7..9ec0860a5d0 100644
--- a/tests/gateway/test_session.py
+++ b/tests/gateway/test_session.py
@@ -235,6 +235,48 @@ class TestBuildSessionContextPrompt:
         assert "Discord" in prompt
         assert "cannot search" in prompt.lower() or "do not have access" in prompt.lower()
 
+    def test_discord_prompt_stable_across_message_id(self):
+        """The cached system prompt must NOT vary with the triggering message_id.
+
+        message_id changes every turn; baking it into the Discord IDs block
+        busts the gateway agent-cache signature and rebuilds the AIAgent on
+        every message (destroying prompt caching). The volatile id is injected
+        per-turn into the user message instead — the cached block only carries
+        a static pointer.
+        """
+        from unittest.mock import patch
+        import gateway.session as _gs
+
+        config = GatewayConfig(
+            platforms={
+                Platform.DISCORD: PlatformConfig(enabled=True, token="fake-d...oken"),
+            },
+        )
+
+        def _prompt_for(msg_id):
+            source = SessionSource(
+                platform=Platform.DISCORD,
+                chat_id="chan-1",
+                chat_name="Server",
+                chat_type="group",
+                user_name="alice",
+                guild_id="guild-123",
+                message_id=msg_id,
+            )
+            ctx = build_session_context(source, config)
+            return build_session_context_prompt(ctx)
+
+        # Force the Discord IDs block on (it only emits when discord tools load).
+        with patch.object(_gs, "_discord_tools_loaded", return_value=True):
+            p1 = _prompt_for("1001")
+            p2 = _prompt_for("2002")
+            p3 = _prompt_for("3003")
+
+        assert p1 == p2 == p3, "system prompt must be stable across message_id"
+        assert "1001" not in p1 and "2002" not in p2 and "3003" not in p3
+        # Static pointer tells the agent where the volatile id actually lives.
+        assert "provided per-turn in the incoming user message" in p1
+
     def test_slack_prompt_includes_platform_notes(self):
         config = GatewayConfig(
             platforms={