From d6c53dcdcb49694b382000b53f194b99d97aa72d Mon Sep 17 00:00:00 2001 From: fayenix <273238055+fayenix@users.noreply.github.com> Date: Tue, 30 Jun 2026 03:41:23 -0700 Subject: [PATCH] fix(gateway): stop per-turn agent-cache eviction from model + message_id signature churn Two independent bugs evicted the cached gateway AIAgent on every turn, preventing the prompt cache from ever warming: 1. Model normalization mismatch: the post-run fallback-eviction check compared _agent.model (stripped in AIAgent.__init__) against the raw _resolve_gateway_model() config string. For vendor-prefixed config on native providers (e.g. 'deepseek/deepseek-v4-pro' vs 'deepseek-v4-pro') this was always unequal, so the agent was evicted after every successful run. Normalize _cfg_model the same way (skip aggregators). 2. Discord triggering message_id leaked into the cached system prompt via build_session_context_prompt()'s Discord IDs block. message_id changes every turn, so the agent-cache signature (computed from the ephemeral prompt) changed every Discord turn -> rebuild every message. The id is now injected per-turn into the user message (where per-turn content belongs and does not touch the cache signature); the cached IDs block carries a static pointer to it, preserving reply/react/pin via the discord tools. Adapted from #28846. Bug #1 fix is the contributor's; bug #2 reworked to be non-destructive (keeps the triggering-id capability instead of deleting it). Redundant auto-reset eviction (already on main via #9893/#48031) and the wrong-premise reset_context_note plumbing from the original PR were dropped. Co-authored-by: Hermes Agent --- gateway/run.py | 37 ++++++++++++++++++++++++++++++ gateway/session.py | 12 +++++++++- scripts/release.py | 1 + tests/gateway/test_session.py | 42 +++++++++++++++++++++++++++++++++++ 4 files changed, 91 insertions(+), 1 deletion(-) diff --git a/gateway/run.py b/gateway/run.py index 92ae75a4e06..90622ef8d56 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -9558,6 +9558,25 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew context_note = _build_document_context_note(display_name, agent_path, mtype) message_text = f"{context_note}\n\n{message_text}" + # Discord: surface the triggering message id per-turn on the user + # message rather than in the cached system prompt. message_id changes + # every turn, so baking it into build_session_context_prompt() would + # bust the agent-cache signature and rebuild the AIAgent every message + # (destroying prompt caching). The static IDs block points the agent + # here; the volatile id rides the per-turn user content. + if ( + source is not None + and getattr(source, "platform", None) == Platform.DISCORD + and getattr(event, "message_id", None) + ): + from gateway.session import _discord_tools_loaded as _disc_tools_loaded + if _disc_tools_loaded(): + message_text = ( + f"[Triggering message id: `{event.message_id}` — use as " + f"`message_id` for reply/react/pin via the discord tools.]\n\n" + f"{message_text}" + ) + if getattr(event, "reply_to_text", None) and event.reply_to_message_id: # Always inject the reply-to pointer — even when the quoted text # already appears in history. The prefix isn't deduplication, it's @@ -17745,6 +17764,24 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew _run_failed = _result_for_fb.get("failed") if _result_for_fb else False if _agent is not None and hasattr(_agent, 'model') and not _run_failed: _cfg_model = _resolve_gateway_model() + # Normalize _cfg_model the same way AIAgent.__init__ does, so a + # vendor-prefixed config value (e.g. "deepseek/deepseek-v4-pro") + # matches the agent's stripped model ("deepseek-v4-pro") on + # native providers. Without this, _agent.model != _cfg_model is + # always true for vendor-prefixed config and the cached agent is + # evicted on every successful turn — destroying prompt caching. + # Aggregators (openrouter, etc.) keep the vendor/model slug, so + # they're left untouched. + try: + from hermes_cli.model_normalize import ( + _AGGREGATOR_PROVIDERS, + normalize_model_for_provider, + ) + _agent_provider = getattr(_agent, 'provider', '') or '' + if _agent_provider and _agent_provider not in _AGGREGATOR_PROVIDERS: + _cfg_model = normalize_model_for_provider(_cfg_model, _agent_provider) + except Exception: + pass if _agent.model != _cfg_model and not self._is_intentional_model_switch(session_key, _agent.model): # Fallback activated on a successful run — evict cached # agent so the next message retries the primary model. diff --git a/gateway/session.py b/gateway/session.py index d2f5b500750..110e7827a26 100644 --- a/gateway/session.py +++ b/gateway/session.py @@ -451,7 +451,17 @@ def build_session_context_prompt( else: id_lines.append(f" - Channel: `{src.chat_id}`") if src.message_id: - id_lines.append(f" - Triggering message: `{src.message_id}`") + # The triggering message id is volatile (changes every turn). + # Keep it OUT of this cached system-prompt block — including it + # here changes build_session_context_prompt() output per turn, + # which busts the gateway agent-cache signature and forces an + # AIAgent rebuild on every Discord message. The actual id is + # injected per-turn into the user message instead (see the + # "Triggering message id" note in run.py). + id_lines.append( + " - Triggering message: provided per-turn in the incoming " + "user message (use it as `message_id` for reply/react/pin)" + ) lines.extend(id_lines) else: lines.append("") diff --git a/scripts/release.py b/scripts/release.py index 1651d8d3a1e..d6337149b22 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -47,6 +47,7 @@ ACP_REGISTRY_MANIFEST = REPO_ROOT / "acp_registry" / "agent.json" AUTHOR_MAP = { "193368749+jimmyjohansson84@users.noreply.github.com": "jimmyjohansson84", # PR #27123 salvage (Kanban unknown-skill warn-instead-of-crash; #27136) "gxalong@gmail.com": "Jeffgithub0029", # PR #28558 salvage (chunk Telegram text *after* MarkdownV2/HTML formatting so escaping inflation can't push a send over the 4096 UTF-16 limit; #28557) + "273238055+fayenix@users.noreply.github.com": "fayenix", # PR #28846 salvage (normalize _cfg_model in gateway fallback-eviction so vendor-prefixed config matches stripped agent.model on native providers) "phanvanhoa@gmail.com": "theAgenticBuilder", # PR #14180 salvage (route delegate_task progress lines through _safe_print so ACP stdio JSON-RPC frames stay clean) "huangxudong663@gmail.com": "huangxudong663-sys", # PR #15157 salvage (isinstance(dict) guard on tool-call model_extra; NVIDIA NIM non-dict crash) "39369769+jasonQin6@users.noreply.github.com": "jasonQin6", # PR #15093 salvage (session staleness guard on stream consumer run() loop; #11016 follow-up) diff --git a/tests/gateway/test_session.py b/tests/gateway/test_session.py index 8b8c38a54d7..9ec0860a5d0 100644 --- a/tests/gateway/test_session.py +++ b/tests/gateway/test_session.py @@ -235,6 +235,48 @@ class TestBuildSessionContextPrompt: assert "Discord" in prompt assert "cannot search" in prompt.lower() or "do not have access" in prompt.lower() + def test_discord_prompt_stable_across_message_id(self): + """The cached system prompt must NOT vary with the triggering message_id. + + message_id changes every turn; baking it into the Discord IDs block + busts the gateway agent-cache signature and rebuilds the AIAgent on + every message (destroying prompt caching). The volatile id is injected + per-turn into the user message instead — the cached block only carries + a static pointer. + """ + from unittest.mock import patch + import gateway.session as _gs + + config = GatewayConfig( + platforms={ + Platform.DISCORD: PlatformConfig(enabled=True, token="fake-d...oken"), + }, + ) + + def _prompt_for(msg_id): + source = SessionSource( + platform=Platform.DISCORD, + chat_id="chan-1", + chat_name="Server", + chat_type="group", + user_name="alice", + guild_id="guild-123", + message_id=msg_id, + ) + ctx = build_session_context(source, config) + return build_session_context_prompt(ctx) + + # Force the Discord IDs block on (it only emits when discord tools load). + with patch.object(_gs, "_discord_tools_loaded", return_value=True): + p1 = _prompt_for("1001") + p2 = _prompt_for("2002") + p3 = _prompt_for("3003") + + assert p1 == p2 == p3, "system prompt must be stable across message_id" + assert "1001" not in p1 and "2002" not in p2 and "3003" not in p3 + # Static pointer tells the agent where the volatile id actually lives. + assert "provided per-turn in the incoming user message" in p1 + def test_slack_prompt_includes_platform_notes(self): config = GatewayConfig( platforms={