fix(gateway): stop per-turn agent-cache eviction from model + message_id signature churn

Two independent bugs evicted the cached gateway AIAgent on every turn,
preventing the prompt cache from ever warming:

1. Model normalization mismatch: the post-run fallback-eviction check
   compared _agent.model (stripped in AIAgent.__init__) against the raw
   _resolve_gateway_model() config string. For vendor-prefixed config on
   native providers (e.g. 'deepseek/deepseek-v4-pro' vs 'deepseek-v4-pro')
   this was always unequal, so the agent was evicted after every
   successful run. Normalize _cfg_model the same way (skip aggregators).

2. Discord triggering message_id leaked into the cached system prompt via
   build_session_context_prompt()'s Discord IDs block. message_id changes
   every turn, so the agent-cache signature (computed from the ephemeral
   prompt) changed every Discord turn -> rebuild every message. The id is
   now injected per-turn into the user message (where per-turn content
   belongs and does not touch the cache signature); the cached IDs block
   carries a static pointer to it, preserving reply/react/pin via the
   discord tools.

Adapted from #28846. Bug #1 fix is the contributor's; bug #2 reworked to
be non-destructive (keeps the triggering-id capability instead of deleting
it). Redundant auto-reset eviction (already on main via #9893/#48031) and
the wrong-premise reset_context_note plumbing from the original PR were
dropped.

Co-authored-by: Hermes Agent <hermes@nousresearch.com>
This commit is contained in:
fayenix 2026-06-30 03:41:23 -07:00 committed by Teknium
parent e7ca53e6b8
commit d6c53dcdcb
4 changed files with 91 additions and 1 deletions

View file

@ -9558,6 +9558,25 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
context_note = _build_document_context_note(display_name, agent_path, mtype)
message_text = f"{context_note}\n\n{message_text}"
# Discord: surface the triggering message id per-turn on the user
# message rather than in the cached system prompt. message_id changes
# every turn, so baking it into build_session_context_prompt() would
# bust the agent-cache signature and rebuild the AIAgent every message
# (destroying prompt caching). The static IDs block points the agent
# here; the volatile id rides the per-turn user content.
if (
source is not None
and getattr(source, "platform", None) == Platform.DISCORD
and getattr(event, "message_id", None)
):
from gateway.session import _discord_tools_loaded as _disc_tools_loaded
if _disc_tools_loaded():
message_text = (
f"[Triggering message id: `{event.message_id}` — use as "
f"`message_id` for reply/react/pin via the discord tools.]\n\n"
f"{message_text}"
)
if getattr(event, "reply_to_text", None) and event.reply_to_message_id:
# Always inject the reply-to pointer — even when the quoted text
# already appears in history. The prefix isn't deduplication, it's
@ -17745,6 +17764,24 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
_run_failed = _result_for_fb.get("failed") if _result_for_fb else False
if _agent is not None and hasattr(_agent, 'model') and not _run_failed:
_cfg_model = _resolve_gateway_model()
# Normalize _cfg_model the same way AIAgent.__init__ does, so a
# vendor-prefixed config value (e.g. "deepseek/deepseek-v4-pro")
# matches the agent's stripped model ("deepseek-v4-pro") on
# native providers. Without this, _agent.model != _cfg_model is
# always true for vendor-prefixed config and the cached agent is
# evicted on every successful turn — destroying prompt caching.
# Aggregators (openrouter, etc.) keep the vendor/model slug, so
# they're left untouched.
try:
from hermes_cli.model_normalize import (
_AGGREGATOR_PROVIDERS,
normalize_model_for_provider,
)
_agent_provider = getattr(_agent, 'provider', '') or ''
if _agent_provider and _agent_provider not in _AGGREGATOR_PROVIDERS:
_cfg_model = normalize_model_for_provider(_cfg_model, _agent_provider)
except Exception:
pass
if _agent.model != _cfg_model and not self._is_intentional_model_switch(session_key, _agent.model):
# Fallback activated on a successful run — evict cached
# agent so the next message retries the primary model.

View file

@ -451,7 +451,17 @@ def build_session_context_prompt(
else:
id_lines.append(f" - Channel: `{src.chat_id}`")
if src.message_id:
id_lines.append(f" - Triggering message: `{src.message_id}`")
# The triggering message id is volatile (changes every turn).
# Keep it OUT of this cached system-prompt block — including it
# here changes build_session_context_prompt() output per turn,
# which busts the gateway agent-cache signature and forces an
# AIAgent rebuild on every Discord message. The actual id is
# injected per-turn into the user message instead (see the
# "Triggering message id" note in run.py).
id_lines.append(
" - Triggering message: provided per-turn in the incoming "
"user message (use it as `message_id` for reply/react/pin)"
)
lines.extend(id_lines)
else:
lines.append("")

View file

@ -47,6 +47,7 @@ ACP_REGISTRY_MANIFEST = REPO_ROOT / "acp_registry" / "agent.json"
AUTHOR_MAP = {
"193368749+jimmyjohansson84@users.noreply.github.com": "jimmyjohansson84", # PR #27123 salvage (Kanban unknown-skill warn-instead-of-crash; #27136)
"gxalong@gmail.com": "Jeffgithub0029", # PR #28558 salvage (chunk Telegram text *after* MarkdownV2/HTML formatting so escaping inflation can't push a send over the 4096 UTF-16 limit; #28557)
"273238055+fayenix@users.noreply.github.com": "fayenix", # PR #28846 salvage (normalize _cfg_model in gateway fallback-eviction so vendor-prefixed config matches stripped agent.model on native providers)
"phanvanhoa@gmail.com": "theAgenticBuilder", # PR #14180 salvage (route delegate_task progress lines through _safe_print so ACP stdio JSON-RPC frames stay clean)
"huangxudong663@gmail.com": "huangxudong663-sys", # PR #15157 salvage (isinstance(dict) guard on tool-call model_extra; NVIDIA NIM non-dict crash)
"39369769+jasonQin6@users.noreply.github.com": "jasonQin6", # PR #15093 salvage (session staleness guard on stream consumer run() loop; #11016 follow-up)

View file

@ -235,6 +235,48 @@ class TestBuildSessionContextPrompt:
assert "Discord" in prompt
assert "cannot search" in prompt.lower() or "do not have access" in prompt.lower()
def test_discord_prompt_stable_across_message_id(self):
"""The cached system prompt must NOT vary with the triggering message_id.
message_id changes every turn; baking it into the Discord IDs block
busts the gateway agent-cache signature and rebuilds the AIAgent on
every message (destroying prompt caching). The volatile id is injected
per-turn into the user message instead the cached block only carries
a static pointer.
"""
from unittest.mock import patch
import gateway.session as _gs
config = GatewayConfig(
platforms={
Platform.DISCORD: PlatformConfig(enabled=True, token="fake-d...oken"),
},
)
def _prompt_for(msg_id):
source = SessionSource(
platform=Platform.DISCORD,
chat_id="chan-1",
chat_name="Server",
chat_type="group",
user_name="alice",
guild_id="guild-123",
message_id=msg_id,
)
ctx = build_session_context(source, config)
return build_session_context_prompt(ctx)
# Force the Discord IDs block on (it only emits when discord tools load).
with patch.object(_gs, "_discord_tools_loaded", return_value=True):
p1 = _prompt_for("1001")
p2 = _prompt_for("2002")
p3 = _prompt_for("3003")
assert p1 == p2 == p3, "system prompt must be stable across message_id"
assert "1001" not in p1 and "2002" not in p2 and "3003" not in p3
# Static pointer tells the agent where the volatile id actually lives.
assert "provided per-turn in the incoming user message" in p1
def test_slack_prompt_includes_platform_notes(self):
config = GatewayConfig(
platforms={