mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-07-01 12:02:05 +00:00
fix(gateway): stop per-turn agent-cache eviction from model + message_id signature churn
Two independent bugs evicted the cached gateway AIAgent on every turn, preventing the prompt cache from ever warming: 1. Model normalization mismatch: the post-run fallback-eviction check compared _agent.model (stripped in AIAgent.__init__) against the raw _resolve_gateway_model() config string. For vendor-prefixed config on native providers (e.g. 'deepseek/deepseek-v4-pro' vs 'deepseek-v4-pro') this was always unequal, so the agent was evicted after every successful run. Normalize _cfg_model the same way (skip aggregators). 2. Discord triggering message_id leaked into the cached system prompt via build_session_context_prompt()'s Discord IDs block. message_id changes every turn, so the agent-cache signature (computed from the ephemeral prompt) changed every Discord turn -> rebuild every message. The id is now injected per-turn into the user message (where per-turn content belongs and does not touch the cache signature); the cached IDs block carries a static pointer to it, preserving reply/react/pin via the discord tools. Adapted from #28846. Bug #1 fix is the contributor's; bug #2 reworked to be non-destructive (keeps the triggering-id capability instead of deleting it). Redundant auto-reset eviction (already on main via #9893/#48031) and the wrong-premise reset_context_note plumbing from the original PR were dropped. Co-authored-by: Hermes Agent <hermes@nousresearch.com>
This commit is contained in:
parent
e7ca53e6b8
commit
d6c53dcdcb
4 changed files with 91 additions and 1 deletions
|
|
@ -9558,6 +9558,25 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
|
|||
context_note = _build_document_context_note(display_name, agent_path, mtype)
|
||||
message_text = f"{context_note}\n\n{message_text}"
|
||||
|
||||
# Discord: surface the triggering message id per-turn on the user
|
||||
# message rather than in the cached system prompt. message_id changes
|
||||
# every turn, so baking it into build_session_context_prompt() would
|
||||
# bust the agent-cache signature and rebuild the AIAgent every message
|
||||
# (destroying prompt caching). The static IDs block points the agent
|
||||
# here; the volatile id rides the per-turn user content.
|
||||
if (
|
||||
source is not None
|
||||
and getattr(source, "platform", None) == Platform.DISCORD
|
||||
and getattr(event, "message_id", None)
|
||||
):
|
||||
from gateway.session import _discord_tools_loaded as _disc_tools_loaded
|
||||
if _disc_tools_loaded():
|
||||
message_text = (
|
||||
f"[Triggering message id: `{event.message_id}` — use as "
|
||||
f"`message_id` for reply/react/pin via the discord tools.]\n\n"
|
||||
f"{message_text}"
|
||||
)
|
||||
|
||||
if getattr(event, "reply_to_text", None) and event.reply_to_message_id:
|
||||
# Always inject the reply-to pointer — even when the quoted text
|
||||
# already appears in history. The prefix isn't deduplication, it's
|
||||
|
|
@ -17745,6 +17764,24 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
|
|||
_run_failed = _result_for_fb.get("failed") if _result_for_fb else False
|
||||
if _agent is not None and hasattr(_agent, 'model') and not _run_failed:
|
||||
_cfg_model = _resolve_gateway_model()
|
||||
# Normalize _cfg_model the same way AIAgent.__init__ does, so a
|
||||
# vendor-prefixed config value (e.g. "deepseek/deepseek-v4-pro")
|
||||
# matches the agent's stripped model ("deepseek-v4-pro") on
|
||||
# native providers. Without this, _agent.model != _cfg_model is
|
||||
# always true for vendor-prefixed config and the cached agent is
|
||||
# evicted on every successful turn — destroying prompt caching.
|
||||
# Aggregators (openrouter, etc.) keep the vendor/model slug, so
|
||||
# they're left untouched.
|
||||
try:
|
||||
from hermes_cli.model_normalize import (
|
||||
_AGGREGATOR_PROVIDERS,
|
||||
normalize_model_for_provider,
|
||||
)
|
||||
_agent_provider = getattr(_agent, 'provider', '') or ''
|
||||
if _agent_provider and _agent_provider not in _AGGREGATOR_PROVIDERS:
|
||||
_cfg_model = normalize_model_for_provider(_cfg_model, _agent_provider)
|
||||
except Exception:
|
||||
pass
|
||||
if _agent.model != _cfg_model and not self._is_intentional_model_switch(session_key, _agent.model):
|
||||
# Fallback activated on a successful run — evict cached
|
||||
# agent so the next message retries the primary model.
|
||||
|
|
|
|||
|
|
@ -451,7 +451,17 @@ def build_session_context_prompt(
|
|||
else:
|
||||
id_lines.append(f" - Channel: `{src.chat_id}`")
|
||||
if src.message_id:
|
||||
id_lines.append(f" - Triggering message: `{src.message_id}`")
|
||||
# The triggering message id is volatile (changes every turn).
|
||||
# Keep it OUT of this cached system-prompt block — including it
|
||||
# here changes build_session_context_prompt() output per turn,
|
||||
# which busts the gateway agent-cache signature and forces an
|
||||
# AIAgent rebuild on every Discord message. The actual id is
|
||||
# injected per-turn into the user message instead (see the
|
||||
# "Triggering message id" note in run.py).
|
||||
id_lines.append(
|
||||
" - Triggering message: provided per-turn in the incoming "
|
||||
"user message (use it as `message_id` for reply/react/pin)"
|
||||
)
|
||||
lines.extend(id_lines)
|
||||
else:
|
||||
lines.append("")
|
||||
|
|
|
|||
|
|
@ -47,6 +47,7 @@ ACP_REGISTRY_MANIFEST = REPO_ROOT / "acp_registry" / "agent.json"
|
|||
AUTHOR_MAP = {
|
||||
"193368749+jimmyjohansson84@users.noreply.github.com": "jimmyjohansson84", # PR #27123 salvage (Kanban unknown-skill warn-instead-of-crash; #27136)
|
||||
"gxalong@gmail.com": "Jeffgithub0029", # PR #28558 salvage (chunk Telegram text *after* MarkdownV2/HTML formatting so escaping inflation can't push a send over the 4096 UTF-16 limit; #28557)
|
||||
"273238055+fayenix@users.noreply.github.com": "fayenix", # PR #28846 salvage (normalize _cfg_model in gateway fallback-eviction so vendor-prefixed config matches stripped agent.model on native providers)
|
||||
"phanvanhoa@gmail.com": "theAgenticBuilder", # PR #14180 salvage (route delegate_task progress lines through _safe_print so ACP stdio JSON-RPC frames stay clean)
|
||||
"huangxudong663@gmail.com": "huangxudong663-sys", # PR #15157 salvage (isinstance(dict) guard on tool-call model_extra; NVIDIA NIM non-dict crash)
|
||||
"39369769+jasonQin6@users.noreply.github.com": "jasonQin6", # PR #15093 salvage (session staleness guard on stream consumer run() loop; #11016 follow-up)
|
||||
|
|
|
|||
|
|
@ -235,6 +235,48 @@ class TestBuildSessionContextPrompt:
|
|||
assert "Discord" in prompt
|
||||
assert "cannot search" in prompt.lower() or "do not have access" in prompt.lower()
|
||||
|
||||
def test_discord_prompt_stable_across_message_id(self):
|
||||
"""The cached system prompt must NOT vary with the triggering message_id.
|
||||
|
||||
message_id changes every turn; baking it into the Discord IDs block
|
||||
busts the gateway agent-cache signature and rebuilds the AIAgent on
|
||||
every message (destroying prompt caching). The volatile id is injected
|
||||
per-turn into the user message instead — the cached block only carries
|
||||
a static pointer.
|
||||
"""
|
||||
from unittest.mock import patch
|
||||
import gateway.session as _gs
|
||||
|
||||
config = GatewayConfig(
|
||||
platforms={
|
||||
Platform.DISCORD: PlatformConfig(enabled=True, token="fake-d...oken"),
|
||||
},
|
||||
)
|
||||
|
||||
def _prompt_for(msg_id):
|
||||
source = SessionSource(
|
||||
platform=Platform.DISCORD,
|
||||
chat_id="chan-1",
|
||||
chat_name="Server",
|
||||
chat_type="group",
|
||||
user_name="alice",
|
||||
guild_id="guild-123",
|
||||
message_id=msg_id,
|
||||
)
|
||||
ctx = build_session_context(source, config)
|
||||
return build_session_context_prompt(ctx)
|
||||
|
||||
# Force the Discord IDs block on (it only emits when discord tools load).
|
||||
with patch.object(_gs, "_discord_tools_loaded", return_value=True):
|
||||
p1 = _prompt_for("1001")
|
||||
p2 = _prompt_for("2002")
|
||||
p3 = _prompt_for("3003")
|
||||
|
||||
assert p1 == p2 == p3, "system prompt must be stable across message_id"
|
||||
assert "1001" not in p1 and "2002" not in p2 and "3003" not in p3
|
||||
# Static pointer tells the agent where the volatile id actually lives.
|
||||
assert "provided per-turn in the incoming user message" in p1
|
||||
|
||||
def test_slack_prompt_includes_platform_notes(self):
|
||||
config = GatewayConfig(
|
||||
platforms={
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue