fix(honcho): include user_id in agent cache signature to prevent shared-thread peer contamination

PR #27371 introduced a per-user-peer resolver in HonchoSessionManager,
but the resolved runtime identity is frozen into the manager at first-
message init.  When the gateway session_key intentionally omits the
participant ID (the default for threads via thread_sessions_per_user=
False), a cached AIAgent created by user A is reused for user B's
messages, attributing B's writes to A's resolved Honcho peer and
breaking #27371's per-user-peer contract.

Fix by including user_id and user_id_alt in _agent_config_signature so
the cache key distinguishes participants in shared threads.  Each user
in a shared thread now triggers a fresh AIAgent build (trading prompt-
cache warmth for memory-attribution correctness — the right tradeoff
for an external-memory backend where misattribution is unrecoverable).

The default-None case keeps the signature byte-identical to pre-fix
behavior so this change doesn't invalidate in-flight caches on deploy.
This commit is contained in:
erosika 2026-05-21 22:18:06 +00:00 committed by kshitij
parent 00e6830204
commit c03960decd
2 changed files with 92 additions and 0 deletions

View file

@ -15080,6 +15080,8 @@ class GatewayRunner:
enabled_toolsets: list,
ephemeral_prompt: str,
cache_keys: dict | None = None,
user_id: str | None = None,
user_id_alt: str | None = None,
) -> str:
"""Compute a stable string key from agent config values.
@ -15093,6 +15095,20 @@ class GatewayRunner:
the output of ``_extract_cache_busting_config(user_config)`` so
edits to model.context_length / compression.* in config.yaml are
picked up on the next gateway message without a manual restart.
``user_id`` and ``user_id_alt`` are the runtime user identities
carried by the current message's gateway source. They participate
in the cache key because the Honcho memory provider freezes them
into ``HonchoSessionManager`` at first-message init (see
``plugins/memory/honcho/__init__.py::_do_session_init``). Without
them in the signature, a shared-thread session_key (one in which
``build_session_key`` intentionally omits the participant ID,
e.g. ``thread_sessions_per_user=False``) would reuse the cached
AIAgent across distinct users, causing the second user's messages
to be attributed to the first user's resolved Honcho peer. This
broke #27371's per-user-peer contract in multi-user gateways.
Per-user agent rebuilds in shared threads trade prompt-cache
warmth for correct memory attribution.
"""
import hashlib, json as _j
@ -15117,6 +15133,8 @@ class GatewayRunner:
# cached agent and doesn't affect system prompt or tools.
ephemeral_prompt or "",
_cache_keys_sorted,
str(user_id or ""),
str(user_id_alt or ""),
],
sort_keys=True,
default=str,
@ -16658,6 +16676,8 @@ class GatewayRunner:
enabled_toolsets,
combined_ephemeral,
cache_keys=self._extract_cache_busting_config(user_config),
user_id=getattr(source, "user_id", None),
user_id_alt=getattr(source, "user_id_alt", None),
)
agent = None
_cache_lock = getattr(self, "_agent_cache_lock", None)

View file

@ -1344,3 +1344,75 @@ class TestCachedAgentInactivityReset:
f"Watchdog would see {idle_secs:.0f}s idle, expected ~{STUCK_FOR}s. "
"Inactivity timeout could not fire for a stuck interrupted turn."
)
class TestAgentConfigSignatureUserId:
"""Regression: shared-thread cache must not reuse an agent across users.
PR #27371 introduces a deterministic per-user-peer resolver in
HonchoSessionManager, but Honcho's resolved runtime user identity is
frozen into the manager at first-message init. When the gateway
session_key intentionally omits the participant ID (the default for
threads via thread_sessions_per_user=False), a cached AIAgent created
by user A is reused for user B's messages, attributing B's writes to
A's resolved Honcho peer. The signature must therefore include
user_id and user_id_alt so per-user agents are built in shared
threads, restoring #27371's per-user-peer contract.
Cost: in a multi-user shared thread, each user triggers a fresh
AIAgent build cold prompt cache for that user's first turn. The
correctness gain is judged to outweigh the per-user cache warmup.
"""
def test_signature_changes_with_user_id(self):
from gateway.run import GatewayRunner
runtime = {"provider": "anthropic", "api_key": "k", "base_url": "", "api_mode": "chat_completions"}
sig_a = GatewayRunner._agent_config_signature(
"claude-sonnet-4", runtime, ["hermes-telegram"], "", user_id="86701400"
)
sig_b = GatewayRunner._agent_config_signature(
"claude-sonnet-4", runtime, ["hermes-telegram"], "", user_id="491827364"
)
assert sig_a != sig_b
def test_signature_stable_with_same_user_id(self):
from gateway.run import GatewayRunner
runtime = {"provider": "anthropic", "api_key": "k", "base_url": "", "api_mode": "chat_completions"}
sig_1 = GatewayRunner._agent_config_signature(
"claude-sonnet-4", runtime, ["hermes-telegram"], "", user_id="86701400"
)
sig_2 = GatewayRunner._agent_config_signature(
"claude-sonnet-4", runtime, ["hermes-telegram"], "", user_id="86701400"
)
assert sig_1 == sig_2
def test_signature_changes_with_user_id_alt(self):
from gateway.run import GatewayRunner
runtime = {"provider": "anthropic", "api_key": "k", "base_url": "", "api_mode": "chat_completions"}
sig_a = GatewayRunner._agent_config_signature(
"claude-sonnet-4", runtime, ["hermes-telegram"], "",
user_id="86701400", user_id_alt="@igor_tg",
)
sig_b = GatewayRunner._agent_config_signature(
"claude-sonnet-4", runtime, ["hermes-telegram"], "",
user_id="86701400", user_id_alt="@erosika_tg",
)
assert sig_a != sig_b
def test_signature_omits_user_id_when_absent(self):
"""Default-None user_id must not change signatures vs unset call.
Pre-#27371-fix callers passed no user_id kwarg. Keeping the
default-None signature byte-identical to the previous behavior
avoids invalidating in-flight caches the moment this lands.
"""
from gateway.run import GatewayRunner
runtime = {"provider": "anthropic", "api_key": "k", "base_url": "", "api_mode": "chat_completions"}
sig_implicit = GatewayRunner._agent_config_signature(
"claude-sonnet-4", runtime, ["hermes-telegram"], "",
)
sig_explicit_none = GatewayRunner._agent_config_signature(
"claude-sonnet-4", runtime, ["hermes-telegram"], "",
user_id=None, user_id_alt=None,
)
assert sig_implicit == sig_explicit_none