diff --git a/gateway/run.py b/gateway/run.py index 8873f182f3b..4df123ff042 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -15080,6 +15080,8 @@ class GatewayRunner: enabled_toolsets: list, ephemeral_prompt: str, cache_keys: dict | None = None, + user_id: str | None = None, + user_id_alt: str | None = None, ) -> str: """Compute a stable string key from agent config values. @@ -15093,6 +15095,20 @@ class GatewayRunner: the output of ``_extract_cache_busting_config(user_config)`` so edits to model.context_length / compression.* in config.yaml are picked up on the next gateway message without a manual restart. + + ``user_id`` and ``user_id_alt`` are the runtime user identities + carried by the current message's gateway source. They participate + in the cache key because the Honcho memory provider freezes them + into ``HonchoSessionManager`` at first-message init (see + ``plugins/memory/honcho/__init__.py::_do_session_init``). Without + them in the signature, a shared-thread session_key (one in which + ``build_session_key`` intentionally omits the participant ID, + e.g. ``thread_sessions_per_user=False``) would reuse the cached + AIAgent across distinct users, causing the second user's messages + to be attributed to the first user's resolved Honcho peer. This + broke #27371's per-user-peer contract in multi-user gateways. + Per-user agent rebuilds in shared threads trade prompt-cache + warmth for correct memory attribution. """ import hashlib, json as _j @@ -15117,6 +15133,8 @@ class GatewayRunner: # cached agent and doesn't affect system prompt or tools. ephemeral_prompt or "", _cache_keys_sorted, + str(user_id or ""), + str(user_id_alt or ""), ], sort_keys=True, default=str, @@ -16658,6 +16676,8 @@ class GatewayRunner: enabled_toolsets, combined_ephemeral, cache_keys=self._extract_cache_busting_config(user_config), + user_id=getattr(source, "user_id", None), + user_id_alt=getattr(source, "user_id_alt", None), ) agent = None _cache_lock = getattr(self, "_agent_cache_lock", None) diff --git a/tests/gateway/test_agent_cache.py b/tests/gateway/test_agent_cache.py index a9793f4d9a2..4ebcda02ce3 100644 --- a/tests/gateway/test_agent_cache.py +++ b/tests/gateway/test_agent_cache.py @@ -1344,3 +1344,75 @@ class TestCachedAgentInactivityReset: f"Watchdog would see {idle_secs:.0f}s idle, expected ~{STUCK_FOR}s. " "Inactivity timeout could not fire for a stuck interrupted turn." ) + + +class TestAgentConfigSignatureUserId: + """Regression: shared-thread cache must not reuse an agent across users. + + PR #27371 introduces a deterministic per-user-peer resolver in + HonchoSessionManager, but Honcho's resolved runtime user identity is + frozen into the manager at first-message init. When the gateway + session_key intentionally omits the participant ID (the default for + threads via thread_sessions_per_user=False), a cached AIAgent created + by user A is reused for user B's messages, attributing B's writes to + A's resolved Honcho peer. The signature must therefore include + user_id and user_id_alt so per-user agents are built in shared + threads, restoring #27371's per-user-peer contract. + + Cost: in a multi-user shared thread, each user triggers a fresh + AIAgent build → cold prompt cache for that user's first turn. The + correctness gain is judged to outweigh the per-user cache warmup. + """ + + def test_signature_changes_with_user_id(self): + from gateway.run import GatewayRunner + runtime = {"provider": "anthropic", "api_key": "k", "base_url": "", "api_mode": "chat_completions"} + sig_a = GatewayRunner._agent_config_signature( + "claude-sonnet-4", runtime, ["hermes-telegram"], "", user_id="86701400" + ) + sig_b = GatewayRunner._agent_config_signature( + "claude-sonnet-4", runtime, ["hermes-telegram"], "", user_id="491827364" + ) + assert sig_a != sig_b + + def test_signature_stable_with_same_user_id(self): + from gateway.run import GatewayRunner + runtime = {"provider": "anthropic", "api_key": "k", "base_url": "", "api_mode": "chat_completions"} + sig_1 = GatewayRunner._agent_config_signature( + "claude-sonnet-4", runtime, ["hermes-telegram"], "", user_id="86701400" + ) + sig_2 = GatewayRunner._agent_config_signature( + "claude-sonnet-4", runtime, ["hermes-telegram"], "", user_id="86701400" + ) + assert sig_1 == sig_2 + + def test_signature_changes_with_user_id_alt(self): + from gateway.run import GatewayRunner + runtime = {"provider": "anthropic", "api_key": "k", "base_url": "", "api_mode": "chat_completions"} + sig_a = GatewayRunner._agent_config_signature( + "claude-sonnet-4", runtime, ["hermes-telegram"], "", + user_id="86701400", user_id_alt="@igor_tg", + ) + sig_b = GatewayRunner._agent_config_signature( + "claude-sonnet-4", runtime, ["hermes-telegram"], "", + user_id="86701400", user_id_alt="@erosika_tg", + ) + assert sig_a != sig_b + + def test_signature_omits_user_id_when_absent(self): + """Default-None user_id must not change signatures vs unset call. + + Pre-#27371-fix callers passed no user_id kwarg. Keeping the + default-None signature byte-identical to the previous behavior + avoids invalidating in-flight caches the moment this lands. + """ + from gateway.run import GatewayRunner + runtime = {"provider": "anthropic", "api_key": "k", "base_url": "", "api_mode": "chat_completions"} + sig_implicit = GatewayRunner._agent_config_signature( + "claude-sonnet-4", runtime, ["hermes-telegram"], "", + ) + sig_explicit_none = GatewayRunner._agent_config_signature( + "claude-sonnet-4", runtime, ["hermes-telegram"], "", + user_id=None, user_id_alt=None, + ) + assert sig_implicit == sig_explicit_none