fix: move pre_llm_call plugin context to user message, preserve prompt cache (#5146)

Plugin context from pre_llm_call hooks was injected into the system
prompt, breaking the prompt cache prefix every turn when content
changed (typical for memory plugins). Now all plugin context goes
into the current turn's user message — the system prompt stays
identical across turns, preserving cached tokens.

The system prompt is reserved for Hermes internals. Plugins
contribute context alongside the user's input.

Also adds comprehensive documentation for all 6 plugin hooks:
pre_tool_call, post_tool_call, pre_llm_call, post_llm_call,
on_session_start, on_session_end — each with full callback
signatures, parameter tables, firing conditions, and examples.

Supersedes #5138 which identified the same cache-busting bug
and proposed an uncached system suffix approach. This fix goes
further by removing system prompt injection entirely.

Co-identified-by: OutThisLife (PR #5138)
This commit is contained in:
Teknium 2026-04-04 16:55:44 -07:00 committed by GitHub
parent 96e96a79ad
commit 5879b3ef82
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 653 additions and 57 deletions

View file

@ -403,6 +403,131 @@ class TestPluginManagerList:
class TestPreLlmCallTargetRouting:
"""Tests for pre_llm_call hook return format with target-aware routing.
The routing logic lives in run_agent.py, but the return format is collected
by invoke_hook(). These tests verify the return format works correctly and
that downstream code can route based on the 'target' key.
"""
def _make_pre_llm_plugin(self, plugins_dir, name, return_expr):
"""Create a plugin that returns a specific value from pre_llm_call."""
_make_plugin_dir(
plugins_dir, name,
register_body=(
f'ctx.register_hook("pre_llm_call", lambda **kw: {return_expr})'
),
)
def test_context_dict_returned(self, tmp_path, monkeypatch):
"""Plugin returning a context dict is collected by invoke_hook."""
plugins_dir = tmp_path / "hermes_test" / "plugins"
self._make_pre_llm_plugin(
plugins_dir, "basic_plugin",
'{"context": "basic context"}',
)
monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes_test"))
mgr = PluginManager()
mgr.discover_and_load()
results = mgr.invoke_hook(
"pre_llm_call", session_id="s1", user_message="hi",
conversation_history=[], is_first_turn=True, model="test",
)
assert len(results) == 1
assert results[0]["context"] == "basic context"
assert "target" not in results[0]
def test_plain_string_return(self, tmp_path, monkeypatch):
"""Plain string returns are collected as-is (routing treats them as user_message)."""
plugins_dir = tmp_path / "hermes_test" / "plugins"
self._make_pre_llm_plugin(
plugins_dir, "str_plugin",
'"plain string context"',
)
monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes_test"))
mgr = PluginManager()
mgr.discover_and_load()
results = mgr.invoke_hook(
"pre_llm_call", session_id="s1", user_message="hi",
conversation_history=[], is_first_turn=True, model="test",
)
assert len(results) == 1
assert results[0] == "plain string context"
def test_multiple_plugins_context_collected(self, tmp_path, monkeypatch):
"""Multiple plugins returning context are all collected."""
plugins_dir = tmp_path / "hermes_test" / "plugins"
self._make_pre_llm_plugin(
plugins_dir, "aaa_memory",
'{"context": "memory context"}',
)
self._make_pre_llm_plugin(
plugins_dir, "bbb_guardrail",
'{"context": "guardrail text"}',
)
monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes_test"))
mgr = PluginManager()
mgr.discover_and_load()
results = mgr.invoke_hook(
"pre_llm_call", session_id="s1", user_message="hi",
conversation_history=[], is_first_turn=True, model="test",
)
assert len(results) == 2
contexts = [r["context"] for r in results]
assert "memory context" in contexts
assert "guardrail text" in contexts
def test_routing_logic_all_to_user_message(self, tmp_path, monkeypatch):
"""Simulate the routing logic from run_agent.py.
All plugin context dicts and plain strings ends up in a single
user message context string. There is no system_prompt target.
"""
plugins_dir = tmp_path / "hermes_test" / "plugins"
self._make_pre_llm_plugin(
plugins_dir, "aaa_mem",
'{"context": "memory A"}',
)
self._make_pre_llm_plugin(
plugins_dir, "bbb_guard",
'{"context": "rule B"}',
)
self._make_pre_llm_plugin(
plugins_dir, "ccc_plain",
'"plain text C"',
)
monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes_test"))
mgr = PluginManager()
mgr.discover_and_load()
results = mgr.invoke_hook(
"pre_llm_call", session_id="s1", user_message="hi",
conversation_history=[], is_first_turn=True, model="test",
)
# Replicate run_agent.py routing logic — everything goes to user msg
_ctx_parts = []
for r in results:
if isinstance(r, dict) and r.get("context"):
_ctx_parts.append(str(r["context"]))
elif isinstance(r, str) and r.strip():
_ctx_parts.append(r)
assert _ctx_parts == ["memory A", "rule B", "plain text C"]
_plugin_user_context = "\n\n".join(_ctx_parts)
assert "memory A" in _plugin_user_context
assert "rule B" in _plugin_user_context
assert "plain text C" in _plugin_user_context
# NOTE: TestPluginCommands removed register_command() was never implemented
# in PluginContext (hermes_cli/plugins.py). The tests referenced _plugin_commands,
# commands_registered, get_plugin_command_handler, and GATEWAY_KNOWN_COMMANDS