fix: move pre_llm_call plugin context to user message, preserve prompt cache (#5146)

Plugin context from pre_llm_call hooks was injected into the system prompt, breaking the prompt cache prefix every turn when content changed (typical for memory plugins). Now all plugin context goes into the current turn's user message — the system prompt stays identical across turns, preserving cached tokens. The system prompt is reserved for Hermes internals. Plugins contribute context alongside the user's input. Also adds comprehensive documentation for all 6 plugin hooks: pre_tool_call, post_tool_call, pre_llm_call, post_llm_call, on_session_start, on_session_end — each with full callback signatures, parameter tables, firing conditions, and examples. Supersedes #5138 which identified the same cache-busting bug and proposed an uncached system suffix approach. This fix goes further by removing system prompt injection entirely. Co-identified-by: OutThisLife (PR #5138)
2026-04-29 01:31:41 +00:00 · 2026-04-04 16:55:44 -07:00 · 2026-04-04 16:55:44 -07:00 · 5879b3ef82
commit 5879b3ef82
parent 96e96a79ad
6 changed files with 653 additions and 57 deletions
--- a/tests/test_plugins.py
+++ b/tests/test_plugins.py
@ -403,6 +403,131 @@ class TestPluginManagerList:



+class TestPreLlmCallTargetRouting:
+    """Tests for pre_llm_call hook return format with target-aware routing.
+
+    The routing logic lives in run_agent.py, but the return format is collected
+    by invoke_hook(). These tests verify the return format works correctly and
+    that downstream code can route based on the 'target' key.
+    """
+
+    def _make_pre_llm_plugin(self, plugins_dir, name, return_expr):
+        """Create a plugin that returns a specific value from pre_llm_call."""
+        _make_plugin_dir(
+            plugins_dir, name,
+            register_body=(
+                f'ctx.register_hook("pre_llm_call", lambda **kw: {return_expr})'
+            ),
+        )
+
+    def test_context_dict_returned(self, tmp_path, monkeypatch):
+        """Plugin returning a context dict is collected by invoke_hook."""
+        plugins_dir = tmp_path / "hermes_test" / "plugins"
+        self._make_pre_llm_plugin(
+            plugins_dir, "basic_plugin",
+            '{"context": "basic context"}',
+        )
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes_test"))
+
+        mgr = PluginManager()
+        mgr.discover_and_load()
+
+        results = mgr.invoke_hook(
+            "pre_llm_call", session_id="s1", user_message="hi",
+            conversation_history=[], is_first_turn=True, model="test",
+        )
+        assert len(results) == 1
+        assert results[0]["context"] == "basic context"
+        assert "target" not in results[0]
+
+    def test_plain_string_return(self, tmp_path, monkeypatch):
+        """Plain string returns are collected as-is (routing treats them as user_message)."""
+        plugins_dir = tmp_path / "hermes_test" / "plugins"
+        self._make_pre_llm_plugin(
+            plugins_dir, "str_plugin",
+            '"plain string context"',
+        )
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes_test"))
+
+        mgr = PluginManager()
+        mgr.discover_and_load()
+
+        results = mgr.invoke_hook(
+            "pre_llm_call", session_id="s1", user_message="hi",
+            conversation_history=[], is_first_turn=True, model="test",
+        )
+        assert len(results) == 1
+        assert results[0] == "plain string context"
+
+    def test_multiple_plugins_context_collected(self, tmp_path, monkeypatch):
+        """Multiple plugins returning context are all collected."""
+        plugins_dir = tmp_path / "hermes_test" / "plugins"
+        self._make_pre_llm_plugin(
+            plugins_dir, "aaa_memory",
+            '{"context": "memory context"}',
+        )
+        self._make_pre_llm_plugin(
+            plugins_dir, "bbb_guardrail",
+            '{"context": "guardrail text"}',
+        )
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes_test"))
+
+        mgr = PluginManager()
+        mgr.discover_and_load()
+
+        results = mgr.invoke_hook(
+            "pre_llm_call", session_id="s1", user_message="hi",
+            conversation_history=[], is_first_turn=True, model="test",
+        )
+        assert len(results) == 2
+        contexts = [r["context"] for r in results]
+        assert "memory context" in contexts
+        assert "guardrail text" in contexts
+
+    def test_routing_logic_all_to_user_message(self, tmp_path, monkeypatch):
+        """Simulate the routing logic from run_agent.py.
+
+        All plugin context — dicts and plain strings — ends up in a single
+        user message context string. There is no system_prompt target.
+        """
+        plugins_dir = tmp_path / "hermes_test" / "plugins"
+        self._make_pre_llm_plugin(
+            plugins_dir, "aaa_mem",
+            '{"context": "memory A"}',
+        )
+        self._make_pre_llm_plugin(
+            plugins_dir, "bbb_guard",
+            '{"context": "rule B"}',
+        )
+        self._make_pre_llm_plugin(
+            plugins_dir, "ccc_plain",
+            '"plain text C"',
+        )
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes_test"))
+
+        mgr = PluginManager()
+        mgr.discover_and_load()
+
+        results = mgr.invoke_hook(
+            "pre_llm_call", session_id="s1", user_message="hi",
+            conversation_history=[], is_first_turn=True, model="test",
+        )
+
+        # Replicate run_agent.py routing logic — everything goes to user msg
+        _ctx_parts = []
+        for r in results:
+            if isinstance(r, dict) and r.get("context"):
+                _ctx_parts.append(str(r["context"]))
+            elif isinstance(r, str) and r.strip():
+                _ctx_parts.append(r)
+
+        assert _ctx_parts == ["memory A", "rule B", "plain text C"]
+        _plugin_user_context = "\n\n".join(_ctx_parts)
+        assert "memory A" in _plugin_user_context
+        assert "rule B" in _plugin_user_context
+        assert "plain text C" in _plugin_user_context
+
+
 # NOTE: TestPluginCommands removed – register_command() was never implemented
 # in PluginContext (hermes_cli/plugins.py).  The tests referenced _plugin_commands,
 # commands_registered, get_plugin_command_handler, and GATEWAY_KNOWN_COMMANDS