From d99e2a29d66135675cf4a237b0131fbdf0927118 Mon Sep 17 00:00:00 2001 From: waxinz Date: Sat, 11 Apr 2026 16:29:02 -0700 Subject: [PATCH] feat: standardize message whitespace and JSON formatting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Normalize api_messages before each API call for consistent prefix matching across turns: 1. Strip leading/trailing whitespace from system prompt parts 2. Strip leading/trailing whitespace from message content strings 3. Normalize tool-call arguments to compact sorted JSON This enables KV cache reuse on local inference servers (llama.cpp, vLLM, Ollama) and improves cache hit rates for cloud providers. All normalization operates on the api_messages copy — the original conversation history in messages is never mutated. Tool-call JSON normalization creates new dicts via spread to avoid the shallow-copy mutation bug in the original PR. Salvaged from PR #7875 by @waxinz with mutation fix. --- run_agent.py | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/run_agent.py b/run_agent.py index 7b97c0ded22..ecaa92b41c9 100644 --- a/run_agent.py +++ b/run_agent.py @@ -3212,7 +3212,7 @@ class AIAgent: if platform_key in PLATFORM_HINTS: prompt_parts.append(PLATFORM_HINTS[platform_key]) - return "\n\n".join(prompt_parts) + return "\n\n".join(p.strip() for p in prompt_parts if p.strip()) # ========================================================================= # Pre/post-call guardrails (inspired by PR #1321 — @alireza78a) @@ -8047,6 +8047,36 @@ class AIAgent: # manual message manipulation are always caught. api_messages = self._sanitize_api_messages(api_messages) + # Normalize message whitespace and tool-call JSON for consistent + # prefix matching. Ensures bit-perfect prefixes across turns, + # which enables KV cache reuse on local inference servers + # (llama.cpp, vLLM, Ollama) and improves cache hit rates for + # cloud providers. Operates on api_messages (the API copy) so + # the original conversation history in `messages` is untouched. + for am in api_messages: + if isinstance(am.get("content"), str): + am["content"] = am["content"].strip() + for am in api_messages: + tcs = am.get("tool_calls") + if not tcs: + continue + new_tcs = [] + for tc in tcs: + if isinstance(tc, dict) and "function" in tc: + try: + args_obj = json.loads(tc["function"]["arguments"]) + tc = {**tc, "function": { + **tc["function"], + "arguments": json.dumps( + args_obj, separators=(",", ":"), + sort_keys=True, + ), + }} + except Exception: + pass + new_tcs.append(tc) + am["tool_calls"] = new_tcs + # Calculate approximate request size for logging total_chars = sum(len(str(msg)) for msg in api_messages) approx_tokens = estimate_messages_tokens_rough(api_messages)