From 92385679b64ef0f34aca2ec1b1031c4e639622bb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=98=BF=E6=B3=A5=E8=B1=86?= <1243352777@qq.com>
Date: Tue, 14 Apr 2026 16:58:37 +0800
Subject: [PATCH] fix: reset retry counters after compression and stop
 poisoning conversation history

Three bugfixes in the agent loop:

1. Reset retry counters after context compression. Without this,
   pre-compression retry counts carry over, causing the model to
   hit empty-response recovery immediately after a compression-
   induced context loss, wasting API calls on a now-valid context.

2. Unmute output in the final-response (no-tool-call) branch.
   _mute_post_response could be left True from a prior housekeeping
   turn, silently suppressing empty-response warnings and recovery
   status that the user should see.

3. Stop injecting 'Calling the X tools...' into assistant message
   content when falling back to prior-turn content. This mutated
   conversation history with synthetic text that the model never
   produced, poisoning subsequent turns.
---
 run_agent.py | 30 ++++++++++++++++++++----------
 1 file changed, 20 insertions(+), 10 deletions(-)

diff --git a/run_agent.py b/run_agent.py
index 0d6be24d0..0814a8b49 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -8012,6 +8012,15 @@ class AIAgent:
                     # skipping them because conversation_history is still the
                     # pre-compression length.
                     conversation_history = None
+                    # Fix: reset retry counters after compression so the model
+                    # gets a fresh budget on the compressed context.  Without
+                    # this, pre-compression retries carry over and the model
+                    # hits "(empty)" immediately after compression-induced
+                    # context loss.
+                    self._empty_content_retries = 0
+                    self._thinking_prefill_retries = 0
+                    self._last_content_with_tools = None
+                    self._mute_post_response = False
                     # Re-estimate after compression
                     _preflight_tokens = estimate_request_tokens_rough(
                         messages,
@@ -10202,6 +10211,13 @@ class AIAgent:
                     # No tool calls - this is the final response
                     final_response = assistant_message.content or ""
                     
+                    # Fix: unmute output when entering the no-tool-call branch
+                    # so the user can see empty-response warnings and recovery
+                    # status messages.  _mute_post_response was set during a
+                    # prior housekeeping tool turn and should not silence the
+                    # final response path.
+                    self._mute_post_response = False
+                    
                     # Check if response only has think block with no actual content after it
                     if not self._has_content_after_think_block(final_response):
                         # ── Partial stream recovery ─────────────────────
@@ -10239,16 +10255,10 @@ class AIAgent:
                             self._emit_status("↻ Empty response after tool calls — using earlier content as final answer")
                             self._last_content_with_tools = None
                             self._empty_content_retries = 0
-                            for i in range(len(messages) - 1, -1, -1):
-                                msg = messages[i]
-                                if msg.get("role") == "assistant" and msg.get("tool_calls"):
-                                    tool_names = []
-                                    for tc in msg["tool_calls"]:
-                                        if not tc or not isinstance(tc, dict): continue
-                                        fn = tc.get("function", {})
-                                        tool_names.append(fn.get("name", "unknown"))
-                                    msg["content"] = f"Calling the {', '.join(tool_names)} tool{'s' if len(tool_names) > 1 else ''}..."
-                                    break
+                            # Do NOT modify the assistant message content — the
+                            # old code injected "Calling the X tools..." which
+                            # poisoned the conversation history.  Just use the
+                            # fallback text as the final response and break.
                             final_response = self._strip_think_blocks(fallback).strip()
                             self._response_was_previewed = True
                             break