diff --git a/run_agent.py b/run_agent.py index 0d6be24d0..0814a8b49 100644 --- a/run_agent.py +++ b/run_agent.py @@ -8012,6 +8012,15 @@ class AIAgent: # skipping them because conversation_history is still the # pre-compression length. conversation_history = None + # Fix: reset retry counters after compression so the model + # gets a fresh budget on the compressed context. Without + # this, pre-compression retries carry over and the model + # hits "(empty)" immediately after compression-induced + # context loss. + self._empty_content_retries = 0 + self._thinking_prefill_retries = 0 + self._last_content_with_tools = None + self._mute_post_response = False # Re-estimate after compression _preflight_tokens = estimate_request_tokens_rough( messages, @@ -10202,6 +10211,13 @@ class AIAgent: # No tool calls - this is the final response final_response = assistant_message.content or "" + # Fix: unmute output when entering the no-tool-call branch + # so the user can see empty-response warnings and recovery + # status messages. _mute_post_response was set during a + # prior housekeeping tool turn and should not silence the + # final response path. + self._mute_post_response = False + # Check if response only has think block with no actual content after it if not self._has_content_after_think_block(final_response): # ── Partial stream recovery ───────────────────── @@ -10239,16 +10255,10 @@ class AIAgent: self._emit_status("↻ Empty response after tool calls — using earlier content as final answer") self._last_content_with_tools = None self._empty_content_retries = 0 - for i in range(len(messages) - 1, -1, -1): - msg = messages[i] - if msg.get("role") == "assistant" and msg.get("tool_calls"): - tool_names = [] - for tc in msg["tool_calls"]: - if not tc or not isinstance(tc, dict): continue - fn = tc.get("function", {}) - tool_names.append(fn.get("name", "unknown")) - msg["content"] = f"Calling the {', '.join(tool_names)} tool{'s' if len(tool_names) > 1 else ''}..." - break + # Do NOT modify the assistant message content — the + # old code injected "Calling the X tools..." which + # poisoned the conversation history. Just use the + # fallback text as the final response and break. final_response = self._strip_think_blocks(fallback).strip() self._response_was_previewed = True break