diff --git a/run_agent.py b/run_agent.py index 7aa4a33f46c..8cea7ee211d 100644 --- a/run_agent.py +++ b/run_agent.py @@ -8620,140 +8620,24 @@ class AIAgent: self._response_was_previewed = True break - # No fallback available — classify the empty response before - # blindly spending retries. Some local/custom backends surface - # implicit context pressure as reasoning-only output rather than - # an explicit overflow error. - if not hasattr(self, '_empty_content_retries'): - self._empty_content_retries = 0 - self._empty_content_retries += 1 + # Reasoning-only response: the model produced thinking + # but no visible content. This is a valid response — + # keep reasoning in its own field and set content to + # "(empty)" so every provider accepts the message. + # No retries needed. + reasoning_text = self._extract_reasoning(assistant_message) + assistant_msg = self._build_assistant_message(assistant_message, finish_reason) + assistant_msg["content"] = "(empty)" + messages.append(assistant_msg) - empty_response_info = self._classify_empty_content_response( - assistant_message, - finish_reason=finish_reason, - approx_tokens=approx_tokens, - api_messages=api_messages, - conversation_history=conversation_history, - ) - reasoning_text = empty_response_info["reasoning_text"] - self._vprint(f"{self.log_prefix}⚠️ Response only contains think block with no content after it") if reasoning_text: reasoning_preview = reasoning_text[:500] + "..." if len(reasoning_text) > 500 else reasoning_text - self._vprint(f"{self.log_prefix} Reasoning: {reasoning_preview}") + self._vprint(f"{self.log_prefix}ℹ️ Reasoning-only response (no visible content). Reasoning: {reasoning_preview}") else: - content_preview = final_response[:80] + "..." if len(final_response) > 80 else final_response - self._vprint(f"{self.log_prefix} Content: '{content_preview}'") + self._vprint(f"{self.log_prefix}ℹ️ Empty response (no content or reasoning).") - if empty_response_info["should_compress"]: - compression_attempts += 1 - if compression_attempts > max_compression_attempts: - self._vprint(f"{self.log_prefix}❌ Max compression attempts ({max_compression_attempts}) reached.", force=True) - self._vprint(f"{self.log_prefix} 💡 Local/custom backend returned reasoning-only output with no visible content. This often means the resumed/large session exceeds the runtime context window. Try /new or lower model.context_length to the actual runtime limit.", force=True) - else: - self._vprint(f"{self.log_prefix}🗜️ Reasoning-only response looks like implicit context pressure — attempting compression ({compression_attempts}/{max_compression_attempts})...", force=True) - original_len = len(messages) - messages, active_system_prompt = self._compress_context( - messages, system_message, approx_tokens=approx_tokens, - task_id=effective_task_id, - ) - if len(messages) < original_len: - conversation_history = None - self._emit_status(f"🗜️ Compressed {original_len} → {len(messages)} messages after reasoning-only response, retrying...") - time.sleep(2) - api_call_count -= 1 - self.iteration_budget.refund() - retry_count += 1 - continue - self._vprint(f"{self.log_prefix} Compression could not shrink the session; falling back to retry/salvage logic.") - - if ( - reasoning_text - and empty_response_info["repeated_signature"] - and empty_response_info["has_structured_reasoning"] - ): - self._vprint(f"{self.log_prefix}ℹ️ Structured reasoning-only response repeated unchanged — using reasoning text directly.", force=True) - self._empty_content_retries = 0 - final_response = reasoning_text - empty_msg = { - "role": "assistant", - "content": final_response, - "reasoning": reasoning_text, - "finish_reason": finish_reason, - } - messages.append(empty_msg) - break - - if self._empty_content_retries < 3: - self._vprint(f"{self.log_prefix}🔄 Retrying API call ({self._empty_content_retries}/3)...") - continue - else: - self._vprint(f"{self.log_prefix}❌ Max retries (3) for empty content exceeded.", force=True) - self._empty_content_retries = 0 - - # If a prior tool_calls turn had real content, salvage it: - # rewrite that turn's content to a brief tool description, - # and use the original content as the final response here. - fallback = getattr(self, '_last_content_with_tools', None) - if fallback: - self._last_content_with_tools = None - # Find the last assistant message with tool_calls and rewrite it - for i in range(len(messages) - 1, -1, -1): - msg = messages[i] - if msg.get("role") == "assistant" and msg.get("tool_calls"): - tool_names = [] - for tc in msg["tool_calls"]: - if not tc or not isinstance(tc, dict): continue - fn = tc.get("function", {}) - tool_names.append(fn.get("name", "unknown")) - msg["content"] = f"Calling the {', '.join(tool_names)} tool{'s' if len(tool_names) > 1 else ''}..." - break - # Strip blocks from fallback content for user display - final_response = self._strip_think_blocks(fallback).strip() - self._response_was_previewed = True - break - - # No fallback -- if reasoning_text exists, the model put its - # entire response inside tags; use that as the content. - if reasoning_text: - self._vprint(f"{self.log_prefix}Using reasoning as response content (model wrapped entire response in think tags).", force=True) - final_response = reasoning_text - empty_msg = { - "role": "assistant", - "content": final_response, - "reasoning": reasoning_text, - "finish_reason": finish_reason, - } - messages.append(empty_msg) - break - - # Truly empty -- no reasoning and no content - empty_msg = { - "role": "assistant", - "content": final_response, - "reasoning": reasoning_text, - "finish_reason": finish_reason, - } - messages.append(empty_msg) - - self._cleanup_task_resources(effective_task_id) - self._persist_session(messages, conversation_history) - - error_message = "Model generated only think blocks with no actual response after 3 retries" - if empty_response_info["is_local_custom"]: - error_message = ( - "Local/custom backend returned reasoning-only output with no visible response after 3 retries. " - "Likely causes: wrong /v1 endpoint, runtime context window smaller than Hermes expects, " - "or a resumed/large session exceeding the backend's actual context limit." - ) - - return { - "final_response": final_response or None, - "messages": messages, - "api_calls": api_call_count, - "completed": False, - "partial": True, - "error": error_message - } + final_response = "(empty)" + break # Reset retry counter/signature on successful content if hasattr(self, '_empty_content_retries'): diff --git a/tests/test_run_agent.py b/tests/test_run_agent.py index 66ba411a01e..edf2577d6c0 100644 --- a/tests/test_run_agent.py +++ b/tests/test_run_agent.py @@ -1488,19 +1488,14 @@ class TestRunConversation: assert result["completed"] is True assert result["api_calls"] == 2 - def test_empty_content_retry_uses_inline_reasoning_as_response(self, agent): - """Reasoning-only payloads should recover the inline reasoning text.""" + def test_inline_think_blocks_reasoning_only_accepted(self, agent): + """Inline reasoning-only responses accepted with (empty) content, no retries.""" self._setup_agent(agent) empty_resp = _mock_response( content="internal reasoning", finish_reason="stop", ) - # Return empty 3 times to exhaust retries - agent.client.chat.completions.create.side_effect = [ - empty_resp, - empty_resp, - empty_resp, - ] + agent.client.chat.completions.create.side_effect = [empty_resp] with ( patch.object(agent, "_persist_session"), patch.object(agent, "_save_trajectory"), @@ -1508,10 +1503,14 @@ class TestRunConversation: ): result = agent.run_conversation("answer me") assert result["completed"] is True - assert result["final_response"] == "internal reasoning" + assert result["final_response"] == "(empty)" + assert result["api_calls"] == 1 # no retries + # Reasoning should be preserved in the assistant message + assistant_msgs = [m for m in result["messages"] if m.get("role") == "assistant"] + assert any(m.get("reasoning") for m in assistant_msgs) - def test_empty_content_local_resumed_session_triggers_compression(self, agent): - """Local resumed reasoning-only responses should compress before burning retries.""" + def test_reasoning_only_local_resumed_no_compression_triggered(self, agent): + """Reasoning-only responses no longer trigger compression — accepted immediately.""" self._setup_agent(agent) agent.base_url = "http://127.0.0.1:1234/v1" agent.compression_enabled = True @@ -1520,39 +1519,34 @@ class TestRunConversation: finish_reason="stop", reasoning_content="reasoning only", ) - ok_resp = _mock_response(content="Recovered after compression", finish_reason="stop") prefill = [ {"role": "user", "content": "old question"}, {"role": "assistant", "content": "old answer"}, ] with ( - patch.object(agent, "_interruptible_api_call", side_effect=[empty_resp, ok_resp]), + patch.object(agent, "_interruptible_api_call", side_effect=[empty_resp]), patch.object(agent, "_compress_context") as mock_compress, patch.object(agent, "_persist_session"), patch.object(agent, "_save_trajectory"), patch.object(agent, "_cleanup_task_resources"), ): - mock_compress.return_value = ( - [{"role": "user", "content": "compressed user message"}], - "compressed system prompt", - ) result = agent.run_conversation("hello", conversation_history=prefill) - mock_compress.assert_called_once() + mock_compress.assert_not_called() # no compression triggered assert result["completed"] is True - assert result["final_response"] == "Recovered after compression" - assert result["api_calls"] == 1 # compression retry is refunded, same as explicit overflow path + assert result["final_response"] == "(empty)" + assert result["api_calls"] == 1 - def test_empty_content_repeated_structured_reasoning_salvages_early(self, agent): - """Repeated identical structured reasoning-only responses should stop retrying early.""" + def test_reasoning_only_response_accepted_without_retry(self, agent): + """Reasoning-only response should be accepted with (empty) content, no retries.""" self._setup_agent(agent) empty_resp = _mock_response( content=None, finish_reason="stop", reasoning_content="structured reasoning answer", ) - agent.client.chat.completions.create.side_effect = [empty_resp, empty_resp] + agent.client.chat.completions.create.side_effect = [empty_resp] with ( patch.object(agent, "_persist_session"), patch.object(agent, "_save_trajectory"), @@ -1560,24 +1554,24 @@ class TestRunConversation: ): result = agent.run_conversation("answer me") assert result["completed"] is True - assert result["final_response"] == "structured reasoning answer" - assert result["api_calls"] == 2 + assert result["final_response"] == "(empty)" + assert result["api_calls"] == 1 # no retries - def test_empty_content_local_custom_error_is_actionable(self, agent): - """Local/custom retries should return a diagnostic tailored to context/endpoint mismatch.""" + def test_truly_empty_response_accepted_without_retry(self, agent): + """Truly empty response (no content, no reasoning) should still complete with (empty).""" self._setup_agent(agent) agent.base_url = "http://127.0.0.1:1234/v1" empty_resp = _mock_response(content=None, finish_reason="stop") - agent.client.chat.completions.create.side_effect = [empty_resp, empty_resp, empty_resp] + agent.client.chat.completions.create.side_effect = [empty_resp] with ( patch.object(agent, "_persist_session"), patch.object(agent, "_save_trajectory"), patch.object(agent, "_cleanup_task_resources"), ): result = agent.run_conversation("answer me") - assert result["completed"] is False - assert "Local/custom backend returned reasoning-only output" in result["error"] - assert "wrong /v1 endpoint" in result["error"] + assert result["completed"] is True + assert result["final_response"] == "(empty)" + assert result["api_calls"] == 1 # no retries def test_nous_401_refreshes_after_remint_and_retries(self, agent): self._setup_agent(agent)