diff --git a/run_agent.py b/run_agent.py
index 60be561292..fb03ee5c4f 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -1326,6 +1326,24 @@ class AIAgent:
summary = detail.get('summary') or detail.get('content') or detail.get('text')
if summary and summary not in reasoning_parts:
reasoning_parts.append(summary)
+
+ # Some providers embed reasoning directly inside assistant content
+ # instead of returning structured reasoning fields. Only fall back
+ # to inline extraction when no structured reasoning was found.
+ content = getattr(assistant_message, "content", None)
+ if not reasoning_parts and isinstance(content, str) and content:
+ inline_patterns = (
+ r"(.*?)",
+ r"(.*?)",
+ r"(.*?)",
+ r"(.*?)",
+ )
+ for pattern in inline_patterns:
+ flags = re.DOTALL | re.IGNORECASE
+ for block in re.findall(pattern, content, flags=flags):
+ cleaned = block.strip()
+ if cleaned and cleaned not in reasoning_parts:
+ reasoning_parts.append(cleaned)
# Combine all reasoning parts
if reasoning_parts:
@@ -6392,6 +6410,7 @@ class AIAgent:
'exceeds the limit', 'context window',
'request entity too large', # OpenRouter/Nous 413 safety net
'prompt is too long', # Anthropic: "prompt is too long: N tokens > M maximum"
+ 'prompt exceeds max length', # Z.AI / GLM: generic 400 overflow wording
])
# Fallback heuristic: Anthropic sometimes returns a generic
diff --git a/tests/test_run_agent.py b/tests/test_run_agent.py
index 81e16b7027..3dd9a134b3 100644
--- a/tests/test_run_agent.py
+++ b/tests/test_run_agent.py
@@ -267,6 +267,21 @@ class TestExtractReasoning:
result = agent._extract_reasoning(msg)
assert result == "same text"
+ @pytest.mark.parametrize(
+ ("content", "expected"),
+ [
+ ("thinking hard", "thinking hard"),
+ ("step by step", "step by step"),
+ (
+ "scratch analysis",
+ "scratch analysis",
+ ),
+ ],
+ )
+ def test_inline_reasoning_blocks_fallback(self, agent, content, expected):
+ msg = _mock_assistant_msg(content=content)
+ assert agent._extract_reasoning(msg) == expected
+
class TestCleanSessionContent:
def test_none_passthrough(self):
@@ -1202,8 +1217,8 @@ class TestRunConversation:
assert result["completed"] is True
assert result["api_calls"] == 2
- def test_empty_content_retry_and_fallback(self, agent):
- """Empty content (only think block) retries, then falls back to partial."""
+ def test_empty_content_retry_uses_inline_reasoning_as_response(self, agent):
+ """Reasoning-only payloads should recover the inline reasoning text."""
self._setup_agent(agent)
empty_resp = _mock_response(
content="internal reasoning",
@@ -1221,9 +1236,8 @@ class TestRunConversation:
patch.object(agent, "_cleanup_task_resources"),
):
result = agent.run_conversation("answer me")
- # After 3 retries with no real content, should return partial
- assert result["completed"] is False
- assert result.get("partial") is True
+ assert result["completed"] is True
+ assert result["final_response"] == "internal reasoning"
def test_nous_401_refreshes_after_remint_and_retries(self, agent):
self._setup_agent(agent)
@@ -1296,6 +1310,36 @@ class TestRunConversation:
assert result["final_response"] == "All done"
assert result["completed"] is True
+ def test_glm_prompt_exceeds_max_length_triggers_compression(self, agent):
+ """GLM/Z.AI uses 'Prompt exceeds max length' for context overflow."""
+ self._setup_agent(agent)
+ err_400 = Exception(
+ "Error code: 400 - {'error': {'code': '1261', 'message': 'Prompt exceeds max length'}}"
+ )
+ err_400.status_code = 400
+ ok_resp = _mock_response(content="Recovered after compression", finish_reason="stop")
+ agent.client.chat.completions.create.side_effect = [err_400, ok_resp]
+ prefill = [
+ {"role": "user", "content": "previous question"},
+ {"role": "assistant", "content": "previous answer"},
+ ]
+
+ with (
+ patch.object(agent, "_compress_context") as mock_compress,
+ patch.object(agent, "_persist_session"),
+ patch.object(agent, "_save_trajectory"),
+ patch.object(agent, "_cleanup_task_resources"),
+ ):
+ mock_compress.return_value = (
+ [{"role": "user", "content": "hello"}],
+ "compressed system prompt",
+ )
+ result = agent.run_conversation("hello", conversation_history=prefill)
+
+ mock_compress.assert_called_once()
+ assert result["final_response"] == "Recovered after compression"
+ assert result["completed"] is True
+
@pytest.mark.parametrize(
("first_content", "second_content", "expected_final"),
[