From 43f4de02162d626604457f10557b64a2f48ae8cc Mon Sep 17 00:00:00 2001
From: Teknium <teknium1@gmail.com>
Date: Wed, 15 Apr 2026 17:06:34 -0700
Subject: [PATCH] feat: add strategic re-evaluation guidance to system prompt

Port from google-gemini/gemini-cli#25062. Adds a concise system prompt
block that tells agents to stop and reconsider their approach after 3
failed attempts at fixing the same issue, instead of continuing to
apply small variations of a failing fix.

The guidance is injected for ALL models when tools are loaded (not just
enforcement-target models), since fix-loops affect every model.

3-step process:
1. Stop and re-read the original task description
2. List current assumptions and identify wrong ones
3. Propose a fundamentally different approach

Includes tests for the constant content and system prompt integration.
---
 agent/prompt_builder.py            | 15 +++++++++++++
 run_agent.py                       |  9 +++++++-
 tests/agent/test_prompt_builder.py | 35 ++++++++++++++++++++++++++++++
 tests/run_agent/test_run_agent.py  |  8 +++++++
 4 files changed, 66 insertions(+), 1 deletion(-)

diff --git a/agent/prompt_builder.py b/agent/prompt_builder.py
index c61d6995b..886fb43cd 100644
--- a/agent/prompt_builder.py
+++ b/agent/prompt_builder.py
@@ -185,6 +185,21 @@ TOOL_USE_ENFORCEMENT_GUIDANCE = (
     "without acting are not acceptable."
 )
 
+# Strategic re-evaluation guidance — prevents agents from getting stuck in
+# fix-loops by forcing a step back after repeated failures.
+# Ported from google-gemini/gemini-cli#25062.
+STRATEGIC_REEVALUATION_GUIDANCE = (
+    "# Strategic re-evaluation\n"
+    "If you have attempted to fix a failing implementation more than 3 times "
+    "without success, you must:\n"
+    "1. Stop and re-read the original task description carefully.\n"
+    "2. List your current assumptions and identify which ones might be wrong.\n"
+    "3. Propose a fundamentally different approach rather than continuing to "
+    "patch the current one.\n"
+    "Do not keep applying small variations of the same fix. Step back, "
+    "reconsider the problem from scratch, and try a different strategy."
+)
+
 # Model name substrings that trigger tool-use enforcement guidance.
 # Add new patterns here when a model family needs explicit steering.
 TOOL_USE_ENFORCEMENT_MODELS = ("gpt", "codex", "gemini", "gemma", "grok")
diff --git a/run_agent.py b/run_agent.py
index f199d806d..5ba248518 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -94,7 +94,7 @@ from agent.model_metadata import (
 from agent.context_compressor import ContextCompressor
 from agent.subdirectory_hints import SubdirectoryHintTracker
 from agent.prompt_caching import apply_anthropic_cache_control
-from agent.prompt_builder import build_skills_system_prompt, build_context_files_prompt, build_environment_hints, load_soul_md, TOOL_USE_ENFORCEMENT_GUIDANCE, TOOL_USE_ENFORCEMENT_MODELS, DEVELOPER_ROLE_MODELS, GOOGLE_MODEL_OPERATIONAL_GUIDANCE, OPENAI_MODEL_EXECUTION_GUIDANCE
+from agent.prompt_builder import build_skills_system_prompt, build_context_files_prompt, build_environment_hints, load_soul_md, TOOL_USE_ENFORCEMENT_GUIDANCE, TOOL_USE_ENFORCEMENT_MODELS, DEVELOPER_ROLE_MODELS, GOOGLE_MODEL_OPERATIONAL_GUIDANCE, OPENAI_MODEL_EXECUTION_GUIDANCE, STRATEGIC_REEVALUATION_GUIDANCE
 from agent.usage_pricing import estimate_usage_cost, normalize_usage
 from agent.display import (
     KawaiiSpinner, build_tool_preview as _build_tool_preview,
@@ -3350,6 +3350,13 @@ class AIAgent:
                 if "gpt" in _model_lower or "codex" in _model_lower:
                     prompt_parts.append(OPENAI_MODEL_EXECUTION_GUIDANCE)
 
+        # Strategic re-evaluation guidance — injected for all models with
+        # tools, not just enforcement targets.  Prevents fix-loops where the
+        # agent applies small variations of a failing approach forever.
+        # Ported from google-gemini/gemini-cli#25062.
+        if self.valid_tool_names:
+            prompt_parts.append(STRATEGIC_REEVALUATION_GUIDANCE)
+
         # so it can refer the user to them rather than reinventing answers.
 
         # Note: ephemeral_system_prompt is NOT included here. It's injected at
diff --git a/tests/agent/test_prompt_builder.py b/tests/agent/test_prompt_builder.py
index 5a222cc38..3a7c79a25 100644
--- a/tests/agent/test_prompt_builder.py
+++ b/tests/agent/test_prompt_builder.py
@@ -24,6 +24,7 @@ from agent.prompt_builder import (
     TOOL_USE_ENFORCEMENT_GUIDANCE,
     TOOL_USE_ENFORCEMENT_MODELS,
     OPENAI_MODEL_EXECUTION_GUIDANCE,
+    STRATEGIC_REEVALUATION_GUIDANCE,
     MEMORY_GUIDANCE,
     SESSION_SEARCH_GUIDANCE,
     PLATFORM_HINTS,
@@ -1029,6 +1030,40 @@ class TestOpenAIModelExecutionGuidance:
         assert len(OPENAI_MODEL_EXECUTION_GUIDANCE) > 100
 
 
+# =========================================================================
+# Strategic re-evaluation guidance
+# (ported from google-gemini/gemini-cli#25062)
+# =========================================================================
+
+
+class TestStrategicReevaluationGuidance:
+    """Tests for the strategic re-evaluation guidance constant."""
+
+    def test_guidance_is_string(self):
+        assert isinstance(STRATEGIC_REEVALUATION_GUIDANCE, str)
+        assert len(STRATEGIC_REEVALUATION_GUIDANCE) > 50
+
+    def test_guidance_mentions_3_attempts(self):
+        """Should trigger after 3 failed attempts."""
+        assert "3 times" in STRATEGIC_REEVALUATION_GUIDANCE
+
+    def test_guidance_requires_reread_task(self):
+        """Step 1: re-read the original task."""
+        assert "original task" in STRATEGIC_REEVALUATION_GUIDANCE.lower()
+
+    def test_guidance_requires_questioning_assumptions(self):
+        """Step 2: question current assumptions."""
+        assert "assumptions" in STRATEGIC_REEVALUATION_GUIDANCE.lower()
+
+    def test_guidance_requires_different_approach(self):
+        """Step 3: propose a fundamentally different approach."""
+        assert "different approach" in STRATEGIC_REEVALUATION_GUIDANCE.lower()
+
+    def test_guidance_discourages_small_variations(self):
+        """Should tell the agent not to keep doing the same thing."""
+        assert "small variations" in STRATEGIC_REEVALUATION_GUIDANCE.lower()
+
+
 # =========================================================================
 # Budget warning history stripping
 # =========================================================================
diff --git a/tests/run_agent/test_run_agent.py b/tests/run_agent/test_run_agent.py
index d71e6a625..6df68b465 100644
--- a/tests/run_agent/test_run_agent.py
+++ b/tests/run_agent/test_run_agent.py
@@ -705,6 +705,14 @@ class TestBuildSystemPrompt:
         assert mock_skills.call_args.kwargs["available_tools"] == set(toolset_map)
         assert mock_skills.call_args.kwargs["available_toolsets"] == {"web", "skills"}
 
+    def test_strategic_reevaluation_guidance_present_when_tools_loaded(self, agent):
+        """Strategic re-evaluation guidance should appear for any agent with tools."""
+        from agent.prompt_builder import STRATEGIC_REEVALUATION_GUIDANCE
+
+        prompt = agent._build_system_prompt()
+        assert "Strategic re-evaluation" in prompt
+        assert "3 times" in prompt
+
 
 class TestToolUseEnforcementConfig:
     """Tests for the agent.tool_use_enforcement config option."""