feat: add strategic re-evaluation guidance to system prompt

Port from google-gemini/gemini-cli#25062. Adds a concise system prompt block that tells agents to stop and reconsider their approach after 3 failed attempts at fixing the same issue, instead of continuing to apply small variations of a failing fix. The guidance is injected for ALL models when tools are loaded (not just enforcement-target models), since fix-loops affect every model. 3-step process: 1. Stop and re-read the original task description 2. List current assumptions and identify wrong ones 3. Propose a fundamentally different approach Includes tests for the constant content and system prompt integration.
2026-04-25 00:51:20 +00:00 · 2026-04-15 17:06:34 -07:00 · 2026-04-15 17:06:34 -07:00 · 43f4de0216
commit 43f4de0216
parent 9d9b424390
4 changed files with 66 additions and 1 deletions
--- a/agent/prompt_builder.py
+++ b/agent/prompt_builder.py
@ -185,6 +185,21 @@ TOOL_USE_ENFORCEMENT_GUIDANCE = (
    "without acting are not acceptable."
 )
 # Strategic re-evaluation guidance — prevents agents from getting stuck in
 # fix-loops by forcing a step back after repeated failures.
 # Ported from google-gemini/gemini-cli#25062.
 STRATEGIC_REEVALUATION_GUIDANCE = (
    "# Strategic re-evaluation\n"
    "If you have attempted to fix a failing implementation more than 3 times "
    "without success, you must:\n"
    "1. Stop and re-read the original task description carefully.\n"
    "2. List your current assumptions and identify which ones might be wrong.\n"
    "3. Propose a fundamentally different approach rather than continuing to "
    "patch the current one.\n"
    "Do not keep applying small variations of the same fix. Step back, "
    "reconsider the problem from scratch, and try a different strategy."
 )
 # Model name substrings that trigger tool-use enforcement guidance.
 # Add new patterns here when a model family needs explicit steering.
 TOOL_USE_ENFORCEMENT_MODELS = ("gpt", "codex", "gemini", "gemma", "grok")
--- a/run_agent.py
+++ b/run_agent.py
@ -94,7 +94,7 @@ from agent.model_metadata import (
 from agent.context_compressor import ContextCompressor
 from agent.subdirectory_hints import SubdirectoryHintTracker
 from agent.prompt_caching import apply_anthropic_cache_control
-from agent.prompt_builder import build_skills_system_prompt, build_context_files_prompt, build_environment_hints, load_soul_md, TOOL_USE_ENFORCEMENT_GUIDANCE, TOOL_USE_ENFORCEMENT_MODELS, DEVELOPER_ROLE_MODELS, GOOGLE_MODEL_OPERATIONAL_GUIDANCE, OPENAI_MODEL_EXECUTION_GUIDANCE
+from agent.prompt_builder import build_skills_system_prompt, build_context_files_prompt, build_environment_hints, load_soul_md, TOOL_USE_ENFORCEMENT_GUIDANCE, TOOL_USE_ENFORCEMENT_MODELS, DEVELOPER_ROLE_MODELS, GOOGLE_MODEL_OPERATIONAL_GUIDANCE, OPENAI_MODEL_EXECUTION_GUIDANCE, STRATEGIC_REEVALUATION_GUIDANCE
 from agent.usage_pricing import estimate_usage_cost, normalize_usage
 from agent.display import (
    KawaiiSpinner, build_tool_preview as _build_tool_preview,
@ -3350,6 +3350,13 @@ class AIAgent:
                if "gpt" in _model_lower or "codex" in _model_lower:
                    prompt_parts.append(OPENAI_MODEL_EXECUTION_GUIDANCE)
        # Strategic re-evaluation guidance — injected for all models with
        # tools, not just enforcement targets.  Prevents fix-loops where the
        # agent applies small variations of a failing approach forever.
        # Ported from google-gemini/gemini-cli#25062.
        if self.valid_tool_names:
            prompt_parts.append(STRATEGIC_REEVALUATION_GUIDANCE)
        # so it can refer the user to them rather than reinventing answers.
        # Note: ephemeral_system_prompt is NOT included here. It's injected at
--- a/tests/agent/test_prompt_builder.py
+++ b/tests/agent/test_prompt_builder.py
@ -24,6 +24,7 @@ from agent.prompt_builder import (
    TOOL_USE_ENFORCEMENT_GUIDANCE,
    TOOL_USE_ENFORCEMENT_MODELS,
    OPENAI_MODEL_EXECUTION_GUIDANCE,
    STRATEGIC_REEVALUATION_GUIDANCE,
    MEMORY_GUIDANCE,
    SESSION_SEARCH_GUIDANCE,
    PLATFORM_HINTS,
@ -1029,6 +1030,40 @@ class TestOpenAIModelExecutionGuidance:
        assert len(OPENAI_MODEL_EXECUTION_GUIDANCE) > 100
 # =========================================================================
 # Strategic re-evaluation guidance
 # (ported from google-gemini/gemini-cli#25062)
 # =========================================================================
 class TestStrategicReevaluationGuidance:
    """Tests for the strategic re-evaluation guidance constant."""
    def test_guidance_is_string(self):
        assert isinstance(STRATEGIC_REEVALUATION_GUIDANCE, str)
        assert len(STRATEGIC_REEVALUATION_GUIDANCE) > 50
    def test_guidance_mentions_3_attempts(self):
        """Should trigger after 3 failed attempts."""
        assert "3 times" in STRATEGIC_REEVALUATION_GUIDANCE
    def test_guidance_requires_reread_task(self):
        """Step 1: re-read the original task."""
        assert "original task" in STRATEGIC_REEVALUATION_GUIDANCE.lower()
    def test_guidance_requires_questioning_assumptions(self):
        """Step 2: question current assumptions."""
        assert "assumptions" in STRATEGIC_REEVALUATION_GUIDANCE.lower()
    def test_guidance_requires_different_approach(self):
        """Step 3: propose a fundamentally different approach."""
        assert "different approach" in STRATEGIC_REEVALUATION_GUIDANCE.lower()
    def test_guidance_discourages_small_variations(self):
        """Should tell the agent not to keep doing the same thing."""
        assert "small variations" in STRATEGIC_REEVALUATION_GUIDANCE.lower()
 # =========================================================================
 # Budget warning history stripping
 # =========================================================================
--- a/tests/run_agent/test_run_agent.py
+++ b/tests/run_agent/test_run_agent.py
@ -705,6 +705,14 @@ class TestBuildSystemPrompt:
        assert mock_skills.call_args.kwargs["available_tools"] == set(toolset_map)
        assert mock_skills.call_args.kwargs["available_toolsets"] == {"web", "skills"}
    def test_strategic_reevaluation_guidance_present_when_tools_loaded(self, agent):
        """Strategic re-evaluation guidance should appear for any agent with tools."""
        from agent.prompt_builder import STRATEGIC_REEVALUATION_GUIDANCE
        prompt = agent._build_system_prompt()
        assert "Strategic re-evaluation" in prompt
        assert "3 times" in prompt
 class TestToolUseEnforcementConfig:
    """Tests for the agent.tool_use_enforcement config option."""