From 43f4de02162d626604457f10557b64a2f48ae8cc Mon Sep 17 00:00:00 2001 From: Teknium Date: Wed, 15 Apr 2026 17:06:34 -0700 Subject: [PATCH] feat: add strategic re-evaluation guidance to system prompt Port from google-gemini/gemini-cli#25062. Adds a concise system prompt block that tells agents to stop and reconsider their approach after 3 failed attempts at fixing the same issue, instead of continuing to apply small variations of a failing fix. The guidance is injected for ALL models when tools are loaded (not just enforcement-target models), since fix-loops affect every model. 3-step process: 1. Stop and re-read the original task description 2. List current assumptions and identify wrong ones 3. Propose a fundamentally different approach Includes tests for the constant content and system prompt integration. --- agent/prompt_builder.py | 15 +++++++++++++ run_agent.py | 9 +++++++- tests/agent/test_prompt_builder.py | 35 ++++++++++++++++++++++++++++++ tests/run_agent/test_run_agent.py | 8 +++++++ 4 files changed, 66 insertions(+), 1 deletion(-) diff --git a/agent/prompt_builder.py b/agent/prompt_builder.py index c61d6995b..886fb43cd 100644 --- a/agent/prompt_builder.py +++ b/agent/prompt_builder.py @@ -185,6 +185,21 @@ TOOL_USE_ENFORCEMENT_GUIDANCE = ( "without acting are not acceptable." ) +# Strategic re-evaluation guidance — prevents agents from getting stuck in +# fix-loops by forcing a step back after repeated failures. +# Ported from google-gemini/gemini-cli#25062. +STRATEGIC_REEVALUATION_GUIDANCE = ( + "# Strategic re-evaluation\n" + "If you have attempted to fix a failing implementation more than 3 times " + "without success, you must:\n" + "1. Stop and re-read the original task description carefully.\n" + "2. List your current assumptions and identify which ones might be wrong.\n" + "3. Propose a fundamentally different approach rather than continuing to " + "patch the current one.\n" + "Do not keep applying small variations of the same fix. Step back, " + "reconsider the problem from scratch, and try a different strategy." +) + # Model name substrings that trigger tool-use enforcement guidance. # Add new patterns here when a model family needs explicit steering. TOOL_USE_ENFORCEMENT_MODELS = ("gpt", "codex", "gemini", "gemma", "grok") diff --git a/run_agent.py b/run_agent.py index f199d806d..5ba248518 100644 --- a/run_agent.py +++ b/run_agent.py @@ -94,7 +94,7 @@ from agent.model_metadata import ( from agent.context_compressor import ContextCompressor from agent.subdirectory_hints import SubdirectoryHintTracker from agent.prompt_caching import apply_anthropic_cache_control -from agent.prompt_builder import build_skills_system_prompt, build_context_files_prompt, build_environment_hints, load_soul_md, TOOL_USE_ENFORCEMENT_GUIDANCE, TOOL_USE_ENFORCEMENT_MODELS, DEVELOPER_ROLE_MODELS, GOOGLE_MODEL_OPERATIONAL_GUIDANCE, OPENAI_MODEL_EXECUTION_GUIDANCE +from agent.prompt_builder import build_skills_system_prompt, build_context_files_prompt, build_environment_hints, load_soul_md, TOOL_USE_ENFORCEMENT_GUIDANCE, TOOL_USE_ENFORCEMENT_MODELS, DEVELOPER_ROLE_MODELS, GOOGLE_MODEL_OPERATIONAL_GUIDANCE, OPENAI_MODEL_EXECUTION_GUIDANCE, STRATEGIC_REEVALUATION_GUIDANCE from agent.usage_pricing import estimate_usage_cost, normalize_usage from agent.display import ( KawaiiSpinner, build_tool_preview as _build_tool_preview, @@ -3350,6 +3350,13 @@ class AIAgent: if "gpt" in _model_lower or "codex" in _model_lower: prompt_parts.append(OPENAI_MODEL_EXECUTION_GUIDANCE) + # Strategic re-evaluation guidance — injected for all models with + # tools, not just enforcement targets. Prevents fix-loops where the + # agent applies small variations of a failing approach forever. + # Ported from google-gemini/gemini-cli#25062. + if self.valid_tool_names: + prompt_parts.append(STRATEGIC_REEVALUATION_GUIDANCE) + # so it can refer the user to them rather than reinventing answers. # Note: ephemeral_system_prompt is NOT included here. It's injected at diff --git a/tests/agent/test_prompt_builder.py b/tests/agent/test_prompt_builder.py index 5a222cc38..3a7c79a25 100644 --- a/tests/agent/test_prompt_builder.py +++ b/tests/agent/test_prompt_builder.py @@ -24,6 +24,7 @@ from agent.prompt_builder import ( TOOL_USE_ENFORCEMENT_GUIDANCE, TOOL_USE_ENFORCEMENT_MODELS, OPENAI_MODEL_EXECUTION_GUIDANCE, + STRATEGIC_REEVALUATION_GUIDANCE, MEMORY_GUIDANCE, SESSION_SEARCH_GUIDANCE, PLATFORM_HINTS, @@ -1029,6 +1030,40 @@ class TestOpenAIModelExecutionGuidance: assert len(OPENAI_MODEL_EXECUTION_GUIDANCE) > 100 +# ========================================================================= +# Strategic re-evaluation guidance +# (ported from google-gemini/gemini-cli#25062) +# ========================================================================= + + +class TestStrategicReevaluationGuidance: + """Tests for the strategic re-evaluation guidance constant.""" + + def test_guidance_is_string(self): + assert isinstance(STRATEGIC_REEVALUATION_GUIDANCE, str) + assert len(STRATEGIC_REEVALUATION_GUIDANCE) > 50 + + def test_guidance_mentions_3_attempts(self): + """Should trigger after 3 failed attempts.""" + assert "3 times" in STRATEGIC_REEVALUATION_GUIDANCE + + def test_guidance_requires_reread_task(self): + """Step 1: re-read the original task.""" + assert "original task" in STRATEGIC_REEVALUATION_GUIDANCE.lower() + + def test_guidance_requires_questioning_assumptions(self): + """Step 2: question current assumptions.""" + assert "assumptions" in STRATEGIC_REEVALUATION_GUIDANCE.lower() + + def test_guidance_requires_different_approach(self): + """Step 3: propose a fundamentally different approach.""" + assert "different approach" in STRATEGIC_REEVALUATION_GUIDANCE.lower() + + def test_guidance_discourages_small_variations(self): + """Should tell the agent not to keep doing the same thing.""" + assert "small variations" in STRATEGIC_REEVALUATION_GUIDANCE.lower() + + # ========================================================================= # Budget warning history stripping # ========================================================================= diff --git a/tests/run_agent/test_run_agent.py b/tests/run_agent/test_run_agent.py index d71e6a625..6df68b465 100644 --- a/tests/run_agent/test_run_agent.py +++ b/tests/run_agent/test_run_agent.py @@ -705,6 +705,14 @@ class TestBuildSystemPrompt: assert mock_skills.call_args.kwargs["available_tools"] == set(toolset_map) assert mock_skills.call_args.kwargs["available_toolsets"] == {"web", "skills"} + def test_strategic_reevaluation_guidance_present_when_tools_loaded(self, agent): + """Strategic re-evaluation guidance should appear for any agent with tools.""" + from agent.prompt_builder import STRATEGIC_REEVALUATION_GUIDANCE + + prompt = agent._build_system_prompt() + assert "Strategic re-evaluation" in prompt + assert "3 times" in prompt + class TestToolUseEnforcementConfig: """Tests for the agent.tool_use_enforcement config option."""