feat: add strategic re-evaluation guidance to system prompt

Port from google-gemini/gemini-cli#25062. Adds a concise system prompt
block that tells agents to stop and reconsider their approach after 3
failed attempts at fixing the same issue, instead of continuing to
apply small variations of a failing fix.

The guidance is injected for ALL models when tools are loaded (not just
enforcement-target models), since fix-loops affect every model.

3-step process:
1. Stop and re-read the original task description
2. List current assumptions and identify wrong ones
3. Propose a fundamentally different approach

Includes tests for the constant content and system prompt integration.
This commit is contained in:
Teknium 2026-04-15 17:06:34 -07:00
parent 9d9b424390
commit 43f4de0216
No known key found for this signature in database
4 changed files with 66 additions and 1 deletions

View file

@ -185,6 +185,21 @@ TOOL_USE_ENFORCEMENT_GUIDANCE = (
"without acting are not acceptable." "without acting are not acceptable."
) )
# Strategic re-evaluation guidance — prevents agents from getting stuck in
# fix-loops by forcing a step back after repeated failures.
# Ported from google-gemini/gemini-cli#25062.
STRATEGIC_REEVALUATION_GUIDANCE = (
"# Strategic re-evaluation\n"
"If you have attempted to fix a failing implementation more than 3 times "
"without success, you must:\n"
"1. Stop and re-read the original task description carefully.\n"
"2. List your current assumptions and identify which ones might be wrong.\n"
"3. Propose a fundamentally different approach rather than continuing to "
"patch the current one.\n"
"Do not keep applying small variations of the same fix. Step back, "
"reconsider the problem from scratch, and try a different strategy."
)
# Model name substrings that trigger tool-use enforcement guidance. # Model name substrings that trigger tool-use enforcement guidance.
# Add new patterns here when a model family needs explicit steering. # Add new patterns here when a model family needs explicit steering.
TOOL_USE_ENFORCEMENT_MODELS = ("gpt", "codex", "gemini", "gemma", "grok") TOOL_USE_ENFORCEMENT_MODELS = ("gpt", "codex", "gemini", "gemma", "grok")

View file

@ -94,7 +94,7 @@ from agent.model_metadata import (
from agent.context_compressor import ContextCompressor from agent.context_compressor import ContextCompressor
from agent.subdirectory_hints import SubdirectoryHintTracker from agent.subdirectory_hints import SubdirectoryHintTracker
from agent.prompt_caching import apply_anthropic_cache_control from agent.prompt_caching import apply_anthropic_cache_control
from agent.prompt_builder import build_skills_system_prompt, build_context_files_prompt, build_environment_hints, load_soul_md, TOOL_USE_ENFORCEMENT_GUIDANCE, TOOL_USE_ENFORCEMENT_MODELS, DEVELOPER_ROLE_MODELS, GOOGLE_MODEL_OPERATIONAL_GUIDANCE, OPENAI_MODEL_EXECUTION_GUIDANCE from agent.prompt_builder import build_skills_system_prompt, build_context_files_prompt, build_environment_hints, load_soul_md, TOOL_USE_ENFORCEMENT_GUIDANCE, TOOL_USE_ENFORCEMENT_MODELS, DEVELOPER_ROLE_MODELS, GOOGLE_MODEL_OPERATIONAL_GUIDANCE, OPENAI_MODEL_EXECUTION_GUIDANCE, STRATEGIC_REEVALUATION_GUIDANCE
from agent.usage_pricing import estimate_usage_cost, normalize_usage from agent.usage_pricing import estimate_usage_cost, normalize_usage
from agent.display import ( from agent.display import (
KawaiiSpinner, build_tool_preview as _build_tool_preview, KawaiiSpinner, build_tool_preview as _build_tool_preview,
@ -3350,6 +3350,13 @@ class AIAgent:
if "gpt" in _model_lower or "codex" in _model_lower: if "gpt" in _model_lower or "codex" in _model_lower:
prompt_parts.append(OPENAI_MODEL_EXECUTION_GUIDANCE) prompt_parts.append(OPENAI_MODEL_EXECUTION_GUIDANCE)
# Strategic re-evaluation guidance — injected for all models with
# tools, not just enforcement targets. Prevents fix-loops where the
# agent applies small variations of a failing approach forever.
# Ported from google-gemini/gemini-cli#25062.
if self.valid_tool_names:
prompt_parts.append(STRATEGIC_REEVALUATION_GUIDANCE)
# so it can refer the user to them rather than reinventing answers. # so it can refer the user to them rather than reinventing answers.
# Note: ephemeral_system_prompt is NOT included here. It's injected at # Note: ephemeral_system_prompt is NOT included here. It's injected at

View file

@ -24,6 +24,7 @@ from agent.prompt_builder import (
TOOL_USE_ENFORCEMENT_GUIDANCE, TOOL_USE_ENFORCEMENT_GUIDANCE,
TOOL_USE_ENFORCEMENT_MODELS, TOOL_USE_ENFORCEMENT_MODELS,
OPENAI_MODEL_EXECUTION_GUIDANCE, OPENAI_MODEL_EXECUTION_GUIDANCE,
STRATEGIC_REEVALUATION_GUIDANCE,
MEMORY_GUIDANCE, MEMORY_GUIDANCE,
SESSION_SEARCH_GUIDANCE, SESSION_SEARCH_GUIDANCE,
PLATFORM_HINTS, PLATFORM_HINTS,
@ -1029,6 +1030,40 @@ class TestOpenAIModelExecutionGuidance:
assert len(OPENAI_MODEL_EXECUTION_GUIDANCE) > 100 assert len(OPENAI_MODEL_EXECUTION_GUIDANCE) > 100
# =========================================================================
# Strategic re-evaluation guidance
# (ported from google-gemini/gemini-cli#25062)
# =========================================================================
class TestStrategicReevaluationGuidance:
"""Tests for the strategic re-evaluation guidance constant."""
def test_guidance_is_string(self):
assert isinstance(STRATEGIC_REEVALUATION_GUIDANCE, str)
assert len(STRATEGIC_REEVALUATION_GUIDANCE) > 50
def test_guidance_mentions_3_attempts(self):
"""Should trigger after 3 failed attempts."""
assert "3 times" in STRATEGIC_REEVALUATION_GUIDANCE
def test_guidance_requires_reread_task(self):
"""Step 1: re-read the original task."""
assert "original task" in STRATEGIC_REEVALUATION_GUIDANCE.lower()
def test_guidance_requires_questioning_assumptions(self):
"""Step 2: question current assumptions."""
assert "assumptions" in STRATEGIC_REEVALUATION_GUIDANCE.lower()
def test_guidance_requires_different_approach(self):
"""Step 3: propose a fundamentally different approach."""
assert "different approach" in STRATEGIC_REEVALUATION_GUIDANCE.lower()
def test_guidance_discourages_small_variations(self):
"""Should tell the agent not to keep doing the same thing."""
assert "small variations" in STRATEGIC_REEVALUATION_GUIDANCE.lower()
# ========================================================================= # =========================================================================
# Budget warning history stripping # Budget warning history stripping
# ========================================================================= # =========================================================================

View file

@ -705,6 +705,14 @@ class TestBuildSystemPrompt:
assert mock_skills.call_args.kwargs["available_tools"] == set(toolset_map) assert mock_skills.call_args.kwargs["available_tools"] == set(toolset_map)
assert mock_skills.call_args.kwargs["available_toolsets"] == {"web", "skills"} assert mock_skills.call_args.kwargs["available_toolsets"] == {"web", "skills"}
def test_strategic_reevaluation_guidance_present_when_tools_loaded(self, agent):
"""Strategic re-evaluation guidance should appear for any agent with tools."""
from agent.prompt_builder import STRATEGIC_REEVALUATION_GUIDANCE
prompt = agent._build_system_prompt()
assert "Strategic re-evaluation" in prompt
assert "3 times" in prompt
class TestToolUseEnforcementConfig: class TestToolUseEnforcementConfig:
"""Tests for the agent.tool_use_enforcement config option.""" """Tests for the agent.tool_use_enforcement config option."""