mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
feat: add strategic re-evaluation guidance to system prompt
Port from google-gemini/gemini-cli#25062. Adds a concise system prompt block that tells agents to stop and reconsider their approach after 3 failed attempts at fixing the same issue, instead of continuing to apply small variations of a failing fix. The guidance is injected for ALL models when tools are loaded (not just enforcement-target models), since fix-loops affect every model. 3-step process: 1. Stop and re-read the original task description 2. List current assumptions and identify wrong ones 3. Propose a fundamentally different approach Includes tests for the constant content and system prompt integration.
This commit is contained in:
parent
9d9b424390
commit
43f4de0216
4 changed files with 66 additions and 1 deletions
|
|
@ -185,6 +185,21 @@ TOOL_USE_ENFORCEMENT_GUIDANCE = (
|
||||||
"without acting are not acceptable."
|
"without acting are not acceptable."
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Strategic re-evaluation guidance — prevents agents from getting stuck in
|
||||||
|
# fix-loops by forcing a step back after repeated failures.
|
||||||
|
# Ported from google-gemini/gemini-cli#25062.
|
||||||
|
STRATEGIC_REEVALUATION_GUIDANCE = (
|
||||||
|
"# Strategic re-evaluation\n"
|
||||||
|
"If you have attempted to fix a failing implementation more than 3 times "
|
||||||
|
"without success, you must:\n"
|
||||||
|
"1. Stop and re-read the original task description carefully.\n"
|
||||||
|
"2. List your current assumptions and identify which ones might be wrong.\n"
|
||||||
|
"3. Propose a fundamentally different approach rather than continuing to "
|
||||||
|
"patch the current one.\n"
|
||||||
|
"Do not keep applying small variations of the same fix. Step back, "
|
||||||
|
"reconsider the problem from scratch, and try a different strategy."
|
||||||
|
)
|
||||||
|
|
||||||
# Model name substrings that trigger tool-use enforcement guidance.
|
# Model name substrings that trigger tool-use enforcement guidance.
|
||||||
# Add new patterns here when a model family needs explicit steering.
|
# Add new patterns here when a model family needs explicit steering.
|
||||||
TOOL_USE_ENFORCEMENT_MODELS = ("gpt", "codex", "gemini", "gemma", "grok")
|
TOOL_USE_ENFORCEMENT_MODELS = ("gpt", "codex", "gemini", "gemma", "grok")
|
||||||
|
|
|
||||||
|
|
@ -94,7 +94,7 @@ from agent.model_metadata import (
|
||||||
from agent.context_compressor import ContextCompressor
|
from agent.context_compressor import ContextCompressor
|
||||||
from agent.subdirectory_hints import SubdirectoryHintTracker
|
from agent.subdirectory_hints import SubdirectoryHintTracker
|
||||||
from agent.prompt_caching import apply_anthropic_cache_control
|
from agent.prompt_caching import apply_anthropic_cache_control
|
||||||
from agent.prompt_builder import build_skills_system_prompt, build_context_files_prompt, build_environment_hints, load_soul_md, TOOL_USE_ENFORCEMENT_GUIDANCE, TOOL_USE_ENFORCEMENT_MODELS, DEVELOPER_ROLE_MODELS, GOOGLE_MODEL_OPERATIONAL_GUIDANCE, OPENAI_MODEL_EXECUTION_GUIDANCE
|
from agent.prompt_builder import build_skills_system_prompt, build_context_files_prompt, build_environment_hints, load_soul_md, TOOL_USE_ENFORCEMENT_GUIDANCE, TOOL_USE_ENFORCEMENT_MODELS, DEVELOPER_ROLE_MODELS, GOOGLE_MODEL_OPERATIONAL_GUIDANCE, OPENAI_MODEL_EXECUTION_GUIDANCE, STRATEGIC_REEVALUATION_GUIDANCE
|
||||||
from agent.usage_pricing import estimate_usage_cost, normalize_usage
|
from agent.usage_pricing import estimate_usage_cost, normalize_usage
|
||||||
from agent.display import (
|
from agent.display import (
|
||||||
KawaiiSpinner, build_tool_preview as _build_tool_preview,
|
KawaiiSpinner, build_tool_preview as _build_tool_preview,
|
||||||
|
|
@ -3350,6 +3350,13 @@ class AIAgent:
|
||||||
if "gpt" in _model_lower or "codex" in _model_lower:
|
if "gpt" in _model_lower or "codex" in _model_lower:
|
||||||
prompt_parts.append(OPENAI_MODEL_EXECUTION_GUIDANCE)
|
prompt_parts.append(OPENAI_MODEL_EXECUTION_GUIDANCE)
|
||||||
|
|
||||||
|
# Strategic re-evaluation guidance — injected for all models with
|
||||||
|
# tools, not just enforcement targets. Prevents fix-loops where the
|
||||||
|
# agent applies small variations of a failing approach forever.
|
||||||
|
# Ported from google-gemini/gemini-cli#25062.
|
||||||
|
if self.valid_tool_names:
|
||||||
|
prompt_parts.append(STRATEGIC_REEVALUATION_GUIDANCE)
|
||||||
|
|
||||||
# so it can refer the user to them rather than reinventing answers.
|
# so it can refer the user to them rather than reinventing answers.
|
||||||
|
|
||||||
# Note: ephemeral_system_prompt is NOT included here. It's injected at
|
# Note: ephemeral_system_prompt is NOT included here. It's injected at
|
||||||
|
|
|
||||||
|
|
@ -24,6 +24,7 @@ from agent.prompt_builder import (
|
||||||
TOOL_USE_ENFORCEMENT_GUIDANCE,
|
TOOL_USE_ENFORCEMENT_GUIDANCE,
|
||||||
TOOL_USE_ENFORCEMENT_MODELS,
|
TOOL_USE_ENFORCEMENT_MODELS,
|
||||||
OPENAI_MODEL_EXECUTION_GUIDANCE,
|
OPENAI_MODEL_EXECUTION_GUIDANCE,
|
||||||
|
STRATEGIC_REEVALUATION_GUIDANCE,
|
||||||
MEMORY_GUIDANCE,
|
MEMORY_GUIDANCE,
|
||||||
SESSION_SEARCH_GUIDANCE,
|
SESSION_SEARCH_GUIDANCE,
|
||||||
PLATFORM_HINTS,
|
PLATFORM_HINTS,
|
||||||
|
|
@ -1029,6 +1030,40 @@ class TestOpenAIModelExecutionGuidance:
|
||||||
assert len(OPENAI_MODEL_EXECUTION_GUIDANCE) > 100
|
assert len(OPENAI_MODEL_EXECUTION_GUIDANCE) > 100
|
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# Strategic re-evaluation guidance
|
||||||
|
# (ported from google-gemini/gemini-cli#25062)
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
|
||||||
|
class TestStrategicReevaluationGuidance:
|
||||||
|
"""Tests for the strategic re-evaluation guidance constant."""
|
||||||
|
|
||||||
|
def test_guidance_is_string(self):
|
||||||
|
assert isinstance(STRATEGIC_REEVALUATION_GUIDANCE, str)
|
||||||
|
assert len(STRATEGIC_REEVALUATION_GUIDANCE) > 50
|
||||||
|
|
||||||
|
def test_guidance_mentions_3_attempts(self):
|
||||||
|
"""Should trigger after 3 failed attempts."""
|
||||||
|
assert "3 times" in STRATEGIC_REEVALUATION_GUIDANCE
|
||||||
|
|
||||||
|
def test_guidance_requires_reread_task(self):
|
||||||
|
"""Step 1: re-read the original task."""
|
||||||
|
assert "original task" in STRATEGIC_REEVALUATION_GUIDANCE.lower()
|
||||||
|
|
||||||
|
def test_guidance_requires_questioning_assumptions(self):
|
||||||
|
"""Step 2: question current assumptions."""
|
||||||
|
assert "assumptions" in STRATEGIC_REEVALUATION_GUIDANCE.lower()
|
||||||
|
|
||||||
|
def test_guidance_requires_different_approach(self):
|
||||||
|
"""Step 3: propose a fundamentally different approach."""
|
||||||
|
assert "different approach" in STRATEGIC_REEVALUATION_GUIDANCE.lower()
|
||||||
|
|
||||||
|
def test_guidance_discourages_small_variations(self):
|
||||||
|
"""Should tell the agent not to keep doing the same thing."""
|
||||||
|
assert "small variations" in STRATEGIC_REEVALUATION_GUIDANCE.lower()
|
||||||
|
|
||||||
|
|
||||||
# =========================================================================
|
# =========================================================================
|
||||||
# Budget warning history stripping
|
# Budget warning history stripping
|
||||||
# =========================================================================
|
# =========================================================================
|
||||||
|
|
|
||||||
|
|
@ -705,6 +705,14 @@ class TestBuildSystemPrompt:
|
||||||
assert mock_skills.call_args.kwargs["available_tools"] == set(toolset_map)
|
assert mock_skills.call_args.kwargs["available_tools"] == set(toolset_map)
|
||||||
assert mock_skills.call_args.kwargs["available_toolsets"] == {"web", "skills"}
|
assert mock_skills.call_args.kwargs["available_toolsets"] == {"web", "skills"}
|
||||||
|
|
||||||
|
def test_strategic_reevaluation_guidance_present_when_tools_loaded(self, agent):
|
||||||
|
"""Strategic re-evaluation guidance should appear for any agent with tools."""
|
||||||
|
from agent.prompt_builder import STRATEGIC_REEVALUATION_GUIDANCE
|
||||||
|
|
||||||
|
prompt = agent._build_system_prompt()
|
||||||
|
assert "Strategic re-evaluation" in prompt
|
||||||
|
assert "3 times" in prompt
|
||||||
|
|
||||||
|
|
||||||
class TestToolUseEnforcementConfig:
|
class TestToolUseEnforcementConfig:
|
||||||
"""Tests for the agent.tool_use_enforcement config option."""
|
"""Tests for the agent.tool_use_enforcement config option."""
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue