mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-29 06:31:32 +00:00
feat(grok): apply OpenAI execution guidance to xAI Grok / xai-oauth models (#27797)
Grok models hit the same failure modes that OPENAI_MODEL_EXECUTION_GUIDANCE
addresses for GPT/Codex: claiming completion without tool calls
('to be honest, I didn't create the file yet'), suggesting workarounds
instead of using existing tools (proposing a folder-based memory system
when the memory tool exists), replying with plans instead of executing.
TOOL_USE_ENFORCEMENT_GUIDANCE was already injected for any model whose
name contains 'grok' (TOOL_USE_ENFORCEMENT_MODELS). This extends the
follow-on family-specific block — OPENAI_MODEL_EXECUTION_GUIDANCE
(tool_persistence / mandatory_tool_use / act_dont_ask / prerequisite_checks
/ verification / missing_context) — to grok-named models too.
The OPENAI_ prefix is retained for backwards compat with imports/tests;
docstring + inline comment now note that the body is family-agnostic and
the prefix reflects origin, not exclusivity.
Tests cover the OpenRouter slug (x-ai/grok-4.3) and the xai-oauth bare
name (grok-4.3), plus a negative control on claude.
E2E verified against a real AIAgent build of the system prompt for both
xai-oauth and openrouter grok models.
This commit is contained in:
parent
43e566f77e
commit
9b91377bec
3 changed files with 42 additions and 1 deletions
|
|
@ -274,6 +274,10 @@ TOOL_USE_ENFORCEMENT_MODELS = ("gpt", "codex", "gemini", "gemma", "grok", "glm")
|
|||
# where GPT models abandon work on partial results, skip prerequisite lookups,
|
||||
# hallucinate instead of using tools, and declare "done" without verification.
|
||||
# Inspired by patterns from OpenAI's GPT-5.4 prompting guide & OpenClaw PR #38953.
|
||||
# Also applied to xAI Grok — same failure modes in practice (claims completion
|
||||
# without tool calls, suggests workarounds instead of using existing tools,
|
||||
# replies with plans/suggestions instead of executing). The body is
|
||||
# family-agnostic; the OPENAI_ prefix reflects origin, not exclusivity.
|
||||
OPENAI_MODEL_EXECUTION_GUIDANCE = (
|
||||
"# Execution discipline\n"
|
||||
"<tool_persistence>\n"
|
||||
|
|
|
|||
|
|
@ -156,7 +156,10 @@ def build_system_prompt_parts(agent: Any, system_message: Optional[str] = None)
|
|||
stable_parts.append(GOOGLE_MODEL_OPERATIONAL_GUIDANCE)
|
||||
# OpenAI GPT/Codex execution discipline (tool persistence,
|
||||
# prerequisite checks, verification, anti-hallucination).
|
||||
if "gpt" in _model_lower or "codex" in _model_lower:
|
||||
# Also applied to xAI Grok — same failure modes (claims completion
|
||||
# without tool calls, suggests workarounds instead of using
|
||||
# existing tools, replies with plans instead of executing).
|
||||
if "gpt" in _model_lower or "codex" in _model_lower or "grok" in _model_lower:
|
||||
stable_parts.append(OPENAI_MODEL_EXECUTION_GUIDANCE)
|
||||
|
||||
has_skills_tools = any(name in agent.valid_tool_names for name in ['skills_list', 'skill_view', 'skill_manage'])
|
||||
|
|
|
|||
|
|
@ -1074,6 +1074,40 @@ class TestToolUseEnforcementConfig:
|
|||
prompt = agent._build_system_prompt()
|
||||
assert TOOL_USE_ENFORCEMENT_GUIDANCE not in prompt
|
||||
|
||||
def test_auto_injects_for_grok(self):
|
||||
"""xAI Grok / xai-oauth models hit the same enforcement path as GPT."""
|
||||
from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
|
||||
agent = self._make_agent(model="x-ai/grok-4.3", tool_use_enforcement="auto")
|
||||
prompt = agent._build_system_prompt()
|
||||
assert TOOL_USE_ENFORCEMENT_GUIDANCE in prompt
|
||||
|
||||
def test_auto_injects_execution_guidance_for_grok(self):
|
||||
"""Grok also gets OPENAI_MODEL_EXECUTION_GUIDANCE (verification,
|
||||
mandatory_tool_use, act_dont_ask). Same failure modes as GPT in
|
||||
practice — claims completion without tool calls, suggests workarounds
|
||||
instead of using existing tools.
|
||||
"""
|
||||
from agent.prompt_builder import OPENAI_MODEL_EXECUTION_GUIDANCE
|
||||
agent = self._make_agent(model="x-ai/grok-4.3", tool_use_enforcement="auto")
|
||||
prompt = agent._build_system_prompt()
|
||||
assert OPENAI_MODEL_EXECUTION_GUIDANCE in prompt
|
||||
|
||||
def test_auto_injects_execution_guidance_for_xai_oauth_model(self):
|
||||
"""xai-oauth bare model names (no slash) also match the grok pattern."""
|
||||
from agent.prompt_builder import OPENAI_MODEL_EXECUTION_GUIDANCE
|
||||
agent = self._make_agent(model="grok-4.3", tool_use_enforcement="auto")
|
||||
prompt = agent._build_system_prompt()
|
||||
assert OPENAI_MODEL_EXECUTION_GUIDANCE in prompt
|
||||
|
||||
def test_auto_does_not_inject_execution_guidance_for_claude(self):
|
||||
"""Sanity: execution guidance stays off for non-targeted families."""
|
||||
from agent.prompt_builder import OPENAI_MODEL_EXECUTION_GUIDANCE
|
||||
agent = self._make_agent(
|
||||
model="anthropic/claude-sonnet-4", tool_use_enforcement="auto"
|
||||
)
|
||||
prompt = agent._build_system_prompt()
|
||||
assert OPENAI_MODEL_EXECUTION_GUIDANCE not in prompt
|
||||
|
||||
def test_true_forces_for_all_models(self):
|
||||
from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
|
||||
agent = self._make_agent(model="anthropic/claude-sonnet-4", tool_use_enforcement=True)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue