mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-29 06:31:32 +00:00
fix(agent): add qwen and deepseek to TOOL_USE_ENFORCEMENT_MODELS
Qwen3.x and DeepSeek-V3.x default to chatty/hallucinatory tool use without enforcement steering — agents narrate "calling tool X" without actually emitting a tool call, or run partial loops. Both model families fit the same failure pattern TOOL_USE_ENFORCEMENT_GUIDANCE was already injected for (gpt, codex, gemini, gemma, grok, glm). Co-authored-by: briandevans <252620095+briandevans@users.noreply.github.com> Squashed salvage of: -403e567cefix(agent): add qwen and deepseek to TOOL_USE_ENFORCEMENT_MODELS -9433eabe7test(agent): use realistic qwen-plus identifier in enforcement test Fixes #28079.
This commit is contained in:
parent
4229facc01
commit
756900723a
3 changed files with 21 additions and 1 deletions
|
|
@ -268,7 +268,7 @@ TOOL_USE_ENFORCEMENT_GUIDANCE = (
|
|||
|
||||
# Model name substrings that trigger tool-use enforcement guidance.
|
||||
# Add new patterns here when a model family needs explicit steering.
|
||||
TOOL_USE_ENFORCEMENT_MODELS = ("gpt", "codex", "gemini", "gemma", "grok", "glm")
|
||||
TOOL_USE_ENFORCEMENT_MODELS = ("gpt", "codex", "gemini", "gemma", "grok", "glm", "qwen", "deepseek")
|
||||
|
||||
# OpenAI GPT/Codex-specific execution guidance. Addresses known failure modes
|
||||
# where GPT models abandon work on partial results, skip prerequisite lookups,
|
||||
|
|
|
|||
|
|
@ -1144,6 +1144,12 @@ class TestToolUseEnforcementGuidance:
|
|||
def test_enforcement_models_includes_grok(self):
|
||||
assert "grok" in TOOL_USE_ENFORCEMENT_MODELS
|
||||
|
||||
def test_enforcement_models_includes_qwen(self):
|
||||
assert "qwen" in TOOL_USE_ENFORCEMENT_MODELS
|
||||
|
||||
def test_enforcement_models_includes_deepseek(self):
|
||||
assert "deepseek" in TOOL_USE_ENFORCEMENT_MODELS
|
||||
|
||||
def test_enforcement_models_is_tuple(self):
|
||||
assert isinstance(TOOL_USE_ENFORCEMENT_MODELS, tuple)
|
||||
|
||||
|
|
|
|||
|
|
@ -1103,6 +1103,20 @@ class TestToolUseEnforcementConfig:
|
|||
prompt = agent._build_system_prompt()
|
||||
assert TOOL_USE_ENFORCEMENT_GUIDANCE in prompt
|
||||
|
||||
def test_auto_injects_for_qwen(self):
|
||||
"""Qwen models default to chatty/hallucinatory tool use without enforcement."""
|
||||
from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
|
||||
agent = self._make_agent(model="qwen/qwen-plus", tool_use_enforcement="auto")
|
||||
prompt = agent._build_system_prompt()
|
||||
assert TOOL_USE_ENFORCEMENT_GUIDANCE in prompt
|
||||
|
||||
def test_auto_injects_for_deepseek(self):
|
||||
"""DeepSeek models default to chatty/hallucinatory tool use without enforcement."""
|
||||
from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
|
||||
agent = self._make_agent(model="deepseek/deepseek-r1", tool_use_enforcement="auto")
|
||||
prompt = agent._build_system_prompt()
|
||||
assert TOOL_USE_ENFORCEMENT_GUIDANCE in prompt
|
||||
|
||||
def test_auto_injects_execution_guidance_for_grok(self):
|
||||
"""Grok also gets OPENAI_MODEL_EXECUTION_GUIDANCE (verification,
|
||||
mandatory_tool_use, act_dont_ask). Same failure modes as GPT in
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue