mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-07-01 12:02:05 +00:00
* fix(agent): config-driven intent-ack continuation for all api_modes (#27881) The agent could end a turn after only stating intent ('I will run a health check...') without executing the announced tool call, forcing the user to re-prompt. A continuation guard that catches this and nudges the model to proceed already existed but was hard-gated to the codex_responses api_mode, so Gemini/Claude/OpenRouter turns never benefited. - New agent.intent_ack_continuation config (default 'auto' = codex-only, byte-stable for existing conversations). 'true'/model-list opts every api_mode in; 'false' disables. Mirrors agent.tool_use_enforcement's shape. - looks_like_codex_intermediate_ack gains require_workspace (default True). The opted-in path drops the codebase/filesystem requirement so general autonomous workflows (server ops, deploys, API calls) are caught, not just coding tasks. Future-ack + action-verb + short-content + no-prior-tool guards still apply; the 2-nudge-per-turn cap is unchanged. - Resolution centralized in intent_ack_continuation_mode (off/codex_only/all). * docs(infographic): intent-ack continuation (#27881)
160 lines
6.5 KiB
Python
160 lines
6.5 KiB
Python
"""Intent-ack continuation gate + detector behavior.
|
|
|
|
Covers the config-driven generalization of the codex intent-ack continuation
|
|
(issue #27881): the historical ``codex_responses``-only path is byte-stable
|
|
under the default ``"auto"`` mode, while an explicit ``true``/model-list opt-in
|
|
extends the "you announced an action but called no tool — keep going" nudge to
|
|
every api_mode and relaxes the codebase/workspace requirement so general
|
|
autonomous workflows ("I'll run a health check on the server") are caught.
|
|
|
|
These are invariant assertions about how the mode string and the detector
|
|
gates relate, not snapshots of the marker lists.
|
|
"""
|
|
|
|
from types import SimpleNamespace
|
|
from typing import Union
|
|
|
|
from agent.agent_runtime_helpers import (
|
|
intent_ack_continuation_enabled,
|
|
intent_ack_continuation_mode,
|
|
looks_like_codex_intermediate_ack,
|
|
)
|
|
|
|
|
|
def _agent(
|
|
mode: Union[str, bool, list] = "auto",
|
|
api_mode="chat_completions",
|
|
model="anthropic/claude-sonnet-4",
|
|
):
|
|
# _strip_think_blocks is a no-op for these plain-text fixtures.
|
|
return SimpleNamespace(
|
|
_intent_ack_continuation=mode,
|
|
api_mode=api_mode,
|
|
model=model,
|
|
_strip_think_blocks=lambda c: c,
|
|
)
|
|
|
|
|
|
# The reporter's exact repro (#27881): server-ops task, no filesystem reference.
|
|
REPRO_USER = (
|
|
"check the current status of the server, grab the latest error logs, "
|
|
"and let me know if there's anything critical"
|
|
)
|
|
REPRO_ACK = "I will start by running a health check command on the server to see its current status."
|
|
|
|
# The codex-coding case the detector was originally built for.
|
|
CODE_USER = "review the codebase in /app"
|
|
CODE_ACK = "Let me inspect the repository files first."
|
|
|
|
|
|
# ── mode resolution ────────────────────────────────────────────────────────
|
|
|
|
|
|
def test_auto_is_codex_only():
|
|
assert intent_ack_continuation_mode(_agent("auto", "codex_responses")) == "codex_only"
|
|
assert intent_ack_continuation_mode(_agent("auto", "chat_completions")) == "off"
|
|
assert intent_ack_continuation_mode(_agent("auto", "anthropic")) == "off"
|
|
|
|
|
|
def test_true_is_all_api_modes():
|
|
for am in ("chat_completions", "anthropic", "codex_responses"):
|
|
assert intent_ack_continuation_mode(_agent(True, am)) == "all"
|
|
for s in ("true", "always", "yes", "on", "ON"):
|
|
assert intent_ack_continuation_mode(_agent(s, "chat_completions")) == "all"
|
|
|
|
|
|
def test_false_is_off_even_for_codex():
|
|
assert intent_ack_continuation_mode(_agent(False, "codex_responses")) == "off"
|
|
for s in ("false", "never", "no", "off"):
|
|
assert intent_ack_continuation_mode(_agent(s, "codex_responses")) == "off"
|
|
|
|
|
|
def test_list_matches_model_substring():
|
|
assert intent_ack_continuation_mode(
|
|
_agent(["gemini", "qwen"], "chat_completions", "google/gemini-3-pro")
|
|
) == "all"
|
|
assert intent_ack_continuation_mode(
|
|
_agent(["gemini", "qwen"], "chat_completions", "anthropic/claude-sonnet-4")
|
|
) == "off"
|
|
|
|
|
|
def test_unrecognised_value_falls_back_to_auto():
|
|
assert intent_ack_continuation_mode(_agent("garbage", "codex_responses")) == "codex_only"
|
|
assert intent_ack_continuation_mode(_agent("garbage", "chat_completions")) == "off"
|
|
|
|
|
|
def test_missing_attr_defaults_to_auto():
|
|
bare = SimpleNamespace(api_mode="chat_completions", model="x", _strip_think_blocks=lambda c: c)
|
|
assert intent_ack_continuation_mode(bare) == "off"
|
|
bare_codex = SimpleNamespace(api_mode="codex_responses", model="x", _strip_think_blocks=lambda c: c)
|
|
assert intent_ack_continuation_mode(bare_codex) == "codex_only"
|
|
|
|
|
|
def test_enabled_is_mode_not_off():
|
|
assert intent_ack_continuation_enabled(_agent(True, "chat_completions")) is True
|
|
assert intent_ack_continuation_enabled(_agent("auto", "codex_responses")) is True
|
|
assert intent_ack_continuation_enabled(_agent("auto", "chat_completions")) is False
|
|
assert intent_ack_continuation_enabled(_agent(False, "codex_responses")) is False
|
|
|
|
|
|
# ── detector: workspace requirement ─────────────────────────────────────────
|
|
|
|
|
|
def test_codex_only_path_requires_workspace():
|
|
a = _agent("auto", "codex_responses")
|
|
msgs = [{"role": "user", "content": CODE_USER}]
|
|
# codebase ack matches workspace markers → fires
|
|
assert looks_like_codex_intermediate_ack(a, CODE_USER, CODE_ACK, msgs, require_workspace=True)
|
|
# server-ops ack has no filesystem reference → does NOT fire (historical scope)
|
|
repro_msgs = [{"role": "user", "content": REPRO_USER}]
|
|
assert not looks_like_codex_intermediate_ack(
|
|
a, REPRO_USER, REPRO_ACK, repro_msgs, require_workspace=True
|
|
)
|
|
|
|
|
|
def test_all_path_drops_workspace_requirement():
|
|
"""The #27881 fix: opted-in turns catch non-codebase intent acks."""
|
|
a = _agent(True, "chat_completions")
|
|
msgs = [{"role": "user", "content": REPRO_USER}]
|
|
assert looks_like_codex_intermediate_ack(
|
|
a, REPRO_USER, REPRO_ACK, msgs, require_workspace=False
|
|
)
|
|
|
|
|
|
# ── detector: guardrails that hold regardless of workspace ───────────────────
|
|
|
|
|
|
def test_real_final_answer_does_not_fire():
|
|
a = _agent(True, "chat_completions")
|
|
final = "Done. The server is healthy and there are no critical errors in the logs."
|
|
msgs = [{"role": "user", "content": REPRO_USER}]
|
|
assert not looks_like_codex_intermediate_ack(a, REPRO_USER, final, msgs, require_workspace=False)
|
|
|
|
|
|
def test_conversational_reply_without_action_verb_does_not_fire():
|
|
a = _agent(True, "chat_completions")
|
|
brainstorm = "I'll help you think through the tradeoffs here."
|
|
msgs = [{"role": "user", "content": "help me decide"}]
|
|
assert not looks_like_codex_intermediate_ack(
|
|
a, "help me decide", brainstorm, msgs, require_workspace=False
|
|
)
|
|
|
|
|
|
def test_does_not_fire_after_a_tool_already_ran():
|
|
a = _agent(True, "chat_completions")
|
|
msgs = [
|
|
{"role": "user", "content": REPRO_USER},
|
|
{"role": "tool", "content": "health check result"},
|
|
]
|
|
assert not looks_like_codex_intermediate_ack(
|
|
a, REPRO_USER, REPRO_ACK, msgs, require_workspace=False
|
|
)
|
|
|
|
|
|
def test_long_response_is_not_treated_as_an_ack():
|
|
a = _agent(True, "chat_completions")
|
|
long_ack = "I will run the check. " + ("x" * 1300)
|
|
msgs = [{"role": "user", "content": REPRO_USER}]
|
|
assert not looks_like_codex_intermediate_ack(
|
|
a, REPRO_USER, long_ack, msgs, require_workspace=False
|
|
)
|