mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-30 11:52:04 +00:00
* fix(agent): config-driven intent-ack continuation for all api_modes (#27881) The agent could end a turn after only stating intent ('I will run a health check...') without executing the announced tool call, forcing the user to re-prompt. A continuation guard that catches this and nudges the model to proceed already existed but was hard-gated to the codex_responses api_mode, so Gemini/Claude/OpenRouter turns never benefited. - New agent.intent_ack_continuation config (default 'auto' = codex-only, byte-stable for existing conversations). 'true'/model-list opts every api_mode in; 'false' disables. Mirrors agent.tool_use_enforcement's shape. - looks_like_codex_intermediate_ack gains require_workspace (default True). The opted-in path drops the codebase/filesystem requirement so general autonomous workflows (server ops, deploys, API calls) are caught, not just coding tasks. Future-ack + action-verb + short-content + no-prior-tool guards still apply; the 2-nudge-per-turn cap is unchanged. - Resolution centralized in intent_ack_continuation_mode (off/codex_only/all). * docs(infographic): intent-ack continuation (#27881)
This commit is contained in:
parent
56abbaeac3
commit
d43e0cf304
7 changed files with 253 additions and 5 deletions
|
|
@ -1307,6 +1307,12 @@ def init_agent(
|
|||
_agent_section = {}
|
||||
agent._tool_use_enforcement = _agent_section.get("tool_use_enforcement", "auto")
|
||||
|
||||
# Intent-ack continuation config: "auto" (default — codex_responses only,
|
||||
# the historical gate), true (all api_modes), false (never), or a list of
|
||||
# model-name substrings. Resolved against the active api_mode/model in the
|
||||
# conversation loop's intent-ack block.
|
||||
agent._intent_ack_continuation = _agent_section.get("intent_ack_continuation", "auto")
|
||||
|
||||
# Universal task-completion guidance toggle. Default True. Surfaced
|
||||
# as a separate flag from tool_use_enforcement because the guidance
|
||||
# applies to ALL models, not just the model families enforcement
|
||||
|
|
|
|||
|
|
@ -2205,8 +2205,21 @@ def looks_like_codex_intermediate_ack(
|
|||
user_message: str,
|
||||
assistant_content: str,
|
||||
messages: List[Dict[str, Any]],
|
||||
require_workspace: bool = True,
|
||||
) -> bool:
|
||||
"""Detect a planning/ack message that should continue instead of ending the turn."""
|
||||
"""Detect a planning/ack message that should continue instead of ending the turn.
|
||||
|
||||
``require_workspace`` (default True) keeps the original codex-coding scope:
|
||||
the ack must reference a filesystem/repo workspace. The conversation loop
|
||||
passes ``require_workspace=False`` when the user has explicitly opted into
|
||||
intent-ack continuation for all api_modes (``agent.intent_ack_continuation``
|
||||
is ``true`` or a model-list), so general autonomous workflows ("I'll run a
|
||||
health check on the server", "I'll start the deployment") — which carry a
|
||||
future-ack and an action verb but no filesystem reference — are caught too.
|
||||
The future-ack + short-content + no-prior-tools + action-verb requirements
|
||||
always apply, which is what keeps conversational "I'll help you brainstorm"
|
||||
replies from tripping it.
|
||||
"""
|
||||
if any(isinstance(msg, dict) and msg.get("role") == "tool" for msg in messages):
|
||||
return False
|
||||
|
||||
|
|
@ -2259,17 +2272,67 @@ def looks_like_codex_intermediate_ack(
|
|||
"path",
|
||||
)
|
||||
|
||||
assistant_mentions_action = any(marker in assistant_text for marker in action_markers)
|
||||
if not assistant_mentions_action:
|
||||
return False
|
||||
|
||||
# Opted-in (all-api_mode) path: a future-ack + action verb + no prior tool
|
||||
# call is enough — the user asked us to keep going when the model only
|
||||
# announces intent, regardless of whether a filesystem is involved.
|
||||
if not require_workspace:
|
||||
return True
|
||||
|
||||
user_text = (user_message or "").strip().lower()
|
||||
user_targets_workspace = (
|
||||
any(marker in user_text for marker in workspace_markers)
|
||||
or "~/" in user_text
|
||||
or "/" in user_text
|
||||
)
|
||||
assistant_mentions_action = any(marker in assistant_text for marker in action_markers)
|
||||
assistant_targets_workspace = any(
|
||||
marker in assistant_text for marker in workspace_markers
|
||||
)
|
||||
return (user_targets_workspace or assistant_targets_workspace) and assistant_mentions_action
|
||||
return user_targets_workspace or assistant_targets_workspace
|
||||
|
||||
|
||||
def intent_ack_continuation_mode(agent) -> str:
|
||||
"""Classify the resolved intent-ack continuation mode for this turn.
|
||||
|
||||
Returns one of:
|
||||
* ``"off"`` — never continue.
|
||||
* ``"codex_only"`` — historical scope: continue only on the
|
||||
``codex_responses`` api_mode, and only for codebase/workspace acks
|
||||
(``require_workspace=True``).
|
||||
* ``"all"`` — user opted in for every api_mode; continue on any
|
||||
future-ack + action verb (``require_workspace=False``).
|
||||
|
||||
Mirrors the four-mode shape of ``agent.tool_use_enforcement``: ``"auto"``
|
||||
(default) → codex_only; ``True``/"true"/"always"/"yes"/"on" → all;
|
||||
``False``/"false"/"never"/"no"/"off" → off; ``list`` → all when a substring
|
||||
matches the active model name, else off.
|
||||
"""
|
||||
mode = getattr(agent, "_intent_ack_continuation", "auto")
|
||||
|
||||
if mode is True or (isinstance(mode, str) and mode.lower() in {"true", "always", "yes", "on"}):
|
||||
return "all"
|
||||
if mode is False or (isinstance(mode, str) and mode.lower() in {"false", "never", "no", "off"}):
|
||||
return "off"
|
||||
if isinstance(mode, list):
|
||||
model_lower = (agent.model or "").lower()
|
||||
return "all" if any(p.lower() in model_lower for p in mode if isinstance(p, str)) else "off"
|
||||
# "auto" or any unrecognised value — historical codex-only behavior.
|
||||
return "codex_only" if agent.api_mode == "codex_responses" else "off"
|
||||
|
||||
|
||||
def intent_ack_continuation_enabled(agent) -> bool:
|
||||
"""Whether intent-ack continuation should fire at all for this turn.
|
||||
|
||||
The ``codex_ack_continuations < 2`` per-turn cap and the
|
||||
``looks_like_codex_intermediate_ack`` detector are applied by the caller;
|
||||
this only decides the on/off gate. Callers that also need to know whether
|
||||
the workspace requirement applies should use ``intent_ack_continuation_mode``
|
||||
directly (``"codex_only"`` ⇒ require_workspace=True, ``"all"`` ⇒ False).
|
||||
"""
|
||||
return intent_ack_continuation_mode(agent) != "off"
|
||||
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -4637,14 +4637,20 @@ def run_conversation(
|
|||
# status from earlier failed attempts in this turn.
|
||||
agent._clear_status_buffer()
|
||||
|
||||
from agent.agent_runtime_helpers import (
|
||||
intent_ack_continuation_mode,
|
||||
)
|
||||
|
||||
_ack_mode = intent_ack_continuation_mode(agent)
|
||||
if (
|
||||
agent.api_mode == "codex_responses"
|
||||
_ack_mode != "off"
|
||||
and agent.valid_tool_names
|
||||
and codex_ack_continuations < 2
|
||||
and agent._looks_like_codex_intermediate_ack(
|
||||
user_message=user_message,
|
||||
assistant_content=final_response,
|
||||
messages=messages,
|
||||
require_workspace=(_ack_mode == "codex_only"),
|
||||
)
|
||||
):
|
||||
codex_ack_continuations += 1
|
||||
|
|
|
|||
|
|
@ -939,6 +939,16 @@ DEFAULT_CONFIG = {
|
|||
# (force on/off for all models), or a list of model-name substrings
|
||||
# to match (e.g. ["gpt", "codex", "gemini", "qwen"]).
|
||||
"tool_use_enforcement": "auto",
|
||||
# Intent-ack continuation: when the model opens a turn by narrating an
|
||||
# action it will take ("I'll go check the logs...") but emits no tool
|
||||
# call, intercept the turn-end, inject a "continue now, execute the
|
||||
# tools" nudge, and loop instead of ending the turn (capped at 2 nudges
|
||||
# per turn). This is the corrective sibling of tool_use_enforcement (the
|
||||
# preventive prompt-side guard). Values: "auto" (default — fires only on
|
||||
# the codex_responses api_mode, the historical behavior), true (all
|
||||
# api_modes — fixes the Gemini/Claude "stops after stating intent" case),
|
||||
# false (never), or a list of model-name substrings to match.
|
||||
"intent_ack_continuation": "auto",
|
||||
# Universal "finish the job" guidance — short prompt block applied to
|
||||
# all models that targets two cross-family failure modes: (1) stopping
|
||||
# after a stub instead of finishing the artifact, (2) fabricating
|
||||
|
|
|
|||
BIN
infographic/intent-ack-continuation/infographic.png
Normal file
BIN
infographic/intent-ack-continuation/infographic.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 1.2 MiB |
|
|
@ -1424,10 +1424,13 @@ class AIAgent:
|
|||
user_message: str,
|
||||
assistant_content: str,
|
||||
messages: List[Dict[str, Any]],
|
||||
require_workspace: bool = True,
|
||||
) -> bool:
|
||||
"""Forwarder — see ``agent.agent_runtime_helpers.looks_like_codex_intermediate_ack``."""
|
||||
from agent.agent_runtime_helpers import looks_like_codex_intermediate_ack
|
||||
return looks_like_codex_intermediate_ack(self, user_message, assistant_content, messages)
|
||||
return looks_like_codex_intermediate_ack(
|
||||
self, user_message, assistant_content, messages, require_workspace
|
||||
)
|
||||
|
||||
def _extract_reasoning(self, assistant_message) -> Optional[str]:
|
||||
"""Forwarder — see ``agent.agent_runtime_helpers.extract_reasoning``."""
|
||||
|
|
|
|||
160
tests/agent/test_intent_ack_continuation.py
Normal file
160
tests/agent/test_intent_ack_continuation.py
Normal file
|
|
@ -0,0 +1,160 @@
|
|||
"""Intent-ack continuation gate + detector behavior.
|
||||
|
||||
Covers the config-driven generalization of the codex intent-ack continuation
|
||||
(issue #27881): the historical ``codex_responses``-only path is byte-stable
|
||||
under the default ``"auto"`` mode, while an explicit ``true``/model-list opt-in
|
||||
extends the "you announced an action but called no tool — keep going" nudge to
|
||||
every api_mode and relaxes the codebase/workspace requirement so general
|
||||
autonomous workflows ("I'll run a health check on the server") are caught.
|
||||
|
||||
These are invariant assertions about how the mode string and the detector
|
||||
gates relate, not snapshots of the marker lists.
|
||||
"""
|
||||
|
||||
from types import SimpleNamespace
|
||||
from typing import Union
|
||||
|
||||
from agent.agent_runtime_helpers import (
|
||||
intent_ack_continuation_enabled,
|
||||
intent_ack_continuation_mode,
|
||||
looks_like_codex_intermediate_ack,
|
||||
)
|
||||
|
||||
|
||||
def _agent(
|
||||
mode: Union[str, bool, list] = "auto",
|
||||
api_mode="chat_completions",
|
||||
model="anthropic/claude-sonnet-4",
|
||||
):
|
||||
# _strip_think_blocks is a no-op for these plain-text fixtures.
|
||||
return SimpleNamespace(
|
||||
_intent_ack_continuation=mode,
|
||||
api_mode=api_mode,
|
||||
model=model,
|
||||
_strip_think_blocks=lambda c: c,
|
||||
)
|
||||
|
||||
|
||||
# The reporter's exact repro (#27881): server-ops task, no filesystem reference.
|
||||
REPRO_USER = (
|
||||
"check the current status of the server, grab the latest error logs, "
|
||||
"and let me know if there's anything critical"
|
||||
)
|
||||
REPRO_ACK = "I will start by running a health check command on the server to see its current status."
|
||||
|
||||
# The codex-coding case the detector was originally built for.
|
||||
CODE_USER = "review the codebase in /app"
|
||||
CODE_ACK = "Let me inspect the repository files first."
|
||||
|
||||
|
||||
# ── mode resolution ────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def test_auto_is_codex_only():
|
||||
assert intent_ack_continuation_mode(_agent("auto", "codex_responses")) == "codex_only"
|
||||
assert intent_ack_continuation_mode(_agent("auto", "chat_completions")) == "off"
|
||||
assert intent_ack_continuation_mode(_agent("auto", "anthropic")) == "off"
|
||||
|
||||
|
||||
def test_true_is_all_api_modes():
|
||||
for am in ("chat_completions", "anthropic", "codex_responses"):
|
||||
assert intent_ack_continuation_mode(_agent(True, am)) == "all"
|
||||
for s in ("true", "always", "yes", "on", "ON"):
|
||||
assert intent_ack_continuation_mode(_agent(s, "chat_completions")) == "all"
|
||||
|
||||
|
||||
def test_false_is_off_even_for_codex():
|
||||
assert intent_ack_continuation_mode(_agent(False, "codex_responses")) == "off"
|
||||
for s in ("false", "never", "no", "off"):
|
||||
assert intent_ack_continuation_mode(_agent(s, "codex_responses")) == "off"
|
||||
|
||||
|
||||
def test_list_matches_model_substring():
|
||||
assert intent_ack_continuation_mode(
|
||||
_agent(["gemini", "qwen"], "chat_completions", "google/gemini-3-pro")
|
||||
) == "all"
|
||||
assert intent_ack_continuation_mode(
|
||||
_agent(["gemini", "qwen"], "chat_completions", "anthropic/claude-sonnet-4")
|
||||
) == "off"
|
||||
|
||||
|
||||
def test_unrecognised_value_falls_back_to_auto():
|
||||
assert intent_ack_continuation_mode(_agent("garbage", "codex_responses")) == "codex_only"
|
||||
assert intent_ack_continuation_mode(_agent("garbage", "chat_completions")) == "off"
|
||||
|
||||
|
||||
def test_missing_attr_defaults_to_auto():
|
||||
bare = SimpleNamespace(api_mode="chat_completions", model="x", _strip_think_blocks=lambda c: c)
|
||||
assert intent_ack_continuation_mode(bare) == "off"
|
||||
bare_codex = SimpleNamespace(api_mode="codex_responses", model="x", _strip_think_blocks=lambda c: c)
|
||||
assert intent_ack_continuation_mode(bare_codex) == "codex_only"
|
||||
|
||||
|
||||
def test_enabled_is_mode_not_off():
|
||||
assert intent_ack_continuation_enabled(_agent(True, "chat_completions")) is True
|
||||
assert intent_ack_continuation_enabled(_agent("auto", "codex_responses")) is True
|
||||
assert intent_ack_continuation_enabled(_agent("auto", "chat_completions")) is False
|
||||
assert intent_ack_continuation_enabled(_agent(False, "codex_responses")) is False
|
||||
|
||||
|
||||
# ── detector: workspace requirement ─────────────────────────────────────────
|
||||
|
||||
|
||||
def test_codex_only_path_requires_workspace():
|
||||
a = _agent("auto", "codex_responses")
|
||||
msgs = [{"role": "user", "content": CODE_USER}]
|
||||
# codebase ack matches workspace markers → fires
|
||||
assert looks_like_codex_intermediate_ack(a, CODE_USER, CODE_ACK, msgs, require_workspace=True)
|
||||
# server-ops ack has no filesystem reference → does NOT fire (historical scope)
|
||||
repro_msgs = [{"role": "user", "content": REPRO_USER}]
|
||||
assert not looks_like_codex_intermediate_ack(
|
||||
a, REPRO_USER, REPRO_ACK, repro_msgs, require_workspace=True
|
||||
)
|
||||
|
||||
|
||||
def test_all_path_drops_workspace_requirement():
|
||||
"""The #27881 fix: opted-in turns catch non-codebase intent acks."""
|
||||
a = _agent(True, "chat_completions")
|
||||
msgs = [{"role": "user", "content": REPRO_USER}]
|
||||
assert looks_like_codex_intermediate_ack(
|
||||
a, REPRO_USER, REPRO_ACK, msgs, require_workspace=False
|
||||
)
|
||||
|
||||
|
||||
# ── detector: guardrails that hold regardless of workspace ───────────────────
|
||||
|
||||
|
||||
def test_real_final_answer_does_not_fire():
|
||||
a = _agent(True, "chat_completions")
|
||||
final = "Done. The server is healthy and there are no critical errors in the logs."
|
||||
msgs = [{"role": "user", "content": REPRO_USER}]
|
||||
assert not looks_like_codex_intermediate_ack(a, REPRO_USER, final, msgs, require_workspace=False)
|
||||
|
||||
|
||||
def test_conversational_reply_without_action_verb_does_not_fire():
|
||||
a = _agent(True, "chat_completions")
|
||||
brainstorm = "I'll help you think through the tradeoffs here."
|
||||
msgs = [{"role": "user", "content": "help me decide"}]
|
||||
assert not looks_like_codex_intermediate_ack(
|
||||
a, "help me decide", brainstorm, msgs, require_workspace=False
|
||||
)
|
||||
|
||||
|
||||
def test_does_not_fire_after_a_tool_already_ran():
|
||||
a = _agent(True, "chat_completions")
|
||||
msgs = [
|
||||
{"role": "user", "content": REPRO_USER},
|
||||
{"role": "tool", "content": "health check result"},
|
||||
]
|
||||
assert not looks_like_codex_intermediate_ack(
|
||||
a, REPRO_USER, REPRO_ACK, msgs, require_workspace=False
|
||||
)
|
||||
|
||||
|
||||
def test_long_response_is_not_treated_as_an_ack():
|
||||
a = _agent(True, "chat_completions")
|
||||
long_ack = "I will run the check. " + ("x" * 1300)
|
||||
msgs = [{"role": "user", "content": REPRO_USER}]
|
||||
assert not looks_like_codex_intermediate_ack(
|
||||
a, REPRO_USER, long_ack, msgs, require_workspace=False
|
||||
)
|
||||
Loading…
Add table
Add a link
Reference in a new issue