fix(agent): config-driven intent-ack continuation for all api_modes (#27881) (#53943)

* fix(agent): config-driven intent-ack continuation for all api_modes (#27881)

The agent could end a turn after only stating intent ('I will run a health
check...') without executing the announced tool call, forcing the user to
re-prompt. A continuation guard that catches this and nudges the model to
proceed already existed but was hard-gated to the codex_responses api_mode,
so Gemini/Claude/OpenRouter turns never benefited.

- New agent.intent_ack_continuation config (default 'auto' = codex-only,
  byte-stable for existing conversations). 'true'/model-list opts every
  api_mode in; 'false' disables. Mirrors agent.tool_use_enforcement's shape.
- looks_like_codex_intermediate_ack gains require_workspace (default True).
  The opted-in path drops the codebase/filesystem requirement so general
  autonomous workflows (server ops, deploys, API calls) are caught, not just
  coding tasks. Future-ack + action-verb + short-content + no-prior-tool
  guards still apply; the 2-nudge-per-turn cap is unchanged.
- Resolution centralized in intent_ack_continuation_mode (off/codex_only/all).

* docs(infographic): intent-ack continuation (#27881)
This commit is contained in:
Teknium 2026-06-27 20:46:00 -07:00 committed by GitHub
parent 56abbaeac3
commit d43e0cf304
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 253 additions and 5 deletions

View file

@ -1307,6 +1307,12 @@ def init_agent(
_agent_section = {}
agent._tool_use_enforcement = _agent_section.get("tool_use_enforcement", "auto")
# Intent-ack continuation config: "auto" (default — codex_responses only,
# the historical gate), true (all api_modes), false (never), or a list of
# model-name substrings. Resolved against the active api_mode/model in the
# conversation loop's intent-ack block.
agent._intent_ack_continuation = _agent_section.get("intent_ack_continuation", "auto")
# Universal task-completion guidance toggle. Default True. Surfaced
# as a separate flag from tool_use_enforcement because the guidance
# applies to ALL models, not just the model families enforcement

View file

@ -2205,8 +2205,21 @@ def looks_like_codex_intermediate_ack(
user_message: str,
assistant_content: str,
messages: List[Dict[str, Any]],
require_workspace: bool = True,
) -> bool:
"""Detect a planning/ack message that should continue instead of ending the turn."""
"""Detect a planning/ack message that should continue instead of ending the turn.
``require_workspace`` (default True) keeps the original codex-coding scope:
the ack must reference a filesystem/repo workspace. The conversation loop
passes ``require_workspace=False`` when the user has explicitly opted into
intent-ack continuation for all api_modes (``agent.intent_ack_continuation``
is ``true`` or a model-list), so general autonomous workflows ("I'll run a
health check on the server", "I'll start the deployment") — which carry a
future-ack and an action verb but no filesystem reference are caught too.
The future-ack + short-content + no-prior-tools + action-verb requirements
always apply, which is what keeps conversational "I'll help you brainstorm"
replies from tripping it.
"""
if any(isinstance(msg, dict) and msg.get("role") == "tool" for msg in messages):
return False
@ -2259,17 +2272,67 @@ def looks_like_codex_intermediate_ack(
"path",
)
assistant_mentions_action = any(marker in assistant_text for marker in action_markers)
if not assistant_mentions_action:
return False
# Opted-in (all-api_mode) path: a future-ack + action verb + no prior tool
# call is enough — the user asked us to keep going when the model only
# announces intent, regardless of whether a filesystem is involved.
if not require_workspace:
return True
user_text = (user_message or "").strip().lower()
user_targets_workspace = (
any(marker in user_text for marker in workspace_markers)
or "~/" in user_text
or "/" in user_text
)
assistant_mentions_action = any(marker in assistant_text for marker in action_markers)
assistant_targets_workspace = any(
marker in assistant_text for marker in workspace_markers
)
return (user_targets_workspace or assistant_targets_workspace) and assistant_mentions_action
return user_targets_workspace or assistant_targets_workspace
def intent_ack_continuation_mode(agent) -> str:
"""Classify the resolved intent-ack continuation mode for this turn.
Returns one of:
* ``"off"`` never continue.
* ``"codex_only"`` historical scope: continue only on the
``codex_responses`` api_mode, and only for codebase/workspace acks
(``require_workspace=True``).
* ``"all"`` user opted in for every api_mode; continue on any
future-ack + action verb (``require_workspace=False``).
Mirrors the four-mode shape of ``agent.tool_use_enforcement``: ``"auto"``
(default) codex_only; ``True``/"true"/"always"/"yes"/"on" all;
``False``/"false"/"never"/"no"/"off" off; ``list`` all when a substring
matches the active model name, else off.
"""
mode = getattr(agent, "_intent_ack_continuation", "auto")
if mode is True or (isinstance(mode, str) and mode.lower() in {"true", "always", "yes", "on"}):
return "all"
if mode is False or (isinstance(mode, str) and mode.lower() in {"false", "never", "no", "off"}):
return "off"
if isinstance(mode, list):
model_lower = (agent.model or "").lower()
return "all" if any(p.lower() in model_lower for p in mode if isinstance(p, str)) else "off"
# "auto" or any unrecognised value — historical codex-only behavior.
return "codex_only" if agent.api_mode == "codex_responses" else "off"
def intent_ack_continuation_enabled(agent) -> bool:
"""Whether intent-ack continuation should fire at all for this turn.
The ``codex_ack_continuations < 2`` per-turn cap and the
``looks_like_codex_intermediate_ack`` detector are applied by the caller;
this only decides the on/off gate. Callers that also need to know whether
the workspace requirement applies should use ``intent_ack_continuation_mode``
directly (``"codex_only"`` require_workspace=True, ``"all"`` False).
"""
return intent_ack_continuation_mode(agent) != "off"

View file

@ -4637,14 +4637,20 @@ def run_conversation(
# status from earlier failed attempts in this turn.
agent._clear_status_buffer()
from agent.agent_runtime_helpers import (
intent_ack_continuation_mode,
)
_ack_mode = intent_ack_continuation_mode(agent)
if (
agent.api_mode == "codex_responses"
_ack_mode != "off"
and agent.valid_tool_names
and codex_ack_continuations < 2
and agent._looks_like_codex_intermediate_ack(
user_message=user_message,
assistant_content=final_response,
messages=messages,
require_workspace=(_ack_mode == "codex_only"),
)
):
codex_ack_continuations += 1

View file

@ -939,6 +939,16 @@ DEFAULT_CONFIG = {
# (force on/off for all models), or a list of model-name substrings
# to match (e.g. ["gpt", "codex", "gemini", "qwen"]).
"tool_use_enforcement": "auto",
# Intent-ack continuation: when the model opens a turn by narrating an
# action it will take ("I'll go check the logs...") but emits no tool
# call, intercept the turn-end, inject a "continue now, execute the
# tools" nudge, and loop instead of ending the turn (capped at 2 nudges
# per turn). This is the corrective sibling of tool_use_enforcement (the
# preventive prompt-side guard). Values: "auto" (default — fires only on
# the codex_responses api_mode, the historical behavior), true (all
# api_modes — fixes the Gemini/Claude "stops after stating intent" case),
# false (never), or a list of model-name substrings to match.
"intent_ack_continuation": "auto",
# Universal "finish the job" guidance — short prompt block applied to
# all models that targets two cross-family failure modes: (1) stopping
# after a stub instead of finishing the artifact, (2) fabricating

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.2 MiB

View file

@ -1424,10 +1424,13 @@ class AIAgent:
user_message: str,
assistant_content: str,
messages: List[Dict[str, Any]],
require_workspace: bool = True,
) -> bool:
"""Forwarder — see ``agent.agent_runtime_helpers.looks_like_codex_intermediate_ack``."""
from agent.agent_runtime_helpers import looks_like_codex_intermediate_ack
return looks_like_codex_intermediate_ack(self, user_message, assistant_content, messages)
return looks_like_codex_intermediate_ack(
self, user_message, assistant_content, messages, require_workspace
)
def _extract_reasoning(self, assistant_message) -> Optional[str]:
"""Forwarder — see ``agent.agent_runtime_helpers.extract_reasoning``."""

View file

@ -0,0 +1,160 @@
"""Intent-ack continuation gate + detector behavior.
Covers the config-driven generalization of the codex intent-ack continuation
(issue #27881): the historical ``codex_responses``-only path is byte-stable
under the default ``"auto"`` mode, while an explicit ``true``/model-list opt-in
extends the "you announced an action but called no tool — keep going" nudge to
every api_mode and relaxes the codebase/workspace requirement so general
autonomous workflows ("I'll run a health check on the server") are caught.
These are invariant assertions about how the mode string and the detector
gates relate, not snapshots of the marker lists.
"""
from types import SimpleNamespace
from typing import Union
from agent.agent_runtime_helpers import (
intent_ack_continuation_enabled,
intent_ack_continuation_mode,
looks_like_codex_intermediate_ack,
)
def _agent(
mode: Union[str, bool, list] = "auto",
api_mode="chat_completions",
model="anthropic/claude-sonnet-4",
):
# _strip_think_blocks is a no-op for these plain-text fixtures.
return SimpleNamespace(
_intent_ack_continuation=mode,
api_mode=api_mode,
model=model,
_strip_think_blocks=lambda c: c,
)
# The reporter's exact repro (#27881): server-ops task, no filesystem reference.
REPRO_USER = (
"check the current status of the server, grab the latest error logs, "
"and let me know if there's anything critical"
)
REPRO_ACK = "I will start by running a health check command on the server to see its current status."
# The codex-coding case the detector was originally built for.
CODE_USER = "review the codebase in /app"
CODE_ACK = "Let me inspect the repository files first."
# ── mode resolution ────────────────────────────────────────────────────────
def test_auto_is_codex_only():
assert intent_ack_continuation_mode(_agent("auto", "codex_responses")) == "codex_only"
assert intent_ack_continuation_mode(_agent("auto", "chat_completions")) == "off"
assert intent_ack_continuation_mode(_agent("auto", "anthropic")) == "off"
def test_true_is_all_api_modes():
for am in ("chat_completions", "anthropic", "codex_responses"):
assert intent_ack_continuation_mode(_agent(True, am)) == "all"
for s in ("true", "always", "yes", "on", "ON"):
assert intent_ack_continuation_mode(_agent(s, "chat_completions")) == "all"
def test_false_is_off_even_for_codex():
assert intent_ack_continuation_mode(_agent(False, "codex_responses")) == "off"
for s in ("false", "never", "no", "off"):
assert intent_ack_continuation_mode(_agent(s, "codex_responses")) == "off"
def test_list_matches_model_substring():
assert intent_ack_continuation_mode(
_agent(["gemini", "qwen"], "chat_completions", "google/gemini-3-pro")
) == "all"
assert intent_ack_continuation_mode(
_agent(["gemini", "qwen"], "chat_completions", "anthropic/claude-sonnet-4")
) == "off"
def test_unrecognised_value_falls_back_to_auto():
assert intent_ack_continuation_mode(_agent("garbage", "codex_responses")) == "codex_only"
assert intent_ack_continuation_mode(_agent("garbage", "chat_completions")) == "off"
def test_missing_attr_defaults_to_auto():
bare = SimpleNamespace(api_mode="chat_completions", model="x", _strip_think_blocks=lambda c: c)
assert intent_ack_continuation_mode(bare) == "off"
bare_codex = SimpleNamespace(api_mode="codex_responses", model="x", _strip_think_blocks=lambda c: c)
assert intent_ack_continuation_mode(bare_codex) == "codex_only"
def test_enabled_is_mode_not_off():
assert intent_ack_continuation_enabled(_agent(True, "chat_completions")) is True
assert intent_ack_continuation_enabled(_agent("auto", "codex_responses")) is True
assert intent_ack_continuation_enabled(_agent("auto", "chat_completions")) is False
assert intent_ack_continuation_enabled(_agent(False, "codex_responses")) is False
# ── detector: workspace requirement ─────────────────────────────────────────
def test_codex_only_path_requires_workspace():
a = _agent("auto", "codex_responses")
msgs = [{"role": "user", "content": CODE_USER}]
# codebase ack matches workspace markers → fires
assert looks_like_codex_intermediate_ack(a, CODE_USER, CODE_ACK, msgs, require_workspace=True)
# server-ops ack has no filesystem reference → does NOT fire (historical scope)
repro_msgs = [{"role": "user", "content": REPRO_USER}]
assert not looks_like_codex_intermediate_ack(
a, REPRO_USER, REPRO_ACK, repro_msgs, require_workspace=True
)
def test_all_path_drops_workspace_requirement():
"""The #27881 fix: opted-in turns catch non-codebase intent acks."""
a = _agent(True, "chat_completions")
msgs = [{"role": "user", "content": REPRO_USER}]
assert looks_like_codex_intermediate_ack(
a, REPRO_USER, REPRO_ACK, msgs, require_workspace=False
)
# ── detector: guardrails that hold regardless of workspace ───────────────────
def test_real_final_answer_does_not_fire():
a = _agent(True, "chat_completions")
final = "Done. The server is healthy and there are no critical errors in the logs."
msgs = [{"role": "user", "content": REPRO_USER}]
assert not looks_like_codex_intermediate_ack(a, REPRO_USER, final, msgs, require_workspace=False)
def test_conversational_reply_without_action_verb_does_not_fire():
a = _agent(True, "chat_completions")
brainstorm = "I'll help you think through the tradeoffs here."
msgs = [{"role": "user", "content": "help me decide"}]
assert not looks_like_codex_intermediate_ack(
a, "help me decide", brainstorm, msgs, require_workspace=False
)
def test_does_not_fire_after_a_tool_already_ran():
a = _agent(True, "chat_completions")
msgs = [
{"role": "user", "content": REPRO_USER},
{"role": "tool", "content": "health check result"},
]
assert not looks_like_codex_intermediate_ack(
a, REPRO_USER, REPRO_ACK, msgs, require_workspace=False
)
def test_long_response_is_not_treated_as_an_ack():
a = _agent(True, "chat_completions")
long_ack = "I will run the check. " + ("x" * 1300)
msgs = [{"role": "user", "content": REPRO_USER}]
assert not looks_like_codex_intermediate_ack(
a, REPRO_USER, long_ack, msgs, require_workspace=False
)