fix(agent): config-driven intent-ack continuation for all api_modes (#27881) (#53943)

* fix(agent): config-driven intent-ack continuation for all api_modes (#27881) The agent could end a turn after only stating intent ('I will run a health check...') without executing the announced tool call, forcing the user to re-prompt. A continuation guard that catches this and nudges the model to proceed already existed but was hard-gated to the codex_responses api_mode, so Gemini/Claude/OpenRouter turns never benefited. - New agent.intent_ack_continuation config (default 'auto' = codex-only, byte-stable for existing conversations). 'true'/model-list opts every api_mode in; 'false' disables. Mirrors agent.tool_use_enforcement's shape. - looks_like_codex_intermediate_ack gains require_workspace (default True). The opted-in path drops the codebase/filesystem requirement so general autonomous workflows (server ops, deploys, API calls) are caught, not just coding tasks. Future-ack + action-verb + short-content + no-prior-tool guards still apply; the 2-nudge-per-turn cap is unchanged. - Resolution centralized in intent_ack_continuation_mode (off/codex_only/all). * docs(infographic): intent-ack continuation (#27881)
2026-06-30 11:52:04 +00:00 · 2026-06-27 20:46:00 -07:00 · 2026-06-27 20:46:00 -07:00 · d43e0cf304
commit d43e0cf304
parent 56abbaeac3
7 changed files with 253 additions and 5 deletions
--- a/agent/agent_init.py
+++ b/agent/agent_init.py
@ -1307,6 +1307,12 @@ def init_agent(
        _agent_section = {}
    agent._tool_use_enforcement = _agent_section.get("tool_use_enforcement", "auto")

+    # Intent-ack continuation config: "auto" (default — codex_responses only,
+    # the historical gate), true (all api_modes), false (never), or a list of
+    # model-name substrings.  Resolved against the active api_mode/model in the
+    # conversation loop's intent-ack block.
+    agent._intent_ack_continuation = _agent_section.get("intent_ack_continuation", "auto")
+
    # Universal task-completion guidance toggle.  Default True.  Surfaced
    # as a separate flag from tool_use_enforcement because the guidance
    # applies to ALL models, not just the model families enforcement
--- a/agent/agent_runtime_helpers.py
+++ b/agent/agent_runtime_helpers.py
@ -2205,8 +2205,21 @@ def looks_like_codex_intermediate_ack(
    user_message: str,
    assistant_content: str,
    messages: List[Dict[str, Any]],
+    require_workspace: bool = True,
 ) -> bool:
-    """Detect a planning/ack message that should continue instead of ending the turn."""
+    """Detect a planning/ack message that should continue instead of ending the turn.
+
+    ``require_workspace`` (default True) keeps the original codex-coding scope:
+    the ack must reference a filesystem/repo workspace. The conversation loop
+    passes ``require_workspace=False`` when the user has explicitly opted into
+    intent-ack continuation for all api_modes (``agent.intent_ack_continuation``
+    is ``true`` or a model-list), so general autonomous workflows ("I'll run a
+    health check on the server", "I'll start the deployment") — which carry a
+    future-ack and an action verb but no filesystem reference — are caught too.
+    The future-ack + short-content + no-prior-tools + action-verb requirements
+    always apply, which is what keeps conversational "I'll help you brainstorm"
+    replies from tripping it.
+    """
    if any(isinstance(msg, dict) and msg.get("role") == "tool" for msg in messages):
        return False

@ -2259,17 +2272,67 @@ def looks_like_codex_intermediate_ack(
        "path",
    )

+    assistant_mentions_action = any(marker in assistant_text for marker in action_markers)
+    if not assistant_mentions_action:
+        return False
+
+    # Opted-in (all-api_mode) path: a future-ack + action verb + no prior tool
+    # call is enough — the user asked us to keep going when the model only
+    # announces intent, regardless of whether a filesystem is involved.
+    if not require_workspace:
+        return True
+
    user_text = (user_message or "").strip().lower()
    user_targets_workspace = (
        any(marker in user_text for marker in workspace_markers)
        or "~/" in user_text
        or "/" in user_text
    )
-    assistant_mentions_action = any(marker in assistant_text for marker in action_markers)
    assistant_targets_workspace = any(
        marker in assistant_text for marker in workspace_markers
    )
-    return (user_targets_workspace or assistant_targets_workspace) and assistant_mentions_action
+    return user_targets_workspace or assistant_targets_workspace
+
+
+def intent_ack_continuation_mode(agent) -> str:
+    """Classify the resolved intent-ack continuation mode for this turn.
+
+    Returns one of:
+      * ``"off"``        — never continue.
+      * ``"codex_only"`` — historical scope: continue only on the
+        ``codex_responses`` api_mode, and only for codebase/workspace acks
+        (``require_workspace=True``).
+      * ``"all"``        — user opted in for every api_mode; continue on any
+        future-ack + action verb (``require_workspace=False``).
+
+    Mirrors the four-mode shape of ``agent.tool_use_enforcement``: ``"auto"``
+    (default) → codex_only; ``True``/"true"/"always"/"yes"/"on" → all;
+    ``False``/"false"/"never"/"no"/"off" → off; ``list`` → all when a substring
+    matches the active model name, else off.
+    """
+    mode = getattr(agent, "_intent_ack_continuation", "auto")
+
+    if mode is True or (isinstance(mode, str) and mode.lower() in {"true", "always", "yes", "on"}):
+        return "all"
+    if mode is False or (isinstance(mode, str) and mode.lower() in {"false", "never", "no", "off"}):
+        return "off"
+    if isinstance(mode, list):
+        model_lower = (agent.model or "").lower()
+        return "all" if any(p.lower() in model_lower for p in mode if isinstance(p, str)) else "off"
+    # "auto" or any unrecognised value — historical codex-only behavior.
+    return "codex_only" if agent.api_mode == "codex_responses" else "off"
+
+
+def intent_ack_continuation_enabled(agent) -> bool:
+    """Whether intent-ack continuation should fire at all for this turn.
+
+    The ``codex_ack_continuations < 2`` per-turn cap and the
+    ``looks_like_codex_intermediate_ack`` detector are applied by the caller;
+    this only decides the on/off gate. Callers that also need to know whether
+    the workspace requirement applies should use ``intent_ack_continuation_mode``
+    directly (``"codex_only"`` ⇒ require_workspace=True, ``"all"`` ⇒ False).
+    """
+    return intent_ack_continuation_mode(agent) != "off"



--- a/agent/conversation_loop.py
+++ b/agent/conversation_loop.py
@ -4637,14 +4637,20 @@ def run_conversation(
                # status from earlier failed attempts in this turn.
                agent._clear_status_buffer()

+                from agent.agent_runtime_helpers import (
+                    intent_ack_continuation_mode,
+                )
+
+                _ack_mode = intent_ack_continuation_mode(agent)
                if (
-                    agent.api_mode == "codex_responses"
+                    _ack_mode != "off"
                    and agent.valid_tool_names
                    and codex_ack_continuations < 2
                    and agent._looks_like_codex_intermediate_ack(
                        user_message=user_message,
                        assistant_content=final_response,
                        messages=messages,
+                        require_workspace=(_ack_mode == "codex_only"),
                    )
                ):
                    codex_ack_continuations += 1
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@ -939,6 +939,16 @@ DEFAULT_CONFIG = {
        # (force on/off for all models), or a list of model-name substrings
        # to match (e.g. ["gpt", "codex", "gemini", "qwen"]).
        "tool_use_enforcement": "auto",
+        # Intent-ack continuation: when the model opens a turn by narrating an
+        # action it will take ("I'll go check the logs...") but emits no tool
+        # call, intercept the turn-end, inject a "continue now, execute the
+        # tools" nudge, and loop instead of ending the turn (capped at 2 nudges
+        # per turn). This is the corrective sibling of tool_use_enforcement (the
+        # preventive prompt-side guard). Values: "auto" (default — fires only on
+        # the codex_responses api_mode, the historical behavior), true (all
+        # api_modes — fixes the Gemini/Claude "stops after stating intent" case),
+        # false (never), or a list of model-name substrings to match.
+        "intent_ack_continuation": "auto",
        # Universal "finish the job" guidance — short prompt block applied to
        # all models that targets two cross-family failure modes: (1) stopping
        # after a stub instead of finishing the artifact, (2) fabricating
--- a/infographic/intent-ack-continuation/infographic.png
+++ b/infographic/intent-ack-continuation/infographic.png
--- a/run_agent.py
+++ b/run_agent.py
@ -1424,10 +1424,13 @@ class AIAgent:
        user_message: str,
        assistant_content: str,
        messages: List[Dict[str, Any]],
+        require_workspace: bool = True,
    ) -> bool:
        """Forwarder — see ``agent.agent_runtime_helpers.looks_like_codex_intermediate_ack``."""
        from agent.agent_runtime_helpers import looks_like_codex_intermediate_ack
-        return looks_like_codex_intermediate_ack(self, user_message, assistant_content, messages)
+        return looks_like_codex_intermediate_ack(
+            self, user_message, assistant_content, messages, require_workspace
+        )

    def _extract_reasoning(self, assistant_message) -> Optional[str]:
        """Forwarder — see ``agent.agent_runtime_helpers.extract_reasoning``."""
--- a/tests/agent/test_intent_ack_continuation.py
+++ b/tests/agent/test_intent_ack_continuation.py
@ -0,0 +1,160 @@
+"""Intent-ack continuation gate + detector behavior.
+
+Covers the config-driven generalization of the codex intent-ack continuation
+(issue #27881): the historical ``codex_responses``-only path is byte-stable
+under the default ``"auto"`` mode, while an explicit ``true``/model-list opt-in
+extends the "you announced an action but called no tool — keep going" nudge to
+every api_mode and relaxes the codebase/workspace requirement so general
+autonomous workflows ("I'll run a health check on the server") are caught.
+
+These are invariant assertions about how the mode string and the detector
+gates relate, not snapshots of the marker lists.
+"""
+
+from types import SimpleNamespace
+from typing import Union
+
+from agent.agent_runtime_helpers import (
+    intent_ack_continuation_enabled,
+    intent_ack_continuation_mode,
+    looks_like_codex_intermediate_ack,
+)
+
+
+def _agent(
+    mode: Union[str, bool, list] = "auto",
+    api_mode="chat_completions",
+    model="anthropic/claude-sonnet-4",
+):
+    # _strip_think_blocks is a no-op for these plain-text fixtures.
+    return SimpleNamespace(
+        _intent_ack_continuation=mode,
+        api_mode=api_mode,
+        model=model,
+        _strip_think_blocks=lambda c: c,
+    )
+
+
+# The reporter's exact repro (#27881): server-ops task, no filesystem reference.
+REPRO_USER = (
+    "check the current status of the server, grab the latest error logs, "
+    "and let me know if there's anything critical"
+)
+REPRO_ACK = "I will start by running a health check command on the server to see its current status."
+
+# The codex-coding case the detector was originally built for.
+CODE_USER = "review the codebase in /app"
+CODE_ACK = "Let me inspect the repository files first."
+
+
+# ── mode resolution ────────────────────────────────────────────────────────
+
+
+def test_auto_is_codex_only():
+    assert intent_ack_continuation_mode(_agent("auto", "codex_responses")) == "codex_only"
+    assert intent_ack_continuation_mode(_agent("auto", "chat_completions")) == "off"
+    assert intent_ack_continuation_mode(_agent("auto", "anthropic")) == "off"
+
+
+def test_true_is_all_api_modes():
+    for am in ("chat_completions", "anthropic", "codex_responses"):
+        assert intent_ack_continuation_mode(_agent(True, am)) == "all"
+    for s in ("true", "always", "yes", "on", "ON"):
+        assert intent_ack_continuation_mode(_agent(s, "chat_completions")) == "all"
+
+
+def test_false_is_off_even_for_codex():
+    assert intent_ack_continuation_mode(_agent(False, "codex_responses")) == "off"
+    for s in ("false", "never", "no", "off"):
+        assert intent_ack_continuation_mode(_agent(s, "codex_responses")) == "off"
+
+
+def test_list_matches_model_substring():
+    assert intent_ack_continuation_mode(
+        _agent(["gemini", "qwen"], "chat_completions", "google/gemini-3-pro")
+    ) == "all"
+    assert intent_ack_continuation_mode(
+        _agent(["gemini", "qwen"], "chat_completions", "anthropic/claude-sonnet-4")
+    ) == "off"
+
+
+def test_unrecognised_value_falls_back_to_auto():
+    assert intent_ack_continuation_mode(_agent("garbage", "codex_responses")) == "codex_only"
+    assert intent_ack_continuation_mode(_agent("garbage", "chat_completions")) == "off"
+
+
+def test_missing_attr_defaults_to_auto():
+    bare = SimpleNamespace(api_mode="chat_completions", model="x", _strip_think_blocks=lambda c: c)
+    assert intent_ack_continuation_mode(bare) == "off"
+    bare_codex = SimpleNamespace(api_mode="codex_responses", model="x", _strip_think_blocks=lambda c: c)
+    assert intent_ack_continuation_mode(bare_codex) == "codex_only"
+
+
+def test_enabled_is_mode_not_off():
+    assert intent_ack_continuation_enabled(_agent(True, "chat_completions")) is True
+    assert intent_ack_continuation_enabled(_agent("auto", "codex_responses")) is True
+    assert intent_ack_continuation_enabled(_agent("auto", "chat_completions")) is False
+    assert intent_ack_continuation_enabled(_agent(False, "codex_responses")) is False
+
+
+# ── detector: workspace requirement ─────────────────────────────────────────
+
+
+def test_codex_only_path_requires_workspace():
+    a = _agent("auto", "codex_responses")
+    msgs = [{"role": "user", "content": CODE_USER}]
+    # codebase ack matches workspace markers → fires
+    assert looks_like_codex_intermediate_ack(a, CODE_USER, CODE_ACK, msgs, require_workspace=True)
+    # server-ops ack has no filesystem reference → does NOT fire (historical scope)
+    repro_msgs = [{"role": "user", "content": REPRO_USER}]
+    assert not looks_like_codex_intermediate_ack(
+        a, REPRO_USER, REPRO_ACK, repro_msgs, require_workspace=True
+    )
+
+
+def test_all_path_drops_workspace_requirement():
+    """The #27881 fix: opted-in turns catch non-codebase intent acks."""
+    a = _agent(True, "chat_completions")
+    msgs = [{"role": "user", "content": REPRO_USER}]
+    assert looks_like_codex_intermediate_ack(
+        a, REPRO_USER, REPRO_ACK, msgs, require_workspace=False
+    )
+
+
+# ── detector: guardrails that hold regardless of workspace ───────────────────
+
+
+def test_real_final_answer_does_not_fire():
+    a = _agent(True, "chat_completions")
+    final = "Done. The server is healthy and there are no critical errors in the logs."
+    msgs = [{"role": "user", "content": REPRO_USER}]
+    assert not looks_like_codex_intermediate_ack(a, REPRO_USER, final, msgs, require_workspace=False)
+
+
+def test_conversational_reply_without_action_verb_does_not_fire():
+    a = _agent(True, "chat_completions")
+    brainstorm = "I'll help you think through the tradeoffs here."
+    msgs = [{"role": "user", "content": "help me decide"}]
+    assert not looks_like_codex_intermediate_ack(
+        a, "help me decide", brainstorm, msgs, require_workspace=False
+    )
+
+
+def test_does_not_fire_after_a_tool_already_ran():
+    a = _agent(True, "chat_completions")
+    msgs = [
+        {"role": "user", "content": REPRO_USER},
+        {"role": "tool", "content": "health check result"},
+    ]
+    assert not looks_like_codex_intermediate_ack(
+        a, REPRO_USER, REPRO_ACK, msgs, require_workspace=False
+    )
+
+
+def test_long_response_is_not_treated_as_an_ack():
+    a = _agent(True, "chat_completions")
+    long_ack = "I will run the check. " + ("x" * 1300)
+    msgs = [{"role": "user", "content": REPRO_USER}]
+    assert not looks_like_codex_intermediate_ack(
+        a, REPRO_USER, long_ack, msgs, require_workspace=False
+    )