hermes-agent/tests/agent/test_intent_ack_continuation.py
Tao Yan b8ebe32866 fix(agent): flatten multi-part user_message in codex intermediate-ack detector
Vision requests routed through the OpenAI-compat API server forward the
raw multi-part content list ([{type:"text"}, {type:"image_url"}, ...])
straight through as user_message. The codex intermediate-ack detector
flattened it with (user_message or "").strip(), so a truthy list survived
and .strip() raised AttributeError — killing any Codex-routed vision turn
that took the require_workspace path.

Route through the existing _summarize_user_message_for_log helper (which
already backs the logging/banner previews on main), and widen the param
type hint from str to Any to match how the function is actually called.

The two logging-preview sites the original PR also touched were fixed
independently on main by the conversation-loop refactor.

Co-authored-by: Hermes Agent <agent@nousresearch.com>
2026-06-30 03:20:11 -07:00

183 lines
7.5 KiB
Python

"""Intent-ack continuation gate + detector behavior.
Covers the config-driven generalization of the codex intent-ack continuation
(issue #27881): the historical ``codex_responses``-only path is byte-stable
under the default ``"auto"`` mode, while an explicit ``true``/model-list opt-in
extends the "you announced an action but called no tool — keep going" nudge to
every api_mode and relaxes the codebase/workspace requirement so general
autonomous workflows ("I'll run a health check on the server") are caught.
These are invariant assertions about how the mode string and the detector
gates relate, not snapshots of the marker lists.
"""
from types import SimpleNamespace
from typing import Union
from agent.agent_runtime_helpers import (
intent_ack_continuation_enabled,
intent_ack_continuation_mode,
looks_like_codex_intermediate_ack,
)
def _agent(
mode: Union[str, bool, list] = "auto",
api_mode="chat_completions",
model="anthropic/claude-sonnet-4",
):
# _strip_think_blocks is a no-op for these plain-text fixtures.
return SimpleNamespace(
_intent_ack_continuation=mode,
api_mode=api_mode,
model=model,
_strip_think_blocks=lambda c: c,
)
# The reporter's exact repro (#27881): server-ops task, no filesystem reference.
REPRO_USER = (
"check the current status of the server, grab the latest error logs, "
"and let me know if there's anything critical"
)
REPRO_ACK = "I will start by running a health check command on the server to see its current status."
# The codex-coding case the detector was originally built for.
CODE_USER = "review the codebase in /app"
CODE_ACK = "Let me inspect the repository files first."
# ── mode resolution ────────────────────────────────────────────────────────
def test_auto_is_codex_only():
assert intent_ack_continuation_mode(_agent("auto", "codex_responses")) == "codex_only"
assert intent_ack_continuation_mode(_agent("auto", "chat_completions")) == "off"
assert intent_ack_continuation_mode(_agent("auto", "anthropic")) == "off"
def test_true_is_all_api_modes():
for am in ("chat_completions", "anthropic", "codex_responses"):
assert intent_ack_continuation_mode(_agent(True, am)) == "all"
for s in ("true", "always", "yes", "on", "ON"):
assert intent_ack_continuation_mode(_agent(s, "chat_completions")) == "all"
def test_false_is_off_even_for_codex():
assert intent_ack_continuation_mode(_agent(False, "codex_responses")) == "off"
for s in ("false", "never", "no", "off"):
assert intent_ack_continuation_mode(_agent(s, "codex_responses")) == "off"
def test_list_matches_model_substring():
assert intent_ack_continuation_mode(
_agent(["gemini", "qwen"], "chat_completions", "google/gemini-3-pro")
) == "all"
assert intent_ack_continuation_mode(
_agent(["gemini", "qwen"], "chat_completions", "anthropic/claude-sonnet-4")
) == "off"
def test_unrecognised_value_falls_back_to_auto():
assert intent_ack_continuation_mode(_agent("garbage", "codex_responses")) == "codex_only"
assert intent_ack_continuation_mode(_agent("garbage", "chat_completions")) == "off"
def test_missing_attr_defaults_to_auto():
bare = SimpleNamespace(api_mode="chat_completions", model="x", _strip_think_blocks=lambda c: c)
assert intent_ack_continuation_mode(bare) == "off"
bare_codex = SimpleNamespace(api_mode="codex_responses", model="x", _strip_think_blocks=lambda c: c)
assert intent_ack_continuation_mode(bare_codex) == "codex_only"
def test_enabled_is_mode_not_off():
assert intent_ack_continuation_enabled(_agent(True, "chat_completions")) is True
assert intent_ack_continuation_enabled(_agent("auto", "codex_responses")) is True
assert intent_ack_continuation_enabled(_agent("auto", "chat_completions")) is False
assert intent_ack_continuation_enabled(_agent(False, "codex_responses")) is False
# ── detector: workspace requirement ─────────────────────────────────────────
def test_codex_only_path_requires_workspace():
a = _agent("auto", "codex_responses")
msgs = [{"role": "user", "content": CODE_USER}]
# codebase ack matches workspace markers → fires
assert looks_like_codex_intermediate_ack(a, CODE_USER, CODE_ACK, msgs, require_workspace=True)
# server-ops ack has no filesystem reference → does NOT fire (historical scope)
repro_msgs = [{"role": "user", "content": REPRO_USER}]
assert not looks_like_codex_intermediate_ack(
a, REPRO_USER, REPRO_ACK, repro_msgs, require_workspace=True
)
def test_multipart_user_message_does_not_crash_on_workspace_path():
"""#9562: vision requests forward ``user_message`` as a multi-part list.
The OpenAI-compat API server passes the raw ``content`` field straight
through for vision turns, so ``user_message`` reaches the detector as
``[{type:"text",...}, {type:"image_url",...}]``. The ``require_workspace``
path flattened it with ``(user_message or "").strip()`` — a truthy list
survived and ``.strip()`` raised ``AttributeError``, killing the turn.
The text part still has to drive workspace detection.
"""
a = _agent("auto", "codex_responses")
multipart = [
{"type": "text", "text": CODE_USER},
{"type": "image_url", "image_url": {"url": "data:image/png;base64,AAAA"}},
]
msgs = [{"role": "user", "content": multipart}]
# No crash, and the text part ("review the codebase in /app") still
# satisfies the workspace requirement so the ack fires.
assert looks_like_codex_intermediate_ack(
a, multipart, CODE_ACK, msgs, require_workspace=True
)
def test_all_path_drops_workspace_requirement():
"""The #27881 fix: opted-in turns catch non-codebase intent acks."""
a = _agent(True, "chat_completions")
msgs = [{"role": "user", "content": REPRO_USER}]
assert looks_like_codex_intermediate_ack(
a, REPRO_USER, REPRO_ACK, msgs, require_workspace=False
)
# ── detector: guardrails that hold regardless of workspace ───────────────────
def test_real_final_answer_does_not_fire():
a = _agent(True, "chat_completions")
final = "Done. The server is healthy and there are no critical errors in the logs."
msgs = [{"role": "user", "content": REPRO_USER}]
assert not looks_like_codex_intermediate_ack(a, REPRO_USER, final, msgs, require_workspace=False)
def test_conversational_reply_without_action_verb_does_not_fire():
a = _agent(True, "chat_completions")
brainstorm = "I'll help you think through the tradeoffs here."
msgs = [{"role": "user", "content": "help me decide"}]
assert not looks_like_codex_intermediate_ack(
a, "help me decide", brainstorm, msgs, require_workspace=False
)
def test_does_not_fire_after_a_tool_already_ran():
a = _agent(True, "chat_completions")
msgs = [
{"role": "user", "content": REPRO_USER},
{"role": "tool", "content": "health check result"},
]
assert not looks_like_codex_intermediate_ack(
a, REPRO_USER, REPRO_ACK, msgs, require_workspace=False
)
def test_long_response_is_not_treated_as_an_ack():
a = _agent(True, "chat_completions")
long_ack = "I will run the check. " + ("x" * 1300)
msgs = [{"role": "user", "content": REPRO_USER}]
assert not looks_like_codex_intermediate_ack(
a, REPRO_USER, long_ack, msgs, require_workspace=False
)