test(guardrail): assert halt message reaches stream_delta_callback

Regression guard for #30770 — verifies the guardrail-halt branch in
agent/conversation_loop.py pushes the synthesized halt message through
stream_delta_callback before breaking out of the loop.  Without the
emit, chat-completions SSE writers drain an empty queue and clients
(Open WebUI, etc.) see a finish chunk with zero content delta —
indistinguishable from a crash.

Verified: the test fails when the production fix is reverted.
This commit is contained in:
Teknium 2026-05-24 04:33:51 -07:00
parent 38b8d0da85
commit 186bf25cb1

View file

@ -304,3 +304,52 @@ def test_config_enabled_hard_stop_run_conversation_returns_controlled_guardrail_
call_ids = [tc["id"] for tc in assistant_msg["tool_calls"]]
following_results = [m for m in result["messages"] if m.get("role") == "tool" and m.get("tool_call_id") in call_ids]
assert len(following_results) == len(call_ids)
def test_guardrail_halt_emits_final_response_through_stream_delta_callback():
"""Regression for #30770: when the guardrail halts the loop, the
synthesized halt message must be pushed through ``stream_delta_callback``
so SSE/TUI clients see why the agent stopped instead of a silent stream
close. Without this the chat-completions SSE writer drains an empty
queue and emits a finish chunk with zero content (indistinguishable
from a crash for Open WebUI and similar clients).
"""
agent = _make_agent("web_search", max_iterations=10, config=_hard_stop_config())
same_args = {"query": "same"}
responses = [
_mock_response(
content="",
finish_reason="tool_calls",
tool_calls=[_mock_tool_call("web_search", json.dumps(same_args), f"c{i}")],
)
for i in range(1, 10)
]
agent.client.chat.completions.create.side_effect = responses
deltas: list = []
agent.stream_delta_callback = lambda d: deltas.append(d)
# The mocked client returns SimpleNamespace responses which aren't
# iterable as streaming chunks; force the non-streaming code path so
# the guardrail-halt branch is reached without engaging the real
# streaming machinery.
agent._disable_streaming = True
with (
patch("run_agent.handle_function_call", return_value=json.dumps({"error": "boom"})),
patch.object(agent, "_persist_session"),
patch.object(agent, "_save_trajectory"),
patch.object(agent, "_cleanup_task_resources"),
):
result = agent.run_conversation("search repeatedly")
assert result["turn_exit_reason"] == "guardrail_halt"
halt_text = result["final_response"]
assert "stopped retrying" in halt_text
# The halt message must have been pushed through the callback at least
# once. Empty-queue SSE writers were the bug — clients saw no content
# delta before the finish chunk.
text_deltas = [d for d in deltas if isinstance(d, str)]
assert halt_text in text_deltas, (
f"halt message was never streamed; callback only saw {deltas!r}"
)