test(guardrail): assert halt message reaches stream_delta_callback

Regression guard for #30770 — verifies the guardrail-halt branch in agent/conversation_loop.py pushes the synthesized halt message through stream_delta_callback before breaking out of the loop. Without the emit, chat-completions SSE writers drain an empty queue and clients (Open WebUI, etc.) see a finish chunk with zero content delta — indistinguishable from a crash. Verified: the test fails when the production fix is reverted.
2026-07-15 14:22:43 +00:00 · 2026-05-24 04:33:51 -07:00 · 2026-05-24 04:33:51 -07:00 · 186bf25cb1
commit 186bf25cb1
parent 38b8d0da85
1 changed files with 49 additions and 0 deletions
--- a/tests/run_agent/test_tool_call_guardrail_runtime.py
+++ b/tests/run_agent/test_tool_call_guardrail_runtime.py
@ -304,3 +304,52 @@ def test_config_enabled_hard_stop_run_conversation_returns_controlled_guardrail_
        call_ids = [tc["id"] for tc in assistant_msg["tool_calls"]]
        following_results = [m for m in result["messages"] if m.get("role") == "tool" and m.get("tool_call_id") in call_ids]
        assert len(following_results) == len(call_ids)
+
+
+def test_guardrail_halt_emits_final_response_through_stream_delta_callback():
+    """Regression for #30770: when the guardrail halts the loop, the
+    synthesized halt message must be pushed through ``stream_delta_callback``
+    so SSE/TUI clients see why the agent stopped instead of a silent stream
+    close.  Without this the chat-completions SSE writer drains an empty
+    queue and emits a finish chunk with zero content (indistinguishable
+    from a crash for Open WebUI and similar clients).
+    """
+    agent = _make_agent("web_search", max_iterations=10, config=_hard_stop_config())
+    same_args = {"query": "same"}
+    responses = [
+        _mock_response(
+            content="",
+            finish_reason="tool_calls",
+            tool_calls=[_mock_tool_call("web_search", json.dumps(same_args), f"c{i}")],
+        )
+        for i in range(1, 10)
+    ]
+    agent.client.chat.completions.create.side_effect = responses
+
+    deltas: list = []
+    agent.stream_delta_callback = lambda d: deltas.append(d)
+    # The mocked client returns SimpleNamespace responses which aren't
+    # iterable as streaming chunks; force the non-streaming code path so
+    # the guardrail-halt branch is reached without engaging the real
+    # streaming machinery.
+    agent._disable_streaming = True
+
+    with (
+        patch("run_agent.handle_function_call", return_value=json.dumps({"error": "boom"})),
+        patch.object(agent, "_persist_session"),
+        patch.object(agent, "_save_trajectory"),
+        patch.object(agent, "_cleanup_task_resources"),
+    ):
+        result = agent.run_conversation("search repeatedly")
+
+    assert result["turn_exit_reason"] == "guardrail_halt"
+    halt_text = result["final_response"]
+    assert "stopped retrying" in halt_text
+
+    # The halt message must have been pushed through the callback at least
+    # once.  Empty-queue SSE writers were the bug — clients saw no content
+    # delta before the finish chunk.
+    text_deltas = [d for d in deltas if isinstance(d, str)]
+    assert halt_text in text_deltas, (
+        f"halt message was never streamed; callback only saw {deltas!r}"
+    )