mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-01 01:51:44 +00:00
fix(api-server): collapse tool start/lifecycle into a single SSE event
Address Copilot review on PR #16666: 1. **Duplicate event on every tool start** — both ``tool_progress_callback`` and ``tool_start_callback`` fire side-by-side in ``run_agent.py``, so wiring both into chat completions emitted *two* ``hermes.tool.progress`` events per real tool call. Drop the legacy ``_on_tool_progress`` emit entirely; ``_on_tool_start`` now produces a single unified event that carries the legacy ``tool``/``emoji``/``label`` fields plus the new ``toolCallId``/``status`` correlation fields. Label is computed inline via ``build_tool_preview`` so callers do not need to pre-format it. 2. **Weak per-event correlation in the regression test** — the previous assertion checked that a ``toolCallId`` appeared *somewhere* in the aggregate, which would have passed even if ``running`` lacked the id. Collect ``(status, toolCallId)`` per event and assert each event carries the correct pair, plus exactly two events on the wire (no silent duplication regression). The two existing chat-completions tool-progress tests are updated to fire ``tool_start_callback`` instead of ``tool_progress_callback``, matching production reality where ``run_agent`` always pairs them. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
13c238327e
commit
e0a03f3f40
2 changed files with 197 additions and 37 deletions
|
|
@ -688,17 +688,17 @@ class TestChatCompletionsEndpoint:
|
|||
|
||||
@pytest.mark.asyncio
|
||||
async def test_stream_includes_tool_progress(self, adapter):
|
||||
"""tool_progress_callback fires → progress appears as custom SSE event, not in delta.content."""
|
||||
"""tool_start_callback fires → progress appears as custom SSE event, not in delta.content."""
|
||||
import asyncio
|
||||
|
||||
app = _create_app(adapter)
|
||||
async with TestClient(TestServer(app)) as cli:
|
||||
async def _mock_run_agent(**kwargs):
|
||||
cb = kwargs.get("stream_delta_callback")
|
||||
tp_cb = kwargs.get("tool_progress_callback")
|
||||
# Simulate tool progress before streaming content
|
||||
if tp_cb:
|
||||
tp_cb("tool.started", "terminal", "ls -la", {"command": "ls -la"})
|
||||
ts_cb = kwargs.get("tool_start_callback")
|
||||
# Simulate the structured tool start the gateway now consumes.
|
||||
if ts_cb:
|
||||
ts_cb("call_terminal_1", "terminal", {"command": "ls -la"})
|
||||
if cb:
|
||||
await asyncio.sleep(0.05)
|
||||
cb("Here are the files.")
|
||||
|
|
@ -724,7 +724,10 @@ class TestChatCompletionsEndpoint:
|
|||
# markers instead of calling tools (#6972).
|
||||
assert "event: hermes.tool.progress" in body
|
||||
assert '"tool": "terminal"' in body
|
||||
assert '"label": "ls -la"' in body
|
||||
# ``label`` is now derived by ``build_tool_preview`` from the
|
||||
# tool args rather than passed by the caller, so we assert
|
||||
# only that *some* label exists rather than a literal value.
|
||||
assert '"label":' in body
|
||||
# The progress marker must NOT appear inside any
|
||||
# chat.completion.chunk delta.content field.
|
||||
import json as _json
|
||||
|
|
@ -744,17 +747,17 @@ class TestChatCompletionsEndpoint:
|
|||
|
||||
@pytest.mark.asyncio
|
||||
async def test_stream_tool_progress_skips_internal_events(self, adapter):
|
||||
"""Internal events (name starting with _) are not streamed."""
|
||||
"""Internal tool calls (name starting with ``_``) are not streamed."""
|
||||
import asyncio
|
||||
|
||||
app = _create_app(adapter)
|
||||
async with TestClient(TestServer(app)) as cli:
|
||||
async def _mock_run_agent(**kwargs):
|
||||
cb = kwargs.get("stream_delta_callback")
|
||||
tp_cb = kwargs.get("tool_progress_callback")
|
||||
if tp_cb:
|
||||
tp_cb("tool.started", "_thinking", "some internal state", {})
|
||||
tp_cb("tool.started", "web_search", "Python docs", {"query": "Python docs"})
|
||||
ts_cb = kwargs.get("tool_start_callback")
|
||||
if ts_cb:
|
||||
ts_cb("call_internal_1", "_thinking", {"text": "some internal state"})
|
||||
ts_cb("call_search_1", "web_search", {"query": "Python docs"})
|
||||
if cb:
|
||||
await asyncio.sleep(0.05)
|
||||
cb("Found it.")
|
||||
|
|
@ -776,10 +779,142 @@ class TestChatCompletionsEndpoint:
|
|||
body = await resp.text()
|
||||
# Internal _thinking event should NOT appear anywhere
|
||||
assert "some internal state" not in body
|
||||
assert "call_internal_1" not in body
|
||||
# Real tool progress should appear as custom SSE event
|
||||
assert "event: hermes.tool.progress" in body
|
||||
assert '"tool": "web_search"' in body
|
||||
assert '"label": "Python docs"' in body
|
||||
# Label is derived from the args dict by build_tool_preview;
|
||||
# asserting on the structural fact (label exists, call id
|
||||
# is correlated) rather than a literal preview string keeps
|
||||
# the test robust against preview-formatter tweaks.
|
||||
assert '"label":' in body
|
||||
assert '"toolCallId": "call_search_1"' in body
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_stream_emits_tool_lifecycle_with_call_id(self, adapter):
|
||||
"""Regression for #16588.
|
||||
|
||||
``/v1/chat/completions`` streaming previously emitted only a
|
||||
``tool.started``-style ``hermes.tool.progress`` event; clients
|
||||
rendering tool lifecycle UI had no way to mark a tool as finished
|
||||
because no matching ``status: completed`` event was emitted, and
|
||||
no ``toolCallId`` was carried for correlation.
|
||||
|
||||
The fix adds ``tool_start_callback`` / ``tool_complete_callback``
|
||||
to the chat completions agent invocation and writes both halves
|
||||
of the lifecycle pair on the same ``event: hermes.tool.progress``
|
||||
SSE line, with stable ``toolCallId`` and ``status``.
|
||||
"""
|
||||
import asyncio
|
||||
import json as _json
|
||||
|
||||
app = _create_app(adapter)
|
||||
async with TestClient(TestServer(app)) as cli:
|
||||
async def _mock_run_agent(**kwargs):
|
||||
cb = kwargs.get("stream_delta_callback")
|
||||
ts_cb = kwargs.get("tool_start_callback")
|
||||
tc_cb = kwargs.get("tool_complete_callback")
|
||||
# The structured callbacks own the chat-completions SSE
|
||||
# channel now; ``tool_progress_callback`` is intentionally
|
||||
# not wired so each tool start emits exactly one event.
|
||||
if ts_cb:
|
||||
ts_cb("call_terminal_1", "terminal", {"command": "ls -la"})
|
||||
if tc_cb:
|
||||
tc_cb("call_terminal_1", "terminal", {"command": "ls -la"}, "ok")
|
||||
if cb:
|
||||
await asyncio.sleep(0.05)
|
||||
cb("done.")
|
||||
return (
|
||||
{"final_response": "done.", "messages": [], "api_calls": 1},
|
||||
{"input_tokens": 1, "output_tokens": 1, "total_tokens": 2},
|
||||
)
|
||||
|
||||
with patch.object(adapter, "_run_agent", side_effect=_mock_run_agent):
|
||||
resp = await cli.post(
|
||||
"/v1/chat/completions",
|
||||
json={
|
||||
"model": "test",
|
||||
"messages": [{"role": "user", "content": "list"}],
|
||||
"stream": True,
|
||||
},
|
||||
)
|
||||
assert resp.status == 200
|
||||
body = await resp.text()
|
||||
|
||||
# Walk the SSE body and collect *(status, toolCallId)* pairs
|
||||
# per event so the assertions verify per-event correlation —
|
||||
# an event missing ``toolCallId`` would not pass even if a
|
||||
# different event happens to carry the right id.
|
||||
pairs: list[tuple[str | None, str | None]] = []
|
||||
lines = body.splitlines()
|
||||
for i, line in enumerate(lines):
|
||||
if line.strip() != "event: hermes.tool.progress":
|
||||
continue
|
||||
for follow in lines[i + 1: i + 4]:
|
||||
if follow.startswith("data: "):
|
||||
try:
|
||||
payload = _json.loads(follow[len("data: "):])
|
||||
except _json.JSONDecodeError:
|
||||
break
|
||||
pairs.append((payload.get("status"), payload.get("toolCallId")))
|
||||
break
|
||||
|
||||
# Each tool start must emit exactly one event (no duplicate
|
||||
# legacy + new emit), and each lifecycle pair must carry the
|
||||
# same toolCallId on every event — not just somewhere in the
|
||||
# aggregate.
|
||||
assert len(pairs) == 2, f"expected 2 events (running+completed), got {pairs}"
|
||||
assert pairs[0] == ("running", "call_terminal_1"), pairs
|
||||
assert pairs[1] == ("completed", "call_terminal_1"), pairs
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_stream_tool_lifecycle_skips_internal_and_orphan_completes(self, adapter):
|
||||
"""Internal tools (``_thinking``-style) and ``completed`` events
|
||||
without a prior matching ``running`` must produce no lifecycle
|
||||
events on the wire — otherwise clients would see orphaned
|
||||
``status: completed`` updates they cannot correlate."""
|
||||
import asyncio
|
||||
|
||||
app = _create_app(adapter)
|
||||
async with TestClient(TestServer(app)) as cli:
|
||||
async def _mock_run_agent(**kwargs):
|
||||
cb = kwargs.get("stream_delta_callback")
|
||||
ts_cb = kwargs.get("tool_start_callback")
|
||||
tc_cb = kwargs.get("tool_complete_callback")
|
||||
# Internal tool — must be filtered.
|
||||
if ts_cb:
|
||||
ts_cb("call_internal_1", "_thinking", {})
|
||||
if tc_cb:
|
||||
tc_cb("call_internal_1", "_thinking", {}, "")
|
||||
# Completion without start — orphan, must be dropped.
|
||||
if tc_cb:
|
||||
tc_cb("call_orphan_1", "web_search", {}, "ok")
|
||||
if cb:
|
||||
await asyncio.sleep(0.05)
|
||||
cb("ok.")
|
||||
return (
|
||||
{"final_response": "ok.", "messages": [], "api_calls": 1},
|
||||
{"input_tokens": 1, "output_tokens": 1, "total_tokens": 2},
|
||||
)
|
||||
|
||||
with patch.object(adapter, "_run_agent", side_effect=_mock_run_agent):
|
||||
resp = await cli.post(
|
||||
"/v1/chat/completions",
|
||||
json={
|
||||
"model": "test",
|
||||
"messages": [{"role": "user", "content": "ok"}],
|
||||
"stream": True,
|
||||
},
|
||||
)
|
||||
assert resp.status == 200
|
||||
body = await resp.text()
|
||||
|
||||
# Neither the internal call_id nor the orphan call_id should
|
||||
# surface as a lifecycle payload on the wire.
|
||||
assert "call_internal_1" not in body
|
||||
assert "call_orphan_1" not in body
|
||||
assert '"status": "running"' not in body
|
||||
assert '"status": "completed"' not in body
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_no_user_message_returns_400(self, adapter):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue