mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-04 07:31:58 +00:00
test(streaming): pin partial-stream-stub finish_reason + continuation contract
Three test classes lock in the #30963 fix: 1. TestPartialStreamStubFinishReason — drives _interruptible_streaming_api_call through the two recovery branches and asserts: - text-only partial → finish_reason="length" (the new behaviour), - mid-tool-call partial → finish_reason="stop" (unchanged on purpose). 2. TestLengthContinuationPromptBranching — pure-Python check on the branch that picks the continuation prompt by response.id. Locks the network error wording for partial-stream-stub vs. the output-length wording for everything else. 3. TestConversationLoopPartialStreamContinuation — feeds a stub + continuation pair into run_conversation, verifies the loop makes a second API call (instead of exiting with text_response(stop)), confirms the network-error continuation prompt actually reaches the model on call #2, and that final_response stitches both halves. Refs: NousResearch/hermes-agent#30963
This commit is contained in:
parent
20b3703a42
commit
6cafcf9c77
1 changed files with 258 additions and 0 deletions
258
tests/run_agent/test_partial_stream_finish_reason.py
Normal file
258
tests/run_agent/test_partial_stream_finish_reason.py
Normal file
|
|
@ -0,0 +1,258 @@
|
|||
"""Regression tests for issue #30963 — partial-stream stub finish_reason.
|
||||
|
||||
Pins the contract:
|
||||
|
||||
- text-only partial stream → stub.finish_reason == "length" so the
|
||||
conversation loop's existing length-continuation path can keep the
|
||||
agent moving against an unfinished goal.
|
||||
- partial mid-tool-call → stub.finish_reason == "stop" so the loop
|
||||
hands control back to the user (matches the user-visible warning
|
||||
"Ask me to retry if you want to continue").
|
||||
- conversation_loop's length-continuation prompt distinguishes a real
|
||||
output-length truncation from a partial-stream-stub network error
|
||||
via response.id.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
# ── Helpers (mirrors test_streaming.py) ────────────────────────────────────
|
||||
|
||||
def _make_stream_chunk(content=None, tool_calls=None, finish_reason=None):
|
||||
delta = SimpleNamespace(
|
||||
content=content, tool_calls=tool_calls,
|
||||
reasoning_content=None, reasoning=None,
|
||||
)
|
||||
choice = SimpleNamespace(index=0, delta=delta, finish_reason=finish_reason)
|
||||
return SimpleNamespace(choices=[choice], model=None, usage=None)
|
||||
|
||||
|
||||
def _make_tool_call_delta(index=0, tc_id=None, name=None, arguments=None):
|
||||
func = SimpleNamespace(name=name, arguments=arguments)
|
||||
return SimpleNamespace(index=index, id=tc_id, function=func)
|
||||
|
||||
|
||||
def _make_agent():
|
||||
from run_agent import AIAgent
|
||||
agent = AIAgent(
|
||||
api_key="test-key",
|
||||
base_url="https://example.com/v1",
|
||||
model="test/model",
|
||||
quiet_mode=True,
|
||||
skip_context_files=True,
|
||||
skip_memory=True,
|
||||
)
|
||||
agent.api_mode = "chat_completions"
|
||||
agent._interrupt_requested = False
|
||||
return agent
|
||||
|
||||
|
||||
# ── Stub finish_reason ────────────────────────────────────────────────────
|
||||
|
||||
class TestPartialStreamStubFinishReason:
|
||||
"""The stub returned by interruptible_streaming_api_call when the
|
||||
upstream connection dies mid-flight."""
|
||||
|
||||
@patch("run_agent.AIAgent._create_request_openai_client")
|
||||
@patch("run_agent.AIAgent._close_request_openai_client")
|
||||
def test_text_only_partial_returns_length(self, _mock_close, mock_create, monkeypatch):
|
||||
"""#30963: text-only partials must classify as length so the loop
|
||||
keeps continuing instead of exiting with budget remaining."""
|
||||
|
||||
def _stalling_stream():
|
||||
yield _make_stream_chunk(content="Here's my answer so far")
|
||||
raise RuntimeError("simulated upstream stall")
|
||||
|
||||
mock_client = MagicMock()
|
||||
mock_client.chat.completions.create.side_effect = lambda *a, **kw: _stalling_stream()
|
||||
mock_create.return_value = mock_client
|
||||
|
||||
agent = _make_agent()
|
||||
agent._current_streamed_assistant_text = "Here's my answer so far"
|
||||
|
||||
monkeypatch.setenv("HERMES_STREAM_RETRIES", "0")
|
||||
response = agent._interruptible_streaming_api_call({})
|
||||
|
||||
assert response.id == "partial-stream-stub"
|
||||
assert response.choices[0].finish_reason == "length", (
|
||||
"Text-only partial streams must use finish_reason=length so the "
|
||||
"conversation loop continues from where the network died "
|
||||
"(issue #30963)."
|
||||
)
|
||||
assert response.choices[0].message.content == "Here's my answer so far"
|
||||
assert response.choices[0].message.tool_calls is None
|
||||
|
||||
@patch("run_agent.AIAgent._create_request_openai_client")
|
||||
@patch("run_agent.AIAgent._close_request_openai_client")
|
||||
def test_partial_tool_call_keeps_stop(self, _mock_close, mock_create, monkeypatch):
|
||||
"""Mid-tool-call partials keep finish_reason=stop on purpose — the
|
||||
warning text asks the user to drive the retry, not the agent."""
|
||||
|
||||
def _stalling_stream():
|
||||
yield _make_stream_chunk(content="Let me write the audit: ")
|
||||
yield _make_stream_chunk(tool_calls=[
|
||||
_make_tool_call_delta(index=0, tc_id="call_1", name="write_file"),
|
||||
])
|
||||
yield _make_stream_chunk(tool_calls=[
|
||||
_make_tool_call_delta(index=0, arguments='{"path": "/tmp/x", '),
|
||||
])
|
||||
raise RuntimeError("simulated upstream stall")
|
||||
|
||||
mock_client = MagicMock()
|
||||
mock_client.chat.completions.create.side_effect = lambda *a, **kw: _stalling_stream()
|
||||
mock_create.return_value = mock_client
|
||||
|
||||
agent = _make_agent()
|
||||
agent._fire_stream_delta = lambda text: None
|
||||
agent._current_streamed_assistant_text = "Let me write the audit: "
|
||||
|
||||
monkeypatch.setenv("HERMES_STREAM_RETRIES", "0")
|
||||
response = agent._interruptible_streaming_api_call({})
|
||||
|
||||
assert response.id == "partial-stream-stub"
|
||||
assert response.choices[0].finish_reason == "stop", (
|
||||
"Partial mid-tool-call must keep finish_reason=stop — the warning "
|
||||
"appended to content asks the user to retry, so the agent must "
|
||||
"not auto-replay a tool call with possible side-effects."
|
||||
)
|
||||
content = response.choices[0].message.content or ""
|
||||
assert "Stream stalled mid tool-call" in content
|
||||
assert "write_file" in content
|
||||
|
||||
|
||||
# ── Length-continuation prompt branching ──────────────────────────────────
|
||||
|
||||
class TestLengthContinuationPromptBranching:
|
||||
"""When finish_reason=length, the continuation prompt that reaches the
|
||||
model has to tell the truth: real truncation vs. network interruption.
|
||||
Lying ("you were truncated") on a partial-stream stub leads the model
|
||||
to no-op ("I wasn't truncated, I'm done"), defeating recovery."""
|
||||
|
||||
def _simulate_branch(self, response_id: str) -> str:
|
||||
"""Return the continuation prompt text the loop would inject for
|
||||
a `finish_reason=length` response with the given id. Mirrors the
|
||||
exact branch in agent/conversation_loop.py."""
|
||||
response = SimpleNamespace(id=response_id)
|
||||
if getattr(response, "id", "") == "partial-stream-stub":
|
||||
return (
|
||||
"[System: The previous response was cut off by a "
|
||||
"network error mid-stream. Continue exactly where "
|
||||
"you left off. Do not restart or repeat prior text. "
|
||||
"Finish the answer directly.]"
|
||||
)
|
||||
return (
|
||||
"[System: Your previous response was truncated by the output "
|
||||
"length limit. Continue exactly where you left off. Do not "
|
||||
"restart or repeat prior text. Finish the answer directly.]"
|
||||
)
|
||||
|
||||
def test_partial_stream_stub_uses_network_prompt(self):
|
||||
prompt = self._simulate_branch("partial-stream-stub")
|
||||
assert "network error mid-stream" in prompt
|
||||
assert "output length limit" not in prompt
|
||||
|
||||
def test_real_truncation_uses_length_prompt(self):
|
||||
prompt = self._simulate_branch("chatcmpl-abc123")
|
||||
assert "output length limit" in prompt
|
||||
assert "network error" not in prompt
|
||||
|
||||
def test_no_id_falls_through_to_length_prompt(self):
|
||||
prompt = self._simulate_branch("")
|
||||
assert "output length limit" in prompt
|
||||
|
||||
|
||||
# ── Integration: live conversation loop ───────────────────────────────────
|
||||
|
||||
@pytest.fixture()
|
||||
def loop_agent():
|
||||
"""AIAgent with a mocked OpenAI client (mirrors test_run_agent's fixture)
|
||||
so we can stage a stub + continuation pair on .chat.completions.create."""
|
||||
from run_agent import AIAgent
|
||||
with (
|
||||
patch("run_agent.get_tool_definitions", return_value=[]),
|
||||
patch("run_agent.check_toolset_requirements", return_value={}),
|
||||
patch("run_agent.OpenAI"),
|
||||
):
|
||||
a = AIAgent(
|
||||
api_key="test-key-1234567890",
|
||||
base_url="https://openrouter.ai/api/v1",
|
||||
quiet_mode=True,
|
||||
skip_context_files=True,
|
||||
skip_memory=True,
|
||||
)
|
||||
a.client = MagicMock()
|
||||
a._cached_system_prompt = "You are helpful."
|
||||
a._use_prompt_caching = False
|
||||
a.tool_delay = 0
|
||||
a.compression_enabled = False
|
||||
a.save_trajectories = False
|
||||
return a
|
||||
|
||||
|
||||
class TestConversationLoopPartialStreamContinuation:
|
||||
"""End-to-end: a partial-stream stub feeds the loop and the loop
|
||||
asks for continuation instead of exiting with finish_reason=stop."""
|
||||
|
||||
def test_partial_stream_stub_does_not_exit_loop_immediately(self, loop_agent):
|
||||
"""The stub from chat_completion_helpers used to exit the loop with
|
||||
text_response(finish_reason=stop). Now finish_reason=length routes
|
||||
through length_continue_retries — the loop persists the partial
|
||||
content and asks the model to continue."""
|
||||
|
||||
from tests.run_agent.test_run_agent import _mock_response, _mock_assistant_msg
|
||||
|
||||
# First API call: the partial-stream stub (length on partial-stream-stub id).
|
||||
partial_stub = SimpleNamespace(
|
||||
id="partial-stream-stub",
|
||||
model="test/model",
|
||||
choices=[SimpleNamespace(
|
||||
index=0,
|
||||
message=_mock_assistant_msg(content="The first half of "),
|
||||
finish_reason="length",
|
||||
)],
|
||||
usage=None,
|
||||
)
|
||||
# Second API call: model continues with the rest, clean stop.
|
||||
continuation = _mock_response(
|
||||
content="the answer is forty-two.", finish_reason="stop",
|
||||
)
|
||||
|
||||
loop_agent.client.chat.completions.create.side_effect = [
|
||||
partial_stub, continuation,
|
||||
]
|
||||
|
||||
with (
|
||||
patch.object(loop_agent, "_persist_session"),
|
||||
patch.object(loop_agent, "_save_trajectory"),
|
||||
patch.object(loop_agent, "_cleanup_task_resources"),
|
||||
):
|
||||
result = loop_agent.run_conversation("ask me something")
|
||||
|
||||
# The loop made TWO API calls (stub + continuation), not one.
|
||||
assert loop_agent.client.chat.completions.create.call_count == 2, (
|
||||
"Partial-stream-stub must trigger a continuation API call, not "
|
||||
"exit the loop after one call."
|
||||
)
|
||||
# The continuation prompt the loop appended must be the network-error
|
||||
# variant, not the "output length limit" lie — otherwise the model
|
||||
# no-ops with "I wasn't truncated, I'm done."
|
||||
# We assert it indirectly by inspecting the second-call kwargs.
|
||||
second_call_kwargs = loop_agent.client.chat.completions.create.call_args_list[1]
|
||||
msgs = second_call_kwargs.kwargs.get("messages") or second_call_kwargs.args[0].get("messages")
|
||||
last_user = next(
|
||||
(m for m in reversed(msgs) if m.get("role") == "user"), None,
|
||||
)
|
||||
assert last_user is not None
|
||||
assert "network error mid-stream" in (last_user.get("content") or ""), (
|
||||
"Continuation prompt for partial-stream-stub must mention the "
|
||||
"network error, not the 'output length limit'."
|
||||
)
|
||||
|
||||
# And the final response stitches both halves together.
|
||||
assert "first half of" in result["final_response"]
|
||||
assert "forty-two" in result["final_response"]
|
||||
Loading…
Add table
Add a link
Reference in a new issue