mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-01 07:01:41 +00:00
fix(conversation-loop): tailor length-continuation prompt for partial stream
The length-continue path's user-facing vprint and continuation prompt both told the model "your response was truncated by the output length limit." That's a lie when the stub came from a partial-stream network error (issue #30963) — and a lie the model can detect, leading to "I wasn't truncated, I'm done" no-op responses that defeat the continuation entirely. Detect the partial-stream-stub via response.id and swap in: - vprint: "Stream interrupted by network error (finish_reason='length' on partial-stream-stub)" - prompt: "[System: The previous response was cut off by a network error mid-stream. Continue exactly where you left off. Do not restart or repeat prior text. Finish the answer directly.]" Real length truncations still see the original "truncated by output length limit" prompt — the model needs to know which class of failure it's recovering from. Same length_continue_retries=3 budget, truncated_response_parts merging, and final-response stitching infrastructure on both branches. Refs: NousResearch/hermes-agent#30963
This commit is contained in:
parent
9140be7c22
commit
20b3703a42
1 changed files with 42 additions and 8 deletions
|
|
@ -1414,7 +1414,18 @@ def run_conversation(
|
|||
finish_reason = "length"
|
||||
|
||||
if finish_reason == "length":
|
||||
agent._vprint(f"{agent.log_prefix}⚠️ Response truncated (finish_reason='length') - model hit max output tokens", force=True)
|
||||
if getattr(response, "id", "") == "partial-stream-stub":
|
||||
agent._vprint(
|
||||
f"{agent.log_prefix}⚠️ Stream interrupted by network error "
|
||||
f"(finish_reason='length' on partial-stream-stub)",
|
||||
force=True,
|
||||
)
|
||||
else:
|
||||
agent._vprint(
|
||||
f"{agent.log_prefix}⚠️ Response truncated "
|
||||
f"(finish_reason='length') - model hit max output tokens",
|
||||
force=True,
|
||||
)
|
||||
|
||||
# Normalize the truncated response to a single OpenAI-style
|
||||
# message shape so text-continuation and tool-call retry
|
||||
|
|
@ -1507,17 +1518,40 @@ def run_conversation(
|
|||
truncated_response_parts.append(assistant_message.content)
|
||||
|
||||
if length_continue_retries < 3:
|
||||
agent._vprint(
|
||||
f"{agent.log_prefix}↻ Requesting continuation "
|
||||
f"({length_continue_retries}/3)..."
|
||||
# Distinguish a real output-token truncation
|
||||
# from a partial-stream-stub network error
|
||||
# (#30963). Same continuation machinery,
|
||||
# but the prompt has to tell the truth or
|
||||
# the model goes off rails ("I wasn't
|
||||
# truncated, I'm done").
|
||||
_is_partial_stream_stub = (
|
||||
getattr(response, "id", "") == "partial-stream-stub"
|
||||
)
|
||||
continue_msg = {
|
||||
"role": "user",
|
||||
"content": (
|
||||
if _is_partial_stream_stub:
|
||||
agent._vprint(
|
||||
f"{agent.log_prefix}↻ Stream interrupted — "
|
||||
f"requesting continuation "
|
||||
f"({length_continue_retries}/3)..."
|
||||
)
|
||||
_continue_content = (
|
||||
"[System: The previous response was cut off by a "
|
||||
"network error mid-stream. Continue exactly where "
|
||||
"you left off. Do not restart or repeat prior text. "
|
||||
"Finish the answer directly.]"
|
||||
)
|
||||
else:
|
||||
agent._vprint(
|
||||
f"{agent.log_prefix}↻ Requesting continuation "
|
||||
f"({length_continue_retries}/3)..."
|
||||
)
|
||||
_continue_content = (
|
||||
"[System: Your previous response was truncated by the output "
|
||||
"length limit. Continue exactly where you left off. Do not "
|
||||
"restart or repeat prior text. Finish the answer directly.]"
|
||||
),
|
||||
)
|
||||
continue_msg = {
|
||||
"role": "user",
|
||||
"content": _continue_content,
|
||||
}
|
||||
messages.append(continue_msg)
|
||||
agent._session_messages = messages
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue