mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
fix(streaming): surface dropped tool-call on mid-stream stall (#12072)
When streaming died after text was already delivered to the user but before a tool-call's arguments finished streaming, the partial-stream stub at the end of _interruptible_streaming_api_call silently set `tool_calls=None` on the returned message and kept `finish_reason=stop`. The agent treated the turn as complete, the session exited cleanly with code 0, and the attempted action was lost with zero user-facing signal. Live-observed Apr 2026 with MiniMax M2.7 on a ~6-minute audit task: agent streamed 'Let me write the audit:', started emitting a write_file tool call, MiniMax stalled for 240s mid-arguments, the stale-stream detector killed the connection, the stub fired, session ended, no file written, no error shown. Fix: the streaming accumulator now records each tool-call's name into `result['partial_tool_names']` as soon as the name is known. When the stub builder fires after a partial delivery and finds any recorded tool names, it appends a human-visible warning to the stub's content — and also fires it as a live stream delta so the user sees it immediately, not only in the persisted transcript. The next turn's model also sees the warning in conversation history and can retry on its own. Text-only partial streams keep the original bare-recovery behaviour (no warning). Validation: | Scenario | Before | After | |---------------------------------------------|---------------------------|---------------------------------------------| | Stream dies mid tool-call, text already sent | Silent exit, no indication | User sees ⚠ warning naming the dropped tool | | Text-only partial stream | Bare recovered text | Unchanged | | tests/run_agent/test_streaming.py | 24 passed | 26 passed (2 new) |
This commit is contained in:
parent
285bb2b915
commit
8322b42c6c
2 changed files with 182 additions and 8 deletions
55
run_agent.py
55
run_agent.py
|
|
@ -5579,7 +5579,7 @@ class AIAgent:
|
|||
raise result["error"]
|
||||
return result["response"]
|
||||
|
||||
result = {"response": None, "error": None}
|
||||
result = {"response": None, "error": None, "partial_tool_names": []}
|
||||
request_client_holder = {"client": None}
|
||||
first_delta_fired = {"done": False}
|
||||
deltas_were_sent = {"yes": False} # Track if any deltas were fired (for fallback)
|
||||
|
|
@ -5751,6 +5751,14 @@ class AIAgent:
|
|||
tool_gen_notified.add(idx)
|
||||
_fire_first_delta()
|
||||
self._fire_tool_gen_started(name)
|
||||
# Record the partial tool-call name so the outer
|
||||
# stub-builder can surface a user-visible warning
|
||||
# if streaming dies before this tool's arguments
|
||||
# are fully delivered. Without this, a stall
|
||||
# during tool-call JSON generation lets the stub
|
||||
# at line ~6107 return `tool_calls=None`, silently
|
||||
# discarding the attempted action.
|
||||
result["partial_tool_names"].append(name)
|
||||
|
||||
if chunk.choices[0].finish_reason:
|
||||
finish_reason = chunk.choices[0].finish_reason
|
||||
|
|
@ -6117,13 +6125,44 @@ class AIAgent:
|
|||
_partial_text = (
|
||||
getattr(self, "_current_streamed_assistant_text", "") or ""
|
||||
).strip() or None
|
||||
logger.warning(
|
||||
"Partial stream delivered before error; returning stub "
|
||||
"response with %s chars of recovered content to prevent "
|
||||
"duplicate messages: %s",
|
||||
len(_partial_text or ""),
|
||||
result["error"],
|
||||
)
|
||||
|
||||
# If the stream died while the model was emitting a tool call,
|
||||
# the stub below will silently set `tool_calls=None` and the
|
||||
# agent loop will treat the turn as complete — the attempted
|
||||
# action is lost with no user-facing signal. Append a
|
||||
# human-visible warning to the stub content so (a) the user
|
||||
# knows something failed, and (b) the next turn's model sees
|
||||
# in conversation history what was attempted and can retry.
|
||||
_partial_names = list(result.get("partial_tool_names") or [])
|
||||
if _partial_names:
|
||||
_name_str = ", ".join(_partial_names[:3])
|
||||
if len(_partial_names) > 3:
|
||||
_name_str += f", +{len(_partial_names) - 3} more"
|
||||
_warn = (
|
||||
f"\n\n⚠ Stream stalled mid tool-call "
|
||||
f"({_name_str}); the action was not executed. "
|
||||
f"Ask me to retry if you want to continue."
|
||||
)
|
||||
_partial_text = (_partial_text or "") + _warn
|
||||
# Also fire as a streaming delta so the user sees it now
|
||||
# instead of only in the persisted transcript.
|
||||
try:
|
||||
self._fire_stream_delta(_warn)
|
||||
except Exception:
|
||||
pass
|
||||
logger.warning(
|
||||
"Partial stream dropped tool call(s) %s after %s chars "
|
||||
"of text; surfaced warning to user: %s",
|
||||
_partial_names, len(_partial_text or ""), result["error"],
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
"Partial stream delivered before error; returning stub "
|
||||
"response with %s chars of recovered content to prevent "
|
||||
"duplicate messages: %s",
|
||||
len(_partial_text or ""),
|
||||
result["error"],
|
||||
)
|
||||
_stub_msg = SimpleNamespace(
|
||||
role="assistant", content=_partial_text, tool_calls=None,
|
||||
reasoning_content=None,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue