mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
fix: reset retry counters after compression and stop poisoning conversation history
Three bugfixes in the agent loop: 1. Reset retry counters after context compression. Without this, pre-compression retry counts carry over, causing the model to hit empty-response recovery immediately after a compression- induced context loss, wasting API calls on a now-valid context. 2. Unmute output in the final-response (no-tool-call) branch. _mute_post_response could be left True from a prior housekeeping turn, silently suppressing empty-response warnings and recovery status that the user should see. 3. Stop injecting 'Calling the X tools...' into assistant message content when falling back to prior-turn content. This mutated conversation history with synthetic text that the model never produced, poisoning subsequent turns.
This commit is contained in:
parent
82f364ffd1
commit
92385679b6
1 changed files with 20 additions and 10 deletions
30
run_agent.py
30
run_agent.py
|
|
@ -8012,6 +8012,15 @@ class AIAgent:
|
|||
# skipping them because conversation_history is still the
|
||||
# pre-compression length.
|
||||
conversation_history = None
|
||||
# Fix: reset retry counters after compression so the model
|
||||
# gets a fresh budget on the compressed context. Without
|
||||
# this, pre-compression retries carry over and the model
|
||||
# hits "(empty)" immediately after compression-induced
|
||||
# context loss.
|
||||
self._empty_content_retries = 0
|
||||
self._thinking_prefill_retries = 0
|
||||
self._last_content_with_tools = None
|
||||
self._mute_post_response = False
|
||||
# Re-estimate after compression
|
||||
_preflight_tokens = estimate_request_tokens_rough(
|
||||
messages,
|
||||
|
|
@ -10202,6 +10211,13 @@ class AIAgent:
|
|||
# No tool calls - this is the final response
|
||||
final_response = assistant_message.content or ""
|
||||
|
||||
# Fix: unmute output when entering the no-tool-call branch
|
||||
# so the user can see empty-response warnings and recovery
|
||||
# status messages. _mute_post_response was set during a
|
||||
# prior housekeeping tool turn and should not silence the
|
||||
# final response path.
|
||||
self._mute_post_response = False
|
||||
|
||||
# Check if response only has think block with no actual content after it
|
||||
if not self._has_content_after_think_block(final_response):
|
||||
# ── Partial stream recovery ─────────────────────
|
||||
|
|
@ -10239,16 +10255,10 @@ class AIAgent:
|
|||
self._emit_status("↻ Empty response after tool calls — using earlier content as final answer")
|
||||
self._last_content_with_tools = None
|
||||
self._empty_content_retries = 0
|
||||
for i in range(len(messages) - 1, -1, -1):
|
||||
msg = messages[i]
|
||||
if msg.get("role") == "assistant" and msg.get("tool_calls"):
|
||||
tool_names = []
|
||||
for tc in msg["tool_calls"]:
|
||||
if not tc or not isinstance(tc, dict): continue
|
||||
fn = tc.get("function", {})
|
||||
tool_names.append(fn.get("name", "unknown"))
|
||||
msg["content"] = f"Calling the {', '.join(tool_names)} tool{'s' if len(tool_names) > 1 else ''}..."
|
||||
break
|
||||
# Do NOT modify the assistant message content — the
|
||||
# old code injected "Calling the X tools..." which
|
||||
# poisoned the conversation history. Just use the
|
||||
# fallback text as the final response and break.
|
||||
final_response = self._strip_think_blocks(fallback).strip()
|
||||
self._response_was_previewed = True
|
||||
break
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue