mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
Merge branch 'main' of github.com:NousResearch/hermes-agent into feat/ink-refactor
This commit is contained in:
commit
561cea0d4a
23 changed files with 1082 additions and 149 deletions
50
run_agent.py
50
run_agent.py
|
|
@ -8013,6 +8013,15 @@ class AIAgent:
|
|||
# skipping them because conversation_history is still the
|
||||
# pre-compression length.
|
||||
conversation_history = None
|
||||
# Fix: reset retry counters after compression so the model
|
||||
# gets a fresh budget on the compressed context. Without
|
||||
# this, pre-compression retries carry over and the model
|
||||
# hits "(empty)" immediately after compression-induced
|
||||
# context loss.
|
||||
self._empty_content_retries = 0
|
||||
self._thinking_prefill_retries = 0
|
||||
self._last_content_with_tools = None
|
||||
self._mute_post_response = False
|
||||
# Re-estimate after compression
|
||||
_preflight_tokens = estimate_request_tokens_rough(
|
||||
messages,
|
||||
|
|
@ -9304,7 +9313,9 @@ class AIAgent:
|
|||
"completed": False,
|
||||
"api_calls": api_call_count,
|
||||
"error": f"Request payload too large: max compression attempts ({max_compression_attempts}) reached.",
|
||||
"partial": True
|
||||
"partial": True,
|
||||
"failed": True,
|
||||
"compression_exhausted": True,
|
||||
}
|
||||
self._emit_status(f"⚠️ Request payload too large (413) — compression attempt {compression_attempts}/{max_compression_attempts}...")
|
||||
|
||||
|
|
@ -9333,7 +9344,9 @@ class AIAgent:
|
|||
"completed": False,
|
||||
"api_calls": api_call_count,
|
||||
"error": "Request payload too large (413). Cannot compress further.",
|
||||
"partial": True
|
||||
"partial": True,
|
||||
"failed": True,
|
||||
"compression_exhausted": True,
|
||||
}
|
||||
|
||||
# Check for context-length errors BEFORE generic 4xx handler.
|
||||
|
|
@ -9384,7 +9397,9 @@ class AIAgent:
|
|||
"completed": False,
|
||||
"api_calls": api_call_count,
|
||||
"error": f"Context length exceeded: max compression attempts ({max_compression_attempts}) reached.",
|
||||
"partial": True
|
||||
"partial": True,
|
||||
"failed": True,
|
||||
"compression_exhausted": True,
|
||||
}
|
||||
restart_with_compressed_messages = True
|
||||
break
|
||||
|
|
@ -9434,7 +9449,9 @@ class AIAgent:
|
|||
"completed": False,
|
||||
"api_calls": api_call_count,
|
||||
"error": f"Context length exceeded: max compression attempts ({max_compression_attempts}) reached.",
|
||||
"partial": True
|
||||
"partial": True,
|
||||
"failed": True,
|
||||
"compression_exhausted": True,
|
||||
}
|
||||
self._emit_status(f"🗜️ Context too large (~{approx_tokens:,} tokens) — compressing ({compression_attempts}/{max_compression_attempts})...")
|
||||
|
||||
|
|
@ -9465,7 +9482,9 @@ class AIAgent:
|
|||
"completed": False,
|
||||
"api_calls": api_call_count,
|
||||
"error": f"Context length exceeded ({approx_tokens:,} tokens). Cannot compress further.",
|
||||
"partial": True
|
||||
"partial": True,
|
||||
"failed": True,
|
||||
"compression_exhausted": True,
|
||||
}
|
||||
|
||||
# Check for non-retryable client errors. The classifier
|
||||
|
|
@ -10203,6 +10222,13 @@ class AIAgent:
|
|||
# No tool calls - this is the final response
|
||||
final_response = assistant_message.content or ""
|
||||
|
||||
# Fix: unmute output when entering the no-tool-call branch
|
||||
# so the user can see empty-response warnings and recovery
|
||||
# status messages. _mute_post_response was set during a
|
||||
# prior housekeeping tool turn and should not silence the
|
||||
# final response path.
|
||||
self._mute_post_response = False
|
||||
|
||||
# Check if response only has think block with no actual content after it
|
||||
if not self._has_content_after_think_block(final_response):
|
||||
# ── Partial stream recovery ─────────────────────
|
||||
|
|
@ -10240,16 +10266,10 @@ class AIAgent:
|
|||
self._emit_status("↻ Empty response after tool calls — using earlier content as final answer")
|
||||
self._last_content_with_tools = None
|
||||
self._empty_content_retries = 0
|
||||
for i in range(len(messages) - 1, -1, -1):
|
||||
msg = messages[i]
|
||||
if msg.get("role") == "assistant" and msg.get("tool_calls"):
|
||||
tool_names = []
|
||||
for tc in msg["tool_calls"]:
|
||||
if not tc or not isinstance(tc, dict): continue
|
||||
fn = tc.get("function", {})
|
||||
tool_names.append(fn.get("name", "unknown"))
|
||||
msg["content"] = f"Calling the {', '.join(tool_names)} tool{'s' if len(tool_names) > 1 else ''}..."
|
||||
break
|
||||
# Do NOT modify the assistant message content — the
|
||||
# old code injected "Calling the X tools..." which
|
||||
# poisoned the conversation history. Just use the
|
||||
# fallback text as the final response and break.
|
||||
final_response = self._strip_think_blocks(fallback).strip()
|
||||
self._response_was_previewed = True
|
||||
break
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue