Merge branch 'main' of github.com:NousResearch/hermes-agent into feat/ink-refactor

This commit is contained in:
Brooklyn Nicholson 2026-04-15 00:02:31 -05:00
commit 561cea0d4a
23 changed files with 1082 additions and 149 deletions

View file

@ -8013,6 +8013,15 @@ class AIAgent:
# skipping them because conversation_history is still the
# pre-compression length.
conversation_history = None
# Fix: reset retry counters after compression so the model
# gets a fresh budget on the compressed context. Without
# this, pre-compression retries carry over and the model
# hits "(empty)" immediately after compression-induced
# context loss.
self._empty_content_retries = 0
self._thinking_prefill_retries = 0
self._last_content_with_tools = None
self._mute_post_response = False
# Re-estimate after compression
_preflight_tokens = estimate_request_tokens_rough(
messages,
@ -9304,7 +9313,9 @@ class AIAgent:
"completed": False,
"api_calls": api_call_count,
"error": f"Request payload too large: max compression attempts ({max_compression_attempts}) reached.",
"partial": True
"partial": True,
"failed": True,
"compression_exhausted": True,
}
self._emit_status(f"⚠️ Request payload too large (413) — compression attempt {compression_attempts}/{max_compression_attempts}...")
@ -9333,7 +9344,9 @@ class AIAgent:
"completed": False,
"api_calls": api_call_count,
"error": "Request payload too large (413). Cannot compress further.",
"partial": True
"partial": True,
"failed": True,
"compression_exhausted": True,
}
# Check for context-length errors BEFORE generic 4xx handler.
@ -9384,7 +9397,9 @@ class AIAgent:
"completed": False,
"api_calls": api_call_count,
"error": f"Context length exceeded: max compression attempts ({max_compression_attempts}) reached.",
"partial": True
"partial": True,
"failed": True,
"compression_exhausted": True,
}
restart_with_compressed_messages = True
break
@ -9434,7 +9449,9 @@ class AIAgent:
"completed": False,
"api_calls": api_call_count,
"error": f"Context length exceeded: max compression attempts ({max_compression_attempts}) reached.",
"partial": True
"partial": True,
"failed": True,
"compression_exhausted": True,
}
self._emit_status(f"🗜️ Context too large (~{approx_tokens:,} tokens) — compressing ({compression_attempts}/{max_compression_attempts})...")
@ -9465,7 +9482,9 @@ class AIAgent:
"completed": False,
"api_calls": api_call_count,
"error": f"Context length exceeded ({approx_tokens:,} tokens). Cannot compress further.",
"partial": True
"partial": True,
"failed": True,
"compression_exhausted": True,
}
# Check for non-retryable client errors. The classifier
@ -10203,6 +10222,13 @@ class AIAgent:
# No tool calls - this is the final response
final_response = assistant_message.content or ""
# Fix: unmute output when entering the no-tool-call branch
# so the user can see empty-response warnings and recovery
# status messages. _mute_post_response was set during a
# prior housekeeping tool turn and should not silence the
# final response path.
self._mute_post_response = False
# Check if response only has think block with no actual content after it
if not self._has_content_after_think_block(final_response):
# ── Partial stream recovery ─────────────────────
@ -10240,16 +10266,10 @@ class AIAgent:
self._emit_status("↻ Empty response after tool calls — using earlier content as final answer")
self._last_content_with_tools = None
self._empty_content_retries = 0
for i in range(len(messages) - 1, -1, -1):
msg = messages[i]
if msg.get("role") == "assistant" and msg.get("tool_calls"):
tool_names = []
for tc in msg["tool_calls"]:
if not tc or not isinstance(tc, dict): continue
fn = tc.get("function", {})
tool_names.append(fn.get("name", "unknown"))
msg["content"] = f"Calling the {', '.join(tool_names)} tool{'s' if len(tool_names) > 1 else ''}..."
break
# Do NOT modify the assistant message content — the
# old code injected "Calling the X tools..." which
# poisoned the conversation history. Just use the
# fallback text as the final response and break.
final_response = self._strip_think_blocks(fallback).strip()
self._response_was_previewed = True
break