mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
Merge branch 'main' of github.com:NousResearch/hermes-agent into feat/ink-refactor
This commit is contained in:
commit
baa0de7649
27 changed files with 1594 additions and 151 deletions
147
run_agent.py
147
run_agent.py
|
|
@ -754,6 +754,7 @@ class AIAgent:
|
|||
self._interrupt_requested = False
|
||||
self._interrupt_message = None # Optional message that triggered interrupt
|
||||
self._execution_thread_id: int | None = None # Set at run_conversation() start
|
||||
self._interrupt_thread_signal_pending = False
|
||||
self._client_lock = threading.RLock()
|
||||
|
||||
# Subagent delegation state
|
||||
|
|
@ -1223,14 +1224,27 @@ class AIAgent:
|
|||
logger.warning("Memory provider plugin init failed: %s", _mpe)
|
||||
self._memory_manager = None
|
||||
|
||||
# Inject memory provider tool schemas into the tool surface
|
||||
# Inject memory provider tool schemas into the tool surface.
|
||||
# Skip tools whose names already exist (plugins may register the
|
||||
# same tools via ctx.register_tool(), which lands in self.tools
|
||||
# through get_tool_definitions()). Duplicate function names cause
|
||||
# 400 errors on providers that enforce unique names (e.g. Xiaomi
|
||||
# MiMo via Nous Portal).
|
||||
if self._memory_manager and self.tools is not None:
|
||||
_existing_tool_names = {
|
||||
t.get("function", {}).get("name")
|
||||
for t in self.tools
|
||||
if isinstance(t, dict)
|
||||
}
|
||||
for _schema in self._memory_manager.get_all_tool_schemas():
|
||||
_tname = _schema.get("name", "")
|
||||
if _tname and _tname in _existing_tool_names:
|
||||
continue # already registered via plugin path
|
||||
_wrapped = {"type": "function", "function": _schema}
|
||||
self.tools.append(_wrapped)
|
||||
_tname = _schema.get("name", "")
|
||||
if _tname:
|
||||
self.valid_tool_names.add(_tname)
|
||||
_existing_tool_names.add(_tname)
|
||||
|
||||
# Skills config: nudge interval for skill creation reminders
|
||||
self._skill_nudge_interval = 10
|
||||
|
|
@ -2949,7 +2963,15 @@ class AIAgent:
|
|||
# Signal all tools to abort any in-flight operations immediately.
|
||||
# Scope the interrupt to this agent's execution thread so other
|
||||
# agents running in the same process (gateway) are not affected.
|
||||
_set_interrupt(True, self._execution_thread_id)
|
||||
if self._execution_thread_id is not None:
|
||||
_set_interrupt(True, self._execution_thread_id)
|
||||
self._interrupt_thread_signal_pending = False
|
||||
else:
|
||||
# The interrupt arrived before run_conversation() finished
|
||||
# binding the agent to its execution thread. Defer the tool-level
|
||||
# interrupt signal until startup completes instead of targeting
|
||||
# the caller thread by mistake.
|
||||
self._interrupt_thread_signal_pending = True
|
||||
# Propagate interrupt to any running child agents (subagent delegation)
|
||||
with self._active_children_lock:
|
||||
children_copy = list(self._active_children)
|
||||
|
|
@ -2965,7 +2987,9 @@ class AIAgent:
|
|||
"""Clear any pending interrupt request and the per-thread tool interrupt signal."""
|
||||
self._interrupt_requested = False
|
||||
self._interrupt_message = None
|
||||
_set_interrupt(False, self._execution_thread_id)
|
||||
self._interrupt_thread_signal_pending = False
|
||||
if self._execution_thread_id is not None:
|
||||
_set_interrupt(False, self._execution_thread_id)
|
||||
|
||||
def _touch_activity(self, desc: str) -> None:
|
||||
"""Update the last-activity timestamp and description (thread-safe)."""
|
||||
|
|
@ -5523,9 +5547,27 @@ class AIAgent:
|
|||
|
||||
t = threading.Thread(target=_call, daemon=True)
|
||||
t.start()
|
||||
_last_heartbeat = time.time()
|
||||
_HEARTBEAT_INTERVAL = 30.0 # seconds between gateway activity touches
|
||||
while t.is_alive():
|
||||
t.join(timeout=0.3)
|
||||
|
||||
# Periodic heartbeat: touch the agent's activity tracker so the
|
||||
# gateway's inactivity monitor knows we're alive while waiting
|
||||
# for stream chunks. Without this, long thinking pauses (e.g.
|
||||
# reasoning models) or slow prefill on local providers (Ollama)
|
||||
# trigger false inactivity timeouts. The _call thread touches
|
||||
# activity on each chunk, but the gap between API call start
|
||||
# and first chunk can exceed the gateway timeout — especially
|
||||
# when the stale-stream timeout is disabled (local providers).
|
||||
_hb_now = time.time()
|
||||
if _hb_now - _last_heartbeat >= _HEARTBEAT_INTERVAL:
|
||||
_last_heartbeat = _hb_now
|
||||
_waiting_secs = int(_hb_now - last_chunk_time["t"])
|
||||
self._touch_activity(
|
||||
f"waiting for stream response ({_waiting_secs}s, no chunks yet)"
|
||||
)
|
||||
|
||||
# Detect stale streams: connections kept alive by SSE pings
|
||||
# but delivering no real chunks. Kill the client so the
|
||||
# inner retry loop can start a fresh connection.
|
||||
|
|
@ -7142,8 +7184,22 @@ class AIAgent:
|
|||
# Each slot holds (function_name, function_args, function_result, duration, error_flag)
|
||||
results = [None] * num_tools
|
||||
|
||||
# Touch activity before launching workers so the gateway knows
|
||||
# we're executing tools (not stuck).
|
||||
self._current_tool = tool_names_str
|
||||
self._touch_activity(f"executing {num_tools} tools concurrently: {tool_names_str}")
|
||||
|
||||
def _run_tool(index, tool_call, function_name, function_args):
|
||||
"""Worker function executed in a thread."""
|
||||
# Set the activity callback on THIS worker thread so
|
||||
# _wait_for_process (terminal commands) can fire heartbeats.
|
||||
# The callback is thread-local; the main thread's callback
|
||||
# is invisible to worker threads.
|
||||
try:
|
||||
from tools.environments.base import set_activity_callback
|
||||
set_activity_callback(self._touch_activity)
|
||||
except Exception:
|
||||
pass
|
||||
start = time.time()
|
||||
try:
|
||||
result = self._invoke_tool(function_name, function_args, effective_task_id, tool_call.id)
|
||||
|
|
@ -7173,8 +7229,26 @@ class AIAgent:
|
|||
f = executor.submit(_run_tool, i, tc, name, args)
|
||||
futures.append(f)
|
||||
|
||||
# Wait for all to complete (exceptions are captured inside _run_tool)
|
||||
concurrent.futures.wait(futures)
|
||||
# Wait for all to complete with periodic heartbeats so the
|
||||
# gateway's inactivity monitor doesn't kill us during long
|
||||
# concurrent tool batches.
|
||||
_conc_start = time.time()
|
||||
while True:
|
||||
done, not_done = concurrent.futures.wait(
|
||||
futures, timeout=30.0,
|
||||
)
|
||||
if not not_done:
|
||||
break
|
||||
_conc_elapsed = int(time.time() - _conc_start)
|
||||
_still_running = [
|
||||
parsed_calls[futures.index(f)][1]
|
||||
for f in not_done
|
||||
if f in futures
|
||||
]
|
||||
self._touch_activity(
|
||||
f"concurrent tools running ({_conc_elapsed}s, "
|
||||
f"{len(not_done)} remaining: {', '.join(_still_running[:3])})"
|
||||
)
|
||||
finally:
|
||||
if spinner:
|
||||
# Build a summary message for the spinner stop
|
||||
|
|
@ -7406,6 +7480,16 @@ class AIAgent:
|
|||
old_text=function_args.get("old_text"),
|
||||
store=self._memory_store,
|
||||
)
|
||||
# Bridge: notify external memory provider of built-in memory writes
|
||||
if self._memory_manager and function_args.get("action") in ("add", "replace"):
|
||||
try:
|
||||
self._memory_manager.on_memory_write(
|
||||
function_args.get("action", ""),
|
||||
target,
|
||||
function_args.get("content", ""),
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
tool_duration = time.time() - tool_start_time
|
||||
if self._should_emit_quiet_tool_messages():
|
||||
self._vprint(f" {_get_cute_tool_message_impl('memory', function_args, tool_duration, result=function_result)}")
|
||||
|
|
@ -7882,6 +7966,7 @@ class AIAgent:
|
|||
self._thinking_prefill_retries = 0
|
||||
self._post_tool_empty_retried = False
|
||||
self._last_content_with_tools = None
|
||||
self._last_content_tools_all_housekeeping = False
|
||||
self._mute_post_response = False
|
||||
self._unicode_sanitization_passes = 0
|
||||
|
||||
|
|
@ -8069,6 +8154,7 @@ class AIAgent:
|
|||
self._empty_content_retries = 0
|
||||
self._thinking_prefill_retries = 0
|
||||
self._last_content_with_tools = None
|
||||
self._last_content_tools_all_housekeeping = False
|
||||
self._mute_post_response = False
|
||||
# Re-estimate after compression
|
||||
_preflight_tokens = estimate_request_tokens_rough(
|
||||
|
|
@ -8128,11 +8214,19 @@ class AIAgent:
|
|||
|
||||
# Record the execution thread so interrupt()/clear_interrupt() can
|
||||
# scope the tool-level interrupt signal to THIS agent's thread only.
|
||||
# Must be set before clear_interrupt() which uses it.
|
||||
# Must be set before any thread-scoped interrupt syncing.
|
||||
self._execution_thread_id = threading.current_thread().ident
|
||||
|
||||
# Clear any stale interrupt state at start
|
||||
self.clear_interrupt()
|
||||
# Always clear stale per-thread state from a previous turn. If an
|
||||
# interrupt arrived before startup finished, preserve it and bind it
|
||||
# to this execution thread now instead of dropping it on the floor.
|
||||
_set_interrupt(False, self._execution_thread_id)
|
||||
if self._interrupt_requested:
|
||||
_set_interrupt(True, self._execution_thread_id)
|
||||
self._interrupt_thread_signal_pending = False
|
||||
else:
|
||||
self._interrupt_message = None
|
||||
self._interrupt_thread_signal_pending = False
|
||||
|
||||
# External memory provider: prefetch once before the tool loop.
|
||||
# Reuse the cached result on every iteration to avoid re-calling
|
||||
|
|
@ -10131,6 +10225,7 @@ class AIAgent:
|
|||
tc.function.name in _HOUSEKEEPING_TOOLS
|
||||
for tc in assistant_message.tool_calls
|
||||
)
|
||||
self._last_content_tools_all_housekeeping = _all_housekeeping
|
||||
if _all_housekeeping and self._has_stream_consumers():
|
||||
self._mute_post_response = True
|
||||
elif self.quiet_mode:
|
||||
|
|
@ -10320,15 +10415,22 @@ class AIAgent:
|
|||
break
|
||||
|
||||
# If the previous turn already delivered real content alongside
|
||||
# tool calls (e.g. "You're welcome!" + memory save), the model
|
||||
# has nothing more to say. Use the earlier content immediately
|
||||
# instead of wasting API calls on retries that won't help.
|
||||
# HOUSEKEEPING tool calls (e.g. "You're welcome!" + memory save),
|
||||
# the model has nothing more to say. Use the earlier content
|
||||
# immediately instead of wasting API calls on retries.
|
||||
# NOTE: Only use this shortcut when ALL tools in that turn were
|
||||
# housekeeping (memory, todo, etc.). When substantive tools
|
||||
# were called (terminal, search_files, etc.), the content was
|
||||
# likely mid-task narration ("I'll scan the directory...") and
|
||||
# the empty follow-up means the model choked — let the
|
||||
# post-tool nudge below handle that instead of exiting early.
|
||||
fallback = getattr(self, '_last_content_with_tools', None)
|
||||
if fallback:
|
||||
if fallback and getattr(self, '_last_content_tools_all_housekeeping', False):
|
||||
_turn_exit_reason = "fallback_prior_turn_content"
|
||||
logger.info("Empty follow-up after tool calls — using prior turn content as final response")
|
||||
self._emit_status("↻ Empty response after tool calls — using earlier content as final answer")
|
||||
self._last_content_with_tools = None
|
||||
self._last_content_tools_all_housekeeping = False
|
||||
self._empty_content_retries = 0
|
||||
# Do NOT modify the assistant message content — the
|
||||
# old code injected "Calling the X tools..." which
|
||||
|
|
@ -10339,13 +10441,18 @@ class AIAgent:
|
|||
break
|
||||
|
||||
# ── Post-tool-call empty response nudge ───────────
|
||||
# The model returned empty after executing tool calls
|
||||
# but there's no prior-turn content to fall back on.
|
||||
# The model returned empty after executing tool calls.
|
||||
# This covers two cases:
|
||||
# (a) No prior-turn content at all — model went silent
|
||||
# (b) Prior turn had content + SUBSTANTIVE tools (the
|
||||
# fallback above was skipped because the content
|
||||
# was mid-task narration, not a final answer)
|
||||
# Instead of giving up, nudge the model to continue by
|
||||
# appending a user-level hint. This is the #9400 case:
|
||||
# weaker models (GLM-5, etc.) sometimes return empty
|
||||
# after tool results instead of continuing to the next
|
||||
# step. One retry with a nudge usually fixes it.
|
||||
# weaker models (mimo-v2-pro, GLM-5, etc.) sometimes
|
||||
# return empty after tool results instead of continuing
|
||||
# to the next step. One retry with a nudge usually
|
||||
# fixes it.
|
||||
_prior_was_tool = any(
|
||||
m.get("role") == "tool"
|
||||
for m in messages[-5:] # check recent messages
|
||||
|
|
@ -10355,6 +10462,10 @@ class AIAgent:
|
|||
and not getattr(self, "_post_tool_empty_retried", False)
|
||||
):
|
||||
self._post_tool_empty_retried = True
|
||||
# Clear stale narration so it doesn't resurface
|
||||
# on a later empty response after the nudge.
|
||||
self._last_content_with_tools = None
|
||||
self._last_content_tools_all_housekeeping = False
|
||||
logger.info(
|
||||
"Empty response after tool calls — nudging model "
|
||||
"to continue processing"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue