mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
fix: word-wrap spinner, interruptable agent join, and delegate_task interrupt (#10940)
* fix: stop /model from silently rerouting direct providers to OpenRouter (#10300) detect_provider_for_model() silently remapped models to OpenRouter when the direct provider's credentials weren't found via env vars. Three bugs: 1. Credential check only looked at env vars from PROVIDER_REGISTRY, missing credential pool entries, auth store, and OAuth tokens 2. When env var check failed, silently returned ('openrouter', slug) instead of the direct provider the model actually belongs to 3. Users with valid credentials via non-env-var mechanisms (pool, OAuth, Claude Code tokens) got silently rerouted Fix: - Expand credential check to also query credential pool and auth store - Always return the direct provider match regardless of credential status -- let client init handle missing creds with a clear error rather than silently routing through the wrong provider Same philosophy as the provider-required fix: don't guess, don't silently reroute, error clearly when something is missing. Closes #10300 * fix: word-wrap spinner, interruptable agent join, and delegate_task interrupt Three fixes: 1. Spinner widget clips long tool commands — prompt_toolkit Window had height=1 and wrap_lines=False. Now uses wrap_lines=True with dynamic height from text length / terminal width. Long commands wrap naturally. 2. agent_thread.join() blocked forever after interrupt — if the agent thread took time to clean up, the process_loop thread froze. Now polls with 0.2s timeout on the interrupt path, checking _should_exit so double Ctrl+C breaks out immediately. 3. Root cause of 5-hour CLI hang: delegate_task() used as_completed() with no interrupt check. When subagent children got stuck, the parent blocked forever inside the ThreadPoolExecutor. Now polls with wait(timeout=0.5) and checks parent_agent._interrupt_requested each iteration. Stuck children are reported as interrupted, and the parent returns immediately.
This commit is contained in:
parent
f05590796e
commit
e66b373351
2 changed files with 94 additions and 17 deletions
41
cli.py
41
cli.py
|
|
@ -2013,7 +2013,17 @@ class HermesCLI:
|
|||
"""Return the visible height for the spinner/status text line above the status bar."""
|
||||
if not getattr(self, "_spinner_text", ""):
|
||||
return 0
|
||||
return 0 if self._use_minimal_tui_chrome(width=width) else 1
|
||||
if self._use_minimal_tui_chrome(width=width):
|
||||
return 0
|
||||
# Compute how many lines the spinner text needs when wrapped.
|
||||
# The rendered text is " {emoji} {label} ({elapsed})" — about
|
||||
# len(_spinner_text) + 16 chars for indent + timer suffix.
|
||||
width = width or self._get_tui_terminal_width()
|
||||
if width and width > 10:
|
||||
import math
|
||||
text_len = len(self._spinner_text) + 16 # indent + timer
|
||||
return max(1, math.ceil(text_len / width))
|
||||
return 1
|
||||
|
||||
def _get_voice_status_fragments(self, width: Optional[int] = None):
|
||||
"""Return the voice status bar fragments for the interactive TUI."""
|
||||
|
|
@ -7750,7 +7760,33 @@ class HermesCLI:
|
|||
# Fallback for non-interactive mode (e.g., single-query)
|
||||
agent_thread.join(0.1)
|
||||
|
||||
agent_thread.join() # Ensure agent thread completes
|
||||
# Wait for the agent thread to finish. After an interrupt the
|
||||
# agent may take a few seconds to clean up (kill subprocess, persist
|
||||
# session). Poll instead of a blocking join so the process_loop
|
||||
# stays responsive — if the user sent another interrupt or the
|
||||
# agent gets stuck, we can break out instead of freezing forever.
|
||||
if interrupt_msg is not None:
|
||||
# Interrupt path: poll briefly, then move on. The agent
|
||||
# thread is daemon — it dies on process exit regardless.
|
||||
for _wait_tick in range(50): # 50 * 0.2s = 10s max
|
||||
agent_thread.join(timeout=0.2)
|
||||
if not agent_thread.is_alive():
|
||||
break
|
||||
# Check if user fired ANOTHER interrupt (Ctrl+C sets
|
||||
# _should_exit which process_loop checks on next pass).
|
||||
if getattr(self, '_should_exit', False):
|
||||
break
|
||||
if agent_thread.is_alive():
|
||||
logger.warning(
|
||||
"Agent thread still alive after interrupt "
|
||||
"(thread %s). Daemon thread will be cleaned up "
|
||||
"on exit.",
|
||||
agent_thread.ident,
|
||||
)
|
||||
else:
|
||||
# Normal completion: agent thread should be done already,
|
||||
# but guard against edge cases.
|
||||
agent_thread.join(timeout=30)
|
||||
|
||||
# Proactively clean up async clients whose event loop is dead.
|
||||
# The agent thread may have created AsyncOpenAI clients bound
|
||||
|
|
@ -9043,6 +9079,7 @@ class HermesCLI:
|
|||
spinner_widget = Window(
|
||||
content=FormattedTextControl(get_spinner_text),
|
||||
height=get_spinner_height,
|
||||
wrap_lines=True,
|
||||
)
|
||||
|
||||
spacer = Window(
|
||||
|
|
|
|||
|
|
@ -750,21 +750,61 @@ def delegate_task(
|
|||
)
|
||||
futures[future] = i
|
||||
|
||||
for future in as_completed(futures):
|
||||
try:
|
||||
entry = future.result()
|
||||
except Exception as exc:
|
||||
idx = futures[future]
|
||||
entry = {
|
||||
"task_index": idx,
|
||||
"status": "error",
|
||||
"summary": None,
|
||||
"error": str(exc),
|
||||
"api_calls": 0,
|
||||
"duration_seconds": 0,
|
||||
}
|
||||
results.append(entry)
|
||||
completed_count += 1
|
||||
# Poll futures with interrupt checking. as_completed() blocks
|
||||
# until ALL futures finish — if a child agent gets stuck,
|
||||
# the parent blocks forever even after interrupt propagation.
|
||||
# Instead, use wait() with a short timeout so we can bail
|
||||
# when the parent is interrupted.
|
||||
pending = set(futures.keys())
|
||||
while pending:
|
||||
if getattr(parent_agent, "_interrupt_requested", False) is True:
|
||||
# Parent interrupted — collect whatever finished and
|
||||
# abandon the rest. Children already received the
|
||||
# interrupt signal; we just can't wait forever.
|
||||
for f in pending:
|
||||
idx = futures[f]
|
||||
if f.done():
|
||||
try:
|
||||
entry = f.result()
|
||||
except Exception as exc:
|
||||
entry = {
|
||||
"task_index": idx,
|
||||
"status": "error",
|
||||
"summary": None,
|
||||
"error": str(exc),
|
||||
"api_calls": 0,
|
||||
"duration_seconds": 0,
|
||||
}
|
||||
else:
|
||||
entry = {
|
||||
"task_index": idx,
|
||||
"status": "interrupted",
|
||||
"summary": None,
|
||||
"error": "Parent agent interrupted — child did not finish in time",
|
||||
"api_calls": 0,
|
||||
"duration_seconds": 0,
|
||||
}
|
||||
results.append(entry)
|
||||
completed_count += 1
|
||||
break
|
||||
|
||||
from concurrent.futures import wait as _cf_wait, FIRST_COMPLETED
|
||||
done, pending = _cf_wait(pending, timeout=0.5, return_when=FIRST_COMPLETED)
|
||||
for future in done:
|
||||
try:
|
||||
entry = future.result()
|
||||
except Exception as exc:
|
||||
idx = futures[future]
|
||||
entry = {
|
||||
"task_index": idx,
|
||||
"status": "error",
|
||||
"summary": None,
|
||||
"error": str(exc),
|
||||
"api_calls": 0,
|
||||
"duration_seconds": 0,
|
||||
}
|
||||
results.append(entry)
|
||||
completed_count += 1
|
||||
|
||||
# Print per-task completion line above the spinner
|
||||
idx = entry["task_index"]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue