mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
refactor: simplify and harden PR fixes after review
- Fix cron ThreadPoolExecutor blocking on timeout: use shutdown(wait=False, cancel_futures=True) instead of context manager that waits indefinitely - Extract _dequeue_pending_text() to deduplicate media-placeholder logic in interrupt and normal-completion dequeue paths - Remove hasattr guards for _running_agents_ts: add class-level default so partial test construction works without scattered defensive checks - Move `import concurrent.futures` to top of cron/scheduler.py - Progress throttle: sleep remaining interval instead of busy-looping 0.1s (~15 wakeups per 1.5s window → 1 wakeup) - Deduplicate _load_stt_config() in transcription_tools.py: _has_openai_audio_backend() now delegates to _resolve_openai_audio_client_config()
This commit is contained in:
parent
28380e7aed
commit
0ed28ab80c
3 changed files with 57 additions and 50 deletions
|
|
@ -9,6 +9,7 @@ runs at a time if multiple processes overlap.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
|
import concurrent.futures
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
|
|
@ -448,22 +449,24 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
|
||||||
# override via env var. Uses a separate thread because
|
# override via env var. Uses a separate thread because
|
||||||
# run_conversation is synchronous.
|
# run_conversation is synchronous.
|
||||||
_cron_timeout = float(os.getenv("HERMES_CRON_TIMEOUT", 600))
|
_cron_timeout = float(os.getenv("HERMES_CRON_TIMEOUT", 600))
|
||||||
import concurrent.futures
|
_cron_pool = concurrent.futures.ThreadPoolExecutor(max_workers=1)
|
||||||
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as _cron_pool:
|
_cron_future = _cron_pool.submit(agent.run_conversation, prompt)
|
||||||
_cron_future = _cron_pool.submit(agent.run_conversation, prompt)
|
try:
|
||||||
try:
|
result = _cron_future.result(timeout=_cron_timeout)
|
||||||
result = _cron_future.result(timeout=_cron_timeout)
|
except concurrent.futures.TimeoutError:
|
||||||
except concurrent.futures.TimeoutError:
|
logger.error(
|
||||||
logger.error(
|
"Job '%s' timed out after %.0fs — interrupting agent",
|
||||||
"Job '%s' timed out after %.0fs — interrupting agent",
|
job_name, _cron_timeout,
|
||||||
job_name, _cron_timeout,
|
)
|
||||||
)
|
if hasattr(agent, "interrupt"):
|
||||||
if hasattr(agent, "interrupt"):
|
agent.interrupt("Cron job timed out")
|
||||||
agent.interrupt("Cron job timed out")
|
_cron_pool.shutdown(wait=False, cancel_futures=True)
|
||||||
raise TimeoutError(
|
raise TimeoutError(
|
||||||
f"Cron job '{job_name}' timed out after "
|
f"Cron job '{job_name}' timed out after "
|
||||||
f"{int(_cron_timeout // 60)} minutes"
|
f"{int(_cron_timeout // 60)} minutes"
|
||||||
)
|
)
|
||||||
|
finally:
|
||||||
|
_cron_pool.shutdown(wait=False)
|
||||||
|
|
||||||
final_response = result.get("final_response", "") or ""
|
final_response = result.get("final_response", "") or ""
|
||||||
# Use a separate variable for log display; keep final_response clean
|
# Use a separate variable for log display; keep final_response clean
|
||||||
|
|
|
||||||
|
|
@ -325,6 +325,21 @@ def _build_media_placeholder(event) -> str:
|
||||||
return "\n".join(parts)
|
return "\n".join(parts)
|
||||||
|
|
||||||
|
|
||||||
|
def _dequeue_pending_text(adapter, session_key: str) -> str | None:
|
||||||
|
"""Consume and return the text of a pending queued message.
|
||||||
|
|
||||||
|
Preserves media context for captionless photo/document events by
|
||||||
|
building a placeholder so the message isn't silently dropped.
|
||||||
|
"""
|
||||||
|
event = adapter.get_pending_message(session_key)
|
||||||
|
if not event:
|
||||||
|
return None
|
||||||
|
text = event.text
|
||||||
|
if not text and getattr(event, "media_urls", None):
|
||||||
|
text = _build_media_placeholder(event)
|
||||||
|
return text
|
||||||
|
|
||||||
|
|
||||||
def _check_unavailable_skill(command_name: str) -> str | None:
|
def _check_unavailable_skill(command_name: str) -> str | None:
|
||||||
"""Check if a command matches a known-but-inactive skill.
|
"""Check if a command matches a known-but-inactive skill.
|
||||||
|
|
||||||
|
|
@ -431,9 +446,13 @@ def _resolve_hermes_bin() -> Optional[list[str]]:
|
||||||
|
|
||||||
|
|
||||||
class GatewayRunner:
|
class GatewayRunner:
|
||||||
|
# Class-level defaults so partial construction in tests doesn't
|
||||||
|
# blow up on attribute access.
|
||||||
|
_running_agents_ts: Dict[str, float] = {}
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Main gateway controller.
|
Main gateway controller.
|
||||||
|
|
||||||
Manages the lifecycle of all platform adapters and routes
|
Manages the lifecycle of all platform adapters and routes
|
||||||
messages to/from the agent.
|
messages to/from the agent.
|
||||||
"""
|
"""
|
||||||
|
|
@ -1726,15 +1745,14 @@ class GatewayRunner:
|
||||||
# longer than the agent timeout, it's a leaked lock from a hung or
|
# longer than the agent timeout, it's a leaked lock from a hung or
|
||||||
# crashed handler. Evict it so the session isn't permanently stuck.
|
# crashed handler. Evict it so the session isn't permanently stuck.
|
||||||
_STALE_TTL = float(os.getenv("HERMES_AGENT_TIMEOUT", 600)) + 60 # timeout + 1 min grace
|
_STALE_TTL = float(os.getenv("HERMES_AGENT_TIMEOUT", 600)) + 60 # timeout + 1 min grace
|
||||||
_ts_dict = getattr(self, "_running_agents_ts", {})
|
_stale_ts = self._running_agents_ts.get(_quick_key, 0)
|
||||||
_stale_ts = _ts_dict.get(_quick_key, 0)
|
|
||||||
if _quick_key in self._running_agents and _stale_ts and (time.time() - _stale_ts) > _STALE_TTL:
|
if _quick_key in self._running_agents and _stale_ts and (time.time() - _stale_ts) > _STALE_TTL:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
"Evicting stale _running_agents entry for %s (age: %.0fs)",
|
"Evicting stale _running_agents entry for %s (age: %.0fs)",
|
||||||
_quick_key[:30], time.time() - _stale_ts,
|
_quick_key[:30], time.time() - _stale_ts,
|
||||||
)
|
)
|
||||||
del self._running_agents[_quick_key]
|
del self._running_agents[_quick_key]
|
||||||
_ts_dict.pop(_quick_key, None)
|
self._running_agents_ts.pop(_quick_key, None)
|
||||||
|
|
||||||
if _quick_key in self._running_agents:
|
if _quick_key in self._running_agents:
|
||||||
if event.get_command() == "status":
|
if event.get_command() == "status":
|
||||||
|
|
@ -2061,8 +2079,7 @@ class GatewayRunner:
|
||||||
# "already running" guard and spin up a duplicate agent for the
|
# "already running" guard and spin up a duplicate agent for the
|
||||||
# same session — corrupting the transcript.
|
# same session — corrupting the transcript.
|
||||||
self._running_agents[_quick_key] = _AGENT_PENDING_SENTINEL
|
self._running_agents[_quick_key] = _AGENT_PENDING_SENTINEL
|
||||||
if hasattr(self, "_running_agents_ts"):
|
self._running_agents_ts[_quick_key] = time.time()
|
||||||
self._running_agents_ts[_quick_key] = time.time()
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
return await self._handle_message_with_agent(event, source, _quick_key)
|
return await self._handle_message_with_agent(event, source, _quick_key)
|
||||||
|
|
@ -2073,8 +2090,7 @@ class GatewayRunner:
|
||||||
# not linger or the session would be permanently locked out.
|
# not linger or the session would be permanently locked out.
|
||||||
if self._running_agents.get(_quick_key) is _AGENT_PENDING_SENTINEL:
|
if self._running_agents.get(_quick_key) is _AGENT_PENDING_SENTINEL:
|
||||||
del self._running_agents[_quick_key]
|
del self._running_agents[_quick_key]
|
||||||
if hasattr(self, "_running_agents_ts"):
|
self._running_agents_ts.pop(_quick_key, None)
|
||||||
self._running_agents_ts.pop(_quick_key, None)
|
|
||||||
|
|
||||||
async def _handle_message_with_agent(self, event, source, _quick_key: str):
|
async def _handle_message_with_agent(self, event, source, _quick_key: str):
|
||||||
"""Inner handler that runs under the _running_agents sentinel guard."""
|
"""Inner handler that runs under the _running_agents sentinel guard."""
|
||||||
|
|
@ -5448,8 +5464,9 @@ class GatewayRunner:
|
||||||
# (grammY auto-retry pattern: proactively rate-limit
|
# (grammY auto-retry pattern: proactively rate-limit
|
||||||
# instead of reacting to 429s.)
|
# instead of reacting to 429s.)
|
||||||
_now = time.monotonic()
|
_now = time.monotonic()
|
||||||
if _now - _last_edit_ts < _PROGRESS_EDIT_INTERVAL:
|
_remaining = _PROGRESS_EDIT_INTERVAL - (_now - _last_edit_ts)
|
||||||
await asyncio.sleep(0.1)
|
if _remaining > 0:
|
||||||
|
await asyncio.sleep(_remaining)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if can_edit and progress_msg_id is not None:
|
if can_edit and progress_msg_id is not None:
|
||||||
|
|
@ -6064,27 +6081,13 @@ class GatewayRunner:
|
||||||
pending = None
|
pending = None
|
||||||
if result and adapter and session_key:
|
if result and adapter and session_key:
|
||||||
if result.get("interrupted"):
|
if result.get("interrupted"):
|
||||||
# Interrupted — consume the interrupt message
|
pending = _dequeue_pending_text(adapter, session_key)
|
||||||
pending_event = adapter.get_pending_message(session_key)
|
if not pending and result.get("interrupt_message"):
|
||||||
if pending_event:
|
|
||||||
pending = pending_event.text
|
|
||||||
# Preserve media context for photo/document events
|
|
||||||
# whose text is empty (no caption). Without this,
|
|
||||||
# captionless photos are silently dropped.
|
|
||||||
if not pending and getattr(pending_event, "media_urls", None):
|
|
||||||
pending = _build_media_placeholder(pending_event)
|
|
||||||
elif result.get("interrupt_message"):
|
|
||||||
pending = result.get("interrupt_message")
|
pending = result.get("interrupt_message")
|
||||||
else:
|
else:
|
||||||
# Normal completion — check for /queue'd messages that were
|
pending = _dequeue_pending_text(adapter, session_key)
|
||||||
# stored without triggering an interrupt.
|
if pending:
|
||||||
pending_event = adapter.get_pending_message(session_key)
|
logger.debug("Processing queued message after agent completion: '%s...'", pending[:40])
|
||||||
if pending_event:
|
|
||||||
pending = pending_event.text
|
|
||||||
if not pending and getattr(pending_event, "media_urls", None):
|
|
||||||
pending = _build_media_placeholder(pending_event)
|
|
||||||
if pending:
|
|
||||||
logger.debug("Processing queued message after agent completion: '%s...'", pending[:40])
|
|
||||||
|
|
||||||
if pending:
|
if pending:
|
||||||
logger.debug("Processing pending message: '%s...'", pending[:40])
|
logger.debug("Processing pending message: '%s...'", pending[:40])
|
||||||
|
|
@ -6159,7 +6162,7 @@ class GatewayRunner:
|
||||||
tracking_task.cancel()
|
tracking_task.cancel()
|
||||||
if session_key and session_key in self._running_agents:
|
if session_key and session_key in self._running_agents:
|
||||||
del self._running_agents[session_key]
|
del self._running_agents[session_key]
|
||||||
if session_key and hasattr(self, "_running_agents_ts"):
|
if session_key:
|
||||||
self._running_agents_ts.pop(session_key, None)
|
self._running_agents_ts.pop(session_key, None)
|
||||||
|
|
||||||
# Wait for cancelled tasks
|
# Wait for cancelled tasks
|
||||||
|
|
|
||||||
|
|
@ -128,10 +128,11 @@ def is_stt_enabled(stt_config: Optional[dict] = None) -> bool:
|
||||||
|
|
||||||
def _has_openai_audio_backend() -> bool:
|
def _has_openai_audio_backend() -> bool:
|
||||||
"""Return True when OpenAI audio can use config credentials, env credentials, or the managed gateway."""
|
"""Return True when OpenAI audio can use config credentials, env credentials, or the managed gateway."""
|
||||||
stt_config = _load_stt_config()
|
try:
|
||||||
openai_cfg = stt_config.get("openai", {})
|
_resolve_openai_audio_client_config()
|
||||||
cfg_api_key = openai_cfg.get("api_key", "")
|
return True
|
||||||
return bool(cfg_api_key or resolve_openai_audio_api_key() or resolve_managed_tool_gateway("openai-audio"))
|
except ValueError:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
def _find_binary(binary_name: str) -> Optional[str]:
|
def _find_binary(binary_name: str) -> Optional[str]:
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue