mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-23 05:31:23 +00:00
fix(async): close unscheduled coroutines in all threadsafe bridges (#26584)
Wraps every sync->async coroutine-scheduling site in the codebase with a new agent.async_utils.safe_schedule_threadsafe() helper that closes the coroutine on scheduling failure (closed loop, shutdown race, etc.) instead of leaking it as 'coroutine was never awaited' RuntimeWarnings plus reference leaks. 22 production call sites migrated across the codebase: - acp_adapter/events.py, acp_adapter/permissions.py - agent/lsp/manager.py - cron/scheduler.py (media + text delivery paths) - gateway/platforms/feishu.py (5 sites, via existing _submit_on_loop helper which now delegates to safe_schedule_threadsafe) - gateway/run.py (10 sites: telegram rename, agent:step hook, status callback, interim+bg-review, clarify send, exec-approval button+text, temp-bubble cleanup, channel-directory refresh) - plugins/memory/hindsight, plugins/platforms/google_chat - tools/browser_supervisor.py (3), browser_cdp_tool.py, computer_use/cua_backend.py, slash_confirm.py - tools/environments/modal.py (_AsyncWorker) - tools/mcp_tool.py (2 + 8 _run_on_mcp_loop callers converted to factory-style so the coroutine is never constructed on a dead loop) - tui_gateway/ws.py Tests: new tests/agent/test_async_utils.py covers helper behavior under live loop, dead loop, None loop, and scheduling exceptions. Regression tests added at three PR-original sites (acp events, acp permissions, mcp loop runner) mirroring contributor's intent. Live-tested end-to-end: - Helper stress test: 1500 schedules across live/dead/race scenarios, zero leaked coroutines - Race exercised: 5000 schedules with loop killed mid-flight, 100 ok / 4900 None returns, zero leaks - hermes chat -q with terminal tool call (exercises step_callback bridge) - MCP probe against failing subprocess servers + factory path - Real gateway daemon boot + SIGINT shutdown across multiple platform adapter inits - WSTransport 100 live + 50 dead-loop writes - Cron delivery path live + dead loop Salvages PR #2657 — adopts contributor's intent over a much wider site list and a single centralized helper instead of inline try/except at each site. 3 of the original PR's 6 sites no longer exist on main (environments/patches.py deleted, DingTalk refactored to native async); the equivalent fix lives in tools/environments/modal.py instead. Co-authored-by: JithendraNara <jithendranaidunara@gmail.com>
This commit is contained in:
parent
931caf2b2d
commit
4e89c53082
23 changed files with 690 additions and 186 deletions
|
|
@ -31,10 +31,17 @@ def _send_update(
|
||||||
update: Any,
|
update: Any,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Fire-and-forget an ACP session update from a worker thread."""
|
"""Fire-and-forget an ACP session update from a worker thread."""
|
||||||
|
from agent.async_utils import safe_schedule_threadsafe
|
||||||
|
|
||||||
|
future = safe_schedule_threadsafe(
|
||||||
|
conn.session_update(session_id, update),
|
||||||
|
loop,
|
||||||
|
logger=logger,
|
||||||
|
log_message="Failed to send ACP update",
|
||||||
|
)
|
||||||
|
if future is None:
|
||||||
|
return
|
||||||
try:
|
try:
|
||||||
future = asyncio.run_coroutine_threadsafe(
|
|
||||||
conn.session_update(session_id, update), loop
|
|
||||||
)
|
|
||||||
future.result(timeout=5)
|
future.result(timeout=5)
|
||||||
except Exception:
|
except Exception:
|
||||||
logger.debug("Failed to send ACP update", exc_info=True)
|
logger.debug("Failed to send ACP update", exc_info=True)
|
||||||
|
|
|
||||||
|
|
@ -111,21 +111,28 @@ def make_approval_callback(
|
||||||
allow_permanent: bool = True,
|
allow_permanent: bool = True,
|
||||||
**_: object,
|
**_: object,
|
||||||
) -> str:
|
) -> str:
|
||||||
|
from agent.async_utils import safe_schedule_threadsafe
|
||||||
|
|
||||||
options = _build_permission_options(allow_permanent=allow_permanent)
|
options = _build_permission_options(allow_permanent=allow_permanent)
|
||||||
|
|
||||||
future = None
|
tool_call = _build_permission_tool_call(command, description)
|
||||||
|
coro = request_permission_fn(
|
||||||
|
session_id=session_id,
|
||||||
|
tool_call=tool_call,
|
||||||
|
options=options,
|
||||||
|
)
|
||||||
|
future = safe_schedule_threadsafe(
|
||||||
|
coro, loop,
|
||||||
|
logger=logger,
|
||||||
|
log_message="Permission request: failed to schedule on loop",
|
||||||
|
)
|
||||||
|
if future is None:
|
||||||
|
return "deny"
|
||||||
|
|
||||||
try:
|
try:
|
||||||
tool_call = _build_permission_tool_call(command, description)
|
|
||||||
coro = request_permission_fn(
|
|
||||||
session_id=session_id,
|
|
||||||
tool_call=tool_call,
|
|
||||||
options=options,
|
|
||||||
)
|
|
||||||
future = asyncio.run_coroutine_threadsafe(coro, loop)
|
|
||||||
response = future.result(timeout=timeout)
|
response = future.result(timeout=timeout)
|
||||||
except (FutureTimeout, Exception) as exc:
|
except (FutureTimeout, Exception) as exc:
|
||||||
if future is not None:
|
future.cancel()
|
||||||
future.cancel()
|
|
||||||
logger.warning("Permission request timed out or failed: %s", exc)
|
logger.warning("Permission request timed out or failed: %s", exc)
|
||||||
return "deny"
|
return "deny"
|
||||||
|
|
||||||
|
|
|
||||||
68
agent/async_utils.py
Normal file
68
agent/async_utils.py
Normal file
|
|
@ -0,0 +1,68 @@
|
||||||
|
"""Async/sync bridging helpers.
|
||||||
|
|
||||||
|
The codebase has ~30 sites that schedule a coroutine onto an event loop from a
|
||||||
|
worker thread via :func:`asyncio.run_coroutine_threadsafe`. That function can
|
||||||
|
raise :class:`RuntimeError` (e.g. the loop was closed during a shutdown race),
|
||||||
|
and when it does the coroutine object is never awaited and never closed —
|
||||||
|
which triggers a ``"coroutine '<name>' was never awaited"`` RuntimeWarning and
|
||||||
|
leaks the coroutine's frame until GC.
|
||||||
|
|
||||||
|
:func:`safe_schedule_threadsafe` wraps the call, closes the coroutine on
|
||||||
|
scheduling failure, and returns ``None`` (instead of a half-formed future) so
|
||||||
|
callers can branch cleanly:
|
||||||
|
|
||||||
|
fut = safe_schedule_threadsafe(coro, loop)
|
||||||
|
if fut is None:
|
||||||
|
return # or fallback behavior
|
||||||
|
fut.result(timeout=5)
|
||||||
|
|
||||||
|
The helper deliberately does NOT also handle ``future.result()`` failures —
|
||||||
|
that is a separate concern. Once the loop has accepted the coroutine, its
|
||||||
|
lifecycle belongs to the loop, not the scheduling thread.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import logging
|
||||||
|
from concurrent.futures import Future
|
||||||
|
from typing import Any, Coroutine, Optional
|
||||||
|
|
||||||
|
|
||||||
|
_DEFAULT_LOGGER = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def safe_schedule_threadsafe(
|
||||||
|
coro: Coroutine[Any, Any, Any],
|
||||||
|
loop: Optional[asyncio.AbstractEventLoop],
|
||||||
|
*,
|
||||||
|
logger: Optional[logging.Logger] = None,
|
||||||
|
log_message: str = "Failed to schedule coroutine on loop",
|
||||||
|
log_level: int = logging.DEBUG,
|
||||||
|
) -> Optional[Future]:
|
||||||
|
"""Schedule ``coro`` on ``loop`` from a sync context, leak-safe.
|
||||||
|
|
||||||
|
Returns the :class:`concurrent.futures.Future` on success, or ``None`` if
|
||||||
|
the loop is missing or :func:`asyncio.run_coroutine_threadsafe` raised
|
||||||
|
(e.g. the loop was closed during a shutdown race). In all failure paths
|
||||||
|
the coroutine is :meth:`close`-d so it does not trigger
|
||||||
|
``"coroutine was never awaited"`` warnings or leak its frame.
|
||||||
|
|
||||||
|
Callers retain full control over what to do with the returned future
|
||||||
|
(call ``.result(timeout=...)``, attach ``add_done_callback``, ignore it
|
||||||
|
fire-and-forget, etc.).
|
||||||
|
"""
|
||||||
|
log = logger if logger is not None else _DEFAULT_LOGGER
|
||||||
|
|
||||||
|
if loop is None:
|
||||||
|
if asyncio.iscoroutine(coro):
|
||||||
|
coro.close()
|
||||||
|
log.log(log_level, "%s: loop is None", log_message)
|
||||||
|
return None
|
||||||
|
|
||||||
|
try:
|
||||||
|
return asyncio.run_coroutine_threadsafe(coro, loop)
|
||||||
|
except Exception as exc:
|
||||||
|
if asyncio.iscoroutine(coro):
|
||||||
|
coro.close()
|
||||||
|
log.log(log_level, "%s: %s", log_message, exc)
|
||||||
|
return None
|
||||||
|
|
@ -107,9 +107,14 @@ class _BackgroundLoop:
|
||||||
|
|
||||||
Returns the coroutine's result, or raises its exception.
|
Returns the coroutine's result, or raises its exception.
|
||||||
"""
|
"""
|
||||||
|
from agent.async_utils import safe_schedule_threadsafe
|
||||||
if self._loop is None:
|
if self._loop is None:
|
||||||
|
if asyncio.iscoroutine(coro):
|
||||||
|
coro.close()
|
||||||
raise RuntimeError("background loop not started")
|
raise RuntimeError("background loop not started")
|
||||||
fut: ConcurrentFuture = asyncio.run_coroutine_threadsafe(coro, self._loop)
|
fut = safe_schedule_threadsafe(coro, self._loop)
|
||||||
|
if fut is None:
|
||||||
|
raise RuntimeError("background loop not running")
|
||||||
try:
|
try:
|
||||||
return fut.result(timeout=timeout)
|
return fut.result(timeout=timeout)
|
||||||
except Exception:
|
except Exception:
|
||||||
|
|
|
||||||
|
|
@ -464,7 +464,14 @@ def _send_media_via_adapter(
|
||||||
else:
|
else:
|
||||||
coro = adapter.send_document(chat_id=chat_id, file_path=media_path, metadata=metadata)
|
coro = adapter.send_document(chat_id=chat_id, file_path=media_path, metadata=metadata)
|
||||||
|
|
||||||
future = asyncio.run_coroutine_threadsafe(coro, loop)
|
from agent.async_utils import safe_schedule_threadsafe
|
||||||
|
future = safe_schedule_threadsafe(coro, loop)
|
||||||
|
if future is None:
|
||||||
|
logger.warning(
|
||||||
|
"Job '%s': cannot send media %s, gateway loop unavailable",
|
||||||
|
job.get("id", "?"), media_path,
|
||||||
|
)
|
||||||
|
return
|
||||||
try:
|
try:
|
||||||
result = future.result(timeout=30)
|
result = future.result(timeout=30)
|
||||||
except TimeoutError:
|
except TimeoutError:
|
||||||
|
|
@ -585,22 +592,26 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
|
||||||
text_to_send = cleaned_delivery_content.strip()
|
text_to_send = cleaned_delivery_content.strip()
|
||||||
adapter_ok = True
|
adapter_ok = True
|
||||||
if text_to_send:
|
if text_to_send:
|
||||||
future = asyncio.run_coroutine_threadsafe(
|
from agent.async_utils import safe_schedule_threadsafe
|
||||||
|
future = safe_schedule_threadsafe(
|
||||||
runtime_adapter.send(chat_id, text_to_send, metadata=send_metadata),
|
runtime_adapter.send(chat_id, text_to_send, metadata=send_metadata),
|
||||||
loop,
|
loop,
|
||||||
)
|
)
|
||||||
try:
|
if future is None:
|
||||||
send_result = future.result(timeout=60)
|
adapter_ok = False
|
||||||
except TimeoutError:
|
else:
|
||||||
future.cancel()
|
try:
|
||||||
raise
|
send_result = future.result(timeout=60)
|
||||||
if send_result and not getattr(send_result, "success", True):
|
except TimeoutError:
|
||||||
err = getattr(send_result, "error", "unknown")
|
future.cancel()
|
||||||
logger.warning(
|
raise
|
||||||
"Job '%s': live adapter send to %s:%s failed (%s), falling back to standalone",
|
if send_result and not getattr(send_result, "success", True):
|
||||||
job["id"], platform_name, chat_id, err,
|
err = getattr(send_result, "error", "unknown")
|
||||||
)
|
logger.warning(
|
||||||
adapter_ok = False # fall through to standalone path
|
"Job '%s': live adapter send to %s:%s failed (%s), falling back to standalone",
|
||||||
|
job["id"], platform_name, chat_id, err,
|
||||||
|
)
|
||||||
|
adapter_ok = False # fall through to standalone path
|
||||||
|
|
||||||
# Send extracted media files as native attachments via the live adapter
|
# Send extracted media files as native attachments via the live adapter
|
||||||
if adapter_ok and media_files:
|
if adapter_ok and media_files:
|
||||||
|
|
|
||||||
|
|
@ -2273,11 +2273,7 @@ class FeishuAdapter(BasePlatformAdapter):
|
||||||
daemon=True,
|
daemon=True,
|
||||||
).start()
|
).start()
|
||||||
return
|
return
|
||||||
future = asyncio.run_coroutine_threadsafe(
|
self._submit_on_loop(loop, self._handle_message_event_data(data))
|
||||||
self._handle_message_event_data(data),
|
|
||||||
loop,
|
|
||||||
)
|
|
||||||
future.add_done_callback(self._log_background_failure)
|
|
||||||
|
|
||||||
def _enqueue_pending_inbound_event(self, data: Any) -> bool:
|
def _enqueue_pending_inbound_event(self, data: Any) -> bool:
|
||||||
"""Append an event to the pending-inbound queue.
|
"""Append an event to the pending-inbound queue.
|
||||||
|
|
@ -2353,16 +2349,12 @@ class FeishuAdapter(BasePlatformAdapter):
|
||||||
dispatched = 0
|
dispatched = 0
|
||||||
requeue: List[Any] = []
|
requeue: List[Any] = []
|
||||||
for event in batch:
|
for event in batch:
|
||||||
try:
|
if self._submit_on_loop(
|
||||||
fut = asyncio.run_coroutine_threadsafe(
|
loop, self._handle_message_event_data(event)
|
||||||
self._handle_message_event_data(event),
|
):
|
||||||
loop,
|
|
||||||
)
|
|
||||||
fut.add_done_callback(self._log_background_failure)
|
|
||||||
dispatched += 1
|
dispatched += 1
|
||||||
except RuntimeError:
|
else:
|
||||||
# Loop closed between check and submit — requeue
|
# Loop closed/unavailable — requeue and poll again.
|
||||||
# and poll again.
|
|
||||||
requeue.append(event)
|
requeue.append(event)
|
||||||
if requeue:
|
if requeue:
|
||||||
with self._pending_inbound_lock:
|
with self._pending_inbound_lock:
|
||||||
|
|
@ -2466,11 +2458,10 @@ class FeishuAdapter(BasePlatformAdapter):
|
||||||
if not self._loop_accepts_callbacks(loop):
|
if not self._loop_accepts_callbacks(loop):
|
||||||
logger.warning("[Feishu] Dropping drive comment event before adapter loop is ready")
|
logger.warning("[Feishu] Dropping drive comment event before adapter loop is ready")
|
||||||
return
|
return
|
||||||
future = asyncio.run_coroutine_threadsafe(
|
self._submit_on_loop(
|
||||||
handle_drive_comment_event(self._client, data, self_open_id=self._bot_open_id),
|
|
||||||
loop,
|
loop,
|
||||||
|
handle_drive_comment_event(self._client, data, self_open_id=self._bot_open_id),
|
||||||
)
|
)
|
||||||
future.add_done_callback(self._log_background_failure)
|
|
||||||
|
|
||||||
def _on_reaction_event(self, event_type: str, data: Any) -> None:
|
def _on_reaction_event(self, event_type: str, data: Any) -> None:
|
||||||
"""Route user reactions on bot messages as synthetic text events."""
|
"""Route user reactions on bot messages as synthetic text events."""
|
||||||
|
|
@ -2498,11 +2489,7 @@ class FeishuAdapter(BasePlatformAdapter):
|
||||||
or bool(getattr(loop, "is_closed", lambda: False)())
|
or bool(getattr(loop, "is_closed", lambda: False)())
|
||||||
):
|
):
|
||||||
return
|
return
|
||||||
future = asyncio.run_coroutine_threadsafe(
|
self._submit_on_loop(loop, self._handle_reaction_event(event_type, data))
|
||||||
self._handle_reaction_event(event_type, data),
|
|
||||||
loop,
|
|
||||||
)
|
|
||||||
future.add_done_callback(self._log_background_failure)
|
|
||||||
|
|
||||||
def _on_card_action_trigger(self, data: Any) -> Any:
|
def _on_card_action_trigger(self, data: Any) -> Any:
|
||||||
"""Handle card-action callback from the Feishu SDK (synchronous).
|
"""Handle card-action callback from the Feishu SDK (synchronous).
|
||||||
|
|
@ -2548,11 +2535,14 @@ class FeishuAdapter(BasePlatformAdapter):
|
||||||
|
|
||||||
def _submit_on_loop(self, loop: Any, coro: Any) -> bool:
|
def _submit_on_loop(self, loop: Any, coro: Any) -> bool:
|
||||||
"""Schedule background work on the adapter loop with shared failure logging."""
|
"""Schedule background work on the adapter loop with shared failure logging."""
|
||||||
try:
|
from agent.async_utils import safe_schedule_threadsafe
|
||||||
future = asyncio.run_coroutine_threadsafe(coro, loop)
|
future = safe_schedule_threadsafe(
|
||||||
except Exception:
|
coro, loop,
|
||||||
coro.close()
|
logger=logger,
|
||||||
logger.warning("[Feishu] Failed to schedule background callback work", exc_info=True)
|
log_message="[Feishu] Failed to schedule background callback work",
|
||||||
|
log_level=logging.WARNING,
|
||||||
|
)
|
||||||
|
if future is None:
|
||||||
return False
|
return False
|
||||||
future.add_done_callback(self._log_background_failure)
|
future.add_done_callback(self._log_background_failure)
|
||||||
return True
|
return True
|
||||||
|
|
|
||||||
206
gateway/run.py
206
gateway/run.py
|
|
@ -50,6 +50,7 @@ from typing import Dict, Optional, Any, List, Union
|
||||||
# gateway is a long-running daemon, so its boot cost matters less than
|
# gateway is a long-running daemon, so its boot cost matters less than
|
||||||
# preserving the established test-patch surface.
|
# preserving the established test-patch surface.
|
||||||
from agent.account_usage import fetch_account_usage, render_account_usage_lines
|
from agent.account_usage import fetch_account_usage, render_account_usage_lines
|
||||||
|
from agent.async_utils import safe_schedule_threadsafe
|
||||||
from agent.i18n import t
|
from agent.i18n import t
|
||||||
from hermes_cli.config import cfg_get
|
from hermes_cli.config import cfg_get
|
||||||
|
|
||||||
|
|
@ -11217,10 +11218,14 @@ class GatewayRunner:
|
||||||
copied_source = dataclasses.replace(source)
|
copied_source = dataclasses.replace(source)
|
||||||
except Exception:
|
except Exception:
|
||||||
copied_source = source
|
copied_source = source
|
||||||
future = asyncio.run_coroutine_threadsafe(
|
future = safe_schedule_threadsafe(
|
||||||
self._rename_telegram_topic_for_session_title(copied_source, session_id, title),
|
self._rename_telegram_topic_for_session_title(copied_source, session_id, title),
|
||||||
loop,
|
loop,
|
||||||
|
logger=logger,
|
||||||
|
log_message="Telegram topic title rename failed to schedule",
|
||||||
)
|
)
|
||||||
|
if future is None:
|
||||||
|
return
|
||||||
def _log_rename_failure(fut) -> None:
|
def _log_rename_failure(fut) -> None:
|
||||||
try:
|
try:
|
||||||
fut.result()
|
fut.result()
|
||||||
|
|
@ -14810,29 +14815,28 @@ class GatewayRunner:
|
||||||
def _step_callback_sync(iteration: int, prev_tools: list) -> None:
|
def _step_callback_sync(iteration: int, prev_tools: list) -> None:
|
||||||
if not _run_still_current():
|
if not _run_still_current():
|
||||||
return
|
return
|
||||||
try:
|
# prev_tools may be list[str] or list[dict] with "name"/"result"
|
||||||
# prev_tools may be list[str] or list[dict] with "name"/"result"
|
# keys. Normalise to keep "tool_names" backward-compatible for
|
||||||
# keys. Normalise to keep "tool_names" backward-compatible for
|
# user-authored hooks that do ', '.join(tool_names)'.
|
||||||
# user-authored hooks that do ', '.join(tool_names)'.
|
_names: list[str] = []
|
||||||
_names: list[str] = []
|
for _t in (prev_tools or []):
|
||||||
for _t in (prev_tools or []):
|
if isinstance(_t, dict):
|
||||||
if isinstance(_t, dict):
|
_names.append(_t.get("name") or "")
|
||||||
_names.append(_t.get("name") or "")
|
else:
|
||||||
else:
|
_names.append(str(_t))
|
||||||
_names.append(str(_t))
|
safe_schedule_threadsafe(
|
||||||
asyncio.run_coroutine_threadsafe(
|
_hooks_ref.emit("agent:step", {
|
||||||
_hooks_ref.emit("agent:step", {
|
"platform": source.platform.value if source.platform else "",
|
||||||
"platform": source.platform.value if source.platform else "",
|
"user_id": source.user_id,
|
||||||
"user_id": source.user_id,
|
"session_id": session_id,
|
||||||
"session_id": session_id,
|
"iteration": iteration,
|
||||||
"iteration": iteration,
|
"tool_names": _names,
|
||||||
"tool_names": _names,
|
"tools": prev_tools,
|
||||||
"tools": prev_tools,
|
}),
|
||||||
}),
|
_loop_for_step,
|
||||||
_loop_for_step,
|
logger=logger,
|
||||||
)
|
log_message="agent:step hook scheduling error",
|
||||||
except Exception as _e:
|
)
|
||||||
logger.debug("agent:step hook error: %s", _e)
|
|
||||||
|
|
||||||
# Bridge sync status_callback → async adapter.send for context pressure
|
# Bridge sync status_callback → async adapter.send for context pressure
|
||||||
_status_adapter = self.adapters.get(source.platform)
|
_status_adapter = self.adapters.get(source.platform)
|
||||||
|
|
@ -14852,27 +14856,28 @@ class GatewayRunner:
|
||||||
def _status_callback_sync(event_type: str, message: str) -> None:
|
def _status_callback_sync(event_type: str, message: str) -> None:
|
||||||
if not _status_adapter or not _run_still_current():
|
if not _status_adapter or not _run_still_current():
|
||||||
return
|
return
|
||||||
try:
|
_fut = safe_schedule_threadsafe(
|
||||||
_fut = asyncio.run_coroutine_threadsafe(
|
_status_adapter.send(
|
||||||
_status_adapter.send(
|
_status_chat_id,
|
||||||
_status_chat_id,
|
message,
|
||||||
message,
|
metadata=_status_thread_metadata,
|
||||||
metadata=_status_thread_metadata,
|
),
|
||||||
),
|
_loop_for_step,
|
||||||
_loop_for_step,
|
logger=logger,
|
||||||
)
|
log_message=f"status_callback ({event_type}) scheduling error",
|
||||||
if _cleanup_progress:
|
)
|
||||||
def _track_status_id(fut) -> None:
|
if _fut is None:
|
||||||
try:
|
return
|
||||||
res = fut.result()
|
if _cleanup_progress:
|
||||||
except Exception:
|
def _track_status_id(fut) -> None:
|
||||||
return
|
try:
|
||||||
mid = getattr(res, "message_id", None)
|
res = fut.result()
|
||||||
if getattr(res, "success", False) and mid:
|
except Exception:
|
||||||
_cleanup_msg_ids.append(str(mid))
|
return
|
||||||
_fut.add_done_callback(_track_status_id)
|
mid = getattr(res, "message_id", None)
|
||||||
except Exception as _e:
|
if getattr(res, "success", False) and mid:
|
||||||
logger.debug("status_callback error (%s): %s", event_type, _e)
|
_cleanup_msg_ids.append(str(mid))
|
||||||
|
_fut.add_done_callback(_track_status_id)
|
||||||
|
|
||||||
def run_sync():
|
def run_sync():
|
||||||
# The conditional re-assignment of `message` further below
|
# The conditional re-assignment of `message` further below
|
||||||
|
|
@ -15026,17 +15031,16 @@ class GatewayRunner:
|
||||||
return
|
return
|
||||||
if already_streamed or not _status_adapter or not str(text or "").strip():
|
if already_streamed or not _status_adapter or not str(text or "").strip():
|
||||||
return
|
return
|
||||||
try:
|
safe_schedule_threadsafe(
|
||||||
asyncio.run_coroutine_threadsafe(
|
_status_adapter.send(
|
||||||
_status_adapter.send(
|
_status_chat_id,
|
||||||
_status_chat_id,
|
text,
|
||||||
text,
|
metadata=_status_thread_metadata,
|
||||||
metadata=_status_thread_metadata,
|
),
|
||||||
),
|
_loop_for_step,
|
||||||
_loop_for_step,
|
logger=logger,
|
||||||
)
|
log_message="interim_assistant_callback scheduling error",
|
||||||
except Exception as _e:
|
)
|
||||||
logger.debug("interim_assistant_callback error: %s", _e)
|
|
||||||
|
|
||||||
turn_route = self._resolve_turn_agent_config(message, model, runtime_kwargs)
|
turn_route = self._resolve_turn_agent_config(message, model, runtime_kwargs)
|
||||||
|
|
||||||
|
|
@ -15125,17 +15129,16 @@ class GatewayRunner:
|
||||||
def _deliver_bg_review_message(message: str) -> None:
|
def _deliver_bg_review_message(message: str) -> None:
|
||||||
if not _status_adapter or not _run_still_current():
|
if not _status_adapter or not _run_still_current():
|
||||||
return
|
return
|
||||||
try:
|
safe_schedule_threadsafe(
|
||||||
asyncio.run_coroutine_threadsafe(
|
_status_adapter.send(
|
||||||
_status_adapter.send(
|
_status_chat_id,
|
||||||
_status_chat_id,
|
message,
|
||||||
message,
|
metadata=_status_thread_metadata,
|
||||||
metadata=_status_thread_metadata,
|
),
|
||||||
),
|
_loop_for_step,
|
||||||
_loop_for_step,
|
logger=logger,
|
||||||
)
|
log_message="background_review_callback scheduling error",
|
||||||
except Exception as _e:
|
)
|
||||||
logger.debug("background_review_callback error: %s", _e)
|
|
||||||
|
|
||||||
def _release_bg_review_messages() -> None:
|
def _release_bg_review_messages() -> None:
|
||||||
_bg_review_release.set()
|
_bg_review_release.set()
|
||||||
|
|
@ -15207,23 +15210,28 @@ class GatewayRunner:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
send_ok = False
|
send_ok = False
|
||||||
try:
|
fut = safe_schedule_threadsafe(
|
||||||
fut = asyncio.run_coroutine_threadsafe(
|
_status_adapter.send_clarify(
|
||||||
_status_adapter.send_clarify(
|
chat_id=_status_chat_id,
|
||||||
chat_id=_status_chat_id,
|
question=question,
|
||||||
question=question,
|
choices=list(choices) if choices else None,
|
||||||
choices=list(choices) if choices else None,
|
clarify_id=clarify_id,
|
||||||
clarify_id=clarify_id,
|
session_key=session_key or "",
|
||||||
session_key=session_key or "",
|
metadata=_status_thread_metadata,
|
||||||
metadata=_status_thread_metadata,
|
),
|
||||||
),
|
_loop_for_step,
|
||||||
_loop_for_step,
|
logger=logger,
|
||||||
)
|
log_message="Clarify send failed to schedule",
|
||||||
result = fut.result(timeout=15)
|
)
|
||||||
send_ok = bool(getattr(result, "success", False))
|
if fut is None:
|
||||||
except Exception as exc:
|
|
||||||
logger.warning("Clarify send failed: %s", exc)
|
|
||||||
send_ok = False
|
send_ok = False
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
result = fut.result(timeout=15)
|
||||||
|
send_ok = bool(getattr(result, "success", False))
|
||||||
|
except Exception as exc:
|
||||||
|
logger.warning("Clarify send failed: %s", exc)
|
||||||
|
send_ok = False
|
||||||
|
|
||||||
if not send_ok:
|
if not send_ok:
|
||||||
# Couldn't deliver the prompt — clean up and return
|
# Couldn't deliver the prompt — clean up and return
|
||||||
|
|
@ -15343,7 +15351,7 @@ class GatewayRunner:
|
||||||
# false positives from MagicMock auto-attribute creation in tests.
|
# false positives from MagicMock auto-attribute creation in tests.
|
||||||
if getattr(type(_status_adapter), "send_exec_approval", None) is not None:
|
if getattr(type(_status_adapter), "send_exec_approval", None) is not None:
|
||||||
try:
|
try:
|
||||||
_approval_result = asyncio.run_coroutine_threadsafe(
|
_approval_fut = safe_schedule_threadsafe(
|
||||||
_status_adapter.send_exec_approval(
|
_status_adapter.send_exec_approval(
|
||||||
chat_id=_status_chat_id,
|
chat_id=_status_chat_id,
|
||||||
command=cmd,
|
command=cmd,
|
||||||
|
|
@ -15352,7 +15360,12 @@ class GatewayRunner:
|
||||||
metadata=_status_thread_metadata,
|
metadata=_status_thread_metadata,
|
||||||
),
|
),
|
||||||
_loop_for_step,
|
_loop_for_step,
|
||||||
).result(timeout=15)
|
logger=logger,
|
||||||
|
log_message="send_exec_approval scheduling error",
|
||||||
|
)
|
||||||
|
if _approval_fut is None:
|
||||||
|
raise RuntimeError("send_exec_approval: loop unavailable")
|
||||||
|
_approval_result = _approval_fut.result(timeout=15)
|
||||||
if _approval_result.success:
|
if _approval_result.success:
|
||||||
return
|
return
|
||||||
logger.warning(
|
logger.warning(
|
||||||
|
|
@ -15374,14 +15387,18 @@ class GatewayRunner:
|
||||||
f"for the session, `/approve always` to approve permanently, or `/deny` to cancel."
|
f"for the session, `/approve always` to approve permanently, or `/deny` to cancel."
|
||||||
)
|
)
|
||||||
try:
|
try:
|
||||||
asyncio.run_coroutine_threadsafe(
|
_approval_send_fut = safe_schedule_threadsafe(
|
||||||
_status_adapter.send(
|
_status_adapter.send(
|
||||||
_status_chat_id,
|
_status_chat_id,
|
||||||
msg,
|
msg,
|
||||||
metadata=_status_thread_metadata,
|
metadata=_status_thread_metadata,
|
||||||
),
|
),
|
||||||
_loop_for_step,
|
_loop_for_step,
|
||||||
).result(timeout=15)
|
logger=logger,
|
||||||
|
log_message="Approval text-send scheduling error",
|
||||||
|
)
|
||||||
|
if _approval_send_fut is not None:
|
||||||
|
_approval_send_fut.result(timeout=15)
|
||||||
except Exception as _e:
|
except Exception as _e:
|
||||||
logger.error("Failed to send approval request: %s", _e)
|
logger.error("Failed to send approval request: %s", _e)
|
||||||
|
|
||||||
|
|
@ -16343,7 +16360,11 @@ class GatewayRunner:
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
try:
|
try:
|
||||||
asyncio.run_coroutine_threadsafe(_delete_all(), _loop_snapshot)
|
safe_schedule_threadsafe(
|
||||||
|
_delete_all(), _loop_snapshot,
|
||||||
|
logger=logger,
|
||||||
|
log_message="Temp bubble cleanup scheduling error",
|
||||||
|
)
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
@ -16400,10 +16421,13 @@ def _start_cron_ticker(stop_event: threading.Event, adapters=None, loop=None, in
|
||||||
# this ticker runs in a background thread. Schedule onto
|
# this ticker runs in a background thread. Schedule onto
|
||||||
# the gateway event loop and wait briefly for completion
|
# the gateway event loop and wait briefly for completion
|
||||||
# so refresh failures are still logged via the except.
|
# so refresh failures are still logged via the except.
|
||||||
fut = asyncio.run_coroutine_threadsafe(
|
fut = safe_schedule_threadsafe(
|
||||||
build_channel_directory(adapters), loop
|
build_channel_directory(adapters), loop,
|
||||||
|
logger=logger,
|
||||||
|
log_message="Channel directory refresh scheduling error",
|
||||||
)
|
)
|
||||||
fut.result(timeout=30)
|
if fut is not None:
|
||||||
|
fut.result(timeout=30)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.debug("Channel directory refresh error: %s", e)
|
logger.debug("Channel directory refresh error: %s", e)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -221,8 +221,11 @@ def _get_loop() -> asyncio.AbstractEventLoop:
|
||||||
|
|
||||||
def _run_sync(coro, timeout: float = _DEFAULT_TIMEOUT):
|
def _run_sync(coro, timeout: float = _DEFAULT_TIMEOUT):
|
||||||
"""Schedule *coro* on the shared loop and block until done."""
|
"""Schedule *coro* on the shared loop and block until done."""
|
||||||
|
from agent.async_utils import safe_schedule_threadsafe
|
||||||
loop = _get_loop()
|
loop = _get_loop()
|
||||||
future = asyncio.run_coroutine_threadsafe(coro, loop)
|
future = safe_schedule_threadsafe(coro, loop)
|
||||||
|
if future is None:
|
||||||
|
raise RuntimeError("Hindsight loop unavailable")
|
||||||
return future.result(timeout=timeout)
|
return future.result(timeout=timeout)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -670,10 +670,18 @@ class GoogleChatAdapter(BasePlatformAdapter):
|
||||||
logger.warning("[GoogleChat] Loop not accepting callbacks; dropping event")
|
logger.warning("[GoogleChat] Loop not accepting callbacks; dropping event")
|
||||||
return
|
return
|
||||||
try:
|
try:
|
||||||
future = asyncio.run_coroutine_threadsafe(coro, loop)
|
from agent.async_utils import safe_schedule_threadsafe
|
||||||
|
future = safe_schedule_threadsafe(
|
||||||
|
coro, loop,
|
||||||
|
logger=logger,
|
||||||
|
log_message="[GoogleChat] Failed to schedule background callback",
|
||||||
|
log_level=logging.WARNING,
|
||||||
|
)
|
||||||
except RuntimeError:
|
except RuntimeError:
|
||||||
logger.warning("[GoogleChat] Loop closed between check and submit")
|
logger.warning("[GoogleChat] Loop closed between check and submit")
|
||||||
return
|
return
|
||||||
|
if future is None:
|
||||||
|
return
|
||||||
future.add_done_callback(self._log_background_failure)
|
future.add_done_callback(self._log_background_failure)
|
||||||
|
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
|
|
|
||||||
|
|
@ -62,6 +62,7 @@ AUTHOR_MAP = {
|
||||||
"nidhi2894@gmail.com": "nidhi-singh02",
|
"nidhi2894@gmail.com": "nidhi-singh02",
|
||||||
"30312689+aashizpoudel@users.noreply.github.com": "aashizpoudel",
|
"30312689+aashizpoudel@users.noreply.github.com": "aashizpoudel",
|
||||||
"oleksii.lisikh@gmail.com": "olisikh",
|
"oleksii.lisikh@gmail.com": "olisikh",
|
||||||
|
"jithendranaidunara@gmail.com": "JithendraNara",
|
||||||
"jeremy@geocaching.com": "outdoorsea",
|
"jeremy@geocaching.com": "outdoorsea",
|
||||||
"leone.parise@gmail.com": "leoneparise",
|
"leone.parise@gmail.com": "leoneparise",
|
||||||
"mr@shu.io": "mrshu",
|
"mr@shu.io": "mrshu",
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,8 @@
|
||||||
"""Tests for acp_adapter.events — callback factories for ACP notifications."""
|
"""Tests for acp_adapter.events — callback factories for ACP notifications."""
|
||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
|
import gc
|
||||||
|
import warnings
|
||||||
from concurrent.futures import Future
|
from concurrent.futures import Future
|
||||||
from unittest.mock import AsyncMock, MagicMock, patch
|
from unittest.mock import AsyncMock, MagicMock, patch
|
||||||
|
|
||||||
|
|
@ -10,6 +12,7 @@ import acp
|
||||||
from acp.schema import ToolCallStart, ToolCallProgress, AgentThoughtChunk, AgentMessageChunk
|
from acp.schema import ToolCallStart, ToolCallProgress, AgentThoughtChunk, AgentMessageChunk
|
||||||
|
|
||||||
from acp_adapter.events import (
|
from acp_adapter.events import (
|
||||||
|
_send_update,
|
||||||
make_message_cb,
|
make_message_cb,
|
||||||
make_step_cb,
|
make_step_cb,
|
||||||
make_thinking_cb,
|
make_thinking_cb,
|
||||||
|
|
@ -325,3 +328,46 @@ class TestMessageCallback:
|
||||||
cb("")
|
cb("")
|
||||||
|
|
||||||
mock_rcts.assert_not_called()
|
mock_rcts.assert_not_called()
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Scheduler-failure regression
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class TestSendUpdate:
|
||||||
|
def test_scheduler_failure_closes_update_coroutine(self, event_loop_fixture):
|
||||||
|
"""If run_coroutine_threadsafe raises, _send_update must close the coro."""
|
||||||
|
created = {"coro": None}
|
||||||
|
|
||||||
|
async def _session_update(session_id, update):
|
||||||
|
return None
|
||||||
|
|
||||||
|
conn = MagicMock()
|
||||||
|
|
||||||
|
def _capture_update(session_id, update):
|
||||||
|
created["coro"] = _session_update(session_id, update)
|
||||||
|
return created["coro"]
|
||||||
|
|
||||||
|
conn.session_update = _capture_update
|
||||||
|
|
||||||
|
with warnings.catch_warnings(record=True) as caught:
|
||||||
|
warnings.simplefilter("always")
|
||||||
|
with patch(
|
||||||
|
"agent.async_utils.asyncio.run_coroutine_threadsafe",
|
||||||
|
side_effect=RuntimeError("scheduler down"),
|
||||||
|
):
|
||||||
|
_send_update(conn, "session-1", event_loop_fixture, {"type": "noop"})
|
||||||
|
gc.collect()
|
||||||
|
|
||||||
|
assert created["coro"] is not None
|
||||||
|
assert created["coro"].cr_frame is None
|
||||||
|
# Only count warnings about THIS test's coroutine; other tests in the
|
||||||
|
# same xdist worker (or stdlib mock internals) may emit unrelated
|
||||||
|
# "coroutine was never awaited" warnings that bleed through.
|
||||||
|
runtime_warnings = [
|
||||||
|
w for w in caught
|
||||||
|
if issubclass(w.category, RuntimeWarning)
|
||||||
|
and "was never awaited" in str(w.message)
|
||||||
|
and "_session_update" in str(w.message)
|
||||||
|
]
|
||||||
|
assert runtime_warnings == []
|
||||||
|
|
|
||||||
|
|
@ -38,7 +38,7 @@ def _invoke_callback(
|
||||||
scheduled["loop"] = passed_loop
|
scheduled["loop"] = passed_loop
|
||||||
return future
|
return future
|
||||||
|
|
||||||
with patch("acp_adapter.permissions.asyncio.run_coroutine_threadsafe", side_effect=_schedule):
|
with patch("agent.async_utils.asyncio.run_coroutine_threadsafe", side_effect=_schedule):
|
||||||
cb = make_approval_callback(request_permission, loop, session_id="s1", timeout=timeout)
|
cb = make_approval_callback(request_permission, loop, session_id="s1", timeout=timeout)
|
||||||
if use_prompt_path:
|
if use_prompt_path:
|
||||||
result = prompt_dangerous_approval(
|
result = prompt_dangerous_approval(
|
||||||
|
|
@ -135,7 +135,7 @@ class TestApprovalBridge:
|
||||||
scheduled["loop"] = passed_loop
|
scheduled["loop"] = passed_loop
|
||||||
return future
|
return future
|
||||||
|
|
||||||
with patch("acp_adapter.permissions.asyncio.run_coroutine_threadsafe", side_effect=_schedule):
|
with patch("agent.async_utils.asyncio.run_coroutine_threadsafe", side_effect=_schedule):
|
||||||
cb = make_approval_callback(request_permission, loop, session_id="s1", timeout=0.01)
|
cb = make_approval_callback(request_permission, loop, session_id="s1", timeout=0.01)
|
||||||
result = cb("rm -rf /", "dangerous command")
|
result = cb("rm -rf /", "dangerous command")
|
||||||
|
|
||||||
|
|
@ -159,10 +159,53 @@ class TestApprovalBridge:
|
||||||
scheduled["loop"] = passed_loop
|
scheduled["loop"] = passed_loop
|
||||||
return future
|
return future
|
||||||
|
|
||||||
with patch("acp_adapter.permissions.asyncio.run_coroutine_threadsafe", side_effect=_schedule):
|
with patch("agent.async_utils.asyncio.run_coroutine_threadsafe", side_effect=_schedule):
|
||||||
cb = make_approval_callback(request_permission, loop, session_id="s1", timeout=1.0)
|
cb = make_approval_callback(request_permission, loop, session_id="s1", timeout=1.0)
|
||||||
result = cb("echo hi", "demo")
|
result = cb("echo hi", "demo")
|
||||||
|
|
||||||
scheduled["coro"].close()
|
scheduled["coro"].close()
|
||||||
|
|
||||||
assert result == "deny"
|
assert result == "deny"
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Scheduler-failure regression
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
import gc # noqa: E402
|
||||||
|
import warnings # noqa: E402
|
||||||
|
|
||||||
|
|
||||||
|
class TestSchedulerFailure:
|
||||||
|
def test_scheduler_failure_closes_permission_coroutine(self):
|
||||||
|
"""If run_coroutine_threadsafe raises, the coro is closed and we return 'deny'."""
|
||||||
|
loop = MagicMock(spec=asyncio.AbstractEventLoop)
|
||||||
|
created = {"coro": None}
|
||||||
|
|
||||||
|
async def _response_coro(**kwargs):
|
||||||
|
return _make_response(AllowedOutcome(option_id="allow_once", outcome="selected"))
|
||||||
|
|
||||||
|
def _request_permission(**kwargs):
|
||||||
|
created["coro"] = _response_coro(**kwargs)
|
||||||
|
return created["coro"]
|
||||||
|
|
||||||
|
with warnings.catch_warnings(record=True) as caught:
|
||||||
|
warnings.simplefilter("always")
|
||||||
|
with patch(
|
||||||
|
"agent.async_utils.asyncio.run_coroutine_threadsafe",
|
||||||
|
side_effect=RuntimeError("scheduler down"),
|
||||||
|
):
|
||||||
|
cb = make_approval_callback(_request_permission, loop, session_id="s1", timeout=0.01)
|
||||||
|
result = cb("rm -rf /", "dangerous")
|
||||||
|
gc.collect()
|
||||||
|
|
||||||
|
assert result == "deny"
|
||||||
|
assert created["coro"] is not None
|
||||||
|
assert created["coro"].cr_frame is None
|
||||||
|
runtime_warnings = [
|
||||||
|
w for w in caught
|
||||||
|
if issubclass(w.category, RuntimeWarning)
|
||||||
|
and "was never awaited" in str(w.message)
|
||||||
|
and "_response_coro" in str(w.message)
|
||||||
|
]
|
||||||
|
assert runtime_warnings == []
|
||||||
|
|
|
||||||
157
tests/agent/test_async_utils.py
Normal file
157
tests/agent/test_async_utils.py
Normal file
|
|
@ -0,0 +1,157 @@
|
||||||
|
"""Tests for agent.async_utils.safe_schedule_threadsafe."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import gc
|
||||||
|
import warnings
|
||||||
|
from concurrent.futures import Future
|
||||||
|
from unittest.mock import patch
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from agent.async_utils import safe_schedule_threadsafe
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Helpers
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _no_unawaited_warnings(caught, *, coro_name: str = "") -> bool:
|
||||||
|
"""Return True if no "X was never awaited" warning slipped through.
|
||||||
|
|
||||||
|
When *coro_name* is provided, only warnings naming that coroutine are
|
||||||
|
counted — xdist workers may emit unrelated unawaited-coroutine warnings
|
||||||
|
(e.g. ``AsyncMockMixin._execute_mock_call``) from concurrent tests.
|
||||||
|
"""
|
||||||
|
bad = [
|
||||||
|
w for w in caught
|
||||||
|
if issubclass(w.category, RuntimeWarning)
|
||||||
|
and "was never awaited" in str(w.message)
|
||||||
|
and (not coro_name or coro_name in str(w.message))
|
||||||
|
]
|
||||||
|
return not bad
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Tests
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class TestSafeScheduleThreadsafe:
|
||||||
|
def test_returns_future_on_success(self):
|
||||||
|
loop = asyncio.new_event_loop()
|
||||||
|
try:
|
||||||
|
import threading
|
||||||
|
ready = threading.Event()
|
||||||
|
stop = threading.Event()
|
||||||
|
|
||||||
|
def _runner():
|
||||||
|
asyncio.set_event_loop(loop)
|
||||||
|
ready.set()
|
||||||
|
loop.run_until_complete(_wait_for_stop(stop))
|
||||||
|
|
||||||
|
async def _wait_for_stop(ev):
|
||||||
|
while not ev.is_set():
|
||||||
|
await asyncio.sleep(0.005)
|
||||||
|
|
||||||
|
t = threading.Thread(target=_runner, daemon=True)
|
||||||
|
t.start()
|
||||||
|
ready.wait(timeout=2)
|
||||||
|
|
||||||
|
async def _sample():
|
||||||
|
return 42
|
||||||
|
|
||||||
|
fut = safe_schedule_threadsafe(_sample(), loop)
|
||||||
|
assert isinstance(fut, Future)
|
||||||
|
assert fut.result(timeout=2) == 42
|
||||||
|
|
||||||
|
stop.set()
|
||||||
|
t.join(timeout=2)
|
||||||
|
finally:
|
||||||
|
if loop.is_running():
|
||||||
|
loop.call_soon_threadsafe(loop.stop)
|
||||||
|
loop.close()
|
||||||
|
|
||||||
|
def test_closed_loop_returns_none_and_closes_coroutine(self):
|
||||||
|
loop = asyncio.new_event_loop()
|
||||||
|
loop.close()
|
||||||
|
|
||||||
|
async def _sample():
|
||||||
|
return "ok"
|
||||||
|
|
||||||
|
coro = _sample()
|
||||||
|
with warnings.catch_warnings(record=True) as caught:
|
||||||
|
warnings.simplefilter("always")
|
||||||
|
result = safe_schedule_threadsafe(coro, loop)
|
||||||
|
del coro
|
||||||
|
gc.collect()
|
||||||
|
|
||||||
|
assert result is None
|
||||||
|
assert _no_unawaited_warnings(caught, coro_name='_sample')
|
||||||
|
|
||||||
|
def test_none_loop_returns_none_and_closes_coroutine(self):
|
||||||
|
async def _sample():
|
||||||
|
return "ok"
|
||||||
|
|
||||||
|
coro = _sample()
|
||||||
|
with warnings.catch_warnings(record=True) as caught:
|
||||||
|
warnings.simplefilter("always")
|
||||||
|
result = safe_schedule_threadsafe(coro, None)
|
||||||
|
del coro
|
||||||
|
gc.collect()
|
||||||
|
|
||||||
|
assert result is None
|
||||||
|
assert _no_unawaited_warnings(caught, coro_name='_sample')
|
||||||
|
|
||||||
|
def test_scheduling_exception_closes_coroutine(self):
|
||||||
|
"""If run_coroutine_threadsafe raises, close the coroutine and return None."""
|
||||||
|
# A loop that *looks* open but raises on submission
|
||||||
|
loop = asyncio.new_event_loop()
|
||||||
|
try:
|
||||||
|
async def _sample():
|
||||||
|
return "ok"
|
||||||
|
|
||||||
|
coro = _sample()
|
||||||
|
with warnings.catch_warnings(record=True) as caught:
|
||||||
|
warnings.simplefilter("always")
|
||||||
|
with patch(
|
||||||
|
"agent.async_utils.asyncio.run_coroutine_threadsafe",
|
||||||
|
side_effect=RuntimeError("scheduler down"),
|
||||||
|
):
|
||||||
|
result = safe_schedule_threadsafe(coro, loop)
|
||||||
|
del coro
|
||||||
|
gc.collect()
|
||||||
|
|
||||||
|
assert result is None
|
||||||
|
assert _no_unawaited_warnings(caught, coro_name='_sample')
|
||||||
|
finally:
|
||||||
|
loop.close()
|
||||||
|
|
||||||
|
def test_logs_at_specified_level(self, caplog):
|
||||||
|
import logging
|
||||||
|
loop = asyncio.new_event_loop()
|
||||||
|
loop.close()
|
||||||
|
|
||||||
|
async def _sample():
|
||||||
|
return None
|
||||||
|
|
||||||
|
custom = logging.getLogger("test_async_utils")
|
||||||
|
with caplog.at_level(logging.WARNING, logger="test_async_utils"):
|
||||||
|
result = safe_schedule_threadsafe(
|
||||||
|
_sample(), loop,
|
||||||
|
logger=custom,
|
||||||
|
log_message="custom-msg",
|
||||||
|
log_level=logging.WARNING,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result is None
|
||||||
|
assert any("custom-msg" in rec.message for rec in caplog.records)
|
||||||
|
|
||||||
|
def test_non_coroutine_arg_does_not_crash(self):
|
||||||
|
"""Defensive: even if the caller hands us something weird, don't blow up."""
|
||||||
|
loop = asyncio.new_event_loop()
|
||||||
|
loop.close()
|
||||||
|
|
||||||
|
# Pass a non-coroutine sentinel
|
||||||
|
result = safe_schedule_threadsafe("not-a-coroutine", loop) # type: ignore[arg-type]
|
||||||
|
assert result is None
|
||||||
|
|
@ -69,7 +69,8 @@ class TestProbeMcpServerTools:
|
||||||
patch("tools.mcp_tool._stop_mcp_loop"):
|
patch("tools.mcp_tool._stop_mcp_loop"):
|
||||||
|
|
||||||
# Simulate running the async probe
|
# Simulate running the async probe
|
||||||
def run_coro(coro, timeout=120):
|
def run_coro(coro_or_factory, timeout=120):
|
||||||
|
coro = coro_or_factory() if callable(coro_or_factory) else coro_or_factory
|
||||||
loop = asyncio.new_event_loop()
|
loop = asyncio.new_event_loop()
|
||||||
try:
|
try:
|
||||||
return loop.run_until_complete(coro)
|
return loop.run_until_complete(coro)
|
||||||
|
|
@ -110,7 +111,8 @@ class TestProbeMcpServerTools:
|
||||||
patch("tools.mcp_tool._run_on_mcp_loop") as mock_run, \
|
patch("tools.mcp_tool._run_on_mcp_loop") as mock_run, \
|
||||||
patch("tools.mcp_tool._stop_mcp_loop"):
|
patch("tools.mcp_tool._stop_mcp_loop"):
|
||||||
|
|
||||||
def run_coro(coro, timeout=120):
|
def run_coro(coro_or_factory, timeout=120):
|
||||||
|
coro = coro_or_factory() if callable(coro_or_factory) else coro_or_factory
|
||||||
loop = asyncio.new_event_loop()
|
loop = asyncio.new_event_loop()
|
||||||
try:
|
try:
|
||||||
return loop.run_until_complete(coro)
|
return loop.run_until_complete(coro)
|
||||||
|
|
@ -144,7 +146,8 @@ class TestProbeMcpServerTools:
|
||||||
patch("tools.mcp_tool._run_on_mcp_loop") as mock_run, \
|
patch("tools.mcp_tool._run_on_mcp_loop") as mock_run, \
|
||||||
patch("tools.mcp_tool._stop_mcp_loop"):
|
patch("tools.mcp_tool._stop_mcp_loop"):
|
||||||
|
|
||||||
def run_coro(coro, timeout=120):
|
def run_coro(coro_or_factory, timeout=120):
|
||||||
|
coro = coro_or_factory() if callable(coro_or_factory) else coro_or_factory
|
||||||
loop = asyncio.new_event_loop()
|
loop = asyncio.new_event_loop()
|
||||||
try:
|
try:
|
||||||
return loop.run_until_complete(coro)
|
return loop.run_until_complete(coro)
|
||||||
|
|
@ -198,7 +201,8 @@ class TestProbeMcpServerTools:
|
||||||
patch("tools.mcp_tool._run_on_mcp_loop") as mock_run, \
|
patch("tools.mcp_tool._run_on_mcp_loop") as mock_run, \
|
||||||
patch("tools.mcp_tool._stop_mcp_loop"):
|
patch("tools.mcp_tool._stop_mcp_loop"):
|
||||||
|
|
||||||
def run_coro(coro, timeout=120):
|
def run_coro(coro_or_factory, timeout=120):
|
||||||
|
coro = coro_or_factory() if callable(coro_or_factory) else coro_or_factory
|
||||||
loop = asyncio.new_event_loop()
|
loop = asyncio.new_event_loop()
|
||||||
try:
|
try:
|
||||||
return loop.run_until_complete(coro)
|
return loop.run_until_complete(coro)
|
||||||
|
|
|
||||||
|
|
@ -31,7 +31,8 @@ class _FakeCallToolResult:
|
||||||
self.structuredContent = structuredContent
|
self.structuredContent = structuredContent
|
||||||
|
|
||||||
|
|
||||||
def _fake_run_on_mcp_loop(coro, timeout=30):
|
def _fake_run_on_mcp_loop(coro_or_factory, timeout=30):
|
||||||
|
coro = coro_or_factory() if callable(coro_or_factory) else coro_or_factory
|
||||||
"""Run an MCP coroutine directly in a fresh event loop."""
|
"""Run an MCP coroutine directly in a fresh event loop."""
|
||||||
loop = asyncio.new_event_loop()
|
loop = asyncio.new_event_loop()
|
||||||
try:
|
try:
|
||||||
|
|
|
||||||
|
|
@ -397,6 +397,77 @@ class TestCheckFunction:
|
||||||
_servers.pop("test_server", None)
|
_servers.pop("test_server", None)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# MCP loop runner
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class TestRunOnMcpLoop:
|
||||||
|
def test_scheduler_failure_closes_factory_coroutine(self):
|
||||||
|
"""If run_coroutine_threadsafe raises, the factory's coroutine is closed."""
|
||||||
|
import gc
|
||||||
|
import warnings
|
||||||
|
import tools.mcp_tool as mcp
|
||||||
|
|
||||||
|
created = {"coro": None}
|
||||||
|
|
||||||
|
async def _sample():
|
||||||
|
return "ok"
|
||||||
|
|
||||||
|
def factory():
|
||||||
|
created["coro"] = _sample()
|
||||||
|
return created["coro"]
|
||||||
|
|
||||||
|
fake_loop = MagicMock()
|
||||||
|
fake_loop.is_running.return_value = True
|
||||||
|
|
||||||
|
with patch.object(mcp, "_mcp_loop", fake_loop):
|
||||||
|
with warnings.catch_warnings(record=True) as caught:
|
||||||
|
warnings.simplefilter("always")
|
||||||
|
with patch(
|
||||||
|
"agent.async_utils.asyncio.run_coroutine_threadsafe",
|
||||||
|
side_effect=RuntimeError("scheduler down"),
|
||||||
|
):
|
||||||
|
with pytest.raises(RuntimeError):
|
||||||
|
mcp._run_on_mcp_loop(factory)
|
||||||
|
gc.collect()
|
||||||
|
|
||||||
|
assert created["coro"] is not None
|
||||||
|
assert created["coro"].cr_frame is None
|
||||||
|
runtime_warnings = [
|
||||||
|
w for w in caught
|
||||||
|
if issubclass(w.category, RuntimeWarning)
|
||||||
|
and "was never awaited" in str(w.message)
|
||||||
|
and "_sample" in str(w.message)
|
||||||
|
]
|
||||||
|
assert runtime_warnings == []
|
||||||
|
|
||||||
|
def test_dead_loop_closes_passed_coroutine(self):
|
||||||
|
"""If loop is None, a passed coroutine (not factory) is closed."""
|
||||||
|
import gc
|
||||||
|
import warnings
|
||||||
|
import tools.mcp_tool as mcp
|
||||||
|
|
||||||
|
async def _sample():
|
||||||
|
return "ok"
|
||||||
|
|
||||||
|
coro = _sample()
|
||||||
|
with patch.object(mcp, "_mcp_loop", None):
|
||||||
|
with warnings.catch_warnings(record=True) as caught:
|
||||||
|
warnings.simplefilter("always")
|
||||||
|
with pytest.raises(RuntimeError, match="not running"):
|
||||||
|
mcp._run_on_mcp_loop(coro)
|
||||||
|
gc.collect()
|
||||||
|
|
||||||
|
assert coro.cr_frame is None
|
||||||
|
runtime_warnings = [
|
||||||
|
w for w in caught
|
||||||
|
if issubclass(w.category, RuntimeWarning)
|
||||||
|
and "was never awaited" in str(w.message)
|
||||||
|
and "_sample" in str(w.message)
|
||||||
|
]
|
||||||
|
assert runtime_warnings == []
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# Tool handler
|
# Tool handler
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
@ -406,7 +477,8 @@ class TestToolHandler:
|
||||||
|
|
||||||
def _patch_mcp_loop(self, coro_side_effect=None):
|
def _patch_mcp_loop(self, coro_side_effect=None):
|
||||||
"""Return a patch for _run_on_mcp_loop that runs the coroutine directly."""
|
"""Return a patch for _run_on_mcp_loop that runs the coroutine directly."""
|
||||||
def fake_run(coro, timeout=30):
|
def fake_run(coro_or_factory, timeout=30):
|
||||||
|
coro = coro_or_factory() if callable(coro_or_factory) else coro_or_factory
|
||||||
return asyncio.run(coro)
|
return asyncio.run(coro)
|
||||||
if coro_side_effect:
|
if coro_side_effect:
|
||||||
return patch("tools.mcp_tool._run_on_mcp_loop", side_effect=coro_side_effect)
|
return patch("tools.mcp_tool._run_on_mcp_loop", side_effect=coro_side_effect)
|
||||||
|
|
@ -485,7 +557,8 @@ class TestToolHandler:
|
||||||
|
|
||||||
try:
|
try:
|
||||||
handler = _make_tool_handler("test_srv", "greet", 120)
|
handler = _make_tool_handler("test_srv", "greet", 120)
|
||||||
def _interrupting_run(coro, timeout=30):
|
def _interrupting_run(coro_or_factory, timeout=30):
|
||||||
|
coro = coro_or_factory() if callable(coro_or_factory) else coro_or_factory
|
||||||
coro.close()
|
coro.close()
|
||||||
raise InterruptedError("User sent a new message")
|
raise InterruptedError("User sent a new message")
|
||||||
with patch(
|
with patch(
|
||||||
|
|
@ -1792,7 +1865,8 @@ class TestUtilityHandlers:
|
||||||
|
|
||||||
def _patch_mcp_loop(self):
|
def _patch_mcp_loop(self):
|
||||||
"""Return a patch for _run_on_mcp_loop that runs the coroutine directly."""
|
"""Return a patch for _run_on_mcp_loop that runs the coroutine directly."""
|
||||||
def fake_run(coro, timeout=30):
|
def fake_run(coro_or_factory, timeout=30):
|
||||||
|
coro = coro_or_factory() if callable(coro_or_factory) else coro_or_factory
|
||||||
return asyncio.run(coro)
|
return asyncio.run(coro)
|
||||||
return patch("tools.mcp_tool._run_on_mcp_loop", side_effect=fake_run)
|
return patch("tools.mcp_tool._run_on_mcp_loop", side_effect=fake_run)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -274,7 +274,13 @@ def _browser_cdp_via_supervisor(
|
||||||
)
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
fut = _asyncio.run_coroutine_threadsafe(_do_cdp(), loop)
|
from agent.async_utils import safe_schedule_threadsafe
|
||||||
|
fut = safe_schedule_threadsafe(_do_cdp(), loop)
|
||||||
|
if fut is None:
|
||||||
|
return tool_error(
|
||||||
|
"CDP call via supervisor failed: loop unavailable",
|
||||||
|
cdp_docs=CDP_DOCS_URL,
|
||||||
|
)
|
||||||
result_msg = fut.result(timeout=timeout + 2)
|
result_msg = fut.result(timeout=timeout + 2)
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
return tool_error(
|
return tool_error(
|
||||||
|
|
|
||||||
|
|
@ -368,11 +368,13 @@ class CDPSupervisor:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
try:
|
try:
|
||||||
fut = asyncio.run_coroutine_threadsafe(_close_ws(), loop)
|
from agent.async_utils import safe_schedule_threadsafe
|
||||||
try:
|
fut = safe_schedule_threadsafe(_close_ws(), loop)
|
||||||
fut.result(timeout=2.0)
|
if fut is not None:
|
||||||
except Exception:
|
try:
|
||||||
pass
|
fut.result(timeout=2.0)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
except RuntimeError:
|
except RuntimeError:
|
||||||
pass # loop already shutting down
|
pass # loop already shutting down
|
||||||
if self._thread is not None:
|
if self._thread is not None:
|
||||||
|
|
@ -451,7 +453,10 @@ class CDPSupervisor:
|
||||||
)
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
fut = asyncio.run_coroutine_threadsafe(_do_respond(), loop)
|
from agent.async_utils import safe_schedule_threadsafe
|
||||||
|
fut = safe_schedule_threadsafe(_do_respond(), loop)
|
||||||
|
if fut is None:
|
||||||
|
return {"ok": False, "error": "Browser supervisor loop unavailable"}
|
||||||
fut.result(timeout=timeout)
|
fut.result(timeout=timeout)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return {"ok": False, "error": f"{type(e).__name__}: {e}"}
|
return {"ok": False, "error": f"{type(e).__name__}: {e}"}
|
||||||
|
|
@ -507,7 +512,10 @@ class CDPSupervisor:
|
||||||
)
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
fut = asyncio.run_coroutine_threadsafe(_do_eval(), loop)
|
from agent.async_utils import safe_schedule_threadsafe
|
||||||
|
fut = safe_schedule_threadsafe(_do_eval(), loop)
|
||||||
|
if fut is None:
|
||||||
|
return {"ok": False, "error": "Browser supervisor loop unavailable"}
|
||||||
response = fut.result(timeout=timeout + 1)
|
response = fut.result(timeout=timeout + 1)
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
return {"ok": False, "error": f"{type(exc).__name__}: {exc}"}
|
return {"ok": False, "error": f"{type(exc).__name__}: {exc}"}
|
||||||
|
|
|
||||||
|
|
@ -183,9 +183,14 @@ class _AsyncBridge:
|
||||||
raise RuntimeError("cua-driver asyncio bridge failed to start")
|
raise RuntimeError("cua-driver asyncio bridge failed to start")
|
||||||
|
|
||||||
def run(self, coro, timeout: Optional[float] = 30.0) -> Any:
|
def run(self, coro, timeout: Optional[float] = 30.0) -> Any:
|
||||||
|
from agent.async_utils import safe_schedule_threadsafe
|
||||||
if not self._loop or not self._thread or not self._thread.is_alive():
|
if not self._loop or not self._thread or not self._thread.is_alive():
|
||||||
|
if asyncio.iscoroutine(coro):
|
||||||
|
coro.close()
|
||||||
|
raise RuntimeError("cua-driver bridge not started")
|
||||||
|
fut = safe_schedule_threadsafe(coro, self._loop)
|
||||||
|
if fut is None:
|
||||||
raise RuntimeError("cua-driver bridge not started")
|
raise RuntimeError("cua-driver bridge not started")
|
||||||
fut: Future = asyncio.run_coroutine_threadsafe(coro, self._loop)
|
|
||||||
return fut.result(timeout=timeout)
|
return fut.result(timeout=timeout)
|
||||||
|
|
||||||
def stop(self) -> None:
|
def stop(self) -> None:
|
||||||
|
|
|
||||||
|
|
@ -144,9 +144,14 @@ class _AsyncWorker:
|
||||||
self._loop.run_forever()
|
self._loop.run_forever()
|
||||||
|
|
||||||
def run_coroutine(self, coro, timeout=600):
|
def run_coroutine(self, coro, timeout=600):
|
||||||
|
from agent.async_utils import safe_schedule_threadsafe
|
||||||
if self._loop is None or self._loop.is_closed():
|
if self._loop is None or self._loop.is_closed():
|
||||||
|
if asyncio.iscoroutine(coro):
|
||||||
|
coro.close()
|
||||||
|
raise RuntimeError("AsyncWorker loop is not running")
|
||||||
|
future = safe_schedule_threadsafe(coro, self._loop)
|
||||||
|
if future is None:
|
||||||
raise RuntimeError("AsyncWorker loop is not running")
|
raise RuntimeError("AsyncWorker loop is not running")
|
||||||
future = asyncio.run_coroutine_threadsafe(coro, self._loop)
|
|
||||||
return future.result(timeout=timeout)
|
return future.result(timeout=timeout)
|
||||||
|
|
||||||
def stop(self):
|
def stop(self):
|
||||||
|
|
|
||||||
|
|
@ -1781,7 +1781,7 @@ def _handle_auth_error_and_retry(
|
||||||
return await manager.handle_401(server_name, None)
|
return await manager.handle_401(server_name, None)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
recovered = _run_on_mcp_loop(_recover(), timeout=10)
|
recovered = _run_on_mcp_loop(_recover, timeout=10)
|
||||||
except Exception as rec_exc:
|
except Exception as rec_exc:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
"MCP OAuth '%s': recovery attempt failed: %s",
|
"MCP OAuth '%s': recovery attempt failed: %s",
|
||||||
|
|
@ -2054,19 +2054,35 @@ def _ensure_mcp_loop():
|
||||||
_mcp_thread.start()
|
_mcp_thread.start()
|
||||||
|
|
||||||
|
|
||||||
def _run_on_mcp_loop(coro, timeout: float = 30):
|
def _run_on_mcp_loop(coro_or_factory, timeout: float = 30):
|
||||||
"""Schedule a coroutine on the MCP event loop and block until done.
|
"""Schedule a coroutine on the MCP event loop and block until done.
|
||||||
|
|
||||||
|
Accepts either a coroutine object or a zero-arg callable that returns one.
|
||||||
|
Callers can pass a factory to avoid constructing coroutine objects when
|
||||||
|
the MCP loop is unavailable (which would otherwise leak the coroutine
|
||||||
|
frame and emit ``"coroutine was never awaited"`` warnings).
|
||||||
|
|
||||||
Poll in short intervals so the calling agent thread can honor user
|
Poll in short intervals so the calling agent thread can honor user
|
||||||
interrupts while the MCP work is still running on the background loop.
|
interrupts while the MCP work is still running on the background loop.
|
||||||
"""
|
"""
|
||||||
from tools.interrupt import is_interrupted
|
from tools.interrupt import is_interrupted
|
||||||
|
from agent.async_utils import safe_schedule_threadsafe
|
||||||
|
|
||||||
with _lock:
|
with _lock:
|
||||||
loop = _mcp_loop
|
loop = _mcp_loop
|
||||||
if loop is None or not loop.is_running():
|
if loop is None or not loop.is_running():
|
||||||
|
if asyncio.iscoroutine(coro_or_factory):
|
||||||
|
coro_or_factory.close()
|
||||||
raise RuntimeError("MCP event loop is not running")
|
raise RuntimeError("MCP event loop is not running")
|
||||||
future = asyncio.run_coroutine_threadsafe(coro, loop)
|
|
||||||
|
coro = coro_or_factory() if callable(coro_or_factory) else coro_or_factory
|
||||||
|
future = safe_schedule_threadsafe(
|
||||||
|
coro, loop,
|
||||||
|
logger=logger,
|
||||||
|
log_message="MCP scheduling failed",
|
||||||
|
)
|
||||||
|
if future is None:
|
||||||
|
raise RuntimeError("MCP event loop unavailable (failed to schedule)")
|
||||||
start_time = time.monotonic()
|
start_time = time.monotonic()
|
||||||
deadline = None if timeout is None else start_time + timeout
|
deadline = None if timeout is None else start_time + timeout
|
||||||
|
|
||||||
|
|
@ -2263,7 +2279,7 @@ def _make_tool_handler(server_name: str, tool_name: str, tool_timeout: float):
|
||||||
return json.dumps({"result": text_result}, ensure_ascii=False)
|
return json.dumps({"result": text_result}, ensure_ascii=False)
|
||||||
|
|
||||||
def _call_once():
|
def _call_once():
|
||||||
return _run_on_mcp_loop(_call(), timeout=tool_timeout)
|
return _run_on_mcp_loop(_call, timeout=tool_timeout)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
result = _call_once()
|
result = _call_once()
|
||||||
|
|
@ -2343,7 +2359,7 @@ def _make_list_resources_handler(server_name: str, tool_timeout: float):
|
||||||
return json.dumps({"resources": resources}, ensure_ascii=False)
|
return json.dumps({"resources": resources}, ensure_ascii=False)
|
||||||
|
|
||||||
def _call_once():
|
def _call_once():
|
||||||
return _run_on_mcp_loop(_call(), timeout=tool_timeout)
|
return _run_on_mcp_loop(_call, timeout=tool_timeout)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
return _call_once()
|
return _call_once()
|
||||||
|
|
@ -2403,7 +2419,7 @@ def _make_read_resource_handler(server_name: str, tool_timeout: float):
|
||||||
return json.dumps({"result": "\n".join(parts) if parts else ""}, ensure_ascii=False)
|
return json.dumps({"result": "\n".join(parts) if parts else ""}, ensure_ascii=False)
|
||||||
|
|
||||||
def _call_once():
|
def _call_once():
|
||||||
return _run_on_mcp_loop(_call(), timeout=tool_timeout)
|
return _run_on_mcp_loop(_call, timeout=tool_timeout)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
return _call_once()
|
return _call_once()
|
||||||
|
|
@ -2466,7 +2482,7 @@ def _make_list_prompts_handler(server_name: str, tool_timeout: float):
|
||||||
return json.dumps({"prompts": prompts}, ensure_ascii=False)
|
return json.dumps({"prompts": prompts}, ensure_ascii=False)
|
||||||
|
|
||||||
def _call_once():
|
def _call_once():
|
||||||
return _run_on_mcp_loop(_call(), timeout=tool_timeout)
|
return _run_on_mcp_loop(_call, timeout=tool_timeout)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
return _call_once()
|
return _call_once()
|
||||||
|
|
@ -2537,7 +2553,7 @@ def _make_get_prompt_handler(server_name: str, tool_timeout: float):
|
||||||
return json.dumps(resp, ensure_ascii=False)
|
return json.dumps(resp, ensure_ascii=False)
|
||||||
|
|
||||||
def _call_once():
|
def _call_once():
|
||||||
return _run_on_mcp_loop(_call(), timeout=tool_timeout)
|
return _run_on_mcp_loop(_call, timeout=tool_timeout)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
return _call_once()
|
return _call_once()
|
||||||
|
|
@ -3121,7 +3137,7 @@ def register_mcp_servers(servers: Dict[str, dict]) -> List[str]:
|
||||||
if _was_interrupted:
|
if _was_interrupted:
|
||||||
_set_interrupt(False)
|
_set_interrupt(False)
|
||||||
try:
|
try:
|
||||||
_run_on_mcp_loop(_discover_all(), timeout=120)
|
_run_on_mcp_loop(_discover_all, timeout=120)
|
||||||
finally:
|
finally:
|
||||||
if _was_interrupted:
|
if _was_interrupted:
|
||||||
_set_interrupt(True)
|
_set_interrupt(True)
|
||||||
|
|
@ -3289,7 +3305,7 @@ def probe_mcp_server_tools() -> Dict[str, List[tuple]]:
|
||||||
)
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
_run_on_mcp_loop(_probe_all(), timeout=120)
|
_run_on_mcp_loop(_probe_all, timeout=120)
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
logger.debug("MCP probe failed: %s", exc)
|
logger.debug("MCP probe failed: %s", exc)
|
||||||
finally:
|
finally:
|
||||||
|
|
@ -3329,11 +3345,17 @@ def shutdown_mcp_servers():
|
||||||
with _lock:
|
with _lock:
|
||||||
loop = _mcp_loop
|
loop = _mcp_loop
|
||||||
if loop is not None and loop.is_running():
|
if loop is not None and loop.is_running():
|
||||||
try:
|
from agent.async_utils import safe_schedule_threadsafe
|
||||||
future = asyncio.run_coroutine_threadsafe(_shutdown(), loop)
|
future = safe_schedule_threadsafe(
|
||||||
future.result(timeout=15)
|
_shutdown(), loop,
|
||||||
except Exception as exc:
|
logger=logger,
|
||||||
logger.debug("Error during MCP shutdown: %s", exc)
|
log_message="MCP shutdown: failed to schedule",
|
||||||
|
)
|
||||||
|
if future is not None:
|
||||||
|
try:
|
||||||
|
future.result(timeout=15)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.debug("Error during MCP shutdown: %s", exc)
|
||||||
|
|
||||||
_stop_mcp_loop()
|
_stop_mcp_loop()
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -153,9 +153,14 @@ def resolve_sync_compat(
|
||||||
Prefer the async ``resolve()`` from an async context.
|
Prefer the async ``resolve()`` from an async context.
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
fut = asyncio.run_coroutine_threadsafe(
|
from agent.async_utils import safe_schedule_threadsafe
|
||||||
|
fut = safe_schedule_threadsafe(
|
||||||
resolve(session_key, confirm_id, choice), loop,
|
resolve(session_key, confirm_id, choice), loop,
|
||||||
|
logger=logger,
|
||||||
|
log_message="resolve_sync_compat scheduling failed",
|
||||||
)
|
)
|
||||||
|
if fut is None:
|
||||||
|
return None
|
||||||
return fut.result(timeout=30)
|
return fut.result(timeout=30)
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
logger.error("resolve_sync_compat failed: %s", exc)
|
logger.error("resolve_sync_compat failed: %s", exc)
|
||||||
|
|
|
||||||
|
|
@ -83,7 +83,11 @@ class WSTransport:
|
||||||
return True
|
return True
|
||||||
|
|
||||||
try:
|
try:
|
||||||
fut = asyncio.run_coroutine_threadsafe(self._safe_send(line), self._loop)
|
from agent.async_utils import safe_schedule_threadsafe
|
||||||
|
fut = safe_schedule_threadsafe(self._safe_send(line), self._loop)
|
||||||
|
if fut is None:
|
||||||
|
self._closed = True
|
||||||
|
return False
|
||||||
fut.result(timeout=_WS_WRITE_TIMEOUT_S)
|
fut.result(timeout=_WS_WRITE_TIMEOUT_S)
|
||||||
return not self._closed
|
return not self._closed
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue