mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-30 11:52:04 +00:00
When the Desktop forcibly closes its WebSocket mid-write, asyncio logs a full traceback for every pending connection-lost callback — 50+ identical WinError 10054 (ConnectionResetError) lines per disconnect on Windows, the equivalent ConnectionResetError/BrokenPipeError on POSIX. These are not actionable: they are the expected side effect of the peer hanging up before our writes drained. Install a loop exception handler on the gateway serving loop that collapses exactly this teardown class (ConnectionResetError/ConnectionAbortedError/ BrokenPipeError originating from _call_connection_lost) to a single debug line, forwarding every other loop error to the existing/default handler unchanged so genuine loop bugs still surface. Idempotent per loop.
This commit is contained in:
parent
6eec0d4f08
commit
fde1c8570f
3 changed files with 210 additions and 0 deletions
|
|
@ -13686,6 +13686,19 @@ def start_server(
|
|||
print(f" Hermes Web UI → http://{host}:{actual_port}")
|
||||
_maybe_open_browser(host, actual_port, open_browser, initial_profile)
|
||||
|
||||
# Collapse the peer-hangup teardown flood (#50005). When the Desktop
|
||||
# forcibly closes its WebSocket mid-write, asyncio logs a full
|
||||
# traceback per pending connection-lost callback — 50+ identical
|
||||
# WinError 10054 (ConnectionResetError) lines per disconnect on
|
||||
# Windows. This filter downgrades exactly that class to one debug
|
||||
# line and passes every other loop error through unchanged.
|
||||
try:
|
||||
from tui_gateway.loop_noise import install_loop_noise_filter
|
||||
|
||||
install_loop_noise_filter(asyncio.get_running_loop())
|
||||
except Exception as exc: # pragma: no cover - best-effort
|
||||
_log.debug("loop noise filter install skipped: %s", exc)
|
||||
|
||||
await server.main_loop()
|
||||
if server.started:
|
||||
await server.shutdown()
|
||||
|
|
|
|||
114
tests/test_tui_gateway_loop_noise.py
Normal file
114
tests/test_tui_gateway_loop_noise.py
Normal file
|
|
@ -0,0 +1,114 @@
|
|||
"""Tests for tui_gateway.loop_noise — the WS peer-hangup teardown filter (#50005)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
|
||||
import pytest
|
||||
|
||||
from tui_gateway.loop_noise import (
|
||||
_is_benign_teardown,
|
||||
install_loop_noise_filter,
|
||||
)
|
||||
|
||||
|
||||
class _FakeConnectionLostCallback:
|
||||
"""Stand-in whose repr matches asyncio's ``_call_connection_lost`` flood."""
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return "<Handle _ProactorBasePipeTransport._call_connection_lost(None)>"
|
||||
|
||||
|
||||
def test_benign_teardown_matches_reset_in_connection_lost():
|
||||
ctx = {
|
||||
"exception": ConnectionResetError(10054, "forcibly closed"),
|
||||
"handle": _FakeConnectionLostCallback(),
|
||||
}
|
||||
assert _is_benign_teardown(ctx) is True
|
||||
|
||||
|
||||
def test_benign_teardown_matches_aborted_and_broken_pipe():
|
||||
for exc in (
|
||||
ConnectionAbortedError(10053, "aborted"),
|
||||
BrokenPipeError("epipe"),
|
||||
):
|
||||
ctx = {"exception": exc, "callback": _FakeConnectionLostCallback()}
|
||||
assert _is_benign_teardown(ctx) is True
|
||||
|
||||
|
||||
def test_reset_outside_connection_lost_is_not_suppressed():
|
||||
# Same error type, but NOT from the connection-lost teardown path — must
|
||||
# fall through to the default handler.
|
||||
ctx = {
|
||||
"exception": ConnectionResetError("reset in a real handler"),
|
||||
"handle": "<Handle some_other_handler()>",
|
||||
}
|
||||
assert _is_benign_teardown(ctx) is False
|
||||
|
||||
|
||||
def test_unrelated_exception_is_not_suppressed():
|
||||
ctx = {
|
||||
"exception": ValueError("boom"),
|
||||
"handle": _FakeConnectionLostCallback(),
|
||||
}
|
||||
assert _is_benign_teardown(ctx) is False
|
||||
|
||||
|
||||
def test_no_exception_is_not_suppressed():
|
||||
assert _is_benign_teardown({"message": "loop warning, no exc"}) is False
|
||||
|
||||
|
||||
def test_install_suppresses_flood_and_forwards_real_errors():
|
||||
loop = asyncio.new_event_loop()
|
||||
try:
|
||||
forwarded: list[dict] = []
|
||||
loop.set_exception_handler(lambda _loop, ctx: forwarded.append(ctx))
|
||||
|
||||
install_loop_noise_filter(loop)
|
||||
|
||||
# Benign teardown flood → swallowed, not forwarded.
|
||||
loop.call_exception_handler(
|
||||
{
|
||||
"exception": ConnectionResetError(10054, "forcibly closed"),
|
||||
"handle": _FakeConnectionLostCallback(),
|
||||
}
|
||||
)
|
||||
assert forwarded == []
|
||||
|
||||
# Genuine loop error → forwarded to the previous handler unchanged.
|
||||
real_ctx = {"exception": RuntimeError("genuine loop bug")}
|
||||
loop.call_exception_handler(real_ctx)
|
||||
assert len(forwarded) == 1
|
||||
assert forwarded[0] is real_ctx
|
||||
finally:
|
||||
loop.close()
|
||||
|
||||
|
||||
def test_install_is_idempotent():
|
||||
loop = asyncio.new_event_loop()
|
||||
try:
|
||||
install_loop_noise_filter(loop)
|
||||
first = loop.get_exception_handler()
|
||||
install_loop_noise_filter(loop)
|
||||
# Second install must NOT wrap again — same handler object.
|
||||
assert loop.get_exception_handler() is first
|
||||
finally:
|
||||
loop.close()
|
||||
|
||||
|
||||
def test_install_falls_back_to_default_handler_when_none_set():
|
||||
loop = asyncio.new_event_loop()
|
||||
try:
|
||||
# No previous handler installed; benign flood still swallowed, and a
|
||||
# real error must not raise out of the filter.
|
||||
install_loop_noise_filter(loop)
|
||||
loop.call_exception_handler(
|
||||
{
|
||||
"exception": ConnectionResetError(10054, "reset"),
|
||||
"handle": _FakeConnectionLostCallback(),
|
||||
}
|
||||
)
|
||||
# A genuine error routes to default_exception_handler — should not raise.
|
||||
loop.call_exception_handler({"message": "some loop warning"})
|
||||
finally:
|
||||
loop.close()
|
||||
83
tui_gateway/loop_noise.py
Normal file
83
tui_gateway/loop_noise.py
Normal file
|
|
@ -0,0 +1,83 @@
|
|||
"""Suppress benign event-loop teardown noise on the gateway serving loop.
|
||||
|
||||
When the Desktop client forcibly closes its WebSocket while the gateway still
|
||||
has pending socket operations, asyncio's transport teardown logs a full
|
||||
traceback for every pending ``_call_connection_lost`` callback. On Windows this
|
||||
surfaces as ``ConnectionResetError: [WinError 10054]`` (and the rarer
|
||||
``ConnectionAbortedError: [WinError 10053]``); on POSIX it is the equivalent
|
||||
``ConnectionResetError``/``BrokenPipeError``. A single client disconnect can
|
||||
emit 50+ identical tracebacks into ``errors.log`` (#50005).
|
||||
|
||||
These are not actionable — they are the expected side effect of the peer
|
||||
hanging up before our writes drained. We install a loop exception handler that
|
||||
collapses exactly this class of teardown error to one debug line and forwards
|
||||
everything else to asyncio's default handler unchanged, so genuine loop bugs
|
||||
still surface.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
from typing import Any
|
||||
|
||||
_log = logging.getLogger(__name__)
|
||||
|
||||
# Connection-teardown errors that mean "the peer hung up mid-write". WinError
|
||||
# 10054 (connection reset) and 10053 (connection aborted) raise as these.
|
||||
_BENIGN_TEARDOWN_ERRORS = (
|
||||
ConnectionResetError,
|
||||
ConnectionAbortedError,
|
||||
BrokenPipeError,
|
||||
)
|
||||
|
||||
|
||||
def _is_benign_teardown(context: dict[str, Any]) -> bool:
|
||||
"""True when the loop error is a peer-hangup during transport teardown.
|
||||
|
||||
Gated on BOTH the exception type AND the ``_call_connection_lost``
|
||||
callback so we only swallow the disconnect flood — any other place these
|
||||
errors surface (a real handler, a custom callback) still goes to the
|
||||
default handler.
|
||||
"""
|
||||
exc = context.get("exception")
|
||||
if not isinstance(exc, _BENIGN_TEARDOWN_ERRORS):
|
||||
return False
|
||||
# The flood originates from the transport's connection-lost callback. Match
|
||||
# on its repr so we don't suppress the same error type raised elsewhere.
|
||||
callback = context.get("callback")
|
||||
handle = context.get("handle")
|
||||
marker = "_call_connection_lost"
|
||||
return marker in repr(callback) or marker in repr(handle)
|
||||
|
||||
|
||||
def install_loop_noise_filter(loop: asyncio.AbstractEventLoop) -> None:
|
||||
"""Chain a teardown-noise filter ahead of the loop's existing handler.
|
||||
|
||||
Idempotent: re-installing on a loop that already has the filter is a no-op,
|
||||
so it's safe to call on every reconnect/serve entry.
|
||||
"""
|
||||
if getattr(loop, "_hermes_noise_filter_installed", False):
|
||||
return
|
||||
|
||||
previous = loop.get_exception_handler()
|
||||
|
||||
def _handler(loop: asyncio.AbstractEventLoop, context: dict[str, Any]) -> None:
|
||||
if _is_benign_teardown(context):
|
||||
_log.debug(
|
||||
"ws peer hangup during teardown (suppressed): %s",
|
||||
context.get("exception"),
|
||||
)
|
||||
return
|
||||
if previous is not None:
|
||||
previous(loop, context)
|
||||
else:
|
||||
loop.default_exception_handler(context)
|
||||
|
||||
loop.set_exception_handler(_handler)
|
||||
# Mark on the loop instance so a second install (reconnect, re-serve) is a
|
||||
# no-op rather than stacking handlers.
|
||||
try:
|
||||
loop._hermes_noise_filter_installed = True # type: ignore[attr-defined]
|
||||
except (AttributeError, TypeError): # pragma: no cover - exotic loop impls
|
||||
pass
|
||||
Loading…
Add table
Add a link
Reference in a new issue