mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-09 08:21:50 +00:00
Salvaged from #35626 (banditburai) and re-scoped after maintainers landed the parent-death watchdog (slash_worker.py) and PTY process-group teardown (pty_bridge.py) directly on main. Those pieces are intentionally NOT included here — this carries only what is still missing: - C1 disconnect reap: ws.py's `finally` only re-pointed the dead transport at stdio. `_close_sessions_for_transport` now reaps `close_on_disconnect` sessions and schedules the grace-reap for the rest, offloaded via `asyncio.to_thread` so the blocking worker.close() + DB write never stalls the uvicorn loop. - C2 create/close orphan race: `_attach_worker` stores the worker iff `_sessions.get(sid) is session` under the lock (else closes it), applied at every spawn site incl. the post-turn `_restart_slash_worker`. - Single idempotent teardown funnel: session.close, WS disconnect, the generous-TTL idle reaper, shutdown, and the WS grace-reap all reach `_close_session_by_id` → `_teardown_session`; `_finalized`/`_closed` flags make concurrent/double teardown a no-op. `_sessions_lock` upgraded to RLock. - uvicorn `ws_ping_interval/timeout=20s` so a half-open socket (reverse-proxy 524) becomes a `WebSocketDisconnect` and the C1 path runs. Plus two review-driven hardening fixes (mine): - `session.active_list` now skips `_finalized` sessions so the footer "N sessions" count reflects attachable sessions instead of only ever growing until restart (#38950). Keys on `_finalized` only, NOT the stdio sentinel, so a standalone `hermes --tui` session stays visible. - `_schedule_ws_orphan_reap._reap` pops via `_close_session_by_id` (under `_sessions_lock`) instead of `_sessions.pop` under the unrelated `_session_resume_lock` (#39591); the resume_lock now only guards the orphan re-check against `session.resume`. - Float env knobs (`HERMES_SLASH_WATCHDOG_*`, `HERMES_TUI_SESSION_TTL_S`) parse with a fallback helper so a malformed value can't crash the worker at import. Fixes #32377 Fixes #38950 Addresses #22855 Co-authored-by: banditburai <123342691+banditburai@users.noreply.github.com> Co-authored-by: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com>
73 lines
2.1 KiB
Python
73 lines
2.1 KiB
Python
import asyncio
|
|
|
|
from tui_gateway import server
|
|
from tui_gateway import ws as ws_mod
|
|
|
|
|
|
def _run_disconnect(monkeypatch, seed):
|
|
"""Drive handle_ws to its disconnect `finally`, seeding sessions against the
|
|
live WSTransport the moment it exists. Returns nothing; inspect _sessions."""
|
|
monkeypatch.setattr(server, "_finalize_session", lambda s, end_reason="tui_close": None)
|
|
|
|
created = []
|
|
real_transport = ws_mod.WSTransport
|
|
monkeypatch.setattr(
|
|
ws_mod, "WSTransport",
|
|
lambda ws, loop, **kw: created.append(real_transport(ws, loop, **kw)) or created[-1],
|
|
)
|
|
|
|
class FakeWS:
|
|
async def accept(self):
|
|
pass
|
|
|
|
async def send_text(self, line):
|
|
pass
|
|
|
|
async def receive_text(self):
|
|
seed(created[0]) # transport now exists; attach it to sessions
|
|
raise ws_mod._WebSocketDisconnect()
|
|
|
|
async def close(self):
|
|
pass
|
|
|
|
asyncio.run(ws_mod.handle_ws(FakeWS()))
|
|
|
|
|
|
def test_ws_disconnect_reaps_flagged_session_and_closes_worker(monkeypatch):
|
|
closed = []
|
|
|
|
class FakeWorker:
|
|
def close(self):
|
|
closed.append(True)
|
|
|
|
server._sessions.clear()
|
|
try:
|
|
_run_disconnect(
|
|
monkeypatch,
|
|
lambda t: server._sessions.update(
|
|
flagged={
|
|
"transport": t,
|
|
"close_on_disconnect": True,
|
|
"slash_worker": FakeWorker(),
|
|
"session_key": "k",
|
|
}
|
|
),
|
|
)
|
|
assert "flagged" not in server._sessions
|
|
assert closed == [True]
|
|
finally:
|
|
server._sessions.clear()
|
|
|
|
|
|
def test_ws_disconnect_preserves_and_repoints_reconnectable_session(monkeypatch):
|
|
server._sessions.clear()
|
|
try:
|
|
_run_disconnect(
|
|
monkeypatch,
|
|
lambda t: server._sessions.update(
|
|
plain={"transport": t, "close_on_disconnect": False, "session_key": "k"}
|
|
),
|
|
)
|
|
assert server._sessions["plain"]["transport"] is server._stdio_transport
|
|
finally:
|
|
server._sessions.clear()
|