mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
Crash-log stack trace (tui_gateway_crash.log) from the user's session pinned the regression: SIGPIPE arrived while main thread was blocked on for-raw-in-sys.stdin — i.e., a background thread (debug print to stderr, most likely from HERMES_VOICE_DEBUG=1) wrote to a pipe whose buffer the TUI hadn't drained yet, and SIG_DFL promptly killed the process. Two fixes that together restore CLI parity: - entry.py: SIGPIPE → SIG_IGN instead of the _log_signal handler that then exited. With SIG_IGN, Python raises BrokenPipeError on the offending write, which write_json already handles with a clean exit via _log_exit. SIGTERM / SIGHUP still route through _log_signal so real termination signals remain diagnosable. - hermes_cli/voice.py:_debug: wrap the stderr print in a BrokenPipeError / OSError try/except. This runs from daemon threads (silence callback, TTS playback, beep), so a broken stderr must not escape and ride up into the main event loop. Verified by spawning the gateway subprocess locally: voice.toggle status → 200 OK, process stays alive, clean exit on stdin close logs "reason=stdin EOF" instead of a silent reap.
117 lines
4.5 KiB
Python
117 lines
4.5 KiB
Python
import json
|
|
import os
|
|
import signal
|
|
import sys
|
|
import time
|
|
import traceback
|
|
|
|
from tui_gateway.server import _CRASH_LOG, dispatch, resolve_skin, write_json
|
|
|
|
|
|
def _log_signal(signum: int, frame) -> None:
|
|
"""Capture WHICH thread and WHERE a termination signal hit us.
|
|
|
|
SIG_DFL for SIGPIPE kills the process silently the instant any
|
|
background thread (TTS playback, beep, voice status emitter, etc.)
|
|
writes to a stdout the TUI has stopped reading. Without this
|
|
handler the gateway-exited banner in the TUI has no trace — the
|
|
crash log never sees a Python exception because the kernel reaps
|
|
the process before the interpreter runs anything.
|
|
"""
|
|
name = {
|
|
signal.SIGPIPE: "SIGPIPE",
|
|
signal.SIGTERM: "SIGTERM",
|
|
signal.SIGHUP: "SIGHUP",
|
|
}.get(signum, f"signal {signum}")
|
|
try:
|
|
os.makedirs(os.path.dirname(_CRASH_LOG), exist_ok=True)
|
|
with open(_CRASH_LOG, "a", encoding="utf-8") as f:
|
|
f.write(
|
|
f"\n=== {name} received · {time.strftime('%Y-%m-%d %H:%M:%S')} ===\n"
|
|
)
|
|
if frame is not None:
|
|
f.write("main-thread stack at signal delivery:\n")
|
|
traceback.print_stack(frame, file=f)
|
|
# All live threads — signal may have been triggered by a
|
|
# background thread (write to broken stdout from TTS, etc.).
|
|
import threading as _threading
|
|
for tid, th in _threading._active.items():
|
|
f.write(f"\n--- thread {th.name} (id={tid}) ---\n")
|
|
f.write("".join(traceback.format_stack(sys._current_frames().get(tid))))
|
|
except Exception:
|
|
pass
|
|
print(f"[gateway-signal] {name}", file=sys.stderr, flush=True)
|
|
sys.exit(0)
|
|
|
|
|
|
# SIGPIPE: ignore, don't exit. The old SIG_DFL killed the process
|
|
# silently whenever a *background* thread (TTS playback chain, voice
|
|
# debug stderr emitter, beep thread) wrote to a pipe the TUI had gone
|
|
# quiet on — even though the main thread was perfectly fine waiting on
|
|
# stdin. Ignoring the signal lets Python raise BrokenPipeError on the
|
|
# offending write (write_json already handles that with a clean
|
|
# sys.exit(0) + _log_exit), which keeps the gateway alive as long as
|
|
# the main command pipe is still readable. Terminal signals still
|
|
# route through _log_signal so kills and hangups are diagnosable.
|
|
signal.signal(signal.SIGPIPE, signal.SIG_IGN)
|
|
signal.signal(signal.SIGTERM, _log_signal)
|
|
signal.signal(signal.SIGHUP, _log_signal)
|
|
signal.signal(signal.SIGINT, signal.SIG_IGN)
|
|
|
|
|
|
def _log_exit(reason: str) -> None:
|
|
"""Record why the gateway subprocess is shutting down.
|
|
|
|
Three exit paths (startup write fail, parse-error-response write fail,
|
|
dispatch-response write fail, stdin EOF) all collapse into a silent
|
|
sys.exit(0) here. Without this trail the TUI shows "gateway exited"
|
|
with no actionable clue about WHICH broken pipe or WHICH message
|
|
triggered it — the main reason voice-mode turns look like phantom
|
|
crashes when the real story is "TUI read pipe closed on this event".
|
|
"""
|
|
try:
|
|
os.makedirs(os.path.dirname(_CRASH_LOG), exist_ok=True)
|
|
with open(_CRASH_LOG, "a", encoding="utf-8") as f:
|
|
f.write(
|
|
f"\n=== gateway exit · {time.strftime('%Y-%m-%d %H:%M:%S')} "
|
|
f"· reason={reason} ===\n"
|
|
)
|
|
except Exception:
|
|
pass
|
|
print(f"[gateway-exit] {reason}", file=sys.stderr, flush=True)
|
|
|
|
|
|
def main():
|
|
if not write_json({
|
|
"jsonrpc": "2.0",
|
|
"method": "event",
|
|
"params": {"type": "gateway.ready", "payload": {"skin": resolve_skin()}},
|
|
}):
|
|
_log_exit("startup write failed (broken stdout pipe before first event)")
|
|
sys.exit(0)
|
|
|
|
for raw in sys.stdin:
|
|
line = raw.strip()
|
|
if not line:
|
|
continue
|
|
|
|
try:
|
|
req = json.loads(line)
|
|
except json.JSONDecodeError:
|
|
if not write_json({"jsonrpc": "2.0", "error": {"code": -32700, "message": "parse error"}, "id": None}):
|
|
_log_exit("parse-error-response write failed (broken stdout pipe)")
|
|
sys.exit(0)
|
|
continue
|
|
|
|
method = req.get("method") if isinstance(req, dict) else None
|
|
resp = dispatch(req)
|
|
if resp is not None:
|
|
if not write_json(resp):
|
|
_log_exit(f"response write failed for method={method!r} (broken stdout pipe)")
|
|
sys.exit(0)
|
|
|
|
_log_exit("stdin EOF (TUI closed the command pipe)")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|