From 2af0848f3c61449b73b9ad68c98cf0386695c1fa Mon Sep 17 00:00:00 2001 From: 0xbyt4 <35742124+0xbyt4@users.noreply.github.com> Date: Fri, 24 Apr 2026 01:54:20 +0300 Subject: [PATCH] fix(tui): ignore SIGPIPE so stderr back-pressure can't kill the gateway MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Crash-log stack trace (tui_gateway_crash.log) from the user's session pinned the regression: SIGPIPE arrived while main thread was blocked on for-raw-in-sys.stdin — i.e., a background thread (debug print to stderr, most likely from HERMES_VOICE_DEBUG=1) wrote to a pipe whose buffer the TUI hadn't drained yet, and SIG_DFL promptly killed the process. Two fixes that together restore CLI parity: - entry.py: SIGPIPE → SIG_IGN instead of the _log_signal handler that then exited. With SIG_IGN, Python raises BrokenPipeError on the offending write, which write_json already handles with a clean exit via _log_exit. SIGTERM / SIGHUP still route through _log_signal so real termination signals remain diagnosable. - hermes_cli/voice.py:_debug: wrap the stderr print in a BrokenPipeError / OSError try/except. This runs from daemon threads (silence callback, TTS playback, beep), so a broken stderr must not escape and ride up into the main event loop. Verified by spawning the gateway subprocess locally: voice.toggle status → 200 OK, process stays alive, clean exit on stdin close logs "reason=stdin EOF" instead of a silent reap. --- hermes_cli/voice.py | 11 ++++++++++- tui_gateway/entry.py | 11 ++++++++++- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/hermes_cli/voice.py b/hermes_cli/voice.py index 4deee8636..0a355ce4f 100644 --- a/hermes_cli/voice.py +++ b/hermes_cli/voice.py @@ -44,9 +44,18 @@ def _debug(msg: str) -> None: which createGatewayEventHandler shows as an Activity line — exactly what we need to diagnose "why didn't the loop auto-restart?" in the user's real terminal without shipping a separate debug RPC. + + Any OSError / BrokenPipeError is swallowed because this fires from + background threads (silence callback, TTS daemon, beep) where a + broken stderr pipe must not kill the whole gateway — the main + command pipe (stdin+stdout) is what actually matters. """ - if os.environ.get("HERMES_VOICE_DEBUG", "").strip() == "1": + if os.environ.get("HERMES_VOICE_DEBUG", "").strip() != "1": + return + try: print(f"[voice] {msg}", file=sys.stderr, flush=True) + except (BrokenPipeError, OSError): + pass def _beeps_enabled() -> bool: diff --git a/tui_gateway/entry.py b/tui_gateway/entry.py index 9974ccbeb..7eac6057e 100644 --- a/tui_gateway/entry.py +++ b/tui_gateway/entry.py @@ -44,7 +44,16 @@ def _log_signal(signum: int, frame) -> None: sys.exit(0) -signal.signal(signal.SIGPIPE, _log_signal) +# SIGPIPE: ignore, don't exit. The old SIG_DFL killed the process +# silently whenever a *background* thread (TTS playback chain, voice +# debug stderr emitter, beep thread) wrote to a pipe the TUI had gone +# quiet on — even though the main thread was perfectly fine waiting on +# stdin. Ignoring the signal lets Python raise BrokenPipeError on the +# offending write (write_json already handles that with a clean +# sys.exit(0) + _log_exit), which keeps the gateway alive as long as +# the main command pipe is still readable. Terminal signals still +# route through _log_signal so kills and hangups are diagnosable. +signal.signal(signal.SIGPIPE, signal.SIG_IGN) signal.signal(signal.SIGTERM, _log_signal) signal.signal(signal.SIGHUP, _log_signal) signal.signal(signal.SIGINT, signal.SIG_IGN)