From 7baf370d3dde0f66938962a2516e728e3cdabc6f Mon Sep 17 00:00:00 2001 From: 0xbyt4 <35742124+0xbyt4@users.noreply.github.com> Date: Fri, 24 Apr 2026 01:49:59 +0300 Subject: [PATCH] chore(tui): capture signal-triggered gateway exits in crash log MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit SIG_DFL for SIGPIPE means the kernel reaps the gateway subprocess the instant a background thread (TTS playback, silence callback, voice status emitter) writes to a stdout the TUI stopped reading — before the Python interpreter can run excepthook, threading.excepthook, atexit, or the entry.py post-loop _log_exit. Replace the three SIG_DFL / SIG_IGN bindings with a _log_signal handler that: - records which signal (SIGPIPE / SIGTERM / SIGHUP) fired and when; - dumps the main-thread stack at signal delivery AND every live thread's stack via sys._current_frames — the background-thread write that provoked SIGPIPE is almost always visible here; - writes everything to ~/.hermes/logs/tui_gateway_crash.log and prints a [gateway-signal] breadcrumb to stderr so the TUI Activity surfaces it as well. SIGINT stays ignored (TUI handles Ctrl+C for the user). --- tui_gateway/entry.py | 42 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/tui_gateway/entry.py b/tui_gateway/entry.py index 42f636d310..9974ccbebf 100644 --- a/tui_gateway/entry.py +++ b/tui_gateway/entry.py @@ -3,10 +3,50 @@ import os import signal import sys import time +import traceback from tui_gateway.server import _CRASH_LOG, dispatch, resolve_skin, write_json -signal.signal(signal.SIGPIPE, signal.SIG_DFL) + +def _log_signal(signum: int, frame) -> None: + """Capture WHICH thread and WHERE a termination signal hit us. + + SIG_DFL for SIGPIPE kills the process silently the instant any + background thread (TTS playback, beep, voice status emitter, etc.) + writes to a stdout the TUI has stopped reading. Without this + handler the gateway-exited banner in the TUI has no trace — the + crash log never sees a Python exception because the kernel reaps + the process before the interpreter runs anything. + """ + name = { + signal.SIGPIPE: "SIGPIPE", + signal.SIGTERM: "SIGTERM", + signal.SIGHUP: "SIGHUP", + }.get(signum, f"signal {signum}") + try: + os.makedirs(os.path.dirname(_CRASH_LOG), exist_ok=True) + with open(_CRASH_LOG, "a", encoding="utf-8") as f: + f.write( + f"\n=== {name} received · {time.strftime('%Y-%m-%d %H:%M:%S')} ===\n" + ) + if frame is not None: + f.write("main-thread stack at signal delivery:\n") + traceback.print_stack(frame, file=f) + # All live threads — signal may have been triggered by a + # background thread (write to broken stdout from TTS, etc.). + import threading as _threading + for tid, th in _threading._active.items(): + f.write(f"\n--- thread {th.name} (id={tid}) ---\n") + f.write("".join(traceback.format_stack(sys._current_frames().get(tid)))) + except Exception: + pass + print(f"[gateway-signal] {name}", file=sys.stderr, flush=True) + sys.exit(0) + + +signal.signal(signal.SIGPIPE, _log_signal) +signal.signal(signal.SIGTERM, _log_signal) +signal.signal(signal.SIGHUP, _log_signal) signal.signal(signal.SIGINT, signal.SIG_IGN)