From 56b99e823950cebeef0da6f23bbe9db6e02f3655 Mon Sep 17 00:00:00 2001 From: opriz Date: Sat, 18 Apr 2026 13:55:03 +0800 Subject: [PATCH] fix(gateway): force-unlink stale PID file after --replace takeover If the old process crashed without firing its atexit handler, remove_pid_file() is a no-op. Force-unlink the stale gateway.pid so write_pid_file() (O_CREAT|O_EXCL) does not hit FileExistsError. --- gateway/run.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/gateway/run.py b/gateway/run.py index d3ee8d4a018..4bb85ea7d69 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -10807,6 +10807,12 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool = except (ProcessLookupError, PermissionError, OSError): pass remove_pid_file() + # remove_pid_file() is a no-op when the PID doesn't match. + # Force-unlink to cover the old-process-crashed case. + try: + (get_hermes_home() / "gateway.pid").unlink(missing_ok=True) + except Exception: + pass # Clean up any takeover marker the old process didn't consume # (e.g. SIGKILL'd before its shutdown handler could read it). try: