mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-27 11:22:03 +00:00
fix(gateway): propagate fatal-config exit code through start_gateway clean-exit path
The contributor PR stamped runner._exit_code=78 on non-retryable startup errors, but start_gateway()'s clean-exit branch returned True before the SystemExit(runner.exit_code) site, so main() exited 0. The s6 finish script's [ "$1" = "78" ] check never matched and s6 crash-looped the gateway anyway — the fix was dead as shipped (#51228). Honor runner.exit_code in the clean-exit branch: raise SystemExit(code) when set, else return True (normal /restart clean exit). Add a start_gateway()-level test that asserts process-level SystemExit(78) propagation — the gap the PR's object-level test missed — plus exit_code on the existing _CleanExitRunner mocks.
This commit is contained in:
parent
776f68e1ee
commit
366c2a3766
2 changed files with 52 additions and 0 deletions
|
|
@ -17783,6 +17783,15 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
|
|||
if runner.should_exit_cleanly:
|
||||
if runner.exit_reason:
|
||||
logger.error("Gateway exiting cleanly: %s", runner.exit_reason)
|
||||
# A clean exit that carries an explicit exit code (e.g. a fatal
|
||||
# config error stamped with GATEWAY_FATAL_CONFIG_EXIT_CODE) must
|
||||
# propagate that code to the process so the s6 finish script can
|
||||
# translate it (78 → 125) and stop the supervisor restart loop.
|
||||
# Without this, the early `return True` below makes main() exit 0,
|
||||
# the finish script's `[ "$1" = "78" ]` check never matches, and
|
||||
# s6 crash-loops the gateway anyway (#51228).
|
||||
if runner.exit_code is not None:
|
||||
raise SystemExit(runner.exit_code)
|
||||
return True
|
||||
|
||||
# Start the background cron scheduler via the resolved provider so
|
||||
|
|
|
|||
|
|
@ -152,6 +152,7 @@ async def test_start_gateway_verbosity_imports_redacting_formatter(monkeypatch,
|
|||
self.config = config
|
||||
self.should_exit_cleanly = True
|
||||
self.exit_reason = None
|
||||
self.exit_code = None
|
||||
self.adapters = {}
|
||||
|
||||
async def start(self):
|
||||
|
|
@ -186,6 +187,7 @@ async def test_start_gateway_replace_force_uses_terminate_pid(monkeypatch, tmp_p
|
|||
self.config = config
|
||||
self.should_exit_cleanly = True
|
||||
self.exit_reason = None
|
||||
self.exit_code = None
|
||||
self.adapters = {}
|
||||
|
||||
async def start(self):
|
||||
|
|
@ -334,6 +336,7 @@ async def test_start_gateway_replace_writes_takeover_marker_before_sigterm(
|
|||
self.config = config
|
||||
self.should_exit_cleanly = True
|
||||
self.exit_reason = None
|
||||
self.exit_code = None
|
||||
self.adapters = {}
|
||||
|
||||
async def start(self):
|
||||
|
|
@ -507,6 +510,46 @@ async def test_runner_exits_with_ex_config_on_nonretryable_startup_error(monkeyp
|
|||
assert state["gateway_state"] == "startup_failed"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_start_gateway_propagates_fatal_config_exit_code(monkeypatch, tmp_path):
|
||||
"""A clean exit carrying GATEWAY_FATAL_CONFIG_EXIT_CODE must surface as a
|
||||
process-level SystemExit(78) — NOT a truthy return — so main() exits 78
|
||||
and the s6 finish script can translate it to 125 (no restart).
|
||||
|
||||
This guards the propagation gap: runner.start() stamps exit_code=78 and
|
||||
requests a clean exit, but start_gateway()'s clean-exit branch used to
|
||||
`return True` before the SystemExit(exit_code) site, so main() exited 0
|
||||
and s6 crash-looped anyway (#51228)."""
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
|
||||
class _FatalConfigRunner:
|
||||
def __init__(self, config):
|
||||
self.config = config
|
||||
self.should_exit_cleanly = True
|
||||
self.exit_reason = "discord: Discord bot token already in use"
|
||||
self.exit_code = GATEWAY_FATAL_CONFIG_EXIT_CODE
|
||||
self.adapters = {}
|
||||
|
||||
async def start(self):
|
||||
return True
|
||||
|
||||
async def stop(self):
|
||||
return None
|
||||
|
||||
monkeypatch.setattr("gateway.status.get_running_pid", lambda: None)
|
||||
monkeypatch.setattr("tools.skills_sync.sync_skills", lambda quiet=True: None)
|
||||
monkeypatch.setattr("hermes_logging.setup_logging", lambda hermes_home, mode: tmp_path)
|
||||
monkeypatch.setattr("hermes_logging._add_rotating_handler", lambda *args, **kwargs: None)
|
||||
monkeypatch.setattr("gateway.run.GatewayRunner", _FatalConfigRunner)
|
||||
|
||||
from gateway.run import start_gateway
|
||||
|
||||
with pytest.raises(SystemExit) as exc_info:
|
||||
await start_gateway(config=GatewayConfig(), replace=False, verbosity=0)
|
||||
|
||||
assert exc_info.value.code == GATEWAY_FATAL_CONFIG_EXIT_CODE
|
||||
|
||||
|
||||
def test_runner_warns_when_docker_gateway_lacks_explicit_output_mount(monkeypatch, tmp_path, caplog):
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
monkeypatch.setenv("TERMINAL_ENV", "docker")
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue