mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-15 09:21:36 +00:00
fix: dedupe concurrent gateway restarts + surface restart outcome in onboarding UI
Follow-ups to the salvaged Telegram QR onboarding auto-restart: - _spawn_gateway_restart() reuses a live in-flight 'hermes gateway restart' child instead of spawning a second racing one (stale cached frontend + new backend both requesting a restart, or restart-button double-click). Both /api/gateway/restart and the onboarding apply path go through it. - ChannelsPage polls /api/actions/gateway-restart/status after a server-initiated restart and surfaces a non-zero exit (e.g. systemd linger missing) via the manual-restart banner, since restart_started only means the child spawned. - Test for the reuse path + _ACTION_PROCS isolation in existing tests.
This commit is contained in:
parent
984e69ff62
commit
fa32af886f
3 changed files with 112 additions and 2 deletions
|
|
@ -1372,11 +1372,28 @@ def _tail_lines(path: Path, n: int) -> List[str]:
|
|||
return lines[-n:] if n > 0 else lines
|
||||
|
||||
|
||||
def _spawn_gateway_restart() -> Tuple[subprocess.Popen, bool]:
|
||||
"""Spawn ``hermes gateway restart``, reusing an in-flight restart.
|
||||
|
||||
Multiple dashboard paths can request a restart in quick succession
|
||||
(restart button double-click, or a stale cached frontend firing its own
|
||||
restart after the server already auto-restarted post-onboarding). Two
|
||||
concurrent ``hermes gateway restart`` children race each other on the
|
||||
manual kill-and-start path, so reuse the live one instead.
|
||||
|
||||
Returns ``(proc, reused)``.
|
||||
"""
|
||||
existing = _ACTION_PROCS.get("gateway-restart")
|
||||
if existing is not None and existing.poll() is None:
|
||||
return existing, True
|
||||
return _spawn_hermes_action(["gateway", "restart"], "gateway-restart"), False
|
||||
|
||||
|
||||
@app.post("/api/gateway/restart")
|
||||
async def restart_gateway():
|
||||
"""Kick off a ``hermes gateway restart`` in the background."""
|
||||
try:
|
||||
proc = _spawn_hermes_action(["gateway", "restart"], "gateway-restart")
|
||||
proc, _reused = _spawn_gateway_restart()
|
||||
except Exception as exc:
|
||||
_log.exception("Failed to spawn gateway restart")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to restart gateway: {exc}")
|
||||
|
|
@ -3757,13 +3774,18 @@ def _restart_gateway_after_telegram_onboarding() -> dict[str, Any]:
|
|||
restart failures so the UI can fall back to the existing manual banner.
|
||||
"""
|
||||
try:
|
||||
proc = _spawn_hermes_action(["gateway", "restart"], "gateway-restart")
|
||||
proc, reused = _spawn_gateway_restart()
|
||||
except Exception as exc:
|
||||
_log.exception("Failed to auto-restart gateway after Telegram onboarding")
|
||||
return {
|
||||
"restart_started": False,
|
||||
"restart_error": str(exc),
|
||||
}
|
||||
if reused:
|
||||
_log.info(
|
||||
"Telegram onboarding: reusing in-flight gateway restart (pid %s)",
|
||||
proc.pid,
|
||||
)
|
||||
return {
|
||||
"restart_started": True,
|
||||
"restart_action": "gateway-restart",
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue