mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-29 01:31:41 +00:00
feat(dashboard): add HTTP health probe for cross-container gateway detection
The dashboard's gateway status detection relied solely on local PID checks (os.kill + /proc), which fails when the gateway runs in a separate container. Changes: - web_server.py: Add _probe_gateway_health() that queries the gateway's HTTP /health/detailed endpoint when the local PID check fails. Activated by setting the GATEWAY_HEALTH_URL env var (e.g. http://gateway:8642/health). Falls back to standard PID check when the env var is not set. - api_server.py: Add GET /health/detailed endpoint that returns full gateway state (platforms, gateway_state, active_agents, pid, etc.) without auth. The existing GET /health remains unchanged for backwards compatibility. - StatusPage.tsx: Handle the case where gateway_pid is null but the gateway is running remotely, displaying 'Running (remote)' instead of 'PID null'. Environment variables: - GATEWAY_HEALTH_URL: URL of the gateway health endpoint (e.g. http://gateway-container:8642/health). Unset = local PID check only. - GATEWAY_HEALTH_TIMEOUT: Probe timeout in seconds (default: 3).
This commit is contained in:
parent
f0b353bade
commit
28c39fda3d
3 changed files with 75 additions and 1 deletions
|
|
@ -10,6 +10,7 @@ Exposes an HTTP server with endpoints:
|
|||
- POST /v1/runs — start a run, returns run_id immediately (202)
|
||||
- GET /v1/runs/{run_id}/events — SSE stream of structured lifecycle events
|
||||
- GET /health — health check
|
||||
- GET /health/detailed — rich status for cross-container dashboard probing
|
||||
|
||||
Any OpenAI-compatible frontend (Open WebUI, LobeChat, LibreChat,
|
||||
AnythingLLM, NextChat, ChatBox, etc.) can connect to hermes-agent
|
||||
|
|
@ -565,6 +566,27 @@ class APIServerAdapter(BasePlatformAdapter):
|
|||
"""GET /health — simple health check."""
|
||||
return web.json_response({"status": "ok", "platform": "hermes-agent"})
|
||||
|
||||
async def _handle_health_detailed(self, request: "web.Request") -> "web.Response":
|
||||
"""GET /health/detailed — rich status for cross-container dashboard probing.
|
||||
|
||||
Returns gateway state, connected platforms, PID, and uptime so the
|
||||
dashboard can display full status without needing a shared PID file or
|
||||
/proc access. No authentication required.
|
||||
"""
|
||||
from gateway.status import read_runtime_status
|
||||
|
||||
runtime = read_runtime_status() or {}
|
||||
return web.json_response({
|
||||
"status": "ok",
|
||||
"platform": "hermes-agent",
|
||||
"gateway_state": runtime.get("gateway_state"),
|
||||
"platforms": runtime.get("platforms", {}),
|
||||
"active_agents": runtime.get("active_agents", 0),
|
||||
"exit_reason": runtime.get("exit_reason"),
|
||||
"updated_at": runtime.get("updated_at"),
|
||||
"pid": os.getpid(),
|
||||
})
|
||||
|
||||
async def _handle_models(self, request: "web.Request") -> "web.Response":
|
||||
"""GET /v1/models — return hermes-agent as an available model."""
|
||||
auth_err = self._check_auth(request)
|
||||
|
|
@ -1783,6 +1805,7 @@ class APIServerAdapter(BasePlatformAdapter):
|
|||
self._app = web.Application(middlewares=mws)
|
||||
self._app["api_server_adapter"] = self
|
||||
self._app.router.add_get("/health", self._handle_health)
|
||||
self._app.router.add_get("/health/detailed", self._handle_health_detailed)
|
||||
self._app.router.add_get("/v1/health", self._handle_health)
|
||||
self._app.router.add_get("/v1/models", self._handle_models)
|
||||
self._app.router.add_post("/v1/chat/completions", self._handle_chat_completions)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue