mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
feat(gateway): notify /restart requester when gateway comes back online
When a user sends /restart, the gateway now persists their routing info (platform, chat_id, thread_id) to .restart_notify.json. After the new gateway process starts and adapters connect, it reads the file, sends a 'Gateway restarted successfully' message to that specific chat, and cleans up the file. This follows the same pattern as _send_update_notification (used by /update). Thread IDs are preserved so the notification lands in the correct Telegram topic or Discord thread. Previously, after /restart the user had no feedback that the gateway was back — they had to send a message to find out. Now they get a proactive notification and know their session continues.
This commit is contained in:
parent
b22663ea69
commit
8a64f3e368
2 changed files with 234 additions and 0 deletions
|
|
@ -1717,6 +1717,9 @@ class GatewayRunner:
|
|||
):
|
||||
self._schedule_update_notification_watch()
|
||||
|
||||
# Notify the chat that initiated /restart that the gateway is back.
|
||||
await self._send_restart_notification()
|
||||
|
||||
# Drain any recovered process watchers (from crash recovery checkpoint)
|
||||
try:
|
||||
from tools.process_registry import process_registry
|
||||
|
|
@ -4147,6 +4150,22 @@ class GatewayRunner:
|
|||
return f"⏳ Draining {count} active agent(s) before restart..."
|
||||
return "⏳ Gateway restart already in progress..."
|
||||
|
||||
# Save the requester's routing info so the new gateway process can
|
||||
# notify them once it comes back online.
|
||||
try:
|
||||
import json as _json
|
||||
notify_data = {
|
||||
"platform": event.source.platform.value if event.source.platform else None,
|
||||
"chat_id": event.source.chat_id,
|
||||
}
|
||||
if event.source.thread_id:
|
||||
notify_data["thread_id"] = event.source.thread_id
|
||||
(_hermes_home / ".restart_notify.json").write_text(
|
||||
_json.dumps(notify_data)
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug("Failed to write restart notify file: %s", e)
|
||||
|
||||
active_agents = self._running_agent_count()
|
||||
self.request_restart(detached=True, via_service=False)
|
||||
if active_agents:
|
||||
|
|
@ -6880,6 +6899,48 @@ class GatewayRunner:
|
|||
|
||||
return True
|
||||
|
||||
async def _send_restart_notification(self) -> None:
|
||||
"""Notify the chat that initiated /restart that the gateway is back."""
|
||||
import json as _json
|
||||
|
||||
notify_path = _hermes_home / ".restart_notify.json"
|
||||
if not notify_path.exists():
|
||||
return
|
||||
|
||||
try:
|
||||
data = _json.loads(notify_path.read_text())
|
||||
platform_str = data.get("platform")
|
||||
chat_id = data.get("chat_id")
|
||||
thread_id = data.get("thread_id")
|
||||
|
||||
if not platform_str or not chat_id:
|
||||
return
|
||||
|
||||
platform = Platform(platform_str)
|
||||
adapter = self.adapters.get(platform)
|
||||
if not adapter:
|
||||
logger.debug(
|
||||
"Restart notification skipped: %s adapter not connected",
|
||||
platform_str,
|
||||
)
|
||||
return
|
||||
|
||||
metadata = {"thread_id": thread_id} if thread_id else None
|
||||
await adapter.send(
|
||||
chat_id,
|
||||
"♻ Gateway restarted successfully. Your session continues.",
|
||||
metadata=metadata,
|
||||
)
|
||||
logger.info(
|
||||
"Sent restart notification to %s:%s",
|
||||
platform_str,
|
||||
chat_id,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning("Restart notification failed: %s", e)
|
||||
finally:
|
||||
notify_path.unlink(missing_ok=True)
|
||||
|
||||
def _set_session_env(self, context: SessionContext) -> list:
|
||||
"""Set session context variables for the current async task.
|
||||
|
||||
|
|
|
|||
173
tests/gateway/test_restart_notification.py
Normal file
173
tests/gateway/test_restart_notification.py
Normal file
|
|
@ -0,0 +1,173 @@
|
|||
"""Tests for /restart notification — the gateway notifies the requester on comeback."""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
from pathlib import Path
|
||||
from unittest.mock import AsyncMock, MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
import gateway.run as gateway_run
|
||||
from gateway.config import Platform
|
||||
from gateway.platforms.base import MessageEvent, MessageType
|
||||
from gateway.session import build_session_key
|
||||
from tests.gateway.restart_test_helpers import (
|
||||
make_restart_runner,
|
||||
make_restart_source,
|
||||
)
|
||||
|
||||
|
||||
# ── _handle_restart_command writes .restart_notify.json ──────────────────
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_restart_command_writes_notify_file(tmp_path, monkeypatch):
|
||||
"""When /restart fires, the requester's routing info is persisted to disk."""
|
||||
monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
|
||||
|
||||
runner, _adapter = make_restart_runner()
|
||||
runner.request_restart = MagicMock(return_value=True)
|
||||
|
||||
source = make_restart_source(chat_id="42")
|
||||
event = MessageEvent(
|
||||
text="/restart",
|
||||
message_type=MessageType.TEXT,
|
||||
source=source,
|
||||
message_id="m1",
|
||||
)
|
||||
|
||||
result = await runner._handle_restart_command(event)
|
||||
assert "Restarting" in result
|
||||
|
||||
notify_path = tmp_path / ".restart_notify.json"
|
||||
assert notify_path.exists()
|
||||
data = json.loads(notify_path.read_text())
|
||||
assert data["platform"] == "telegram"
|
||||
assert data["chat_id"] == "42"
|
||||
assert "thread_id" not in data # no thread → omitted
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_restart_command_preserves_thread_id(tmp_path, monkeypatch):
|
||||
"""Thread ID is saved when the requester is in a threaded chat."""
|
||||
monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
|
||||
|
||||
runner, _adapter = make_restart_runner()
|
||||
runner.request_restart = MagicMock(return_value=True)
|
||||
|
||||
source = make_restart_source(chat_id="99")
|
||||
source.thread_id = "topic_7"
|
||||
|
||||
event = MessageEvent(
|
||||
text="/restart",
|
||||
message_type=MessageType.TEXT,
|
||||
source=source,
|
||||
message_id="m2",
|
||||
)
|
||||
|
||||
await runner._handle_restart_command(event)
|
||||
|
||||
data = json.loads((tmp_path / ".restart_notify.json").read_text())
|
||||
assert data["thread_id"] == "topic_7"
|
||||
|
||||
|
||||
# ── _send_restart_notification ───────────────────────────────────────────
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_send_restart_notification_delivers_and_cleans_up(tmp_path, monkeypatch):
|
||||
"""On startup, the notification is sent and the file is removed."""
|
||||
monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
|
||||
|
||||
notify_path = tmp_path / ".restart_notify.json"
|
||||
notify_path.write_text(json.dumps({
|
||||
"platform": "telegram",
|
||||
"chat_id": "42",
|
||||
}))
|
||||
|
||||
runner, adapter = make_restart_runner()
|
||||
adapter.send = AsyncMock()
|
||||
|
||||
await runner._send_restart_notification()
|
||||
|
||||
adapter.send.assert_called_once()
|
||||
call_args = adapter.send.call_args
|
||||
assert call_args[0][0] == "42" # chat_id
|
||||
assert "restarted" in call_args[0][1].lower()
|
||||
assert call_args[1].get("metadata") is None # no thread
|
||||
assert not notify_path.exists()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_send_restart_notification_with_thread(tmp_path, monkeypatch):
|
||||
"""Thread ID is passed as metadata so the message lands in the right topic."""
|
||||
monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
|
||||
|
||||
notify_path = tmp_path / ".restart_notify.json"
|
||||
notify_path.write_text(json.dumps({
|
||||
"platform": "telegram",
|
||||
"chat_id": "99",
|
||||
"thread_id": "topic_7",
|
||||
}))
|
||||
|
||||
runner, adapter = make_restart_runner()
|
||||
adapter.send = AsyncMock()
|
||||
|
||||
await runner._send_restart_notification()
|
||||
|
||||
call_args = adapter.send.call_args
|
||||
assert call_args[1]["metadata"] == {"thread_id": "topic_7"}
|
||||
assert not notify_path.exists()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_send_restart_notification_noop_when_no_file(tmp_path, monkeypatch):
|
||||
"""Nothing happens if there's no pending restart notification."""
|
||||
monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
|
||||
|
||||
runner, adapter = make_restart_runner()
|
||||
adapter.send = AsyncMock()
|
||||
|
||||
await runner._send_restart_notification()
|
||||
|
||||
adapter.send.assert_not_called()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_send_restart_notification_skips_when_adapter_missing(tmp_path, monkeypatch):
|
||||
"""If the requester's platform isn't connected, clean up without crashing."""
|
||||
monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
|
||||
|
||||
notify_path = tmp_path / ".restart_notify.json"
|
||||
notify_path.write_text(json.dumps({
|
||||
"platform": "discord", # runner only has telegram adapter
|
||||
"chat_id": "42",
|
||||
}))
|
||||
|
||||
runner, _adapter = make_restart_runner()
|
||||
|
||||
await runner._send_restart_notification()
|
||||
|
||||
# File cleaned up even though we couldn't send
|
||||
assert not notify_path.exists()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_send_restart_notification_cleans_up_on_send_failure(
|
||||
tmp_path, monkeypatch
|
||||
):
|
||||
"""If the adapter.send() raises, the file is still cleaned up."""
|
||||
monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
|
||||
|
||||
notify_path = tmp_path / ".restart_notify.json"
|
||||
notify_path.write_text(json.dumps({
|
||||
"platform": "telegram",
|
||||
"chat_id": "42",
|
||||
}))
|
||||
|
||||
runner, adapter = make_restart_runner()
|
||||
adapter.send = AsyncMock(side_effect=RuntimeError("network down"))
|
||||
|
||||
await runner._send_restart_notification()
|
||||
|
||||
assert not notify_path.exists() # cleaned up despite error
|
||||
Loading…
Add table
Add a link
Reference in a new issue