From 8a64f3e3681924f1059edf9a01d95d565f9a001d Mon Sep 17 00:00:00 2001 From: Teknium Date: Sun, 12 Apr 2026 21:19:44 -0700 Subject: [PATCH] feat(gateway): notify /restart requester when gateway comes back online MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a user sends /restart, the gateway now persists their routing info (platform, chat_id, thread_id) to .restart_notify.json. After the new gateway process starts and adapters connect, it reads the file, sends a 'Gateway restarted successfully' message to that specific chat, and cleans up the file. This follows the same pattern as _send_update_notification (used by /update). Thread IDs are preserved so the notification lands in the correct Telegram topic or Discord thread. Previously, after /restart the user had no feedback that the gateway was back — they had to send a message to find out. Now they get a proactive notification and know their session continues. --- gateway/run.py | 61 ++++++++ tests/gateway/test_restart_notification.py | 173 +++++++++++++++++++++ 2 files changed, 234 insertions(+) create mode 100644 tests/gateway/test_restart_notification.py diff --git a/gateway/run.py b/gateway/run.py index 31fe724f2..bf493846b 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -1717,6 +1717,9 @@ class GatewayRunner: ): self._schedule_update_notification_watch() + # Notify the chat that initiated /restart that the gateway is back. + await self._send_restart_notification() + # Drain any recovered process watchers (from crash recovery checkpoint) try: from tools.process_registry import process_registry @@ -4147,6 +4150,22 @@ class GatewayRunner: return f"⏳ Draining {count} active agent(s) before restart..." return "⏳ Gateway restart already in progress..." + # Save the requester's routing info so the new gateway process can + # notify them once it comes back online. + try: + import json as _json + notify_data = { + "platform": event.source.platform.value if event.source.platform else None, + "chat_id": event.source.chat_id, + } + if event.source.thread_id: + notify_data["thread_id"] = event.source.thread_id + (_hermes_home / ".restart_notify.json").write_text( + _json.dumps(notify_data) + ) + except Exception as e: + logger.debug("Failed to write restart notify file: %s", e) + active_agents = self._running_agent_count() self.request_restart(detached=True, via_service=False) if active_agents: @@ -6880,6 +6899,48 @@ class GatewayRunner: return True + async def _send_restart_notification(self) -> None: + """Notify the chat that initiated /restart that the gateway is back.""" + import json as _json + + notify_path = _hermes_home / ".restart_notify.json" + if not notify_path.exists(): + return + + try: + data = _json.loads(notify_path.read_text()) + platform_str = data.get("platform") + chat_id = data.get("chat_id") + thread_id = data.get("thread_id") + + if not platform_str or not chat_id: + return + + platform = Platform(platform_str) + adapter = self.adapters.get(platform) + if not adapter: + logger.debug( + "Restart notification skipped: %s adapter not connected", + platform_str, + ) + return + + metadata = {"thread_id": thread_id} if thread_id else None + await adapter.send( + chat_id, + "♻ Gateway restarted successfully. Your session continues.", + metadata=metadata, + ) + logger.info( + "Sent restart notification to %s:%s", + platform_str, + chat_id, + ) + except Exception as e: + logger.warning("Restart notification failed: %s", e) + finally: + notify_path.unlink(missing_ok=True) + def _set_session_env(self, context: SessionContext) -> list: """Set session context variables for the current async task. diff --git a/tests/gateway/test_restart_notification.py b/tests/gateway/test_restart_notification.py new file mode 100644 index 000000000..ac7a89f27 --- /dev/null +++ b/tests/gateway/test_restart_notification.py @@ -0,0 +1,173 @@ +"""Tests for /restart notification — the gateway notifies the requester on comeback.""" + +import asyncio +import json +from pathlib import Path +from unittest.mock import AsyncMock, MagicMock + +import pytest + +import gateway.run as gateway_run +from gateway.config import Platform +from gateway.platforms.base import MessageEvent, MessageType +from gateway.session import build_session_key +from tests.gateway.restart_test_helpers import ( + make_restart_runner, + make_restart_source, +) + + +# ── _handle_restart_command writes .restart_notify.json ────────────────── + + +@pytest.mark.asyncio +async def test_restart_command_writes_notify_file(tmp_path, monkeypatch): + """When /restart fires, the requester's routing info is persisted to disk.""" + monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path) + + runner, _adapter = make_restart_runner() + runner.request_restart = MagicMock(return_value=True) + + source = make_restart_source(chat_id="42") + event = MessageEvent( + text="/restart", + message_type=MessageType.TEXT, + source=source, + message_id="m1", + ) + + result = await runner._handle_restart_command(event) + assert "Restarting" in result + + notify_path = tmp_path / ".restart_notify.json" + assert notify_path.exists() + data = json.loads(notify_path.read_text()) + assert data["platform"] == "telegram" + assert data["chat_id"] == "42" + assert "thread_id" not in data # no thread → omitted + + +@pytest.mark.asyncio +async def test_restart_command_preserves_thread_id(tmp_path, monkeypatch): + """Thread ID is saved when the requester is in a threaded chat.""" + monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path) + + runner, _adapter = make_restart_runner() + runner.request_restart = MagicMock(return_value=True) + + source = make_restart_source(chat_id="99") + source.thread_id = "topic_7" + + event = MessageEvent( + text="/restart", + message_type=MessageType.TEXT, + source=source, + message_id="m2", + ) + + await runner._handle_restart_command(event) + + data = json.loads((tmp_path / ".restart_notify.json").read_text()) + assert data["thread_id"] == "topic_7" + + +# ── _send_restart_notification ─────────────────────────────────────────── + + +@pytest.mark.asyncio +async def test_send_restart_notification_delivers_and_cleans_up(tmp_path, monkeypatch): + """On startup, the notification is sent and the file is removed.""" + monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path) + + notify_path = tmp_path / ".restart_notify.json" + notify_path.write_text(json.dumps({ + "platform": "telegram", + "chat_id": "42", + })) + + runner, adapter = make_restart_runner() + adapter.send = AsyncMock() + + await runner._send_restart_notification() + + adapter.send.assert_called_once() + call_args = adapter.send.call_args + assert call_args[0][0] == "42" # chat_id + assert "restarted" in call_args[0][1].lower() + assert call_args[1].get("metadata") is None # no thread + assert not notify_path.exists() + + +@pytest.mark.asyncio +async def test_send_restart_notification_with_thread(tmp_path, monkeypatch): + """Thread ID is passed as metadata so the message lands in the right topic.""" + monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path) + + notify_path = tmp_path / ".restart_notify.json" + notify_path.write_text(json.dumps({ + "platform": "telegram", + "chat_id": "99", + "thread_id": "topic_7", + })) + + runner, adapter = make_restart_runner() + adapter.send = AsyncMock() + + await runner._send_restart_notification() + + call_args = adapter.send.call_args + assert call_args[1]["metadata"] == {"thread_id": "topic_7"} + assert not notify_path.exists() + + +@pytest.mark.asyncio +async def test_send_restart_notification_noop_when_no_file(tmp_path, monkeypatch): + """Nothing happens if there's no pending restart notification.""" + monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path) + + runner, adapter = make_restart_runner() + adapter.send = AsyncMock() + + await runner._send_restart_notification() + + adapter.send.assert_not_called() + + +@pytest.mark.asyncio +async def test_send_restart_notification_skips_when_adapter_missing(tmp_path, monkeypatch): + """If the requester's platform isn't connected, clean up without crashing.""" + monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path) + + notify_path = tmp_path / ".restart_notify.json" + notify_path.write_text(json.dumps({ + "platform": "discord", # runner only has telegram adapter + "chat_id": "42", + })) + + runner, _adapter = make_restart_runner() + + await runner._send_restart_notification() + + # File cleaned up even though we couldn't send + assert not notify_path.exists() + + +@pytest.mark.asyncio +async def test_send_restart_notification_cleans_up_on_send_failure( + tmp_path, monkeypatch +): + """If the adapter.send() raises, the file is still cleaned up.""" + monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path) + + notify_path = tmp_path / ".restart_notify.json" + notify_path.write_text(json.dumps({ + "platform": "telegram", + "chat_id": "42", + })) + + runner, adapter = make_restart_runner() + adapter.send = AsyncMock(side_effect=RuntimeError("network down")) + + await runner._send_restart_notification() + + assert not notify_path.exists() # cleaned up despite error