fix(gateway): preserve home-channel thread targets across restart notifications

This commit is contained in:
leprincep35700 2026-05-01 15:19:25 +00:00 committed by Teknium
parent d87fd9f039
commit b59bb4e351
8 changed files with 544 additions and 39 deletions

View file

@ -12,6 +12,7 @@ class RestartTestAdapter(BasePlatformAdapter):
def __init__(self):
super().__init__(PlatformConfig(enabled=True, token="***"), Platform.TELEGRAM)
self.sent: list[str] = []
self.sent_calls: list[tuple[str, str, object]] = []
async def connect(self):
return True
@ -21,6 +22,7 @@ class RestartTestAdapter(BasePlatformAdapter):
async def send(self, chat_id, content, reply_to=None, metadata=None):
self.sent.append(content)
self.sent_calls.append((chat_id, content, metadata))
return SendResult(success=True, message_id="1")
async def send_typing(self, chat_id, metadata=None):
@ -30,12 +32,17 @@ class RestartTestAdapter(BasePlatformAdapter):
return {"id": chat_id}
def make_restart_source(chat_id: str = "123456", chat_type: str = "dm") -> SessionSource:
def make_restart_source(
chat_id: str = "123456",
chat_type: str = "dm",
thread_id: str | None = None,
) -> SessionSource:
return SessionSource(
platform=Platform.TELEGRAM,
chat_id=chat_id,
chat_type=chat_type,
user_id="u1",
thread_id=thread_id,
)
@ -81,6 +88,15 @@ def make_restart_runner(
runner._handle_restart_command = GatewayRunner._handle_restart_command.__get__(
runner, GatewayRunner
)
runner._handle_set_home_command = GatewayRunner._handle_set_home_command.__get__(
runner, GatewayRunner
)
runner._send_restart_notification = GatewayRunner._send_restart_notification.__get__(
runner, GatewayRunner
)
runner._send_home_channel_startup_notifications = (
GatewayRunner._send_home_channel_startup_notifications.__get__(runner, GatewayRunner)
)
runner._status_action_label = GatewayRunner._status_action_label.__get__(
runner, GatewayRunner
)

View file

@ -8,7 +8,7 @@ to env vars nothing read on startup — the home channel appeared to set
successfully but was lost on every new gateway session.
"""
from gateway.run import _home_target_env_var
from gateway.run import _home_target_env_var, _home_thread_env_var
def test_matrix_home_target_env_var_uses_home_room():
@ -34,3 +34,9 @@ def test_unknown_platform_home_target_env_var_falls_back_to_home_channel():
def test_case_insensitive_platform_name():
assert _home_target_env_var("MATRIX") == "MATRIX_HOME_ROOM"
assert _home_target_env_var("Email") == "EMAIL_HOME_ADDRESS"
def test_home_thread_env_var_uses_home_target_name_plus_thread_id():
assert _home_thread_env_var("discord") == "DISCORD_HOME_CHANNEL_THREAD_ID"
assert _home_thread_env_var("matrix") == "MATRIX_HOME_ROOM_THREAD_ID"
assert _home_thread_env_var("email") == "EMAIL_HOME_ADDRESS_THREAD_ID"

View file

@ -8,8 +8,8 @@ from unittest.mock import AsyncMock, MagicMock
import pytest
import gateway.run as gateway_run
from gateway.config import Platform
from gateway.platforms.base import MessageEvent, MessageType
from gateway.config import HomeChannel, Platform
from gateway.platforms.base import MessageEvent, MessageType, SendResult
from gateway.session import build_session_key
from tests.gateway.restart_test_helpers import (
make_restart_runner,
@ -17,6 +17,22 @@ from tests.gateway.restart_test_helpers import (
)
# ── restart marker helpers ───────────────────────────────────────────────
def test_restart_notification_pending_false_without_marker(tmp_path, monkeypatch):
monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
assert gateway_run._restart_notification_pending() is False
def test_restart_notification_pending_true_with_marker(tmp_path, monkeypatch):
monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
(tmp_path / ".restart_notify.json").write_text("{}")
assert gateway_run._restart_notification_pending() is True
# ── _handle_restart_command writes .restart_notify.json ──────────────────
@ -143,6 +159,184 @@ async def test_restart_command_uses_atomic_json_writes_for_marker_files(tmp_path
assert calls[1][1]["platform"] == "telegram"
@pytest.mark.asyncio
async def test_sethome_updates_running_config_for_same_process_restart(tmp_path, monkeypatch):
"""/sethome persists to env and updates in-memory config before restart."""
monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
saved = {}
def _fake_save_env_value(key, value):
saved[key] = value
monkeypatch.setattr("hermes_cli.config.save_env_value", _fake_save_env_value)
runner, _adapter = make_restart_runner()
source = make_restart_source(chat_id="home-42")
source.chat_name = "Ops Home"
event = MessageEvent(
text="/sethome",
message_type=MessageType.TEXT,
source=source,
message_id="m-home",
)
result = await runner._handle_set_home_command(event)
home = runner.config.get_home_channel(Platform.TELEGRAM)
assert "Home channel set" in result
assert saved["TELEGRAM_HOME_CHANNEL"] == "home-42"
assert home is not None
assert home.chat_id == "home-42"
assert home.name == "Ops Home"
@pytest.mark.asyncio
async def test_sethome_preserves_thread_target_for_same_process_restart(tmp_path, monkeypatch):
"""/sethome from a topic/thread stores the thread-aware home target."""
monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
saved = {}
def _fake_save_env_value(key, value):
saved[key] = value
monkeypatch.setattr("hermes_cli.config.save_env_value", _fake_save_env_value)
runner, _adapter = make_restart_runner()
source = make_restart_source(chat_id="parent-42", thread_id="topic-7")
source.chat_name = "Ops Topic"
event = MessageEvent(
text="/sethome",
message_type=MessageType.TEXT,
source=source,
message_id="m-home-thread",
)
result = await runner._handle_set_home_command(event)
home = runner.config.get_home_channel(Platform.TELEGRAM)
assert "Home channel set" in result
assert saved["TELEGRAM_HOME_CHANNEL"] == "parent-42"
assert saved["TELEGRAM_HOME_CHANNEL_THREAD_ID"] == "topic-7"
assert home is not None
assert home.chat_id == "parent-42"
assert home.thread_id == "topic-7"
# ── home-channel startup notifications ─────────────────────────────────────
@pytest.mark.asyncio
async def test_send_home_channel_startup_notification_to_configured_home(tmp_path, monkeypatch):
monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
runner, adapter = make_restart_runner()
runner.config.platforms[Platform.TELEGRAM].home_channel = HomeChannel(
platform=Platform.TELEGRAM,
chat_id="home-42",
name="Ops Home",
)
adapter.send = AsyncMock()
delivered = await runner._send_home_channel_startup_notifications()
assert delivered == {("telegram", "home-42", None)}
adapter.send.assert_called_once_with(
"home-42",
"♻️ Gateway online — Hermes is back and ready.",
)
@pytest.mark.asyncio
async def test_send_home_channel_startup_notification_preserves_thread_metadata(
tmp_path, monkeypatch
):
monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
runner, adapter = make_restart_runner()
runner.config.platforms[Platform.TELEGRAM].home_channel = HomeChannel(
platform=Platform.TELEGRAM,
chat_id="parent-42",
name="Ops Topic",
thread_id="topic-7",
)
adapter.send = AsyncMock(return_value=SendResult(success=True, message_id="home"))
delivered = await runner._send_home_channel_startup_notifications()
assert delivered == {("telegram", "parent-42", "topic-7")}
adapter.send.assert_called_once_with(
"parent-42",
"♻️ Gateway online — Hermes is back and ready.",
metadata={"thread_id": "topic-7"},
)
@pytest.mark.asyncio
async def test_send_home_channel_startup_notification_skips_restart_target(
tmp_path, monkeypatch
):
monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
runner, adapter = make_restart_runner()
runner.config.platforms[Platform.TELEGRAM].home_channel = HomeChannel(
platform=Platform.TELEGRAM,
chat_id="42",
name="Ops Home",
)
adapter.send = AsyncMock()
delivered = await runner._send_home_channel_startup_notifications(
skip_targets={("telegram", "42", None)}
)
assert delivered == set()
adapter.send.assert_not_called()
@pytest.mark.asyncio
async def test_send_home_channel_startup_notification_does_not_skip_different_thread(
tmp_path, monkeypatch
):
monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
runner, adapter = make_restart_runner()
runner.config.platforms[Platform.TELEGRAM].home_channel = HomeChannel(
platform=Platform.TELEGRAM,
chat_id="42",
name="Ops Home",
)
adapter.send = AsyncMock(return_value=SendResult(success=True, message_id="home"))
delivered = await runner._send_home_channel_startup_notifications(
skip_targets={("telegram", "42", "topic-7")}
)
assert delivered == {("telegram", "42", None)}
adapter.send.assert_called_once()
@pytest.mark.asyncio
async def test_send_home_channel_startup_notification_ignores_false_send_result(
tmp_path, monkeypatch
):
monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
runner, adapter = make_restart_runner()
runner.config.platforms[Platform.TELEGRAM].home_channel = HomeChannel(
platform=Platform.TELEGRAM,
chat_id="home-42",
name="Ops Home",
)
adapter.send = AsyncMock(return_value=SendResult(success=False, error="network down"))
delivered = await runner._send_home_channel_startup_notifications()
assert delivered == set()
adapter.send.assert_called_once()
# ── _send_restart_notification ───────────────────────────────────────────
@ -160,8 +354,9 @@ async def test_send_restart_notification_delivers_and_cleans_up(tmp_path, monkey
runner, adapter = make_restart_runner()
adapter.send = AsyncMock()
await runner._send_restart_notification()
delivered_target = await runner._send_restart_notification()
assert delivered_target == ("telegram", "42", None)
adapter.send.assert_called_once()
call_args = adapter.send.call_args
assert call_args[0][0] == "42" # chat_id
@ -185,8 +380,9 @@ async def test_send_restart_notification_with_thread(tmp_path, monkeypatch):
runner, adapter = make_restart_runner()
adapter.send = AsyncMock()
await runner._send_restart_notification()
delivered_target = await runner._send_restart_notification()
assert delivered_target == ("telegram", "99", "topic_7")
call_args = adapter.send.call_args
assert call_args[1]["metadata"] == {"thread_id": "topic_7"}
assert not notify_path.exists()
@ -240,9 +436,10 @@ async def test_send_restart_notification_cleans_up_on_send_failure(
runner, adapter = make_restart_runner()
adapter.send = AsyncMock(side_effect=RuntimeError("network down"))
await runner._send_restart_notification()
delivered_target = await runner._send_restart_notification()
# File cleaned up even though send raised.
assert delivered_target is None
assert not notify_path.exists()
@ -274,7 +471,7 @@ async def test_send_restart_notification_logs_warning_on_sendresult_failure(
)
with caplog.at_level("DEBUG", logger="gateway.run"):
await runner._send_restart_notification()
delivered_target = await runner._send_restart_notification()
success_lines = [
r for r in caplog.records
@ -286,6 +483,7 @@ async def test_send_restart_notification_logs_warning_on_sendresult_failure(
and "was not delivered" in r.getMessage()
and "Chat not found" in r.getMessage()
]
assert delivered_target is None
assert not success_lines, (
"Expected no INFO 'Sent restart notification' line when send failed, "
f"got: {[r.getMessage() for r in success_lines]}"
@ -317,12 +515,13 @@ async def test_send_restart_notification_logs_info_on_sendresult_success(
adapter.send = AsyncMock(return_value=SendResult(success=True, message_id="m-1"))
with caplog.at_level("DEBUG", logger="gateway.run"):
await runner._send_restart_notification()
delivered_target = await runner._send_restart_notification()
success_lines = [
r for r in caplog.records
if r.levelname == "INFO" and "Sent restart notification" in r.getMessage()
]
assert delivered_target == ("telegram", "42", None)
assert success_lines, (
"Expected INFO 'Sent restart notification' when send succeeded; "
f"got records: {[(r.levelname, r.getMessage()) for r in caplog.records]}"

View file

@ -32,7 +32,8 @@ from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from gateway.config import GatewayConfig, Platform, PlatformConfig
from gateway.config import GatewayConfig, HomeChannel, Platform, PlatformConfig
from gateway.platforms.base import SendResult
from gateway.run import (
_auto_continue_freshness_window,
_coerce_gateway_timestamp,
@ -931,6 +932,84 @@ async def test_restart_banner_uses_try_to_resume_wording():
assert "try to resume" in msg
@pytest.mark.asyncio
async def test_restart_notifies_home_channel_even_without_active_sessions():
runner, adapter = make_restart_runner()
runner._restart_requested = True
runner.config.platforms[Platform.TELEGRAM].home_channel = HomeChannel(
platform=Platform.TELEGRAM,
chat_id="home-42",
name="Ops Home",
)
await runner._notify_active_sessions_of_shutdown()
assert adapter.sent == [
"⚠️ Gateway restarting — Your current task will be interrupted. "
"Send any message after restart and I'll try to resume where you left off."
]
@pytest.mark.asyncio
async def test_restart_home_channel_notification_dedupes_active_chat():
runner, adapter = make_restart_runner()
runner._restart_requested = True
runner._running_agents["agent:main:telegram:dm:999"] = MagicMock()
runner.config.platforms[Platform.TELEGRAM].home_channel = HomeChannel(
platform=Platform.TELEGRAM,
chat_id="999",
name="Ops Home",
)
await runner._notify_active_sessions_of_shutdown()
assert len(adapter.sent) == 1
@pytest.mark.asyncio
async def test_restart_home_channel_notification_not_deduped_across_threads():
runner, adapter = make_restart_runner()
runner._restart_requested = True
session_key = "agent:main:telegram:group:999"
runner.session_store._entries[session_key] = MagicMock(
origin=SessionSource(
platform=Platform.TELEGRAM,
chat_id="999",
chat_type="group",
user_id="u1",
thread_id="topic-7",
)
)
runner._running_agents[session_key] = MagicMock()
runner.config.platforms[Platform.TELEGRAM].home_channel = HomeChannel(
platform=Platform.TELEGRAM,
chat_id="999",
name="Ops Home",
)
await runner._notify_active_sessions_of_shutdown()
assert len(adapter.sent) == 2
assert adapter.sent_calls[0][2] == {"thread_id": "topic-7"}
assert adapter.sent_calls[1][2] is None
@pytest.mark.asyncio
async def test_restart_home_channel_notification_ignores_false_send_result():
runner, adapter = make_restart_runner()
runner._restart_requested = True
runner.config.platforms[Platform.TELEGRAM].home_channel = HomeChannel(
platform=Platform.TELEGRAM,
chat_id="home-42",
name="Ops Home",
)
adapter.send = AsyncMock(return_value=SendResult(success=False, error="network down"))
await runner._notify_active_sessions_of_shutdown()
adapter.send.assert_called_once()
# ---------------------------------------------------------------------------
# Stuck-loop escalation integration
# ---------------------------------------------------------------------------