mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-11 08:42:11 +00:00
fix(gateway): keep pending /update completion notifications until the target platform reconnects
This commit is contained in:
parent
a6a0a5b1b0
commit
b7169f9bbb
3 changed files with 113 additions and 16 deletions
|
|
@ -15142,10 +15142,15 @@ class GatewayRunner:
|
|||
|
||||
if not adapter or not chat_id:
|
||||
logger.warning("Update watcher: cannot resolve adapter/chat_id, falling back to completion-only")
|
||||
# Fall back to old behavior: wait for exit code and send final notification
|
||||
# Fall back to completion-only: wait for the exit code and send the
|
||||
# final notification. _send_update_notification re-resolves the
|
||||
# adapter on every call, so when the target platform is still
|
||||
# reconnecting it returns False and keeps the markers. Keep polling
|
||||
# until it actually delivers (returns True) instead of giving up
|
||||
# after the first completion check — otherwise a platform that
|
||||
# reconnects a few seconds after completion never gets notified.
|
||||
while (pending_path.exists() or claimed_path.exists()) and loop.time() < deadline:
|
||||
if exit_code_path.exists():
|
||||
await self._send_update_notification()
|
||||
if exit_code_path.exists() and await self._send_update_notification():
|
||||
return
|
||||
await asyncio.sleep(poll_interval)
|
||||
if (pending_path.exists() or claimed_path.exists()) and not exit_code_path.exists():
|
||||
|
|
@ -15359,6 +15364,24 @@ class GatewayRunner:
|
|||
platform = Platform(platform_str)
|
||||
adapter = self.adapters.get(platform)
|
||||
|
||||
if not adapter and chat_id:
|
||||
# The update finished, but the target platform has not
|
||||
# reconnected yet (common right after the restart that
|
||||
# `hermes update` triggers). Treating "adapter missing" as a
|
||||
# definitive skip would delete the markers and silently lose the
|
||||
# completion notification — the user never learns whether the
|
||||
# update succeeded or timed out. Preserve the markers instead so
|
||||
# a later retry (the watcher poll loop, or the next gateway
|
||||
# startup) can deliver the result once the adapter is back.
|
||||
logger.info(
|
||||
"Update notification deferred: %s adapter not connected yet",
|
||||
platform_str,
|
||||
)
|
||||
cleanup = False
|
||||
active_pending_path = pending_path
|
||||
claimed_path.replace(pending_path)
|
||||
return False
|
||||
|
||||
if adapter and chat_id:
|
||||
metadata = self._thread_metadata_for_target(
|
||||
platform,
|
||||
|
|
|
|||
|
|
@ -661,8 +661,14 @@ class TestSendUpdateNotification:
|
|||
assert not pending_path.exists()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_no_adapter_for_platform(self, tmp_path):
|
||||
"""Does not crash if the platform adapter is not connected."""
|
||||
async def test_no_adapter_for_platform_preserves_markers(self, tmp_path):
|
||||
"""A finished update whose platform is offline keeps its markers.
|
||||
|
||||
When the target platform's adapter has not reconnected yet, dropping
|
||||
the completion markers would silently lose the notification. Instead the
|
||||
call defers (returns False) and leaves every marker on disk so a later
|
||||
retry can deliver once the platform is back.
|
||||
"""
|
||||
runner = _make_runner()
|
||||
hermes_home = tmp_path / "hermes"
|
||||
hermes_home.mkdir()
|
||||
|
|
@ -680,13 +686,62 @@ class TestSendUpdateNotification:
|
|||
runner.adapters = {Platform.TELEGRAM: mock_adapter}
|
||||
|
||||
with patch("gateway.run._hermes_home", hermes_home):
|
||||
await runner._send_update_notification()
|
||||
result = await runner._send_update_notification()
|
||||
|
||||
# send should not have been called (wrong platform)
|
||||
# No send (wrong platform offline) and the result is deferred.
|
||||
assert result is False
|
||||
mock_adapter.send.assert_not_called()
|
||||
# Files should still be cleaned up
|
||||
# Markers are preserved for a later retry — NOT cleaned up.
|
||||
assert pending_path.exists()
|
||||
assert output_path.exists()
|
||||
assert exit_code_path.exists()
|
||||
# The marker stays in its canonical pending location (claim restored).
|
||||
assert not (hermes_home / ".update_pending.claimed.json").exists()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_deferred_notification_delivers_after_reconnect(self, tmp_path):
|
||||
"""A deferred completion is delivered once the platform reconnects.
|
||||
|
||||
Regression for the late-reconnect /update bug: the update finishes while
|
||||
the target platform is offline, the markers survive the deferral, and
|
||||
the next call (after the adapter is registered) delivers the result and
|
||||
cleans up — exactly once.
|
||||
"""
|
||||
runner = _make_runner()
|
||||
hermes_home = tmp_path / "hermes"
|
||||
hermes_home.mkdir()
|
||||
|
||||
pending = {"platform": "discord", "chat_id": "111", "user_id": "222"}
|
||||
pending_path = hermes_home / ".update_pending.json"
|
||||
output_path = hermes_home / ".update_output.txt"
|
||||
exit_code_path = hermes_home / ".update_exit_code"
|
||||
pending_path.write_text(json.dumps(pending))
|
||||
output_path.write_text("✓ Update complete!")
|
||||
exit_code_path.write_text("0")
|
||||
|
||||
# First pass: target platform (discord) is still offline → defer.
|
||||
with patch("gateway.run._hermes_home", hermes_home):
|
||||
first = await runner._send_update_notification()
|
||||
|
||||
assert first is False
|
||||
assert pending_path.exists()
|
||||
|
||||
# Platform reconnects: the reconnect watcher adds the adapter back.
|
||||
mock_adapter = AsyncMock()
|
||||
runner.adapters = {Platform.DISCORD: mock_adapter}
|
||||
|
||||
with patch("gateway.run._hermes_home", hermes_home):
|
||||
second = await runner._send_update_notification()
|
||||
|
||||
assert second is True
|
||||
mock_adapter.send.assert_called_once()
|
||||
sent_text = mock_adapter.send.call_args[0][1]
|
||||
assert "Update complete" in sent_text
|
||||
# Now everything is cleaned up — no duplicate deliveries possible.
|
||||
assert not pending_path.exists()
|
||||
assert not output_path.exists()
|
||||
assert not exit_code_path.exists()
|
||||
assert not (hermes_home / ".update_pending.claimed.json").exists()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -439,30 +439,49 @@ class TestWatchUpdateProgress:
|
|||
assert "failed" in all_sent.lower()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_falls_back_when_adapter_unavailable(self, tmp_path):
|
||||
"""Falls back to legacy notification when adapter can't be resolved."""
|
||||
async def test_falls_back_and_delivers_after_reconnect(self, tmp_path):
|
||||
"""Completion-only fallback waits for the platform to reconnect.
|
||||
|
||||
When the target adapter isn't connected at watcher start, the watcher
|
||||
must keep the markers and retry until the platform reconnects, then
|
||||
deliver the completion notification — rather than dropping it on the
|
||||
first completion check (the late-reconnect /update bug).
|
||||
"""
|
||||
runner = _make_runner()
|
||||
hermes_home = tmp_path / "hermes"
|
||||
hermes_home.mkdir()
|
||||
|
||||
# Platform doesn't match any adapter
|
||||
# Target platform (discord) isn't connected yet; the update is finished.
|
||||
pending = {"platform": "discord", "chat_id": "111", "user_id": "222"}
|
||||
(hermes_home / ".update_pending.json").write_text(json.dumps(pending))
|
||||
pending_path = hermes_home / ".update_pending.json"
|
||||
pending_path.write_text(json.dumps(pending))
|
||||
(hermes_home / ".update_output.txt").write_text("done\n")
|
||||
(hermes_home / ".update_exit_code").write_text("0")
|
||||
|
||||
# Only telegram adapter available
|
||||
mock_adapter = AsyncMock()
|
||||
runner.adapters = {Platform.TELEGRAM: mock_adapter}
|
||||
# Only telegram is connected at first.
|
||||
runner.adapters = {Platform.TELEGRAM: AsyncMock()}
|
||||
|
||||
discord_adapter = AsyncMock()
|
||||
|
||||
async def reconnect_discord():
|
||||
# The platform reconnect watcher registers discord mid-poll.
|
||||
await asyncio.sleep(0.3)
|
||||
runner.adapters[Platform.DISCORD] = discord_adapter
|
||||
|
||||
with patch("gateway.run._hermes_home", hermes_home):
|
||||
task = asyncio.create_task(reconnect_discord())
|
||||
await runner._watch_update_progress(
|
||||
poll_interval=0.1,
|
||||
stream_interval=0.2,
|
||||
timeout=5.0,
|
||||
)
|
||||
await task
|
||||
|
||||
# Should not crash; legacy notification handles this case
|
||||
# The completion was delivered to discord once it reconnected...
|
||||
discord_adapter.send.assert_called_once()
|
||||
# ...and the markers are cleaned up after successful delivery.
|
||||
assert not pending_path.exists()
|
||||
assert not (hermes_home / ".update_exit_code").exists()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_prompt_forwarded_only_once(self, tmp_path):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue