mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-07-01 12:02:05 +00:00
fix(gateway): restore MoA one-shot model override on failed turns
The MoA one-shot restore ran inside the try block after _handle_message_with_agent returned. When that call raised an exception (agent init failure, interpreter shutdown, OOM), the restore was skipped and the MoA model override stayed permanently on _session_model_overrides — silently routing all subsequent messages through the MoA reference fan-out with no user-visible indication. Move the restore to the finally block so it fires on every exit path (success, exception, interrupt). The restore data lives on the per-turn event object and would be lost if not consumed here.
This commit is contained in:
parent
17cb829991
commit
2f29e3cfc5
2 changed files with 140 additions and 17 deletions
|
|
@ -9020,16 +9020,6 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
|
|||
|
||||
try:
|
||||
_agent_result = await self._handle_message_with_agent(event, source, _quick_key, _run_generation)
|
||||
if getattr(event, "_moa_disable_after_turn", False):
|
||||
try:
|
||||
_restore = getattr(event, "_moa_restore_override", None)
|
||||
if _restore is None:
|
||||
self._session_model_overrides.pop(_quick_key, None)
|
||||
else:
|
||||
self._session_model_overrides[_quick_key] = _restore
|
||||
self._evict_cached_agent(_quick_key)
|
||||
except Exception:
|
||||
pass
|
||||
# Goal continuation: after the agent returns a final response
|
||||
# for this turn, check any standing /goal — the judge will
|
||||
# either mark it done, pause it (budget), or enqueue a
|
||||
|
|
@ -9060,13 +9050,16 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
|
|||
logger.debug("goal continuation hook failed: %s", _goal_exc)
|
||||
return _agent_result
|
||||
finally:
|
||||
# Unconditional release covers every exit path. _release_running_agent_state
|
||||
# is idempotent (pop-on-absent is harmless) and, called without a
|
||||
# run_generation guard, always clears the slot regardless of which
|
||||
# generation it holds. This evicts the zombie left when session_reset
|
||||
# bumps the generation (N -> N+1) mid-flight: gen-N's guarded release
|
||||
# inside _run_agent returns False, and the old sentinel-only check here
|
||||
# missed the leftover real agent — locking the session out forever (#28686).
|
||||
if getattr(event, "_moa_disable_after_turn", False):
|
||||
try:
|
||||
_restore = getattr(event, "_moa_restore_override", None)
|
||||
if _restore is None:
|
||||
self._session_model_overrides.pop(_quick_key, None)
|
||||
else:
|
||||
self._session_model_overrides[_quick_key] = _restore
|
||||
self._evict_cached_agent(_quick_key)
|
||||
except Exception:
|
||||
pass
|
||||
self._release_running_agent_state(_quick_key)
|
||||
|
||||
async def _prepare_inbound_message_text(
|
||||
|
|
|
|||
130
tests/gateway/test_moa_one_shot_restore.py
Normal file
130
tests/gateway/test_moa_one_shot_restore.py
Normal file
|
|
@ -0,0 +1,130 @@
|
|||
"""MoA one-shot model override must be restored on both success and failure."""
|
||||
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
def _make_runner():
|
||||
"""Build a minimal GatewayRunner-like object with the fields the
|
||||
MoA one-shot restore path reads/writes."""
|
||||
runner = MagicMock()
|
||||
runner._session_model_overrides = {}
|
||||
runner._release_running_agent_state = MagicMock()
|
||||
runner._evict_cached_agent = MagicMock()
|
||||
runner._begin_session_run_generation = MagicMock(return_value=1)
|
||||
return runner
|
||||
|
||||
|
||||
def _make_event(text="hello", moa_disable=False, moa_restore=None):
|
||||
event = SimpleNamespace()
|
||||
event.text = text
|
||||
if moa_disable:
|
||||
event._moa_disable_after_turn = True
|
||||
event._moa_restore_override = moa_restore
|
||||
return event
|
||||
|
||||
|
||||
class TestMoaOneShotRestore:
|
||||
"""The gateway's MoA one-shot restore must fire in the finally block
|
||||
so a failed turn still reverts the model override."""
|
||||
|
||||
def test_restore_fires_on_success(self):
|
||||
"""Normal successful turn restores the previous model override."""
|
||||
runner = _make_runner()
|
||||
key = "agent:main:telegram:dm:123"
|
||||
runner._session_model_overrides[key] = {
|
||||
"provider": "moa", "model": "default",
|
||||
}
|
||||
event = _make_event(
|
||||
moa_disable=True,
|
||||
moa_restore={"provider": "openrouter", "model": "gpt-4"},
|
||||
)
|
||||
|
||||
# Simulate: try block succeeds, finally runs
|
||||
try:
|
||||
pass # _handle_message_with_agent succeeds
|
||||
finally:
|
||||
if getattr(event, "_moa_disable_after_turn", False):
|
||||
_restore = getattr(event, "_moa_restore_override", None)
|
||||
if _restore is None:
|
||||
runner._session_model_overrides.pop(key, None)
|
||||
else:
|
||||
runner._session_model_overrides[key] = _restore
|
||||
|
||||
assert runner._session_model_overrides[key] == {
|
||||
"provider": "openrouter", "model": "gpt-4",
|
||||
}
|
||||
|
||||
def test_restore_fires_on_exception(self):
|
||||
"""A failed turn (exception) must still restore the previous model."""
|
||||
runner = _make_runner()
|
||||
key = "agent:main:telegram:dm:123"
|
||||
runner._session_model_overrides[key] = {
|
||||
"provider": "moa", "model": "default",
|
||||
}
|
||||
event = _make_event(
|
||||
moa_disable=True,
|
||||
moa_restore={"provider": "openrouter", "model": "gpt-4"},
|
||||
)
|
||||
|
||||
# Simulate: try block raises, finally still runs
|
||||
try:
|
||||
raise RuntimeError("provider error")
|
||||
except RuntimeError:
|
||||
pass
|
||||
finally:
|
||||
if getattr(event, "_moa_disable_after_turn", False):
|
||||
_restore = getattr(event, "_moa_restore_override", None)
|
||||
if _restore is None:
|
||||
runner._session_model_overrides.pop(key, None)
|
||||
else:
|
||||
runner._session_model_overrides[key] = _restore
|
||||
|
||||
assert runner._session_model_overrides[key] == {
|
||||
"provider": "openrouter", "model": "gpt-4",
|
||||
}
|
||||
|
||||
def test_restore_none_clears_override(self):
|
||||
"""When the user had no model override before /moa, the override
|
||||
should be removed (not left as MoA)."""
|
||||
runner = _make_runner()
|
||||
key = "agent:main:discord:guild:456"
|
||||
runner._session_model_overrides[key] = {
|
||||
"provider": "moa", "model": "default",
|
||||
}
|
||||
event = _make_event(moa_disable=True, moa_restore=None)
|
||||
|
||||
try:
|
||||
raise RuntimeError("timeout")
|
||||
except RuntimeError:
|
||||
pass
|
||||
finally:
|
||||
if getattr(event, "_moa_disable_after_turn", False):
|
||||
_restore = getattr(event, "_moa_restore_override", None)
|
||||
if _restore is None:
|
||||
runner._session_model_overrides.pop(key, None)
|
||||
else:
|
||||
runner._session_model_overrides[key] = _restore
|
||||
|
||||
assert key not in runner._session_model_overrides
|
||||
|
||||
def test_no_restore_when_not_one_shot(self):
|
||||
"""Normal (non-MoA) turns must not touch model overrides."""
|
||||
runner = _make_runner()
|
||||
key = "agent:main:slack:channel:789"
|
||||
runner._session_model_overrides[key] = {
|
||||
"provider": "openrouter", "model": "gpt-4",
|
||||
}
|
||||
event = _make_event() # no _moa_disable_after_turn
|
||||
|
||||
try:
|
||||
pass
|
||||
finally:
|
||||
if getattr(event, "_moa_disable_after_turn", False):
|
||||
runner._session_model_overrides.pop(key, None)
|
||||
|
||||
assert runner._session_model_overrides[key] == {
|
||||
"provider": "openrouter", "model": "gpt-4",
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue