mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-30 11:52:04 +00:00
refactor(gateway): extract MoA one-shot restore helper; restore #28686 comment; real-method tests
Follow-up on the salvaged MoA restore fix: - Extract the finally-block restore into _restore_moa_one_shot() so the behavior is unit-testable without re-implementing it, and so the gateway /moa handler and the finally block share one implementation. - Restore the load-bearing #28686 zombie-eviction comment above _release_running_agent_state that the original diff dropped. - Rewrite the tests to call the real _restore_moa_one_shot helper (the originals re-implemented the restore logic inline, so they passed regardless of the production code).
This commit is contained in:
parent
2f29e3cfc5
commit
4e0788783b
2 changed files with 117 additions and 117 deletions
|
|
@ -9050,18 +9050,45 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
|
|||
logger.debug("goal continuation hook failed: %s", _goal_exc)
|
||||
return _agent_result
|
||||
finally:
|
||||
if getattr(event, "_moa_disable_after_turn", False):
|
||||
try:
|
||||
_restore = getattr(event, "_moa_restore_override", None)
|
||||
if _restore is None:
|
||||
self._session_model_overrides.pop(_quick_key, None)
|
||||
else:
|
||||
self._session_model_overrides[_quick_key] = _restore
|
||||
self._evict_cached_agent(_quick_key)
|
||||
except Exception:
|
||||
pass
|
||||
# MoA one-shot restore must run on EVERY exit path, not just
|
||||
# success. The restore data lives on the per-turn event object
|
||||
# (_moa_restore_override), which is discarded once the event goes
|
||||
# out of scope — so if _handle_message_with_agent raises, a restore
|
||||
# in the try block would be skipped and the MoA override would leak
|
||||
# permanently (every later message silently fans out through MoA).
|
||||
# Putting it in finally guarantees the revert on success, exception,
|
||||
# and interrupt alike.
|
||||
self._restore_moa_one_shot(event, _quick_key)
|
||||
# Unconditional release covers every exit path. _release_running_agent_state
|
||||
# is idempotent (pop-on-absent is harmless) and, called without a
|
||||
# run_generation guard, always clears the slot regardless of which
|
||||
# generation it holds. This evicts the zombie left when session_reset
|
||||
# bumps the generation (N -> N+1) mid-flight: gen-N's guarded release
|
||||
# inside _run_agent returns False, and the old sentinel-only check here
|
||||
# missed the leftover real agent — locking the session out forever (#28686).
|
||||
self._release_running_agent_state(_quick_key)
|
||||
|
||||
def _restore_moa_one_shot(self, event: "MessageEvent", quick_key: str) -> None:
|
||||
"""Revert a ``/moa <prompt>`` one-shot model override after its turn.
|
||||
|
||||
Called from the ``finally`` of the message-handling path so the revert
|
||||
fires whether the turn succeeded, raised, or was interrupted. A no-op
|
||||
unless ``event._moa_disable_after_turn`` is set. ``_moa_restore_override``
|
||||
carries the prior per-session override (``None`` means the user had no
|
||||
override, so the MoA override is cleared outright).
|
||||
"""
|
||||
if not getattr(event, "_moa_disable_after_turn", False):
|
||||
return
|
||||
try:
|
||||
_restore = getattr(event, "_moa_restore_override", None)
|
||||
if _restore is None:
|
||||
self._session_model_overrides.pop(quick_key, None)
|
||||
else:
|
||||
self._session_model_overrides[quick_key] = _restore
|
||||
self._evict_cached_agent(quick_key)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
async def _prepare_inbound_message_text(
|
||||
self,
|
||||
*,
|
||||
|
|
|
|||
|
|
@ -1,130 +1,103 @@
|
|||
"""MoA one-shot model override must be restored on both success and failure."""
|
||||
"""MoA one-shot model override must be restored on both success and failure.
|
||||
|
||||
These exercise the real ``GatewayRunner._restore_moa_one_shot`` helper that the
|
||||
message-handling ``finally`` block calls, so they prove the production logic —
|
||||
not a re-implementation of it. The bug being guarded: the restore used to live
|
||||
in the ``try`` block, so a turn that raised skipped it and the MoA override
|
||||
leaked permanently (every later message silently fanned out through MoA).
|
||||
"""
|
||||
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
from gateway.run import GatewayRunner
|
||||
|
||||
|
||||
def _make_runner():
|
||||
"""Build a minimal GatewayRunner-like object with the fields the
|
||||
MoA one-shot restore path reads/writes."""
|
||||
runner = MagicMock()
|
||||
"""Minimal GatewayRunner with only the fields _restore_moa_one_shot reads."""
|
||||
runner = object.__new__(GatewayRunner)
|
||||
runner._session_model_overrides = {}
|
||||
runner._release_running_agent_state = MagicMock()
|
||||
runner._evict_cached_agent = MagicMock()
|
||||
runner._begin_session_run_generation = MagicMock(return_value=1)
|
||||
return runner
|
||||
|
||||
|
||||
def _make_event(text="hello", moa_disable=False, moa_restore=None):
|
||||
def _make_event(moa_disable=False, moa_restore=None):
|
||||
event = SimpleNamespace()
|
||||
event.text = text
|
||||
if moa_disable:
|
||||
event._moa_disable_after_turn = True
|
||||
event._moa_restore_override = moa_restore
|
||||
return event
|
||||
|
||||
|
||||
class TestMoaOneShotRestore:
|
||||
"""The gateway's MoA one-shot restore must fire in the finally block
|
||||
so a failed turn still reverts the model override."""
|
||||
def test_restore_reverts_to_previous_override():
|
||||
"""A one-shot turn restores the prior per-session model override."""
|
||||
runner = _make_runner()
|
||||
key = "agent:main:telegram:dm:123"
|
||||
runner._session_model_overrides[key] = {"provider": "moa", "model": "default"}
|
||||
event = _make_event(
|
||||
moa_disable=True,
|
||||
moa_restore={"provider": "openrouter", "model": "gpt-4"},
|
||||
)
|
||||
|
||||
def test_restore_fires_on_success(self):
|
||||
"""Normal successful turn restores the previous model override."""
|
||||
runner = _make_runner()
|
||||
key = "agent:main:telegram:dm:123"
|
||||
runner._session_model_overrides[key] = {
|
||||
"provider": "moa", "model": "default",
|
||||
}
|
||||
event = _make_event(
|
||||
moa_disable=True,
|
||||
moa_restore={"provider": "openrouter", "model": "gpt-4"},
|
||||
)
|
||||
runner._restore_moa_one_shot(event, key)
|
||||
|
||||
# Simulate: try block succeeds, finally runs
|
||||
assert runner._session_model_overrides[key] == {
|
||||
"provider": "openrouter",
|
||||
"model": "gpt-4",
|
||||
}
|
||||
runner._evict_cached_agent.assert_called_once_with(key)
|
||||
|
||||
|
||||
def test_restore_none_clears_override():
|
||||
"""If the user had no override before /moa, the MoA override is removed."""
|
||||
runner = _make_runner()
|
||||
key = "agent:main:discord:guild:456"
|
||||
runner._session_model_overrides[key] = {"provider": "moa", "model": "default"}
|
||||
event = _make_event(moa_disable=True, moa_restore=None)
|
||||
|
||||
runner._restore_moa_one_shot(event, key)
|
||||
|
||||
assert key not in runner._session_model_overrides
|
||||
runner._evict_cached_agent.assert_called_once_with(key)
|
||||
|
||||
|
||||
def test_no_restore_for_non_one_shot_turn():
|
||||
"""Normal (non-MoA) turns must not touch model overrides or evict agents."""
|
||||
runner = _make_runner()
|
||||
key = "agent:main:slack:channel:789"
|
||||
runner._session_model_overrides[key] = {"provider": "openrouter", "model": "gpt-4"}
|
||||
event = _make_event() # no _moa_disable_after_turn
|
||||
|
||||
runner._restore_moa_one_shot(event, key)
|
||||
|
||||
assert runner._session_model_overrides[key] == {
|
||||
"provider": "openrouter",
|
||||
"model": "gpt-4",
|
||||
}
|
||||
runner._evict_cached_agent.assert_not_called()
|
||||
|
||||
|
||||
def test_restore_runs_from_finally_even_when_turn_raises():
|
||||
"""The whole point of the fix: a raising turn still reverts the override.
|
||||
|
||||
Mirrors the real call site — the restore is invoked from a ``finally`` block,
|
||||
so it fires after an exception propagates out of the turn body.
|
||||
"""
|
||||
runner = _make_runner()
|
||||
key = "agent:main:telegram:dm:999"
|
||||
runner._session_model_overrides[key] = {"provider": "moa", "model": "default"}
|
||||
event = _make_event(
|
||||
moa_disable=True,
|
||||
moa_restore={"provider": "openrouter", "model": "gpt-4"},
|
||||
)
|
||||
|
||||
with __import__("pytest").raises(RuntimeError):
|
||||
try:
|
||||
pass # _handle_message_with_agent succeeds
|
||||
raise RuntimeError("provider error mid-turn")
|
||||
finally:
|
||||
if getattr(event, "_moa_disable_after_turn", False):
|
||||
_restore = getattr(event, "_moa_restore_override", None)
|
||||
if _restore is None:
|
||||
runner._session_model_overrides.pop(key, None)
|
||||
else:
|
||||
runner._session_model_overrides[key] = _restore
|
||||
runner._restore_moa_one_shot(event, key)
|
||||
|
||||
assert runner._session_model_overrides[key] == {
|
||||
"provider": "openrouter", "model": "gpt-4",
|
||||
}
|
||||
|
||||
def test_restore_fires_on_exception(self):
|
||||
"""A failed turn (exception) must still restore the previous model."""
|
||||
runner = _make_runner()
|
||||
key = "agent:main:telegram:dm:123"
|
||||
runner._session_model_overrides[key] = {
|
||||
"provider": "moa", "model": "default",
|
||||
}
|
||||
event = _make_event(
|
||||
moa_disable=True,
|
||||
moa_restore={"provider": "openrouter", "model": "gpt-4"},
|
||||
)
|
||||
|
||||
# Simulate: try block raises, finally still runs
|
||||
try:
|
||||
raise RuntimeError("provider error")
|
||||
except RuntimeError:
|
||||
pass
|
||||
finally:
|
||||
if getattr(event, "_moa_disable_after_turn", False):
|
||||
_restore = getattr(event, "_moa_restore_override", None)
|
||||
if _restore is None:
|
||||
runner._session_model_overrides.pop(key, None)
|
||||
else:
|
||||
runner._session_model_overrides[key] = _restore
|
||||
|
||||
assert runner._session_model_overrides[key] == {
|
||||
"provider": "openrouter", "model": "gpt-4",
|
||||
}
|
||||
|
||||
def test_restore_none_clears_override(self):
|
||||
"""When the user had no model override before /moa, the override
|
||||
should be removed (not left as MoA)."""
|
||||
runner = _make_runner()
|
||||
key = "agent:main:discord:guild:456"
|
||||
runner._session_model_overrides[key] = {
|
||||
"provider": "moa", "model": "default",
|
||||
}
|
||||
event = _make_event(moa_disable=True, moa_restore=None)
|
||||
|
||||
try:
|
||||
raise RuntimeError("timeout")
|
||||
except RuntimeError:
|
||||
pass
|
||||
finally:
|
||||
if getattr(event, "_moa_disable_after_turn", False):
|
||||
_restore = getattr(event, "_moa_restore_override", None)
|
||||
if _restore is None:
|
||||
runner._session_model_overrides.pop(key, None)
|
||||
else:
|
||||
runner._session_model_overrides[key] = _restore
|
||||
|
||||
assert key not in runner._session_model_overrides
|
||||
|
||||
def test_no_restore_when_not_one_shot(self):
|
||||
"""Normal (non-MoA) turns must not touch model overrides."""
|
||||
runner = _make_runner()
|
||||
key = "agent:main:slack:channel:789"
|
||||
runner._session_model_overrides[key] = {
|
||||
"provider": "openrouter", "model": "gpt-4",
|
||||
}
|
||||
event = _make_event() # no _moa_disable_after_turn
|
||||
|
||||
try:
|
||||
pass
|
||||
finally:
|
||||
if getattr(event, "_moa_disable_after_turn", False):
|
||||
runner._session_model_overrides.pop(key, None)
|
||||
|
||||
assert runner._session_model_overrides[key] == {
|
||||
"provider": "openrouter", "model": "gpt-4",
|
||||
}
|
||||
assert runner._session_model_overrides[key] == {
|
||||
"provider": "openrouter",
|
||||
"model": "gpt-4",
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue