mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-28 11:32:22 +00:00
* feat(moa): expose MoA presets as selectable virtual models Reconstructed onto current main (PR #46081's base had diverged with no common ancestor, marking the PR dirty so CI never dispatched). MoA is now a virtual provider: each named preset is a selectable model under provider 'moa', and the preset's aggregator is the acting model that answers and calls tools. Reference models fan out in parallel via a bounded ThreadPoolExecutor (the same batch pattern delegate_task uses) — all references dispatched at once, collected when every one finishes, then handed to the aggregator. Output order is preserved, failures and the MoA-recursion guard stay isolated per reference. - Removed the old mixture_of_agents model tool and moa toolset. - Added moa as a virtual provider in the provider/model inventory. - /moa is shortcut behavior over model selection (default preset / named preset / one-shot prompt). - Dashboard + Desktop manage named presets; presets appear in model pickers. - Parallel reference fan-out in agent/moa_loop.py with regression test. * fix(moa): thread moa_config through _run_agent to _run_agent_inner The reconstructed gateway MoA wiring declared moa_config on _run_agent (the profile-scoping wrapper) and used it inside _run_agent_inner, but the wrapper never forwarded it — _run_agent_inner had no such parameter, so the runtime hit NameError: name 'moa_config' is not defined on the compression-failure session sync path. Add moa_config to _run_agent_inner's signature and forward it from both wrapper call sites (multiplex and non-multiplex). Caught by tests/gateway/test_compression_failure_session_sync.py on CI shard test(4). * fix(moa): classify moa as a virtual provider in the catalog The moa virtual provider has no PROVIDER_REGISTRY/ProviderProfile entry, so provider_catalog() fell through to the default auth_type="api_key" with no env vars — tripping two catalog invariants: - test_provider_catalog: api_key providers must expose a credential env var - test_provider_parity: every hermes-model provider must be desktop-configurable moa already declares auth_type="virtual" in HERMES_OVERLAYS; consult that overlay as an auth_type fallback so the catalog reports moa as virtual (no real credential, no network endpoint). Exempt virtual providers from the desktop parity union check the same way 'custom' is exempt — derived from the catalog, not a hardcoded slug, so future virtual providers are covered too.
278 lines
9.6 KiB
Python
278 lines
9.6 KiB
Python
"""Tests for /goal handling in tui_gateway.
|
|
|
|
The TUI routes ``/goal`` through ``command.dispatch`` (not ``slash.exec``)
|
|
because the CLI's ``_handle_goal_command`` queues the kickoff message onto
|
|
``_pending_input``, which the slash-worker subprocess has no reader for.
|
|
Instead we handle ``/goal`` directly in the server and return a
|
|
``{"type": "send", "notice": ..., "message": ...}`` payload the TUI client
|
|
uses to render a system line and fire the kickoff prompt.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import importlib
|
|
import threading
|
|
from pathlib import Path
|
|
from unittest.mock import MagicMock, patch
|
|
|
|
import pytest
|
|
|
|
|
|
@pytest.fixture()
|
|
def hermes_home(tmp_path, monkeypatch):
|
|
home = tmp_path / ".hermes"
|
|
home.mkdir()
|
|
monkeypatch.setattr(Path, "home", lambda: tmp_path)
|
|
monkeypatch.setenv("HERMES_HOME", str(home))
|
|
|
|
# Bust the goal-module DB cache so it re-resolves HERMES_HOME.
|
|
from hermes_cli import goals
|
|
|
|
goals._DB_CACHE.clear()
|
|
yield home
|
|
goals._DB_CACHE.clear()
|
|
|
|
|
|
@pytest.fixture()
|
|
def server(hermes_home):
|
|
with patch.dict(
|
|
"sys.modules",
|
|
{
|
|
"hermes_cli.env_loader": MagicMock(),
|
|
"hermes_cli.banner": MagicMock(),
|
|
},
|
|
):
|
|
mod = importlib.import_module("tui_gateway.server")
|
|
yield mod
|
|
# Reset module-level session state without re-importing. importlib.reload
|
|
# would re-register the module's atexit hooks (ThreadPoolExecutor
|
|
# shutdown, _shutdown_sessions); the duplicates race the stderr
|
|
# buffer at interpreter shutdown and surface as Fatal Python error:
|
|
# _enter_buffered_busy. Clearing the per-session dicts gives the
|
|
# next test a clean slate; _methods is NOT cleared because it's
|
|
# populated at module import time and re-registration only happens
|
|
# via reload (which we don't do).
|
|
mod._sessions.clear()
|
|
mod._pending.clear()
|
|
mod._answers.clear()
|
|
|
|
|
|
@pytest.fixture()
|
|
def session(server):
|
|
sid = "sid-test"
|
|
session_key = "tui-goal-session-1"
|
|
s = {
|
|
"session_key": session_key,
|
|
"history": [],
|
|
"history_lock": threading.Lock(),
|
|
"history_version": 0,
|
|
"running": False,
|
|
"attached_images": [],
|
|
"cols": 120,
|
|
}
|
|
server._sessions[sid] = s
|
|
return sid, session_key, s
|
|
|
|
|
|
def _call(server, method, **params):
|
|
handler = server._methods[method]
|
|
return handler(1, params)
|
|
|
|
|
|
# ── command.dispatch /goal ────────────────────────────────────────────
|
|
|
|
|
|
def test_goal_bare_shows_status_when_none_set(server, session):
|
|
sid, _, _ = session
|
|
r = _call(server, "command.dispatch", name="goal", arg="", session_id=sid)
|
|
assert r["result"]["type"] == "exec"
|
|
assert "No active goal" in r["result"]["output"]
|
|
|
|
|
|
def test_goal_whitespace_only_shows_status(server, session):
|
|
sid, _, _ = session
|
|
r = _call(server, "command.dispatch", name="goal", arg=" ", session_id=sid)
|
|
assert r["result"]["type"] == "exec"
|
|
assert "No active goal" in r["result"]["output"]
|
|
|
|
|
|
def test_goal_status_alias_shows_status(server, session):
|
|
sid, _, _ = session
|
|
r = _call(server, "command.dispatch", name="goal", arg="status", session_id=sid)
|
|
assert r["result"]["type"] == "exec"
|
|
assert "No active goal" in r["result"]["output"]
|
|
|
|
|
|
def test_goal_set_returns_send_with_notice(server, session):
|
|
sid, session_key, _ = session
|
|
r = _call(server, "command.dispatch", name="goal", arg="build a rocket", session_id=sid)
|
|
result = r["result"]
|
|
assert result["type"] == "send"
|
|
assert result["message"] == "build a rocket"
|
|
assert "notice" in result
|
|
assert "Goal set" in result["notice"]
|
|
assert "20-turn budget" in result["notice"]
|
|
|
|
# Persisted in SessionDB
|
|
from hermes_cli.goals import GoalManager
|
|
|
|
mgr = GoalManager(session_key)
|
|
assert mgr.state is not None
|
|
assert mgr.state.goal == "build a rocket"
|
|
assert mgr.state.status == "active"
|
|
|
|
|
|
def test_goal_pause_after_set(server, session):
|
|
sid, session_key, _ = session
|
|
_call(server, "command.dispatch", name="goal", arg="write a story", session_id=sid)
|
|
r = _call(server, "command.dispatch", name="goal", arg="pause", session_id=sid)
|
|
assert r["result"]["type"] == "exec"
|
|
assert "paused" in r["result"]["output"].lower()
|
|
|
|
from hermes_cli.goals import GoalManager
|
|
|
|
assert GoalManager(session_key).state.status == "paused"
|
|
|
|
|
|
def test_goal_resume_reactivates(server, session):
|
|
sid, session_key, _ = session
|
|
_call(server, "command.dispatch", name="goal", arg="write a story", session_id=sid)
|
|
_call(server, "command.dispatch", name="goal", arg="pause", session_id=sid)
|
|
r = _call(server, "command.dispatch", name="goal", arg="resume", session_id=sid)
|
|
assert r["result"]["type"] == "exec"
|
|
assert "resumed" in r["result"]["output"].lower()
|
|
|
|
from hermes_cli.goals import GoalManager
|
|
|
|
assert GoalManager(session_key).state.status == "active"
|
|
|
|
|
|
def test_goal_clear_removes_active_goal(server, session):
|
|
sid, session_key, _ = session
|
|
_call(server, "command.dispatch", name="goal", arg="write a story", session_id=sid)
|
|
r = _call(server, "command.dispatch", name="goal", arg="clear", session_id=sid)
|
|
assert r["result"]["type"] == "exec"
|
|
assert "cleared" in r["result"]["output"].lower()
|
|
|
|
from hermes_cli.goals import GoalManager
|
|
|
|
# After clear the row is marked status=cleared (kept for audit);
|
|
# ``has_goal()`` / ``is_active()`` return False so the goal loop
|
|
# stays off and ``status`` reports "No active goal".
|
|
mgr = GoalManager(session_key)
|
|
assert not mgr.has_goal()
|
|
assert not mgr.is_active()
|
|
assert "No active goal" in mgr.status_line()
|
|
|
|
|
|
def test_goal_stop_and_done_are_clear_aliases(server, session):
|
|
sid, _, _ = session
|
|
_call(server, "command.dispatch", name="goal", arg="first goal", session_id=sid)
|
|
r = _call(server, "command.dispatch", name="goal", arg="stop", session_id=sid)
|
|
assert "cleared" in r["result"]["output"].lower()
|
|
|
|
_call(server, "command.dispatch", name="goal", arg="second goal", session_id=sid)
|
|
r = _call(server, "command.dispatch", name="goal", arg="done", session_id=sid)
|
|
assert "cleared" in r["result"]["output"].lower()
|
|
|
|
|
|
def test_goal_requires_session(server):
|
|
r = _call(server, "command.dispatch", name="goal", arg="nope", session_id="unknown")
|
|
assert "error" in r
|
|
assert r["error"]["code"] == 4001
|
|
|
|
|
|
# ── slash.exec /goal routing ──────────────────────────────────────────
|
|
|
|
|
|
def test_slash_exec_routes_goal_to_command_dispatch(server, session):
|
|
"""slash.exec must route /goal directly to command.dispatch internally
|
|
instead of returning an error. Previously the 4018 error required the
|
|
TUI client to retry via command.dispatch, but some clients failed the
|
|
fallback, leaving the command empty ("empty command")."""
|
|
sid, _, _ = session
|
|
r = _call(server, "slash.exec", command="goal status", session_id=sid)
|
|
# Should succeed by routing to command.dispatch internally
|
|
assert "result" in r
|
|
assert r["result"]["type"] == "exec"
|
|
assert "No active goal" in r["result"]["output"]
|
|
|
|
|
|
def test_pending_input_commands_includes_goal(server):
|
|
"""Guard: _PENDING_INPUT_COMMANDS must list 'goal' — removing it would
|
|
silently re-break the TUI."""
|
|
assert "goal" in server._PENDING_INPUT_COMMANDS
|
|
|
|
|
|
# ── command.dispatch /moa ────────────────────────────────────────────
|
|
|
|
def _write_moa_config(home, text):
|
|
cfg_path = home / "config.yaml"
|
|
cfg_path.write_text(text)
|
|
|
|
|
|
def test_moa_bare_switches_to_default_preset_model(server, session, hermes_home):
|
|
_write_moa_config(hermes_home, """
|
|
moa:
|
|
default_preset: default
|
|
presets:
|
|
default:
|
|
reference_models:
|
|
- provider: openai-codex
|
|
model: gpt-5.5
|
|
aggregator:
|
|
provider: openrouter
|
|
model: anthropic/claude-opus-4.8
|
|
""")
|
|
sid, _, s = session
|
|
r = _call(server, "command.dispatch", name="moa", arg="", session_id=sid)
|
|
assert r["result"]["type"] == "exec"
|
|
assert "Model switched to MoA preset: default" in r["result"]["output"]
|
|
assert s["model_override"]["provider"] == "moa"
|
|
assert s["model_override"]["model"] == "default"
|
|
|
|
|
|
def test_moa_exact_preset_switches_to_named_preset_model(server, session, hermes_home):
|
|
_write_moa_config(hermes_home, """
|
|
moa:
|
|
default_preset: default
|
|
presets:
|
|
default: {}
|
|
review:
|
|
reference_models:
|
|
- provider: openrouter
|
|
model: deepseek/deepseek-v4-pro
|
|
aggregator:
|
|
provider: openrouter
|
|
model: anthropic/claude-opus-4.8
|
|
""")
|
|
sid, _, s = session
|
|
r = _call(server, "command.dispatch", name="moa", arg="review", session_id=sid)
|
|
assert r["result"]["type"] == "exec"
|
|
assert s["model_override"]["provider"] == "moa"
|
|
assert s["model_override"]["model"] == "review"
|
|
|
|
|
|
def test_moa_non_preset_returns_one_shot_send(server, session, hermes_home):
|
|
_write_moa_config(hermes_home, """
|
|
moa:
|
|
default_preset: default
|
|
presets:
|
|
default:
|
|
reference_models:
|
|
- provider: openai-codex
|
|
model: gpt-5.5
|
|
aggregator:
|
|
provider: openrouter
|
|
model: anthropic/claude-opus-4.8
|
|
""")
|
|
sid, _, _ = session
|
|
r = _call(server, "command.dispatch", name="moa", arg="inspect this project", session_id=sid)
|
|
result = r["result"]
|
|
assert result["type"] == "send"
|
|
assert result["message"] == "inspect this project"
|
|
assert "one-shot" in result["notice"]
|
|
|
|
|
|
def test_pending_input_commands_includes_moa(server):
|
|
assert "moa" in server._PENDING_INPUT_COMMANDS
|