hermes-agent/tests/tui_gateway/test_goal_command.py
teknium1 50f6855217 feat(moa): make /moa one-shot only; route preset switching through the model picker
/moa no longer does a sticky model switch. It now always runs a single
prompt through the default MoA preset and restores the prior model
afterward; the whole argument is the prompt (no preset-name matching).
To switch to a MoA preset for the session, select it from the model
picker, where presets already surface under a virtual Mixture of Agents
provider on every model-selection surface.

Also fixes #53444: the TUI one-shot only set session[model_override],
which the already-built cached agent ignored, so MoA silently never ran
and the turn used the original model. The TUI now does a real in-place
agent.switch_model() via _apply_model_switch() when a live agent exists
(with a proper restore after the turn), and falls back to a model_override
for lazy/unbuilt sessions.

Removes the redundant sticky-switch branch from the CLI, gateway, and TUI
/moa handlers; updates the command description, usage string, and docs.
2026-06-27 03:09:09 -07:00

284 lines
9.9 KiB
Python

"""Tests for /goal handling in tui_gateway.
The TUI routes ``/goal`` through ``command.dispatch`` (not ``slash.exec``)
because the CLI's ``_handle_goal_command`` queues the kickoff message onto
``_pending_input``, which the slash-worker subprocess has no reader for.
Instead we handle ``/goal`` directly in the server and return a
``{"type": "send", "notice": ..., "message": ...}`` payload the TUI client
uses to render a system line and fire the kickoff prompt.
"""
from __future__ import annotations
import importlib
import threading
from pathlib import Path
from unittest.mock import MagicMock, patch
import pytest
@pytest.fixture()
def hermes_home(tmp_path, monkeypatch):
home = tmp_path / ".hermes"
home.mkdir()
monkeypatch.setattr(Path, "home", lambda: tmp_path)
monkeypatch.setenv("HERMES_HOME", str(home))
# Bust the goal-module DB cache so it re-resolves HERMES_HOME.
from hermes_cli import goals
goals._DB_CACHE.clear()
yield home
goals._DB_CACHE.clear()
@pytest.fixture()
def server(hermes_home):
with patch.dict(
"sys.modules",
{
"hermes_cli.env_loader": MagicMock(),
"hermes_cli.banner": MagicMock(),
},
):
mod = importlib.import_module("tui_gateway.server")
yield mod
# Reset module-level session state without re-importing. importlib.reload
# would re-register the module's atexit hooks (ThreadPoolExecutor
# shutdown, _shutdown_sessions); the duplicates race the stderr
# buffer at interpreter shutdown and surface as Fatal Python error:
# _enter_buffered_busy. Clearing the per-session dicts gives the
# next test a clean slate; _methods is NOT cleared because it's
# populated at module import time and re-registration only happens
# via reload (which we don't do).
mod._sessions.clear()
mod._pending.clear()
mod._answers.clear()
@pytest.fixture()
def session(server):
sid = "sid-test"
session_key = "tui-goal-session-1"
s = {
"session_key": session_key,
"history": [],
"history_lock": threading.Lock(),
"history_version": 0,
"running": False,
"attached_images": [],
"cols": 120,
}
server._sessions[sid] = s
return sid, session_key, s
def _call(server, method, **params):
handler = server._methods[method]
return handler(1, params)
# ── command.dispatch /goal ────────────────────────────────────────────
def test_goal_bare_shows_status_when_none_set(server, session):
sid, _, _ = session
r = _call(server, "command.dispatch", name="goal", arg="", session_id=sid)
assert r["result"]["type"] == "exec"
assert "No active goal" in r["result"]["output"]
def test_goal_whitespace_only_shows_status(server, session):
sid, _, _ = session
r = _call(server, "command.dispatch", name="goal", arg=" ", session_id=sid)
assert r["result"]["type"] == "exec"
assert "No active goal" in r["result"]["output"]
def test_goal_status_alias_shows_status(server, session):
sid, _, _ = session
r = _call(server, "command.dispatch", name="goal", arg="status", session_id=sid)
assert r["result"]["type"] == "exec"
assert "No active goal" in r["result"]["output"]
def test_goal_set_returns_send_with_notice(server, session):
sid, session_key, _ = session
r = _call(server, "command.dispatch", name="goal", arg="build a rocket", session_id=sid)
result = r["result"]
assert result["type"] == "send"
assert result["message"] == "build a rocket"
assert "notice" in result
assert "Goal set" in result["notice"]
assert "20-turn budget" in result["notice"]
# Persisted in SessionDB
from hermes_cli.goals import GoalManager
mgr = GoalManager(session_key)
assert mgr.state is not None
assert mgr.state.goal == "build a rocket"
assert mgr.state.status == "active"
def test_goal_pause_after_set(server, session):
sid, session_key, _ = session
_call(server, "command.dispatch", name="goal", arg="write a story", session_id=sid)
r = _call(server, "command.dispatch", name="goal", arg="pause", session_id=sid)
assert r["result"]["type"] == "exec"
assert "paused" in r["result"]["output"].lower()
from hermes_cli.goals import GoalManager
assert GoalManager(session_key).state.status == "paused"
def test_goal_resume_reactivates(server, session):
sid, session_key, _ = session
_call(server, "command.dispatch", name="goal", arg="write a story", session_id=sid)
_call(server, "command.dispatch", name="goal", arg="pause", session_id=sid)
r = _call(server, "command.dispatch", name="goal", arg="resume", session_id=sid)
assert r["result"]["type"] == "exec"
assert "resumed" in r["result"]["output"].lower()
from hermes_cli.goals import GoalManager
assert GoalManager(session_key).state.status == "active"
def test_goal_clear_removes_active_goal(server, session):
sid, session_key, _ = session
_call(server, "command.dispatch", name="goal", arg="write a story", session_id=sid)
r = _call(server, "command.dispatch", name="goal", arg="clear", session_id=sid)
assert r["result"]["type"] == "exec"
assert "cleared" in r["result"]["output"].lower()
from hermes_cli.goals import GoalManager
# After clear the row is marked status=cleared (kept for audit);
# ``has_goal()`` / ``is_active()`` return False so the goal loop
# stays off and ``status`` reports "No active goal".
mgr = GoalManager(session_key)
assert not mgr.has_goal()
assert not mgr.is_active()
assert "No active goal" in mgr.status_line()
def test_goal_stop_and_done_are_clear_aliases(server, session):
sid, _, _ = session
_call(server, "command.dispatch", name="goal", arg="first goal", session_id=sid)
r = _call(server, "command.dispatch", name="goal", arg="stop", session_id=sid)
assert "cleared" in r["result"]["output"].lower()
_call(server, "command.dispatch", name="goal", arg="second goal", session_id=sid)
r = _call(server, "command.dispatch", name="goal", arg="done", session_id=sid)
assert "cleared" in r["result"]["output"].lower()
def test_goal_requires_session(server):
r = _call(server, "command.dispatch", name="goal", arg="nope", session_id="unknown")
assert "error" in r
assert r["error"]["code"] == 4001
# ── slash.exec /goal routing ──────────────────────────────────────────
def test_slash_exec_routes_goal_to_command_dispatch(server, session):
"""slash.exec must route /goal directly to command.dispatch internally
instead of returning an error. Previously the 4018 error required the
TUI client to retry via command.dispatch, but some clients failed the
fallback, leaving the command empty ("empty command")."""
sid, _, _ = session
r = _call(server, "slash.exec", command="goal status", session_id=sid)
# Should succeed by routing to command.dispatch internally
assert "result" in r
assert r["result"]["type"] == "exec"
assert "No active goal" in r["result"]["output"]
def test_pending_input_commands_includes_goal(server):
"""Guard: _PENDING_INPUT_COMMANDS must list 'goal' — removing it would
silently re-break the TUI."""
assert "goal" in server._PENDING_INPUT_COMMANDS
# ── command.dispatch /moa ────────────────────────────────────────────
def _write_moa_config(home, text):
cfg_path = home / "config.yaml"
cfg_path.write_text(text)
def test_moa_bare_returns_usage(server, session, hermes_home):
_write_moa_config(hermes_home, """
moa:
default_preset: default
presets:
default:
reference_models:
- provider: openai-codex
model: gpt-5.5
aggregator:
provider: openrouter
model: anthropic/claude-opus-4.8
""")
sid, _, s = session
r = _call(server, "command.dispatch", name="moa", arg="", session_id=sid)
# Bare /moa is usage-only now; switching to a preset is via the model picker.
assert "error" in r
assert "model_override" not in s
def test_moa_arg_is_always_one_shot(server, session, hermes_home):
# Any arg (even a preset name) is a one-shot prompt through the DEFAULT
# preset; /moa never does a sticky switch anymore.
_write_moa_config(hermes_home, """
moa:
default_preset: default
presets:
default: {}
review:
reference_models:
- provider: openrouter
model: deepseek/deepseek-v4-pro
aggregator:
provider: openrouter
model: anthropic/claude-opus-4.8
""")
sid, _, s = session
r = _call(server, "command.dispatch", name="moa", arg="review", session_id=sid)
result = r["result"]
assert result["type"] == "send"
assert result["message"] == "review"
assert "one-shot" in result["notice"]
# Lazy session (no live agent) → MoA preset pinned via model_override for
# the build, and it is the DEFAULT preset, not the "review" arg.
assert s["model_override"]["provider"] == "moa"
assert s["model_override"]["model"] == "default"
def test_moa_non_preset_returns_one_shot_send(server, session, hermes_home):
_write_moa_config(hermes_home, """
moa:
default_preset: default
presets:
default:
reference_models:
- provider: openai-codex
model: gpt-5.5
aggregator:
provider: openrouter
model: anthropic/claude-opus-4.8
""")
sid, _, _ = session
r = _call(server, "command.dispatch", name="moa", arg="inspect this project", session_id=sid)
result = r["result"]
assert result["type"] == "send"
assert result["message"] == "inspect this project"
assert "one-shot" in result["notice"]
def test_pending_input_commands_includes_moa(server):
assert "moa" in server._PENDING_INPUT_COMMANDS