From 50f685521734237faf0d902fa7d347492d9ea96a Mon Sep 17 00:00:00 2001
From: teknium1 <127238744+teknium1@users.noreply.github.com>
Date: Sat, 27 Jun 2026 02:57:27 -0700
Subject: [PATCH] feat(moa): make /moa one-shot only; route preset switching
 through the model picker

/moa no longer does a sticky model switch. It now always runs a single
prompt through the default MoA preset and restores the prior model
afterward; the whole argument is the prompt (no preset-name matching).
To switch to a MoA preset for the session, select it from the model
picker, where presets already surface under a virtual Mixture of Agents
provider on every model-selection surface.

Also fixes #53444: the TUI one-shot only set session[model_override],
which the already-built cached agent ignored, so MoA silently never ran
and the turn used the original model. The TUI now does a real in-place
agent.switch_model() via _apply_model_switch() when a live agent exists
(with a proper restore after the turn), and falls back to a model_override
for lazy/unbuilt sessions.

Removes the redundant sticky-switch branch from the CLI, gateway, and TUI
/moa handlers; updates the command description, usage string, and docs.
---
 cli.py                                        | 61 +++++-------
 gateway/run.py                                | 21 ++--
 hermes_cli/commands.py                        |  2 +-
 hermes_cli/moa_config.py                      |  2 +-
 tests/cli/test_moa_command.py                 | 29 ++++--
 tests/tui_gateway/test_goal_command.py        | 22 +++--
 tui_gateway/server.py                         | 98 ++++++++++++++-----
 website/docs/reference/cli-commands.md        |  4 +-
 .../user-guide/features/mixture-of-agents.md  | 24 ++---
 9 files changed, 153 insertions(+), 110 deletions(-)

diff --git a/cli.py b/cli.py
index 1a92ae93778..001d7f34bab 100644
--- a/cli.py
+++ b/cli.py
@@ -8419,50 +8419,41 @@ class HermesCLI(CLIAgentSetupMixin, CLICommandsMixin):
         elif canonical == "goal":
             self._handle_goal_command(cmd_original)
         elif canonical == "moa":
+            # /moa is one-shot sugar only: run a single prompt through the
+            # default MoA preset, then restore the prior model. To *switch* to a
+            # MoA preset for the session, pick it from the model picker (MoA
+            # presets surface as a virtual "Mixture of Agents" provider).
             from hermes_cli.moa_config import (
-                exact_moa_preset_name,
                 moa_usage,
                 normalize_moa_config,
-                resolve_moa_preset,
             )
 
             parts = cmd_original.split(None, 1)
             payload = parts[1].strip() if len(parts) > 1 else ""
+            if not payload:
+                _cprint(f"  {moa_usage()}")
+                return True
             moa_cfg = self.config.get("moa") if isinstance(self.config, dict) else {}
             normalized = normalize_moa_config(moa_cfg)
-            matched_preset = exact_moa_preset_name(normalized, payload) if payload else normalized["default_preset"]
-            if matched_preset:
-                self.requested_provider = "moa"
-                self.provider = "moa"
-                self.model = matched_preset
-                self.api_key = "moa-virtual-provider"
-                self.base_url = "moa://local"
-                self.api_mode = "chat_completions"
-                self.agent = None
-                _cprint(f"  Model switched to MoA preset: {matched_preset}.")
-            else:
-                if not payload:
-                    _cprint(f"  {moa_usage()}")
-                    return True
-                preset = normalized["default_preset"]
-                self._pending_moa_restore_model = {
-                    "requested_provider": getattr(self, "requested_provider", None),
-                    "provider": getattr(self, "provider", None),
-                    "model": getattr(self, "model", None),
-                    "api_key": getattr(self, "api_key", None),
-                    "base_url": getattr(self, "base_url", None),
-                    "api_mode": getattr(self, "api_mode", None),
-                }
-                self.requested_provider = "moa"
-                self.provider = "moa"
-                self.model = preset
-                self.api_key = "moa-virtual-provider"
-                self.base_url = "moa://local"
-                self.api_mode = "chat_completions"
-                self.agent = None
-                self._pending_moa_disable_after_turn = True
-                self._pending_agent_seed = payload
-                _cprint(f"  MoA one-shot queued with preset {preset}; previous model will be restored after this turn.")
+            preset = normalized["default_preset"]
+            self._pending_moa_restore_model = {
+                "requested_provider": getattr(self, "requested_provider", None),
+                "provider": getattr(self, "provider", None),
+                "model": getattr(self, "model", None),
+                "api_key": getattr(self, "api_key", None),
+                "base_url": getattr(self, "base_url", None),
+                "api_mode": getattr(self, "api_mode", None),
+            }
+            self.requested_provider = "moa"
+            self.provider = "moa"
+            self.model = preset
+            self.api_key = "moa-virtual-provider"
+            self.base_url = "moa://local"
+            self.api_mode = "chat_completions"
+            self.agent = None
+            self._pending_moa_disable_after_turn = True
+            self._pending_agent_seed = payload
+            _cprint(f"  MoA one-shot queued with preset {preset}; previous model will be restored after this turn.")
         elif canonical == "subgoal":
             self._handle_subgoal_command(cmd_original)
         elif canonical == "skin":
diff --git a/gateway/run.py b/gateway/run.py
index ea0ac5c5153..e013e102941 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -8756,33 +8756,24 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
             return await self._handle_goal_command(event)
 
         if canonical == "moa":
+            # /moa is one-shot sugar only: run a single prompt through the
+            # default MoA preset, then restore the prior model. To *switch* to a
+            # MoA preset for the session, pick it from the model picker (MoA
+            # presets surface as a virtual "Mixture of Agents" provider).
             from hermes_cli.moa_config import (
-                exact_moa_preset_name,
                 moa_usage,
                 normalize_moa_config,
-                resolve_moa_preset,
             )
             from hermes_cli.config import load_config
 
             moa_payload = event.get_command_args().strip()
+            if not moa_payload:
+                return moa_usage()
             try:
                 cfg = load_config()
                 moa_cfg = normalize_moa_config(cfg.get("moa") if isinstance(cfg, dict) else {})
             except Exception:
                 moa_cfg = normalize_moa_config({})
-            matched_preset = exact_moa_preset_name(moa_cfg, moa_payload) if moa_payload else moa_cfg["default_preset"]
-            if matched_preset:
-                self._session_model_overrides[_quick_key] = {
-                    "provider": "moa",
-                    "model": matched_preset,
-                    "base_url": "moa://local",
-                    "api_key": "moa-virtual-provider",
-                    "api_mode": "chat_completions",
-                }
-                self._evict_cached_agent(_quick_key)
-                return f"Model switched to MoA preset: {matched_preset}."
-            if not moa_payload:
-                return moa_usage()
             preset = moa_cfg["default_preset"]
             try:
                 event.text = moa_payload
diff --git a/hermes_cli/commands.py b/hermes_cli/commands.py
index 39cf526d2cc..bdba0af1cfa 100644
--- a/hermes_cli/commands.py
+++ b/hermes_cli/commands.py
@@ -109,7 +109,7 @@ COMMAND_REGISTRY: list[CommandDef] = [
                args_hint="<prompt>"),
     CommandDef("goal", "Set a standing goal Hermes works on across turns until achieved", "Session",
                args_hint="[text | draft <text> | show | pause | resume | clear | status | wait <pid> | unwait]"),
-    CommandDef("moa", "Run one prompt through configured Mixture of Agents models", "Session",
+    CommandDef("moa", "Run one prompt through the default Mixture of Agents preset, then restore your model", "Session",
                args_hint="<prompt>"),
     CommandDef("subgoal", "Add or manage extra criteria on the active goal", "Session",
                args_hint="[text | remove N | clear]"),
diff --git a/hermes_cli/moa_config.py b/hermes_cli/moa_config.py
index 40828cbae5a..bef4b472cee 100644
--- a/hermes_cli/moa_config.py
+++ b/hermes_cli/moa_config.py
@@ -199,4 +199,4 @@ def build_moa_turn_prompt(user_prompt: str, config: Any = None, preset: str | No
 
 
 def moa_usage() -> str:
-    return "Usage: /moa [preset-name | prompt]  (bare /moa toggles the default preset)"
+    return "Usage: /moa <prompt>  (runs one prompt through the default MoA preset, then restores your model; pick a preset from the model picker to switch for the session)"
diff --git a/tests/cli/test_moa_command.py b/tests/cli/test_moa_command.py
index 0302067f501..c526a0f37af 100644
--- a/tests/cli/test_moa_command.py
+++ b/tests/cli/test_moa_command.py
@@ -25,28 +25,41 @@ def _make_cli():
     cli._pending_input = queue.Queue()
     cli._pending_agent_seed = None
     cli._pending_moa_config = None
+    cli._pending_moa_disable_after_turn = False
+    cli._pending_moa_restore_model = None
     cli._agent_running = False
     cli.agent = None
+    cli.provider = "openrouter"
+    cli.requested_provider = "openrouter"
+    cli.model = "anthropic/claude-opus-4.8"
+    cli.api_key = "test-key"
+    cli.base_url = "https://openrouter.ai/api/v1"
+    cli.api_mode = "chat_completions"
     return cli
 
 
-def test_moa_bare_switches_to_default_preset_model():
+def test_moa_bare_shows_usage_no_switch():
+    # /moa with no prompt is usage-only now; switching to a preset for the
+    # session is done via the model picker, not /moa.
     cli = _make_cli()
+    cli._pending_moa_disable_after_turn = False
     with patch("cli._cprint"):
         assert cli.process_command("/moa") is True
-    assert cli.provider == "moa"
-    assert cli.requested_provider == "moa"
-    assert cli.model == "default"
-    assert cli.agent is None
+    assert cli.provider != "moa"
+    assert cli._pending_agent_seed is None
+    assert cli._pending_moa_disable_after_turn is False
 
 
-def test_moa_exact_preset_switches_to_named_preset_model():
+def test_moa_arg_is_always_one_shot_prompt():
+    # Any argument (even a string that matches a preset name) is treated as a
+    # one-shot prompt through the DEFAULT preset, then the model is restored.
     cli = _make_cli()
     with patch("cli._cprint"):
         cli.process_command("/moa review")
+    assert cli._pending_agent_seed == "review"
+    assert cli._pending_moa_disable_after_turn is True
     assert cli.provider == "moa"
-    assert cli.model == "review"
-    assert cli.agent is None
+    assert cli.model == "default"
 
 
 def test_moa_non_preset_is_one_shot_prompt():
diff --git a/tests/tui_gateway/test_goal_command.py b/tests/tui_gateway/test_goal_command.py
index 58d5f4bc6d4..11ceadb58af 100644
--- a/tests/tui_gateway/test_goal_command.py
+++ b/tests/tui_gateway/test_goal_command.py
@@ -211,7 +211,7 @@ def _write_moa_config(home, text):
     cfg_path.write_text(text)
 
 
-def test_moa_bare_switches_to_default_preset_model(server, session, hermes_home):
+def test_moa_bare_returns_usage(server, session, hermes_home):
     _write_moa_config(hermes_home, """
 moa:
   default_preset: default
@@ -226,13 +226,14 @@ moa:
 """)
     sid, _, s = session
     r = _call(server, "command.dispatch", name="moa", arg="", session_id=sid)
-    assert r["result"]["type"] == "exec"
-    assert "Model switched to MoA preset: default" in r["result"]["output"]
-    assert s["model_override"]["provider"] == "moa"
-    assert s["model_override"]["model"] == "default"
+    # Bare /moa is usage-only now; switching to a preset is via the model picker.
+    assert "error" in r
+    assert "model_override" not in s
 
 
-def test_moa_exact_preset_switches_to_named_preset_model(server, session, hermes_home):
+def test_moa_arg_is_always_one_shot(server, session, hermes_home):
+    # Any arg (even a preset name) is a one-shot prompt through the DEFAULT
+    # preset; /moa never does a sticky switch anymore.
     _write_moa_config(hermes_home, """
 moa:
   default_preset: default
@@ -248,9 +249,14 @@ moa:
 """)
     sid, _, s = session
     r = _call(server, "command.dispatch", name="moa", arg="review", session_id=sid)
-    assert r["result"]["type"] == "exec"
+    result = r["result"]
+    assert result["type"] == "send"
+    assert result["message"] == "review"
+    assert "one-shot" in result["notice"]
+    # Lazy session (no live agent) → MoA preset pinned via model_override for
+    # the build, and it is the DEFAULT preset, not the "review" arg.
     assert s["model_override"]["provider"] == "moa"
-    assert s["model_override"]["model"] == "review"
+    assert s["model_override"]["model"] == "default"
 
 
 def test_moa_non_preset_returns_one_shot_send(server, session, hermes_home):
diff --git a/tui_gateway/server.py b/tui_gateway/server.py
index 826efc9faa2..ec61aed6d57 100644
--- a/tui_gateway/server.py
+++ b/tui_gateway/server.py
@@ -8426,7 +8426,39 @@ def _run_prompt_submit(rid, sid: str, session: dict, text: Any) -> None:
             result = agent.run_conversation(run_message, **run_kwargs)
             if "moa_one_shot_restore" in session:
                 _restore = session.pop("moa_one_shot_restore", None)
-                if _restore is None:
+                # Restore the model the user was on before the /moa one-shot.
+                # The one-shot did a real in-place agent.switch_model() to MoA
+                # (#53444), so undoing it must go back through the switch path —
+                # resetting session["model_override"] alone would leave the live
+                # agent's client pinned to MoA for the next turn.
+                if isinstance(_restore, dict):
+                    _prev_override = _restore.get("override")
+                    _prev_model = _restore.get("model")
+                    _prev_provider = _restore.get("provider")
+                    if _prev_override is None:
+                        session.pop("model_override", None)
+                    else:
+                        session["model_override"] = _prev_override
+                    if _prev_model:
+                        _raw = (
+                            f"{_prev_model} --provider {_prev_provider}"
+                            if _prev_provider
+                            else _prev_model
+                        )
+                        try:
+                            _apply_model_switch(
+                                sid,
+                                session,
+                                _raw,
+                                confirm_expensive_model=False,
+                                pin_session_override=bool(_prev_override),
+                            )
+                        except Exception as _moa_restore_exc:
+                            logger.warning(
+                                "MoA one-shot model restore failed: %s",
+                                _moa_restore_exc,
+                            )
+                elif _restore is None:
                     session.pop("model_override", None)
                 else:
                     session["model_override"] = _restore
@@ -11205,38 +11237,54 @@ def _(rid, params: dict) -> dict:
 
         return _ok(rid, {"type": "send", "message": build_learn_prompt(arg)})
     if name == "moa":
+        # /moa is one-shot sugar only: run a single prompt through the default
+        # MoA preset, then restore the prior model. To *switch* to a MoA preset
+        # for the rest of the session, pick it from the model picker (MoA
+        # presets surface as a virtual "Mixture of Agents" provider).
         try:
-            from hermes_cli.moa_config import (
-                build_moa_turn_prompt, exact_moa_preset_name, moa_usage, normalize_moa_config
-            )
+            from hermes_cli.moa_config import moa_usage, normalize_moa_config
 
-            moa_cfg = normalize_moa_config(_load_cfg().get("moa") or {})
-            matched = exact_moa_preset_name(moa_cfg, arg) if arg else moa_cfg["default_preset"]
-            if matched:
-                if not session:
-                    return _err(rid, 4001, "no active session")
-                session["model_override"] = {
-                    "model": matched,
-                    "provider": "moa",
-                    "base_url": "moa://local",
-                    "api_key": "moa-virtual-provider",
-                    "api_mode": "chat_completions",
-                }
-                session["moa_active_preset"] = matched
-                return _ok(rid, {"type": "exec", "output": f"Model switched to MoA preset: {matched}."})
             if not arg:
                 return _err(rid, 4004, moa_usage())
             if not session:
                 return _err(rid, 4001, "no active session")
+            sid = params.get("session_id", "")
+            moa_cfg = normalize_moa_config(_load_cfg().get("moa") or {})
             preset = moa_cfg["default_preset"]
-            session["moa_one_shot_restore"] = session.get("model_override")
-            session["model_override"] = {
-                "model": preset,
-                "provider": "moa",
-                "base_url": "moa://local",
-                "api_key": "moa-virtual-provider",
-                "api_mode": "chat_completions",
+            # Record the live model identity so it can be restored after the
+            # one-shot turn, then swap the agent's client in place (#53444:
+            # setting session["model_override"] alone never switched the
+            # already-built agent, so the turn silently ran on the old model).
+            agent = session.get("agent")
+            session["moa_one_shot_restore"] = {
+                "override": session.get("model_override"),
+                "model": getattr(agent, "model", None) if agent else None,
+                "provider": getattr(agent, "provider", None) if agent else None,
             }
+            if agent is not None:
+                # Live agent: swap its client in place so THIS turn runs MoA.
+                try:
+                    _apply_model_switch(
+                        sid,
+                        session,
+                        f"{preset} --provider moa",
+                        confirm_expensive_model=False,
+                        pin_session_override=True,
+                    )
+                except Exception as exc:
+                    session.pop("moa_one_shot_restore", None)
+                    return _err(rid, 5030, f"moa unavailable: {exc}")
+            else:
+                # No agent built yet (lazy/fresh session): the override is
+                # consumed by the first build, so the turn runs MoA without an
+                # in-place switch.
+                session["model_override"] = {
+                    "provider": "moa",
+                    "model": preset,
+                    "base_url": "moa://local",
+                    "api_key": "moa-virtual-provider",
+                    "api_mode": "chat_completions",
+                }
             return _ok(
                 rid,
                 {
diff --git a/website/docs/reference/cli-commands.md b/website/docs/reference/cli-commands.md
index 9224844076e..d9ae7ac90c1 100644
--- a/website/docs/reference/cli-commands.md
+++ b/website/docs/reference/cli-commands.md
@@ -39,7 +39,7 @@ hermes [global-options] <command> [subcommand/options]
 |---------|---------|
 | `hermes chat` | Interactive or one-shot chat with the agent. |
 | `hermes model` | Interactively choose the default provider and model. |
-| `hermes moa` | Configure named Mixture of Agents presets used by `/moa`. |
+| `hermes moa` | Configure named Mixture of Agents presets selectable from the model picker. |
 | `hermes fallback` | Manage fallback providers tried when the primary model errors. |
 | `hermes gateway` | Run or manage the messaging gateway service. |
 | `hermes proxy` | Local OpenAI-compatible proxy that attaches OAuth provider credentials. See [Subscription Proxy](../user-guide/features/subscription-proxy.md). |
@@ -1122,7 +1122,7 @@ See [Curator](../user-guide/features/curator.md) for behavior and config.
 
 ## `hermes moa`
 
-Configure named Mixture of Agents presets used by the `/moa` slash command.
+Configure named Mixture of Agents presets. Presets appear as selectable models under a `Mixture of Agents` provider in every model picker; `/moa <prompt>` runs one prompt through the default preset.
 
 ```bash
 hermes moa list
diff --git a/website/docs/user-guide/features/mixture-of-agents.md b/website/docs/user-guide/features/mixture-of-agents.md
index 213c7e5dcaf..ca60d2db357 100644
--- a/website/docs/user-guide/features/mixture-of-agents.md
+++ b/website/docs/user-guide/features/mixture-of-agents.md
@@ -31,27 +31,21 @@ Configured presets therefore show up wherever you would pick any other model.
 
 ## Slash command shortcut
 
-`/moa` is convenience sugar over model selection:
-
-```bash
-/moa
-```
-
-Switches the current session to the default MoA preset.
-
-```bash
-/moa review
-```
-
-If `review` exactly matches a preset name, switches the current session to provider `moa`, model `review`.
+`/moa` is one-shot convenience sugar. It runs a single prompt through the **default** MoA preset, then restores whatever model you were on:
 
 ```bash
 /moa design and implement a migration plan for this flaky test cluster
 ```
 
-If the text does not exactly match a preset name, Hermes treats it as a one-shot prompt. It temporarily switches to the default MoA preset for that turn, sends the prompt, then restores the previous model afterward.
+Hermes temporarily switches to the default MoA preset for that one turn, sends the prompt, then restores your previous model afterward. The whole argument is the prompt — `/moa` no longer interprets it as a preset name.
 
-Preset matching is exact on purpose. Hermes does not fuzzy-match preset names, so normal prompts cannot accidentally become model switches.
+```bash
+/moa
+```
+
+Bare `/moa` (no prompt) just prints usage.
+
+To **switch** to a MoA preset for the rest of the session, select it from the model picker — MoA presets appear under a `Mixture of Agents` provider in every model-selection surface (see above). `/moa` is deliberately not a model switch, so a normal prompt can never accidentally change your model.
 
 ## How it works in the agent loop