fix(gateway): surface unknown /commands instead of leaking them to the LLM

Previously, typing a /command that isn't a built-in, plugin, or skill would silently fall through to the LLM as plain text. The model often interprets it as a loose instruction and invents unrelated tool calls — e.g. a stray /claude_code slipped through and the model fabricated a delegate_task invocation that got stuck in an OAuth loop. Now we check GATEWAY_KNOWN_COMMANDS after the skill / plugin / unavailable-skill lookups and return an actionable message pointing the user at /commands. The user gets feedback, and the agent doesn't waste a round-trip guessing what /foo-bar was supposed to mean.
2026-05-01 01:51:44 +00:00 · 2026-04-05 10:09:01 +00:00 · 2026-04-05 10:09:01 +00:00 · e8053e8b93
commit e8053e8b93
parent 4a75aec433
2 changed files with 187 additions and 0 deletions
--- a/tests/gateway/test_unknown_command.py
+++ b/tests/gateway/test_unknown_command.py
@ -0,0 +1,166 @@
+"""Tests for gateway warning when an unrecognized /command is dispatched.
+
+Without this warning, unknown slash commands get forwarded to the LLM as plain
+text, which often leads to silent failure (e.g. the model inventing a bogus
+delegate_task call instead of telling the user the command doesn't exist).
+"""
+
+from datetime import datetime
+from types import SimpleNamespace
+from unittest.mock import AsyncMock, MagicMock
+
+import pytest
+
+from gateway.config import GatewayConfig, Platform, PlatformConfig
+from gateway.platforms.base import MessageEvent
+from gateway.session import SessionEntry, SessionSource, build_session_key
+
+
+def _make_source() -> SessionSource:
+    return SessionSource(
+        platform=Platform.TELEGRAM,
+        user_id="u1",
+        chat_id="c1",
+        user_name="tester",
+        chat_type="dm",
+    )
+
+
+def _make_event(text: str) -> MessageEvent:
+    return MessageEvent(text=text, source=_make_source(), message_id="m1")
+
+
+def _make_runner():
+    from gateway.run import GatewayRunner
+
+    runner = object.__new__(GatewayRunner)
+    runner.config = GatewayConfig(
+        platforms={Platform.TELEGRAM: PlatformConfig(enabled=True, token="***")}
+    )
+    adapter = MagicMock()
+    adapter.send = AsyncMock()
+    runner.adapters = {Platform.TELEGRAM: adapter}
+    runner._voice_mode = {}
+    runner.hooks = SimpleNamespace(emit=AsyncMock(), loaded_hooks=False)
+
+    session_entry = SessionEntry(
+        session_key=build_session_key(_make_source()),
+        session_id="sess-1",
+        created_at=datetime.now(),
+        updated_at=datetime.now(),
+        platform=Platform.TELEGRAM,
+        chat_type="dm",
+    )
+    runner.session_store = MagicMock()
+    runner.session_store.get_or_create_session.return_value = session_entry
+    runner.session_store.load_transcript.return_value = []
+    runner.session_store.has_any_sessions.return_value = True
+    runner.session_store.append_to_transcript = MagicMock()
+    runner.session_store.rewrite_transcript = MagicMock()
+    runner.session_store.update_session = MagicMock()
+    runner._running_agents = {}
+    runner._pending_messages = {}
+    runner._pending_approvals = {}
+    runner._session_db = None
+    runner._reasoning_config = None
+    runner._provider_routing = {}
+    runner._fallback_model = None
+    runner._show_reasoning = False
+    runner._is_user_authorized = lambda _source: True
+    runner._set_session_env = lambda _context: None
+    runner._should_send_voice_reply = lambda *_args, **_kwargs: False
+    runner._send_voice_reply = AsyncMock()
+    runner._capture_gateway_honcho_if_configured = lambda *args, **kwargs: None
+    runner._emit_gateway_run_progress = AsyncMock()
+    return runner
+
+
+@pytest.mark.asyncio
+async def test_unknown_slash_command_returns_guidance(monkeypatch):
+    """A genuinely unknown /foobar should return user-facing guidance, not
+    silently drop through to the LLM."""
+    import gateway.run as gateway_run
+
+    runner = _make_runner()
+    # If the LLM were called, this would fail: the guard must short-circuit
+    # before _run_agent is invoked.
+    runner._run_agent = AsyncMock(
+        side_effect=AssertionError(
+            "unknown slash command leaked through to the agent"
+        )
+    )
+
+    monkeypatch.setattr(
+        gateway_run, "_resolve_runtime_agent_kwargs", lambda: {"api_key": "***"}
+    )
+
+    result = await runner._handle_message(_make_event("/definitely-not-a-command"))
+
+    assert result is not None
+    assert "Unknown command" in result
+    assert "/definitely-not-a-command" in result
+    assert "/commands" in result
+    runner._run_agent.assert_not_called()
+
+
+@pytest.mark.asyncio
+async def test_unknown_slash_command_underscored_form_also_guarded(monkeypatch):
+    """Telegram may send /foo_bar — same guard must trigger for underscored
+    commands that normalize to unknown hyphenated names."""
+    import gateway.run as gateway_run
+
+    runner = _make_runner()
+    runner._run_agent = AsyncMock(
+        side_effect=AssertionError(
+            "unknown slash command leaked through to the agent"
+        )
+    )
+
+    monkeypatch.setattr(
+        gateway_run, "_resolve_runtime_agent_kwargs", lambda: {"api_key": "***"}
+    )
+
+    result = await runner._handle_message(_make_event("/made_up_thing"))
+
+    assert result is not None
+    assert "Unknown command" in result
+    assert "/made_up_thing" in result
+    runner._run_agent.assert_not_called()
+
+
+@pytest.mark.asyncio
+async def test_known_slash_command_not_flagged_as_unknown(monkeypatch):
+    """A real built-in like /status must NOT hit the unknown-command guard."""
+    runner = _make_runner()
+    # Make _handle_status_command exist via the normal path by running a real
+    # dispatch. If the guard fires, the return string will mention "Unknown".
+    runner._running_agents[build_session_key(_make_source())] = MagicMock()
+
+    result = await runner._handle_message(_make_event("/status"))
+
+    assert result is not None
+    assert "Unknown command" not in result
+
+
+@pytest.mark.asyncio
+async def test_underscored_alias_for_hyphenated_builtin_not_flagged(monkeypatch):
+    """Telegram autocomplete sends /reload_mcp for the /reload-mcp built-in.
+    That must NOT be flagged as unknown."""
+    import gateway.run as gateway_run
+
+    runner = _make_runner()
+    # Prevent real MCP work; we only care that the unknown guard doesn't fire.
+    async def _noop_reload(*_a, **_kw):
+        return "mcp reloaded"
+
+    runner._handle_reload_mcp_command = _noop_reload  # type: ignore[attr-defined]
+
+    monkeypatch.setattr(
+        gateway_run, "_resolve_runtime_agent_kwargs", lambda: {"api_key": "***"}
+    )
+
+    result = await runner._handle_message(_make_event("/reload_mcp"))
+
+    # Whatever /reload_mcp returns, it must not be the unknown-command guard.
+    if result is not None:
+        assert "Unknown command" not in result