mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-26 01:01:40 +00:00
* feat(steer): /steer <prompt> injects a mid-run note after the next tool call Adds a new slash command that sits between /queue (turn boundary) and interrupt. /steer <text> stashes the message on the running agent and the agent loop appends it to the LAST tool result's content once the current tool batch finishes. The model sees it as part of the tool output on its next iteration. No interrupt is fired, no new user turn is inserted, and no prompt cache invalidation happens beyond the normal per-turn tool-result churn. Message-role alternation is preserved — we only modify an existing role:"tool" message's content. Wiring ------ - hermes_cli/commands.py: register /steer + add to ACTIVE_SESSION_BYPASS_COMMANDS. - run_agent.py: add _pending_steer state, AIAgent.steer(), _drain_pending_steer(), _apply_pending_steer_to_tool_results(); drain at end of both parallel and sequential tool executors; clear on interrupt; return leftover as result['pending_steer'] if the agent exits before another tool batch. - cli.py: /steer handler — route to agent.steer() when running, fall back to the regular queue otherwise; deliver result['pending_steer'] as next turn. - gateway/run.py: running-agent intercept calls running_agent.steer(); idle-agent path strips the prefix and forwards as a regular user message. - tui_gateway/server.py: new session.steer JSON-RPC method. - ui-tui: SessionSteerResponse type + local /steer slash command that calls session.steer when ui.busy, otherwise enqueues for the next turn. Fallbacks --------- - Agent exits mid-steer → surfaces in run_conversation result as pending_steer so CLI/gateway deliver it as the next user turn instead of silently dropping it. - All tools skipped after interrupt → re-stashes pending_steer for the caller. - No active agent → /steer reduces to sending the text as a normal message. Tests ----- - tests/run_agent/test_steer.py — accept/reject, concatenation, drain, last-tool-result injection, multimodal list content, thread safety, cleared-on-interrupt, registry membership, bypass-set membership. - tests/gateway/test_steer_command.py — running agent, pending sentinel, missing steer() method, rejected payload, empty payload. - tests/gateway/test_command_bypass_active_session.py — /steer bypasses the Level-1 base adapter guard. - tests/test_tui_gateway_server.py — session.steer RPC paths. 72/72 targeted tests pass under scripts/run_tests.sh. * feat(steer): register /steer in Discord's native slash tree Discord's app_commands tree is a curated subset of slash commands (not derived from COMMAND_REGISTRY like Telegram/Slack). /steer already works there as plain text (routes through handle_message → base adapter bypass → runner), but registering it here adds Discord's native autocomplete + argument hint UI so users can discover and type it like any other first-class command.
420 lines
15 KiB
Python
420 lines
15 KiB
Python
"""Regression tests: slash commands must bypass the base adapter's active-session guard.
|
|
|
|
When an agent is running, the base adapter's Level 1 guard in
|
|
handle_message() intercepts all incoming messages and queues them as
|
|
pending. Certain commands (/stop, /new, /reset, /approve, /deny,
|
|
/status) must bypass this guard and be dispatched directly to the gateway
|
|
runner — otherwise they are queued as user text and either:
|
|
- leak into the conversation as agent input (/stop, /new), or
|
|
- deadlock (/approve, /deny — agent blocks on Event.wait)
|
|
|
|
These tests verify that the bypass works at the adapter level and that
|
|
the safety net in _run_agent discards leaked command text.
|
|
"""
|
|
|
|
import asyncio
|
|
from unittest.mock import AsyncMock, MagicMock
|
|
|
|
import pytest
|
|
|
|
from gateway.config import Platform, PlatformConfig
|
|
from gateway.platforms.base import BasePlatformAdapter, MessageEvent, MessageType
|
|
from gateway.session import SessionSource, build_session_key
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Helpers
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class _StubAdapter(BasePlatformAdapter):
|
|
"""Concrete adapter with abstract methods stubbed out."""
|
|
|
|
async def connect(self):
|
|
pass
|
|
|
|
async def disconnect(self):
|
|
pass
|
|
|
|
async def send(self, chat_id, text, **kwargs):
|
|
pass
|
|
|
|
async def get_chat_info(self, chat_id):
|
|
return {}
|
|
|
|
|
|
def _make_adapter():
|
|
"""Create a minimal adapter for testing the active-session guard."""
|
|
config = PlatformConfig(enabled=True, token="test-token")
|
|
adapter = _StubAdapter(config, Platform.TELEGRAM)
|
|
adapter.sent_responses = []
|
|
|
|
async def _mock_handler(event):
|
|
cmd = event.get_command()
|
|
return f"handled:{cmd}" if cmd else f"handled:text:{event.text}"
|
|
|
|
adapter._message_handler = _mock_handler
|
|
|
|
async def _mock_send_retry(chat_id, content, **kwargs):
|
|
adapter.sent_responses.append(content)
|
|
|
|
adapter._send_with_retry = _mock_send_retry
|
|
return adapter
|
|
|
|
|
|
def _make_event(text="/stop", chat_id="12345"):
|
|
source = SessionSource(
|
|
platform=Platform.TELEGRAM, chat_id=chat_id, chat_type="dm"
|
|
)
|
|
return MessageEvent(text=text, message_type=MessageType.TEXT, source=source)
|
|
|
|
|
|
def _session_key(chat_id="12345"):
|
|
source = SessionSource(
|
|
platform=Platform.TELEGRAM, chat_id=chat_id, chat_type="dm"
|
|
)
|
|
return build_session_key(source)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Tests: commands bypass Level 1 when session is active
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestCommandBypassActiveSession:
|
|
"""Commands that must bypass the active-session guard."""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_stop_bypasses_guard(self):
|
|
"""/stop must be dispatched directly, not queued."""
|
|
adapter = _make_adapter()
|
|
sk = _session_key()
|
|
adapter._active_sessions[sk] = asyncio.Event()
|
|
|
|
await adapter.handle_message(_make_event("/stop"))
|
|
|
|
assert sk not in adapter._pending_messages, (
|
|
"/stop was queued as a pending message instead of being dispatched"
|
|
)
|
|
assert any("handled:stop" in r for r in adapter.sent_responses), (
|
|
"/stop response was not sent back to the user"
|
|
)
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_new_bypasses_guard(self):
|
|
"""/new must be dispatched directly, not queued."""
|
|
adapter = _make_adapter()
|
|
sk = _session_key()
|
|
adapter._active_sessions[sk] = asyncio.Event()
|
|
|
|
await adapter.handle_message(_make_event("/new"))
|
|
|
|
assert sk not in adapter._pending_messages
|
|
assert any("handled:new" in r for r in adapter.sent_responses)
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_reset_bypasses_guard(self):
|
|
"""/reset (alias for /new) must be dispatched directly."""
|
|
adapter = _make_adapter()
|
|
sk = _session_key()
|
|
adapter._active_sessions[sk] = asyncio.Event()
|
|
|
|
await adapter.handle_message(_make_event("/reset"))
|
|
|
|
assert sk not in adapter._pending_messages
|
|
assert any("handled:reset" in r for r in adapter.sent_responses)
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_approve_bypasses_guard(self):
|
|
"""/approve must bypass (deadlock prevention)."""
|
|
adapter = _make_adapter()
|
|
sk = _session_key()
|
|
adapter._active_sessions[sk] = asyncio.Event()
|
|
|
|
await adapter.handle_message(_make_event("/approve"))
|
|
|
|
assert sk not in adapter._pending_messages
|
|
assert any("handled:approve" in r for r in adapter.sent_responses)
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_deny_bypasses_guard(self):
|
|
"""/deny must bypass (deadlock prevention)."""
|
|
adapter = _make_adapter()
|
|
sk = _session_key()
|
|
adapter._active_sessions[sk] = asyncio.Event()
|
|
|
|
await adapter.handle_message(_make_event("/deny"))
|
|
|
|
assert sk not in adapter._pending_messages
|
|
assert any("handled:deny" in r for r in adapter.sent_responses)
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_status_bypasses_guard(self):
|
|
"""/status must bypass so it returns a system response."""
|
|
adapter = _make_adapter()
|
|
sk = _session_key()
|
|
adapter._active_sessions[sk] = asyncio.Event()
|
|
|
|
await adapter.handle_message(_make_event("/status"))
|
|
|
|
assert sk not in adapter._pending_messages
|
|
assert any("handled:status" in r for r in adapter.sent_responses)
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_agents_bypasses_guard(self):
|
|
"""/agents must bypass so active-task queries don't interrupt runs."""
|
|
adapter = _make_adapter()
|
|
sk = _session_key()
|
|
adapter._active_sessions[sk] = asyncio.Event()
|
|
|
|
await adapter.handle_message(_make_event("/agents"))
|
|
|
|
assert sk not in adapter._pending_messages
|
|
assert any("handled:agents" in r for r in adapter.sent_responses)
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_tasks_alias_bypasses_guard(self):
|
|
"""/tasks alias must bypass active-session guard too."""
|
|
adapter = _make_adapter()
|
|
sk = _session_key()
|
|
adapter._active_sessions[sk] = asyncio.Event()
|
|
|
|
await adapter.handle_message(_make_event("/tasks"))
|
|
|
|
assert sk not in adapter._pending_messages
|
|
assert any("handled:tasks" in r for r in adapter.sent_responses)
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_background_bypasses_guard(self):
|
|
"""/background must bypass so it spawns a parallel task, not an interrupt."""
|
|
adapter = _make_adapter()
|
|
sk = _session_key()
|
|
adapter._active_sessions[sk] = asyncio.Event()
|
|
|
|
await adapter.handle_message(_make_event("/background summarize HN"))
|
|
|
|
assert sk not in adapter._pending_messages, (
|
|
"/background was queued as a pending message instead of being dispatched"
|
|
)
|
|
assert any("handled:background" in r for r in adapter.sent_responses), (
|
|
"/background response was not sent back to the user"
|
|
)
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_steer_bypasses_guard(self):
|
|
"""/steer must bypass the Level-1 active-session guard so it reaches
|
|
the gateway runner's /steer handler and injects into the running
|
|
agent instead of being queued as user text for the next turn.
|
|
"""
|
|
adapter = _make_adapter()
|
|
sk = _session_key()
|
|
adapter._active_sessions[sk] = asyncio.Event()
|
|
|
|
await adapter.handle_message(_make_event("/steer also check auth.log"))
|
|
|
|
assert sk not in adapter._pending_messages, (
|
|
"/steer was queued as a pending message instead of being dispatched"
|
|
)
|
|
assert any("handled:steer" in r for r in adapter.sent_responses), (
|
|
"/steer response was not sent back to the user"
|
|
)
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_help_bypasses_guard(self):
|
|
"""/help must bypass so it is not silently dropped as pending slash text."""
|
|
adapter = _make_adapter()
|
|
sk = _session_key()
|
|
adapter._active_sessions[sk] = asyncio.Event()
|
|
|
|
await adapter.handle_message(_make_event("/help"))
|
|
|
|
assert sk not in adapter._pending_messages, (
|
|
"/help was queued as a pending message instead of being dispatched"
|
|
)
|
|
assert any("handled:help" in r for r in adapter.sent_responses), (
|
|
"/help response was not sent back to the user"
|
|
)
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_update_bypasses_guard(self):
|
|
"""/update must bypass so it is not discarded by the pending-command safety net."""
|
|
adapter = _make_adapter()
|
|
sk = _session_key()
|
|
adapter._active_sessions[sk] = asyncio.Event()
|
|
|
|
await adapter.handle_message(_make_event("/update"))
|
|
|
|
assert sk not in adapter._pending_messages, (
|
|
"/update was queued as a pending message instead of being dispatched"
|
|
)
|
|
assert any("handled:update" in r for r in adapter.sent_responses), (
|
|
"/update response was not sent back to the user"
|
|
)
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_queue_bypasses_guard(self):
|
|
"""/queue must bypass so it can queue without interrupting."""
|
|
adapter = _make_adapter()
|
|
sk = _session_key()
|
|
adapter._active_sessions[sk] = asyncio.Event()
|
|
|
|
await adapter.handle_message(_make_event("/queue follow up"))
|
|
|
|
assert sk not in adapter._pending_messages, (
|
|
"/queue was queued as a pending message instead of being dispatched"
|
|
)
|
|
assert any("handled:queue" in r for r in adapter.sent_responses), (
|
|
"/queue response was not sent back to the user"
|
|
)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Tests: non-bypass messages still get queued
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestNonBypassStillQueued:
|
|
"""Regular messages and unknown commands must be queued, not dispatched."""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_regular_text_queued(self):
|
|
"""Plain text while agent is running must be queued as pending."""
|
|
adapter = _make_adapter()
|
|
sk = _session_key()
|
|
adapter._active_sessions[sk] = asyncio.Event()
|
|
|
|
await adapter.handle_message(_make_event("hello world"))
|
|
|
|
assert sk in adapter._pending_messages, (
|
|
"Regular text was not queued — it should be pending"
|
|
)
|
|
assert len(adapter.sent_responses) == 0, (
|
|
"Regular text should not produce a direct response"
|
|
)
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_unknown_command_queued(self):
|
|
"""Unknown /commands must be queued, not dispatched."""
|
|
adapter = _make_adapter()
|
|
sk = _session_key()
|
|
adapter._active_sessions[sk] = asyncio.Event()
|
|
|
|
await adapter.handle_message(_make_event("/foobar"))
|
|
|
|
assert sk in adapter._pending_messages
|
|
assert len(adapter.sent_responses) == 0
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_file_path_not_treated_as_command(self):
|
|
"""A message like '/path/to/file' must not bypass the guard."""
|
|
adapter = _make_adapter()
|
|
sk = _session_key()
|
|
adapter._active_sessions[sk] = asyncio.Event()
|
|
|
|
await adapter.handle_message(_make_event("/path/to/file.py"))
|
|
|
|
assert sk in adapter._pending_messages
|
|
assert len(adapter.sent_responses) == 0
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Tests: no active session — commands go through normally
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestNoActiveSessionNormalDispatch:
|
|
"""When no agent is running, messages spawn a background task normally."""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_stop_when_no_session_active(self):
|
|
"""/stop without an active session spawns a background task
|
|
(the Level 2 handler will return 'No active task')."""
|
|
adapter = _make_adapter()
|
|
sk = _session_key()
|
|
|
|
# No active session — _active_sessions is empty
|
|
assert sk not in adapter._active_sessions
|
|
|
|
await adapter.handle_message(_make_event("/stop"))
|
|
|
|
# Should have gone through the normal path (background task spawned)
|
|
# and NOT be in _pending_messages (that's the queued-during-active path)
|
|
assert sk not in adapter._pending_messages
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Tests: safety net in _run_agent discards command text from pending queue
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestPendingCommandSafetyNet:
|
|
"""The safety net in gateway/run.py _run_agent must discard command text
|
|
that leaks into the pending queue via interrupt_message fallback."""
|
|
|
|
def test_stop_command_detected(self):
|
|
"""resolve_command must recognize /stop so the safety net can
|
|
discard it."""
|
|
from hermes_cli.commands import resolve_command
|
|
|
|
assert resolve_command("stop") is not None
|
|
assert resolve_command("stop").name == "stop"
|
|
|
|
def test_new_command_detected(self):
|
|
from hermes_cli.commands import resolve_command
|
|
|
|
assert resolve_command("new") is not None
|
|
assert resolve_command("new").name == "new"
|
|
|
|
def test_reset_alias_detected(self):
|
|
from hermes_cli.commands import resolve_command
|
|
|
|
assert resolve_command("reset") is not None
|
|
assert resolve_command("reset").name == "new" # alias
|
|
|
|
def test_unknown_command_not_detected(self):
|
|
from hermes_cli.commands import resolve_command
|
|
|
|
assert resolve_command("foobar") is None
|
|
|
|
def test_file_path_not_detected_as_command(self):
|
|
"""'/path/to/file' should not resolve as a command."""
|
|
from hermes_cli.commands import resolve_command
|
|
|
|
# The safety net splits on whitespace and takes the first word
|
|
# after stripping '/'. For '/path/to/file', that's 'path/to/file'.
|
|
assert resolve_command("path/to/file") is None
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Tests: bypass with @botname suffix (Telegram-style)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestBypassWithBotnameSuffix:
|
|
"""Telegram appends @botname to commands. The bypass must still work."""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_stop_with_botname(self):
|
|
"""/stop@MyHermesBot must bypass the guard."""
|
|
adapter = _make_adapter()
|
|
sk = _session_key()
|
|
adapter._active_sessions[sk] = asyncio.Event()
|
|
|
|
await adapter.handle_message(_make_event("/stop@MyHermesBot"))
|
|
|
|
assert sk not in adapter._pending_messages, (
|
|
"/stop@MyHermesBot was queued instead of bypassing"
|
|
)
|
|
assert any("handled:stop" in r for r in adapter.sent_responses)
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_new_with_botname(self):
|
|
"""/new@MyHermesBot must bypass the guard."""
|
|
adapter = _make_adapter()
|
|
sk = _session_key()
|
|
adapter._active_sessions[sk] = asyncio.Event()
|
|
|
|
await adapter.handle_message(_make_event("/new@MyHermesBot"))
|
|
|
|
assert sk not in adapter._pending_messages
|
|
assert any("handled:new" in r for r in adapter.sent_responses)
|