mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-30 06:41:51 +00:00
#33151 flipped THREE Telegram display defaults to false: - tool_progress: new -> off (kept: per-tool stream is too chatty) - interim_assistant_messages: T -> F (REVERTED here) - long_running_notifications: T -> F (REVERTED here) - busy_ack_detail: T -> F (kept: verbose iteration counter) The two reverts were wrong. interim_assistant_messages = the model's REAL words mid-turn ("I'll inspect the repo first.", "Let me check both files in parallel"). That is signal, not noise. Suppressing it left Telegram users staring at "typing..." for the entire turn duration with no feedback. long_running_notifications = the periodic heartbeat. Silent agent for 30 minutes is worse than one bubble updating every 3 minutes. Changes: - gateway/display_config.py: Telegram tier-1 inbox keeps both defaults on (only tool_progress and busy_ack_detail stay off). - gateway/run.py _notify_long_running(): edit a single heartbeat message in place (where the adapter supports it) instead of posting a new "Still working..." bubble each interval. Telegram, Discord, Slack, Matrix all qualify. Falls back to send-new when edit fails. - gateway/run.py: tighten heartbeat text. "⏳ Still working... (12 min elapsed — iteration 21/60, running: terminal)" -> "⏳ Working — 12 min, terminal". Verbose iteration detail moves behind busy_ack_detail (one knob now controls both busy acks AND heartbeat verbosity). - tests/, cli-config.yaml.example, website/docs/user-guide/messaging: updated to reflect the corrected story.
367 lines
13 KiB
Python
367 lines
13 KiB
Python
"""Tests for opt-in cleanup of temporary progress bubbles.
|
|
|
|
When ``display.platforms.<plat>.cleanup_progress: true`` is set for a
|
|
platform whose adapter supports message deletion (e.g. Telegram), the
|
|
tool-progress bubble, "⏳ Working — N min" heartbeats, and status-callback
|
|
messages sent during a run are deleted after the final response is
|
|
delivered.
|
|
|
|
Failed runs skip cleanup so the bubbles remain as breadcrumbs.
|
|
Adapters without ``delete_message`` silently no-op.
|
|
"""
|
|
|
|
import asyncio
|
|
import importlib
|
|
import sys
|
|
import time
|
|
import types
|
|
from types import SimpleNamespace
|
|
|
|
import pytest
|
|
|
|
from gateway.config import Platform, PlatformConfig
|
|
from gateway.platforms.base import BasePlatformAdapter, SendResult
|
|
from gateway.session import SessionSource
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Test fakes — mirror those in test_run_progress_topics.py but add a
|
|
# delete_message implementation that records ids instead of hitting a bot.
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class CleanupCaptureAdapter(BasePlatformAdapter):
|
|
"""Adapter that records every delete_message call for inspection."""
|
|
|
|
_next_mid = 100
|
|
|
|
def __init__(self, platform=Platform.TELEGRAM):
|
|
super().__init__(PlatformConfig(enabled=True, token="***"), platform)
|
|
self.sent = []
|
|
self.edits = []
|
|
self.deleted = []
|
|
|
|
async def connect(self) -> bool:
|
|
return True
|
|
|
|
async def disconnect(self) -> None:
|
|
return None
|
|
|
|
def _mint_id(self) -> str:
|
|
CleanupCaptureAdapter._next_mid += 1
|
|
return str(CleanupCaptureAdapter._next_mid)
|
|
|
|
async def send(self, chat_id, content, reply_to=None, metadata=None) -> SendResult:
|
|
mid = self._mint_id()
|
|
self.sent.append(
|
|
{"chat_id": chat_id, "content": content, "message_id": mid, "metadata": metadata}
|
|
)
|
|
return SendResult(success=True, message_id=mid)
|
|
|
|
async def edit_message(self, chat_id, message_id, content) -> SendResult:
|
|
self.edits.append({"chat_id": chat_id, "message_id": message_id, "content": content})
|
|
return SendResult(success=True, message_id=message_id)
|
|
|
|
async def delete_message(self, chat_id, message_id) -> bool:
|
|
self.deleted.append({"chat_id": chat_id, "message_id": str(message_id)})
|
|
return True
|
|
|
|
async def send_typing(self, chat_id, metadata=None) -> None:
|
|
return None
|
|
|
|
async def stop_typing(self, chat_id) -> None:
|
|
return None
|
|
|
|
async def get_chat_info(self, chat_id: str):
|
|
return {"id": chat_id}
|
|
|
|
|
|
class NoDeleteAdapter(CleanupCaptureAdapter):
|
|
"""Adapter that inherits the base no-op delete_message (used to prove
|
|
the cleanup path skips adapters without deletion support)."""
|
|
|
|
async def delete_message(self, chat_id, message_id) -> bool: # type: ignore[override]
|
|
# Pretend to be an adapter whose platform doesn't support deletion:
|
|
# match the base class behavior exactly. gateway/run.py checks
|
|
# ``type(adapter).delete_message is BasePlatformAdapter.delete_message``
|
|
# to detect this, so we re-assign at class body level below.
|
|
raise AssertionError("should not be called — cleanup must skip this adapter")
|
|
|
|
|
|
# Re-bind so the class's delete_message identity equals the base's.
|
|
NoDeleteAdapter.delete_message = BasePlatformAdapter.delete_message
|
|
|
|
|
|
class ProgressAgent:
|
|
"""Emits two tool-progress events and returns a normal final response."""
|
|
|
|
def __init__(self, **kwargs):
|
|
self.tool_progress_callback = kwargs.get("tool_progress_callback")
|
|
self.tools = []
|
|
|
|
def run_conversation(self, message, conversation_history=None, task_id=None):
|
|
cb = self.tool_progress_callback
|
|
if cb is not None:
|
|
cb("tool.started", "terminal", "pwd", {})
|
|
time.sleep(0.25)
|
|
cb("tool.started", "terminal", "ls", {})
|
|
time.sleep(0.25)
|
|
return {"final_response": "done", "messages": [], "api_calls": 1}
|
|
|
|
|
|
class FailingAgent:
|
|
def __init__(self, **kwargs):
|
|
self.tool_progress_callback = kwargs.get("tool_progress_callback")
|
|
self.tools = []
|
|
|
|
def run_conversation(self, message, conversation_history=None, task_id=None):
|
|
cb = self.tool_progress_callback
|
|
if cb is not None:
|
|
cb("tool.started", "terminal", "pwd", {})
|
|
time.sleep(0.25)
|
|
# Empty final_response + failed=True is the shape the gateway
|
|
# actually returns on provider errors (see gateway/run.py where
|
|
# failed keys are only propagated when final_response is empty).
|
|
return {
|
|
"final_response": "",
|
|
"messages": [],
|
|
"api_calls": 1,
|
|
"failed": True,
|
|
"error": "simulated provider failure",
|
|
}
|
|
|
|
|
|
def _make_runner(adapter):
|
|
gateway_run = importlib.import_module("gateway.run")
|
|
GatewayRunner = gateway_run.GatewayRunner
|
|
runner = object.__new__(GatewayRunner)
|
|
runner.adapters = {adapter.platform: adapter}
|
|
runner._voice_mode = {}
|
|
runner._prefill_messages = []
|
|
runner._ephemeral_system_prompt = ""
|
|
runner._reasoning_config = None
|
|
runner._provider_routing = {}
|
|
runner._fallback_model = None
|
|
runner._session_db = None
|
|
runner._running_agents = {}
|
|
runner._session_run_generation = {}
|
|
runner.hooks = SimpleNamespace(loaded_hooks=False)
|
|
runner.config = SimpleNamespace(
|
|
thread_sessions_per_user=False,
|
|
group_sessions_per_user=False,
|
|
stt_enabled=False,
|
|
)
|
|
return runner
|
|
|
|
|
|
def _install_fakes(monkeypatch, agent_cls, *, cleanup_on: bool):
|
|
"""Wire up the module stubs every _run_agent test needs."""
|
|
monkeypatch.setenv("HERMES_TOOL_PROGRESS_MODE", "all")
|
|
|
|
fake_dotenv = types.ModuleType("dotenv")
|
|
fake_dotenv.load_dotenv = lambda *a, **k: None
|
|
monkeypatch.setitem(sys.modules, "dotenv", fake_dotenv)
|
|
|
|
fake_run_agent = types.ModuleType("run_agent")
|
|
fake_run_agent.AIAgent = agent_cls
|
|
monkeypatch.setitem(sys.modules, "run_agent", fake_run_agent)
|
|
import tools.terminal_tool # noqa: F401 — register tool emoji
|
|
|
|
gateway_run = importlib.import_module("gateway.run")
|
|
monkeypatch.setattr(gateway_run, "_resolve_runtime_agent_kwargs", lambda: {"api_key": "fake"})
|
|
|
|
# Wire the per-platform cleanup_progress flag via the config loader the
|
|
# gateway actually reads (``_load_gateway_config`` returns user config).
|
|
cfg = {
|
|
"display": {
|
|
"platforms": {
|
|
"telegram": {"cleanup_progress": True},
|
|
}
|
|
}
|
|
} if cleanup_on else {}
|
|
monkeypatch.setattr(gateway_run, "_load_gateway_config", lambda: cfg)
|
|
return gateway_run
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Tests
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_cleanup_off_by_default_leaves_bubbles(monkeypatch, tmp_path):
|
|
"""Without ``cleanup_progress: true``, firing whatever callback is
|
|
registered never reaches delete_message."""
|
|
adapter = CleanupCaptureAdapter()
|
|
runner = _make_runner(adapter)
|
|
gateway_run = _install_fakes(monkeypatch, ProgressAgent, cleanup_on=False)
|
|
monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
|
|
|
|
source = SessionSource(platform=Platform.TELEGRAM, chat_id="-1001")
|
|
session_key = "agent:main:telegram:group:-1001"
|
|
|
|
result = await runner._run_agent(
|
|
message="hello",
|
|
context_prompt="",
|
|
history=[],
|
|
source=source,
|
|
session_id="sess-1",
|
|
session_key=session_key,
|
|
)
|
|
|
|
assert result["final_response"] == "done"
|
|
# Even if an unrelated callback got registered (background-review
|
|
# release lives in the same slot) firing it should never cause any
|
|
# delete_message calls when cleanup is off.
|
|
cb = adapter.pop_post_delivery_callback(session_key)
|
|
if cb is not None:
|
|
cb()
|
|
for _ in range(10):
|
|
await asyncio.sleep(0.01)
|
|
assert adapter.deleted == []
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_cleanup_registers_callback_and_deletes_on_success(monkeypatch, tmp_path):
|
|
"""With the flag on, the cleanup callback deletes the progress bubble."""
|
|
adapter = CleanupCaptureAdapter()
|
|
runner = _make_runner(adapter)
|
|
gateway_run = _install_fakes(monkeypatch, ProgressAgent, cleanup_on=True)
|
|
monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
|
|
|
|
source = SessionSource(platform=Platform.TELEGRAM, chat_id="-1001")
|
|
session_key = "agent:main:telegram:group:-1001"
|
|
|
|
result = await runner._run_agent(
|
|
message="hello",
|
|
context_prompt="",
|
|
history=[],
|
|
source=source,
|
|
session_id="sess-1",
|
|
session_key=session_key,
|
|
)
|
|
|
|
assert result["final_response"] == "done"
|
|
# The cleanup callback should be registered for this session.
|
|
cb = adapter.pop_post_delivery_callback(session_key)
|
|
assert callable(cb)
|
|
|
|
# Fire it (base.py does this in _process_message_background's finally)
|
|
# and let the scheduled coroutine run to completion.
|
|
cb()
|
|
# delete_message is scheduled via run_coroutine_threadsafe → give the
|
|
# loop a couple of ticks to drain.
|
|
for _ in range(20):
|
|
await asyncio.sleep(0.01)
|
|
if adapter.deleted:
|
|
break
|
|
|
|
# At least the first tool-progress bubble should have been deleted.
|
|
assert len(adapter.deleted) >= 1, f"deleted={adapter.deleted} sent={adapter.sent}"
|
|
for entry in adapter.deleted:
|
|
assert entry["chat_id"] == "-1001"
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_cleanup_skipped_on_failed_run(monkeypatch, tmp_path):
|
|
"""Failed runs skip cleanup registration — breadcrumbs stay."""
|
|
adapter = CleanupCaptureAdapter()
|
|
runner = _make_runner(adapter)
|
|
gateway_run = _install_fakes(monkeypatch, FailingAgent, cleanup_on=True)
|
|
monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
|
|
|
|
source = SessionSource(platform=Platform.TELEGRAM, chat_id="-1001")
|
|
session_key = "agent:main:telegram:group:-1001"
|
|
|
|
result = await runner._run_agent(
|
|
message="hello",
|
|
context_prompt="",
|
|
history=[],
|
|
source=source,
|
|
session_id="sess-1",
|
|
session_key=session_key,
|
|
)
|
|
|
|
assert result.get("failed") is True
|
|
# Whatever callback is registered should not trigger any deletion —
|
|
# the cleanup callback is skipped on failed runs.
|
|
cb = adapter.pop_post_delivery_callback(session_key)
|
|
if cb is not None:
|
|
cb()
|
|
for _ in range(10):
|
|
await asyncio.sleep(0.01)
|
|
assert adapter.deleted == []
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_cleanup_noop_on_adapter_without_delete_support(monkeypatch, tmp_path):
|
|
"""Adapters that inherit the base-class delete_message no-op are
|
|
detected up front — the cleanup path never registers its callback so
|
|
a stray bg-review callback (if present) can fire harmlessly."""
|
|
adapter = NoDeleteAdapter()
|
|
runner = _make_runner(adapter)
|
|
gateway_run = _install_fakes(monkeypatch, ProgressAgent, cleanup_on=True)
|
|
monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
|
|
|
|
source = SessionSource(platform=Platform.TELEGRAM, chat_id="-1001")
|
|
session_key = "agent:main:telegram:group:-1001"
|
|
|
|
result = await runner._run_agent(
|
|
message="hello",
|
|
context_prompt="",
|
|
history=[],
|
|
source=source,
|
|
session_id="sess-1",
|
|
session_key=session_key,
|
|
)
|
|
|
|
assert result["final_response"] == "done"
|
|
# No deletion attempts on an adapter without delete_message support.
|
|
# (The NoDeleteAdapter.delete_message would raise AssertionError if
|
|
# the cleanup closure had somehow captured a reference to it.)
|
|
assert adapter.deleted == []
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_cleanup_chains_with_existing_callback(monkeypatch, tmp_path):
|
|
"""When a bg-review-style callback is already registered, the cleanup
|
|
callback chains with it — both fire, neither clobbers the other."""
|
|
adapter = CleanupCaptureAdapter()
|
|
runner = _make_runner(adapter)
|
|
gateway_run = _install_fakes(monkeypatch, ProgressAgent, cleanup_on=True)
|
|
monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
|
|
|
|
source = SessionSource(platform=Platform.TELEGRAM, chat_id="-1001")
|
|
session_key = "agent:main:telegram:group:-1001"
|
|
|
|
pre_existing_fired = []
|
|
|
|
def _preexisting_callback() -> None:
|
|
pre_existing_fired.append(True)
|
|
|
|
# Pre-register a callback with the same generation the run will use
|
|
# (run_generation=None in this test path — matches the default slot).
|
|
adapter.register_post_delivery_callback(session_key, _preexisting_callback)
|
|
|
|
result = await runner._run_agent(
|
|
message="hello",
|
|
context_prompt="",
|
|
history=[],
|
|
source=source,
|
|
session_id="sess-1",
|
|
session_key=session_key,
|
|
)
|
|
|
|
assert result["final_response"] == "done"
|
|
cb = adapter.pop_post_delivery_callback(session_key)
|
|
assert callable(cb)
|
|
cb()
|
|
for _ in range(20):
|
|
await asyncio.sleep(0.01)
|
|
if adapter.deleted:
|
|
break
|
|
|
|
# Both effects land: the pre-existing callback fires AND the cleanup
|
|
# deletes at least one progress bubble.
|
|
assert pre_existing_fired == [True]
|
|
assert len(adapter.deleted) >= 1
|