mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-26 01:01:40 +00:00
fix(tools): chunk long messages in send_message_tool before dispatch (#1552)
* fix: prevent infinite 400 failure loop on context overflow (#1630) When a gateway session exceeds the model's context window, Anthropic may return a generic 400 invalid_request_error with just 'Error' as the message. This bypassed the phrase-based context-length detection, causing the agent to treat it as a non-retryable client error. Worse, the failed user message was still persisted to the transcript, making the session even larger on each attempt — creating an infinite loop. Three-layer fix: 1. run_agent.py — Fallback heuristic: when a 400 error has a very short generic message AND the session is large (>40% of context or >80 messages), treat it as a probable context overflow and trigger compression instead of aborting. 2. run_agent.py + gateway/run.py — Don't persist failed messages: when the agent returns failed=True before generating any response, skip writing the user's message to the transcript/DB. This prevents the session from growing on each failure. 3. gateway/run.py — Smarter error messages: detect context-overflow failures and suggest /compact or /reset specifically, instead of a generic 'try again' that will fail identically. * fix(skills): detect prompt injection patterns and block cache file reads Adds two security layers to prevent prompt injection via skills hub cache files (#1558): 1. read_file: blocks direct reads of ~/.hermes/skills/.hub/ directory (index-cache, catalog files). The 3.5MB clawhub_catalog_v1.json was the original injection vector — untrusted skill descriptions in the catalog contained adversarial text that the model executed. 2. skill_view: warns when skills are loaded from outside the trusted ~/.hermes/skills/ directory, and detects common injection patterns in skill content ("ignore previous instructions", "<system>", etc.). Cherry-picked from PR #1562 by ygd58. * fix(tools): chunk long messages in send_message_tool before dispatch (#1552) Long messages sent via send_message tool or cron delivery silently failed when exceeding platform limits. Gateway adapters handle this via truncate_message(), but the standalone senders in send_message_tool bypassed that entirely. - Apply truncate_message() chunking in _send_to_platform() before dispatching to individual platform senders - Remove naive message[i:i+2000] character split in _send_discord() in favor of centralized smart splitting - Attach media files to last chunk only for Telegram - Add regression tests for chunking and media placement Cherry-picked from PR #1557 by llbn. --------- Co-authored-by: buray <ygd58@users.noreply.github.com> Co-authored-by: lbn <llbn@users.noreply.github.com>
This commit is contained in:
parent
81f76111b0
commit
12afccd9ca
3 changed files with 122 additions and 35 deletions
|
|
@ -9,7 +9,7 @@ from types import SimpleNamespace
|
|||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
from gateway.config import Platform
|
||||
from tools.send_message_tool import _send_telegram, send_message_tool
|
||||
from tools.send_message_tool import _send_telegram, _send_to_platform, send_message_tool
|
||||
|
||||
|
||||
def _run_async_immediately(coro):
|
||||
|
|
@ -345,3 +345,49 @@ class TestSendTelegramMediaDelivery:
|
|||
assert "error" in result
|
||||
assert "No deliverable text or media remained" in result["error"]
|
||||
bot.send_message.assert_not_awaited()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Regression: long messages are chunked before platform dispatch
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestSendToPlatformChunking:
|
||||
def test_long_message_is_chunked(self):
|
||||
"""Messages exceeding the platform limit are split into multiple sends."""
|
||||
send = AsyncMock(return_value={"success": True, "message_id": "1"})
|
||||
long_msg = "word " * 1000 # ~5000 chars, well over Discord's 2000 limit
|
||||
with patch("tools.send_message_tool._send_discord", send):
|
||||
result = asyncio.run(
|
||||
_send_to_platform(
|
||||
Platform.DISCORD,
|
||||
SimpleNamespace(enabled=True, token="tok", extra={}),
|
||||
"ch", long_msg,
|
||||
)
|
||||
)
|
||||
assert result["success"] is True
|
||||
assert send.await_count >= 3
|
||||
for call in send.await_args_list:
|
||||
assert len(call.args[2]) <= 2020 # each chunk fits the limit
|
||||
|
||||
def test_telegram_media_attaches_to_last_chunk(self):
|
||||
"""When chunked, media files are sent only with the last chunk."""
|
||||
sent_calls = []
|
||||
|
||||
async def fake_send(token, chat_id, message, media_files=None, thread_id=None):
|
||||
sent_calls.append(media_files or [])
|
||||
return {"success": True, "platform": "telegram", "chat_id": chat_id, "message_id": str(len(sent_calls))}
|
||||
|
||||
long_msg = "word " * 2000 # ~10000 chars, well over 4096
|
||||
media = [("/tmp/photo.png", False)]
|
||||
with patch("tools.send_message_tool._send_telegram", fake_send):
|
||||
asyncio.run(
|
||||
_send_to_platform(
|
||||
Platform.TELEGRAM,
|
||||
SimpleNamespace(enabled=True, token="tok", extra={}),
|
||||
"123", long_msg, media_files=media,
|
||||
)
|
||||
)
|
||||
assert len(sent_calls) >= 3
|
||||
assert all(call == [] for call in sent_calls[:-1])
|
||||
assert sent_calls[-1] == media
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue