mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-23 10:42:00 +00:00
fix(delivery): drop env-var knob, flag all chunking adapters
Follow-up to ScotterMonk's cron-truncation fix: - Remove HERMES_DELIVERY_MAX_PLATFORM_OUTPUT env var. Behavioral config belongs in config.yaml, not a new HERMES_* env var (.env is secrets only). The actual bug is fixed entirely by the adapter-aware skip; the configurable cap was unneeded scope. MAX_PLATFORM_OUTPUT is a constant again, collapsing the max_output=0 disable branch and the audit-vs-truncation threshold divergence. - Flag the remaining verified-chunking adapters (slack, matrix, feishu, mattermost, teams, whatsapp, whatsapp_cloud, weixin, bluebubbles, yuanbao) with splits_long_messages=True so the fix covers the whole bug class, not just Discord/Telegram. Each verified to chunk in its own send() via truncate_message(). - SMS deliberately left False: it chunks for normal replies but a multi-segment cron blast is cost-bearing; the 4000-cap + file save is the safer default there. - Update tests: drop the two env-override tests, add a test asserting a save failure during truncation (non-chunking) propagates.
This commit is contained in:
parent
86e4521cb1
commit
e9cd8c5bf3
12 changed files with 46 additions and 117 deletions
|
|
@ -20,34 +20,13 @@ from hermes_cli.config import get_hermes_home
|
|||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Default cap before gateway-level truncation of cron output for platform
|
||||
# delivery. Telegram's hard API limit is 4096; the 200-char headroom covers
|
||||
# the "full output saved to …" footer appended on truncation. Override via
|
||||
# the HERMES_DELIVERY_MAX_PLATFORM_OUTPUT env var. Adapters that split long
|
||||
# Cap before gateway-level truncation of cron output for non-chunking platform
|
||||
# delivery. Telegram's hard API limit is 4096; the headroom covers the "full
|
||||
# output saved to …" footer appended on truncation. Adapters that split long
|
||||
# messages natively (BasePlatformAdapter.splits_long_messages) bypass this
|
||||
# entirely — the adapter chunks in its own send() and the full output is
|
||||
# preserved.
|
||||
_DEFAULT_MAX_PLATFORM_OUTPUT = 4000
|
||||
|
||||
|
||||
def _max_platform_output() -> int:
|
||||
"""Max chars before gateway-level truncation of cron output.
|
||||
|
||||
``HERMES_DELIVERY_MAX_PLATFORM_OUTPUT`` env var overrides the default
|
||||
(4000). Non-int or negative values fall back to the default with a
|
||||
warning.
|
||||
"""
|
||||
env = os.getenv("HERMES_DELIVERY_MAX_PLATFORM_OUTPUT")
|
||||
if env is not None:
|
||||
try:
|
||||
return max(0, int(env.strip()))
|
||||
except ValueError:
|
||||
logger.warning(
|
||||
"HERMES_DELIVERY_MAX_PLATFORM_OUTPUT=%r is not an int; "
|
||||
"using default %d",
|
||||
env, _DEFAULT_MAX_PLATFORM_OUTPUT,
|
||||
)
|
||||
return _DEFAULT_MAX_PLATFORM_OUTPUT
|
||||
MAX_PLATFORM_OUTPUT = 4000
|
||||
|
||||
# Matches strings that are *only* a "silence" narration with optional markdown
|
||||
# wrappers. Covers: *(silent)*, _silent_, `silent`, ~silent~, (silent), silent,
|
||||
|
|
@ -345,28 +324,21 @@ class DeliveryRouter:
|
|||
# Guard: handle oversized cron output.
|
||||
#
|
||||
# Two independent decisions:
|
||||
# 1. AUDIT SAVE — when content exceeds the audit threshold (4000
|
||||
# chars, the historical default), the full output is always
|
||||
# written to disk as a recoverable audit trail. This fires
|
||||
# regardless of truncation setting or adapter capability.
|
||||
# 2. TRUNCATION — for non-chunking adapters, content above
|
||||
# max_output is truncated with a footer pointing to the saved
|
||||
# file. Chunking-capable adapters (splits_long_messages=True)
|
||||
# receive the full payload and split natively in their send().
|
||||
# Setting HERMES_DELIVERY_MAX_PLATFORM_OUTPUT=0 disables
|
||||
# truncation entirely (the user takes responsibility for platform
|
||||
# API limits), but the audit save in step 1 still fires.
|
||||
max_output = _max_platform_output()
|
||||
# 1. AUDIT SAVE — when content exceeds MAX_PLATFORM_OUTPUT, the full
|
||||
# output is always written to disk as a recoverable audit trail.
|
||||
# This fires regardless of adapter capability (best-effort).
|
||||
# 2. TRUNCATION — for non-chunking adapters, content above the cap is
|
||||
# truncated with a footer pointing to the saved file. Chunking-
|
||||
# capable adapters (splits_long_messages=True) receive the full
|
||||
# payload and split natively in their send().
|
||||
job_id = (metadata or {}).get("job_id", "unknown")
|
||||
saved_path: Optional[Path] = None
|
||||
|
||||
# Step 1 — audit save (independent of truncation, best-effort).
|
||||
# The save is a side-effect audit trail, not essential to delivery.
|
||||
# If it fails (full disk, permissions), delivery proceeds — the
|
||||
# content reaches the adapter regardless. The truncation path's
|
||||
# fallback save below is NOT best-effort: the footer needs a valid
|
||||
# path, so a failure there is a real delivery problem.
|
||||
if len(content) > _DEFAULT_MAX_PLATFORM_OUTPUT:
|
||||
if len(content) > MAX_PLATFORM_OUTPUT:
|
||||
# Step 1 — audit save (best-effort). The save is a side-effect
|
||||
# audit trail, not essential to delivery. If it fails (full disk,
|
||||
# permissions), delivery proceeds — the content reaches the adapter
|
||||
# regardless.
|
||||
try:
|
||||
saved_path = self._save_full_output(content, job_id)
|
||||
except OSError as exc:
|
||||
|
|
@ -376,9 +348,8 @@ class DeliveryRouter:
|
|||
len(content), job_id, exc,
|
||||
)
|
||||
|
||||
# Step 2 — truncation (only for non-chunking adapters).
|
||||
if max_output > 0 and len(content) > max_output:
|
||||
if adapter and getattr(adapter, "splits_long_messages", False):
|
||||
# Step 2 — truncation (only for non-chunking adapters).
|
||||
if getattr(adapter, "splits_long_messages", False):
|
||||
# Adapter chunks natively — deliver full payload.
|
||||
if saved_path:
|
||||
logger.info(
|
||||
|
|
@ -387,27 +358,18 @@ class DeliveryRouter:
|
|||
len(content), saved_path,
|
||||
)
|
||||
else:
|
||||
# Non-chunking adapter — truncate with footer.
|
||||
# Non-chunking adapter — truncate with footer. The footer
|
||||
# needs a valid path, so if the best-effort save above failed,
|
||||
# retry it here (a failure now is a real delivery problem).
|
||||
if saved_path is None:
|
||||
# Content exceeded max_output but not the audit threshold
|
||||
# (e.g. HERMES_DELIVERY_MAX_PLATFORM_OUTPUT=200). Save
|
||||
# anyway since we're about to truncate.
|
||||
saved_path = self._save_full_output(content, job_id)
|
||||
footer = f"\n\n... [truncated, full output saved to {saved_path}]"
|
||||
visible = max(0, max_output - len(footer))
|
||||
visible = max(0, MAX_PLATFORM_OUTPUT - len(footer))
|
||||
logger.info(
|
||||
"Cron output truncated (%d chars) — full output: %s",
|
||||
len(content), saved_path,
|
||||
)
|
||||
content = content[:visible] + footer
|
||||
elif saved_path:
|
||||
# Truncation disabled (max_output=0) but content was large enough
|
||||
# to warrant an audit copy.
|
||||
logger.info(
|
||||
"Cron output delivered untruncated (%d chars, truncation "
|
||||
"disabled) — audit copy saved to %s",
|
||||
len(content), saved_path,
|
||||
)
|
||||
|
||||
# Substrate-level anti-loop guard: drop hallucinated "silence narration"
|
||||
# (*(silent)*, 🔇, a bare ".", etc.) before it ever reaches the adapter.
|
||||
|
|
|
|||
|
|
@ -113,6 +113,7 @@ class BlueBubblesAdapter(BasePlatformAdapter):
|
|||
platform = Platform.BLUEBUBBLES
|
||||
SUPPORTS_MESSAGE_EDITING = False
|
||||
MAX_MESSAGE_LENGTH = MAX_TEXT_LENGTH
|
||||
splits_long_messages = True # send() chunks via truncate_message(MAX_MESSAGE_LENGTH)
|
||||
|
||||
def __init__(self, config: PlatformConfig):
|
||||
super().__init__(config, Platform.BLUEBUBBLES)
|
||||
|
|
|
|||
|
|
@ -1139,6 +1139,7 @@ class WeixinAdapter(BasePlatformAdapter):
|
|||
"""Native Hermes adapter for Weixin personal accounts."""
|
||||
|
||||
supports_code_blocks = True # Weixin renders fenced code blocks
|
||||
splits_long_messages = True # send() chunks via _split_text()
|
||||
|
||||
MAX_MESSAGE_LENGTH = 2000
|
||||
|
||||
|
|
|
|||
|
|
@ -187,6 +187,8 @@ class WhatsAppCloudAdapter(WhatsAppBehaviorMixin, BasePlatformAdapter):
|
|||
syntax). The Baileys adapter does the same.
|
||||
"""
|
||||
|
||||
splits_long_messages = True # send() chunks via truncate_message()
|
||||
|
||||
def __init__(self, config: PlatformConfig):
|
||||
super().__init__(config, Platform.WHATSAPP_CLOUD)
|
||||
extra = config.extra or {}
|
||||
|
|
|
|||
|
|
@ -4983,6 +4983,7 @@ class YuanbaoAdapter(BasePlatformAdapter):
|
|||
|
||||
PLATFORM = Platform.YUANBAO
|
||||
MAX_TEXT_CHUNK: int = 4000 # Yuanbao single message character limit
|
||||
splits_long_messages = True # send() auto-chunks via truncate_message(MAX_TEXT_CHUNK)
|
||||
MEDIA_MAX_SIZE_MB: int = 50 # Max media file size in MB for upload validation
|
||||
REPLY_REF_MAX_ENTRIES: ClassVar[int] = 500 # Max capacity of reference dedup dict
|
||||
|
||||
|
|
|
|||
|
|
@ -1410,6 +1410,7 @@ class FeishuAdapter(BasePlatformAdapter):
|
|||
"""Feishu/Lark bot adapter."""
|
||||
|
||||
supports_code_blocks = True # Feishu renders fenced code blocks
|
||||
splits_long_messages = True # send() chunks via truncate_message(MAX_MESSAGE_LENGTH)
|
||||
|
||||
MAX_MESSAGE_LENGTH = 8000
|
||||
# Max distinct chat IDs retained in _chat_locks before LRU eviction kicks in.
|
||||
|
|
|
|||
|
|
@ -775,6 +775,7 @@ class MatrixAdapter(BasePlatformAdapter):
|
|||
"""Gateway adapter for Matrix (any homeserver)."""
|
||||
|
||||
supports_code_blocks = True # Matrix renders fenced code blocks (HTML/markdown)
|
||||
splits_long_messages = True # send() chunks via truncate_message(MAX_MESSAGE_LENGTH)
|
||||
|
||||
# Matrix clients commonly reserve typed "/" for client-local commands;
|
||||
# the adapter accepts "!command" as the alias that always reaches Hermes
|
||||
|
|
|
|||
|
|
@ -71,6 +71,8 @@ def check_mattermost_requirements() -> bool:
|
|||
class MattermostAdapter(BasePlatformAdapter):
|
||||
"""Gateway adapter for Mattermost (self-hosted or cloud)."""
|
||||
|
||||
splits_long_messages = True # send() chunks via truncate_message(MAX_POST_LENGTH)
|
||||
|
||||
def __init__(self, config: PlatformConfig):
|
||||
super().__init__(config, Platform.MATTERMOST)
|
||||
|
||||
|
|
|
|||
|
|
@ -321,6 +321,7 @@ class SlackAdapter(BasePlatformAdapter):
|
|||
|
||||
MAX_MESSAGE_LENGTH = 39000 # Slack API allows 40,000 chars; leave margin
|
||||
supports_code_blocks = True # Slack mrkdwn renders fenced code blocks
|
||||
splits_long_messages = True # send() chunks via truncate_message(MAX_MESSAGE_LENGTH)
|
||||
# Slack blocks typed native slash commands inside threads ("/approve is
|
||||
# not supported in threads. Sorry!"). The adapter rewrites a leading
|
||||
# "!" to "/" for known commands (see _handle_slack_message), so "!" is
|
||||
|
|
|
|||
|
|
@ -691,6 +691,7 @@ class TeamsAdapter(BasePlatformAdapter):
|
|||
"""Microsoft Teams adapter using the microsoft-teams-apps SDK."""
|
||||
|
||||
MAX_MESSAGE_LENGTH = 28000 # Teams text message limit (~28 KB)
|
||||
splits_long_messages = True # send() chunks via truncate_message()
|
||||
|
||||
def __init__(self, config: PlatformConfig):
|
||||
super().__init__(config, Platform("teams"))
|
||||
|
|
|
|||
|
|
@ -337,6 +337,7 @@ class WhatsAppAdapter(WhatsAppBehaviorMixin, BasePlatformAdapter):
|
|||
|
||||
# Default bridge location resolved via shared helper
|
||||
_DEFAULT_BRIDGE_DIR = None # resolved in __init__
|
||||
splits_long_messages = True # send() chunks via truncate_message()
|
||||
|
||||
def __init__(self, config: PlatformConfig):
|
||||
super().__init__(config, Platform.WHATSAPP)
|
||||
|
|
|
|||
|
|
@ -367,50 +367,6 @@ async def test_short_output_never_truncated(tmp_path, monkeypatch):
|
|||
assert not list(tmp_path.glob("cron/output/*.txt"))
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_env_override_changes_truncation_threshold(tmp_path, monkeypatch):
|
||||
"""HERMES_DELIVERY_MAX_PLATFORM_OUTPUT env var overrides the default 4000."""
|
||||
monkeypatch.setattr("gateway.delivery.get_hermes_home", lambda: tmp_path)
|
||||
monkeypatch.setenv("HERMES_DELIVERY_MAX_PLATFORM_OUTPUT", "200")
|
||||
adapter = NonChunkingAdapter()
|
||||
router = DeliveryRouter(GatewayConfig(), adapters={Platform.DISCORD: adapter})
|
||||
target = DeliveryTarget.parse("discord:123")
|
||||
|
||||
content = "x" * 300 # over the env-override threshold of 200
|
||||
await router._deliver_to_platform(target, content, metadata={"job_id": "job4"})
|
||||
|
||||
delivered = adapter.calls[0]["content"]
|
||||
assert len(delivered) < 300 # truncated because env lowered the bar
|
||||
assert "truncated" in delivered.lower()
|
||||
# Audit file saved (truncation path always saves when it truncates)
|
||||
saved_files = list(tmp_path.glob("cron/output/job4_*.txt"))
|
||||
assert len(saved_files) == 1
|
||||
assert saved_files[0].read_text() == content
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_env_override_disable_truncation(tmp_path, monkeypatch):
|
||||
"""Setting HERMES_DELIVERY_MAX_PLATFORM_OUTPUT=0 disables truncation entirely."""
|
||||
monkeypatch.setattr("gateway.delivery.get_hermes_home", lambda: tmp_path)
|
||||
monkeypatch.setenv("HERMES_DELIVERY_MAX_PLATFORM_OUTPUT", "0")
|
||||
adapter = NonChunkingAdapter()
|
||||
router = DeliveryRouter(GatewayConfig(), adapters={Platform.DISCORD: adapter})
|
||||
target = DeliveryTarget.parse("discord:123")
|
||||
|
||||
content = "x" * 10000
|
||||
await router._deliver_to_platform(target, content, metadata={"job_id": "job5"})
|
||||
|
||||
# With max_output=0, truncation is disabled — even non-chunking adapters
|
||||
# receive the full content (they may error at the platform API level, but
|
||||
# that's the user's explicit choice).
|
||||
assert adapter.calls[0]["content"] == content
|
||||
# Audit file STILL saved — the audit threshold (4000) is independent of
|
||||
# the truncation setting. Content (10000) exceeds it.
|
||||
saved_files = list(tmp_path.glob("cron/output/job5_*.txt"))
|
||||
assert len(saved_files) == 1
|
||||
assert saved_files[0].read_text() == content
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_audit_save_failure_does_not_break_chunking_delivery(tmp_path, monkeypatch):
|
||||
"""If the audit save fails (disk full, permissions), chunking adapters
|
||||
|
|
@ -431,24 +387,21 @@ async def test_audit_save_failure_does_not_break_chunking_delivery(tmp_path, mon
|
|||
|
||||
monkeypatch.setattr(router, "_save_full_output", failing_save)
|
||||
|
||||
# Should NOT raise — audit failure is caught
|
||||
# Should NOT raise — audit failure is caught for chunking adapters
|
||||
await router._deliver_to_platform(target, long_content, metadata={"job_id": "job6"})
|
||||
|
||||
# Adapter still got the full content
|
||||
assert adapter.calls[0]["content"] == long_content
|
||||
# Save was attempted
|
||||
# Save was attempted (best-effort, swallowed)
|
||||
assert call_count["n"] == 1
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_audit_save_failure_does_not_break_non_chunking_delivery(tmp_path, monkeypatch):
|
||||
"""If the audit save fails AND truncation is needed, the fallback save
|
||||
in Step 2 is NOT caught — the footer needs a valid path, so this is a
|
||||
real failure. But if content exceeds the audit threshold AND truncation
|
||||
is disabled (max_output=0), the caught Step 1 failure lets delivery
|
||||
proceed."""
|
||||
async def test_save_failure_during_truncation_raises_for_non_chunking_adapter(tmp_path, monkeypatch):
|
||||
"""For a non-chunking adapter, the truncation footer needs a valid saved
|
||||
path. If the save fails there, that is a real delivery problem and the
|
||||
error propagates (not swallowed like the chunking best-effort save)."""
|
||||
monkeypatch.setattr("gateway.delivery.get_hermes_home", lambda: tmp_path)
|
||||
monkeypatch.setenv("HERMES_DELIVERY_MAX_PLATFORM_OUTPUT", "0")
|
||||
|
||||
adapter = NonChunkingAdapter()
|
||||
router = DeliveryRouter(GatewayConfig(), adapters={Platform.DISCORD: adapter})
|
||||
|
|
@ -461,8 +414,10 @@ async def test_audit_save_failure_does_not_break_non_chunking_delivery(tmp_path,
|
|||
|
||||
monkeypatch.setattr(router, "_save_full_output", failing_save)
|
||||
|
||||
# max_output=0 → no truncation → Step 1 failure is caught → delivery proceeds
|
||||
await router._deliver_to_platform(target, long_content, metadata={"job_id": "job7"})
|
||||
# Non-chunking adapter must truncate → needs a valid saved path → the
|
||||
# Step 1 best-effort catch swallows the first attempt, but the Step 2
|
||||
# retry (footer needs the path) re-raises.
|
||||
with pytest.raises(OSError, match="No space left on device"):
|
||||
await router._deliver_to_platform(target, long_content, metadata={"job_id": "job7"})
|
||||
|
||||
|
||||
# Non-chunking adapter still got the full content (truncation disabled)
|
||||
assert adapter.calls[0]["content"] == long_content
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue