fix(delivery): drop env-var knob, flag all chunking adapters

Follow-up to ScotterMonk's cron-truncation fix: - Remove HERMES_DELIVERY_MAX_PLATFORM_OUTPUT env var. Behavioral config belongs in config.yaml, not a new HERMES_* env var (.env is secrets only). The actual bug is fixed entirely by the adapter-aware skip; the configurable cap was unneeded scope. MAX_PLATFORM_OUTPUT is a constant again, collapsing the max_output=0 disable branch and the audit-vs-truncation threshold divergence. - Flag the remaining verified-chunking adapters (slack, matrix, feishu, mattermost, teams, whatsapp, whatsapp_cloud, weixin, bluebubbles, yuanbao) with splits_long_messages=True so the fix covers the whole bug class, not just Discord/Telegram. Each verified to chunk in its own send() via truncate_message(). - SMS deliberately left False: it chunks for normal replies but a multi-segment cron blast is cost-bearing; the 4000-cap + file save is the safer default there. - Update tests: drop the two env-override tests, add a test asserting a save failure during truncation (non-chunking) propagates.
2026-06-23 10:42:00 +00:00 · 2026-06-22 04:35:23 -07:00 · 2026-06-22 04:35:23 -07:00 · e9cd8c5bf3
commit e9cd8c5bf3
parent 86e4521cb1
12 changed files with 46 additions and 117 deletions
--- a/gateway/delivery.py
+++ b/gateway/delivery.py
@ -20,34 +20,13 @@ from hermes_cli.config import get_hermes_home

 logger = logging.getLogger(__name__)

-# Default cap before gateway-level truncation of cron output for platform
-# delivery.  Telegram's hard API limit is 4096; the 200-char headroom covers
-# the "full output saved to …" footer appended on truncation.  Override via
-# the HERMES_DELIVERY_MAX_PLATFORM_OUTPUT env var.  Adapters that split long
+# Cap before gateway-level truncation of cron output for non-chunking platform
+# delivery.  Telegram's hard API limit is 4096; the headroom covers the "full
+# output saved to …" footer appended on truncation.  Adapters that split long
 # messages natively (BasePlatformAdapter.splits_long_messages) bypass this
 # entirely — the adapter chunks in its own send() and the full output is
 # preserved.
-_DEFAULT_MAX_PLATFORM_OUTPUT = 4000
-
-
-def _max_platform_output() -> int:
-    """Max chars before gateway-level truncation of cron output.
-
-    ``HERMES_DELIVERY_MAX_PLATFORM_OUTPUT`` env var overrides the default
-    (4000).  Non-int or negative values fall back to the default with a
-    warning.
-    """
-    env = os.getenv("HERMES_DELIVERY_MAX_PLATFORM_OUTPUT")
-    if env is not None:
-        try:
-            return max(0, int(env.strip()))
-        except ValueError:
-            logger.warning(
-                "HERMES_DELIVERY_MAX_PLATFORM_OUTPUT=%r is not an int; "
-                "using default %d",
-                env, _DEFAULT_MAX_PLATFORM_OUTPUT,
-            )
-    return _DEFAULT_MAX_PLATFORM_OUTPUT
+MAX_PLATFORM_OUTPUT = 4000

 # Matches strings that are *only* a "silence" narration with optional markdown
 # wrappers. Covers: *(silent)*, _silent_, `silent`, ~silent~, (silent), silent,
@ -345,28 +324,21 @@ class DeliveryRouter:
        # Guard: handle oversized cron output.
        #
        # Two independent decisions:
-        #   1. AUDIT SAVE — when content exceeds the audit threshold (4000
-        #      chars, the historical default), the full output is always
-        #      written to disk as a recoverable audit trail.  This fires
-        #      regardless of truncation setting or adapter capability.
-        #   2. TRUNCATION — for non-chunking adapters, content above
-        #      max_output is truncated with a footer pointing to the saved
-        #      file.  Chunking-capable adapters (splits_long_messages=True)
-        #      receive the full payload and split natively in their send().
-        #      Setting HERMES_DELIVERY_MAX_PLATFORM_OUTPUT=0 disables
-        #      truncation entirely (the user takes responsibility for platform
-        #      API limits), but the audit save in step 1 still fires.
-        max_output = _max_platform_output()
+        #   1. AUDIT SAVE — when content exceeds MAX_PLATFORM_OUTPUT, the full
+        #      output is always written to disk as a recoverable audit trail.
+        #      This fires regardless of adapter capability (best-effort).
+        #   2. TRUNCATION — for non-chunking adapters, content above the cap is
+        #      truncated with a footer pointing to the saved file.  Chunking-
+        #      capable adapters (splits_long_messages=True) receive the full
+        #      payload and split natively in their send().
        job_id = (metadata or {}).get("job_id", "unknown")
        saved_path: Optional[Path] = None

-        # Step 1 — audit save (independent of truncation, best-effort).
-        # The save is a side-effect audit trail, not essential to delivery.
-        # If it fails (full disk, permissions), delivery proceeds — the
-        # content reaches the adapter regardless.  The truncation path's
-        # fallback save below is NOT best-effort: the footer needs a valid
-        # path, so a failure there is a real delivery problem.
-        if len(content) > _DEFAULT_MAX_PLATFORM_OUTPUT:
+        if len(content) > MAX_PLATFORM_OUTPUT:
+            # Step 1 — audit save (best-effort).  The save is a side-effect
+            # audit trail, not essential to delivery.  If it fails (full disk,
+            # permissions), delivery proceeds — the content reaches the adapter
+            # regardless.
            try:
                saved_path = self._save_full_output(content, job_id)
            except OSError as exc:
@ -376,9 +348,8 @@ class DeliveryRouter:
                    len(content), job_id, exc,
                )

-        # Step 2 — truncation (only for non-chunking adapters).
-        if max_output > 0 and len(content) > max_output:
-            if adapter and getattr(adapter, "splits_long_messages", False):
+            # Step 2 — truncation (only for non-chunking adapters).
+            if getattr(adapter, "splits_long_messages", False):
                # Adapter chunks natively — deliver full payload.
                if saved_path:
                    logger.info(
@ -387,27 +358,18 @@ class DeliveryRouter:
                        len(content), saved_path,
                    )
            else:
-                # Non-chunking adapter — truncate with footer.
+                # Non-chunking adapter — truncate with footer.  The footer
+                # needs a valid path, so if the best-effort save above failed,
+                # retry it here (a failure now is a real delivery problem).
                if saved_path is None:
-                    # Content exceeded max_output but not the audit threshold
-                    # (e.g. HERMES_DELIVERY_MAX_PLATFORM_OUTPUT=200).  Save
-                    # anyway since we're about to truncate.
                    saved_path = self._save_full_output(content, job_id)
                footer = f"\n\n... [truncated, full output saved to {saved_path}]"
-                visible = max(0, max_output - len(footer))
+                visible = max(0, MAX_PLATFORM_OUTPUT - len(footer))
                logger.info(
                    "Cron output truncated (%d chars) — full output: %s",
                    len(content), saved_path,
                )
                content = content[:visible] + footer
-        elif saved_path:
-            # Truncation disabled (max_output=0) but content was large enough
-            # to warrant an audit copy.
-            logger.info(
-                "Cron output delivered untruncated (%d chars, truncation "
-                "disabled) — audit copy saved to %s",
-                len(content), saved_path,
-            )
        
        # Substrate-level anti-loop guard: drop hallucinated "silence narration"
        # (*(silent)*, 🔇, a bare ".", etc.) before it ever reaches the adapter.
--- a/gateway/platforms/bluebubbles.py
+++ b/gateway/platforms/bluebubbles.py
@ -113,6 +113,7 @@ class BlueBubblesAdapter(BasePlatformAdapter):
    platform = Platform.BLUEBUBBLES
    SUPPORTS_MESSAGE_EDITING = False
    MAX_MESSAGE_LENGTH = MAX_TEXT_LENGTH
+    splits_long_messages = True  # send() chunks via truncate_message(MAX_MESSAGE_LENGTH)

    def __init__(self, config: PlatformConfig):
        super().__init__(config, Platform.BLUEBUBBLES)
--- a/gateway/platforms/weixin.py
+++ b/gateway/platforms/weixin.py
@ -1139,6 +1139,7 @@ class WeixinAdapter(BasePlatformAdapter):
    """Native Hermes adapter for Weixin personal accounts."""

    supports_code_blocks = True  # Weixin renders fenced code blocks
+    splits_long_messages = True  # send() chunks via _split_text()

    MAX_MESSAGE_LENGTH = 2000

--- a/gateway/platforms/whatsapp_cloud.py
+++ b/gateway/platforms/whatsapp_cloud.py
@ -187,6 +187,8 @@ class WhatsAppCloudAdapter(WhatsAppBehaviorMixin, BasePlatformAdapter):
    syntax). The Baileys adapter does the same.
    """

+    splits_long_messages = True  # send() chunks via truncate_message()
+
    def __init__(self, config: PlatformConfig):
        super().__init__(config, Platform.WHATSAPP_CLOUD)
        extra = config.extra or {}
--- a/gateway/platforms/yuanbao.py
+++ b/gateway/platforms/yuanbao.py
@ -4983,6 +4983,7 @@ class YuanbaoAdapter(BasePlatformAdapter):

    PLATFORM = Platform.YUANBAO
    MAX_TEXT_CHUNK: int = 4000  # Yuanbao single message character limit
+    splits_long_messages = True  # send() auto-chunks via truncate_message(MAX_TEXT_CHUNK)
    MEDIA_MAX_SIZE_MB: int = 50  # Max media file size in MB for upload validation
    REPLY_REF_MAX_ENTRIES: ClassVar[int] = 500  # Max capacity of reference dedup dict

--- a/plugins/platforms/feishu/adapter.py
+++ b/plugins/platforms/feishu/adapter.py
@ -1410,6 +1410,7 @@ class FeishuAdapter(BasePlatformAdapter):
    """Feishu/Lark bot adapter."""

    supports_code_blocks = True  # Feishu renders fenced code blocks
+    splits_long_messages = True  # send() chunks via truncate_message(MAX_MESSAGE_LENGTH)

    MAX_MESSAGE_LENGTH = 8000
    # Max distinct chat IDs retained in _chat_locks before LRU eviction kicks in.
--- a/plugins/platforms/matrix/adapter.py
+++ b/plugins/platforms/matrix/adapter.py
@ -775,6 +775,7 @@ class MatrixAdapter(BasePlatformAdapter):
    """Gateway adapter for Matrix (any homeserver)."""

    supports_code_blocks = True  # Matrix renders fenced code blocks (HTML/markdown)
+    splits_long_messages = True  # send() chunks via truncate_message(MAX_MESSAGE_LENGTH)

    # Matrix clients commonly reserve typed "/" for client-local commands;
    # the adapter accepts "!command" as the alias that always reaches Hermes
--- a/plugins/platforms/mattermost/adapter.py
+++ b/plugins/platforms/mattermost/adapter.py
@ -71,6 +71,8 @@ def check_mattermost_requirements() -> bool:
 class MattermostAdapter(BasePlatformAdapter):
    """Gateway adapter for Mattermost (self-hosted or cloud)."""

+    splits_long_messages = True  # send() chunks via truncate_message(MAX_POST_LENGTH)
+
    def __init__(self, config: PlatformConfig):
        super().__init__(config, Platform.MATTERMOST)

--- a/plugins/platforms/slack/adapter.py
+++ b/plugins/platforms/slack/adapter.py
@ -321,6 +321,7 @@ class SlackAdapter(BasePlatformAdapter):

    MAX_MESSAGE_LENGTH = 39000  # Slack API allows 40,000 chars; leave margin
    supports_code_blocks = True  # Slack mrkdwn renders fenced code blocks
+    splits_long_messages = True  # send() chunks via truncate_message(MAX_MESSAGE_LENGTH)
    # Slack blocks typed native slash commands inside threads ("/approve is
    # not supported in threads. Sorry!").  The adapter rewrites a leading
    # "!" to "/" for known commands (see _handle_slack_message), so "!" is
--- a/plugins/platforms/teams/adapter.py
+++ b/plugins/platforms/teams/adapter.py
@ -691,6 +691,7 @@ class TeamsAdapter(BasePlatformAdapter):
    """Microsoft Teams adapter using the microsoft-teams-apps SDK."""

    MAX_MESSAGE_LENGTH = 28000  # Teams text message limit (~28 KB)
+    splits_long_messages = True  # send() chunks via truncate_message()

    def __init__(self, config: PlatformConfig):
        super().__init__(config, Platform("teams"))
--- a/plugins/platforms/whatsapp/adapter.py
+++ b/plugins/platforms/whatsapp/adapter.py
@ -337,6 +337,7 @@ class WhatsAppAdapter(WhatsAppBehaviorMixin, BasePlatformAdapter):

    # Default bridge location resolved via shared helper
    _DEFAULT_BRIDGE_DIR = None  # resolved in __init__
+    splits_long_messages = True  # send() chunks via truncate_message()

    def __init__(self, config: PlatformConfig):
        super().__init__(config, Platform.WHATSAPP)
--- a/tests/gateway/test_delivery.py
+++ b/tests/gateway/test_delivery.py
@ -367,50 +367,6 @@ async def test_short_output_never_truncated(tmp_path, monkeypatch):
    assert not list(tmp_path.glob("cron/output/*.txt"))


-@pytest.mark.asyncio
-async def test_env_override_changes_truncation_threshold(tmp_path, monkeypatch):
-    """HERMES_DELIVERY_MAX_PLATFORM_OUTPUT env var overrides the default 4000."""
-    monkeypatch.setattr("gateway.delivery.get_hermes_home", lambda: tmp_path)
-    monkeypatch.setenv("HERMES_DELIVERY_MAX_PLATFORM_OUTPUT", "200")
-    adapter = NonChunkingAdapter()
-    router = DeliveryRouter(GatewayConfig(), adapters={Platform.DISCORD: adapter})
-    target = DeliveryTarget.parse("discord:123")
-
-    content = "x" * 300  # over the env-override threshold of 200
-    await router._deliver_to_platform(target, content, metadata={"job_id": "job4"})
-
-    delivered = adapter.calls[0]["content"]
-    assert len(delivered) < 300  # truncated because env lowered the bar
-    assert "truncated" in delivered.lower()
-    # Audit file saved (truncation path always saves when it truncates)
-    saved_files = list(tmp_path.glob("cron/output/job4_*.txt"))
-    assert len(saved_files) == 1
-    assert saved_files[0].read_text() == content
-
-
-@pytest.mark.asyncio
-async def test_env_override_disable_truncation(tmp_path, monkeypatch):
-    """Setting HERMES_DELIVERY_MAX_PLATFORM_OUTPUT=0 disables truncation entirely."""
-    monkeypatch.setattr("gateway.delivery.get_hermes_home", lambda: tmp_path)
-    monkeypatch.setenv("HERMES_DELIVERY_MAX_PLATFORM_OUTPUT", "0")
-    adapter = NonChunkingAdapter()
-    router = DeliveryRouter(GatewayConfig(), adapters={Platform.DISCORD: adapter})
-    target = DeliveryTarget.parse("discord:123")
-
-    content = "x" * 10000
-    await router._deliver_to_platform(target, content, metadata={"job_id": "job5"})
-
-    # With max_output=0, truncation is disabled — even non-chunking adapters
-    # receive the full content (they may error at the platform API level, but
-    # that's the user's explicit choice).
-    assert adapter.calls[0]["content"] == content
-    # Audit file STILL saved — the audit threshold (4000) is independent of
-    # the truncation setting.  Content (10000) exceeds it.
-    saved_files = list(tmp_path.glob("cron/output/job5_*.txt"))
-    assert len(saved_files) == 1
-    assert saved_files[0].read_text() == content
-
-
@pytest.mark.asyncio
 async def test_audit_save_failure_does_not_break_chunking_delivery(tmp_path, monkeypatch):
    """If the audit save fails (disk full, permissions), chunking adapters
@ -431,24 +387,21 @@ async def test_audit_save_failure_does_not_break_chunking_delivery(tmp_path, mon

    monkeypatch.setattr(router, "_save_full_output", failing_save)

-    # Should NOT raise — audit failure is caught
+    # Should NOT raise — audit failure is caught for chunking adapters
    await router._deliver_to_platform(target, long_content, metadata={"job_id": "job6"})

    # Adapter still got the full content
    assert adapter.calls[0]["content"] == long_content
-    # Save was attempted
+    # Save was attempted (best-effort, swallowed)
    assert call_count["n"] == 1


@pytest.mark.asyncio
-async def test_audit_save_failure_does_not_break_non_chunking_delivery(tmp_path, monkeypatch):
-    """If the audit save fails AND truncation is needed, the fallback save
-    in Step 2 is NOT caught — the footer needs a valid path, so this is a
-    real failure. But if content exceeds the audit threshold AND truncation
-    is disabled (max_output=0), the caught Step 1 failure lets delivery
-    proceed."""
+async def test_save_failure_during_truncation_raises_for_non_chunking_adapter(tmp_path, monkeypatch):
+    """For a non-chunking adapter, the truncation footer needs a valid saved
+    path. If the save fails there, that is a real delivery problem and the
+    error propagates (not swallowed like the chunking best-effort save)."""
    monkeypatch.setattr("gateway.delivery.get_hermes_home", lambda: tmp_path)
-    monkeypatch.setenv("HERMES_DELIVERY_MAX_PLATFORM_OUTPUT", "0")

    adapter = NonChunkingAdapter()
    router = DeliveryRouter(GatewayConfig(), adapters={Platform.DISCORD: adapter})
@ -461,8 +414,10 @@ async def test_audit_save_failure_does_not_break_non_chunking_delivery(tmp_path,

    monkeypatch.setattr(router, "_save_full_output", failing_save)

-    # max_output=0 → no truncation → Step 1 failure is caught → delivery proceeds
-    await router._deliver_to_platform(target, long_content, metadata={"job_id": "job7"})
+    # Non-chunking adapter must truncate → needs a valid saved path → the
+    # Step 1 best-effort catch swallows the first attempt, but the Step 2
+    # retry (footer needs the path) re-raises.
+    with pytest.raises(OSError, match="No space left on device"):
+        await router._deliver_to_platform(target, long_content, metadata={"job_id": "job7"})
+

-    # Non-chunking adapter still got the full content (truncation disabled)
-    assert adapter.calls[0]["content"] == long_content