From 54e7b74f7f476b7d0de31e5f3a97376647e68e64 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Sat, 6 Jun 2026 09:00:10 -0700 Subject: [PATCH] fix(gateway): plain text while busy interrupts by default again (#40590) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: respect disabled auto-compaction on context overflow Port from anomalyco/opencode#30749. When compression.enabled is false, NO automatic compaction trigger may fire. The proactive token-threshold paths (preflight + post-response should_compress gate) already honoured the setting, but the three provider-overflow recovery paths in the agent loop — long-context-tier 429, 413 payload-too-large, and context-overflow — called _compress_context() unconditionally, silently compressing and rotating the session against the user's explicit choice. Add a single guard at the top of the overflow-recovery dispatch: when compression is disabled and the error is one of those three overflow classes, surface a terminal error (compaction_disabled: True) telling the user to /compress manually, /new, switch to a larger-context model, or reduce attachments. Manual /compress (force=True) is unaffected — it never enters this loop. Tests: new TestOverflowWithCompactionDisabled (413 + 400 overflow don't compress when disabled; control case still compresses when enabled). Existing overflow-recovery tests updated to enable compaction explicitly (they verify the recovery fires); fixture defaults flipped to True to match production (compression.enabled defaults to True). * fix(gateway): plain text while busy interrupts by default again busy_input_mode (default 'interrupt') was advertised as the busy-behavior knob, but a second knob added in 7abd62719 — busy_text_mode, defaulting to 'queue' — short-circuited every plain TEXT message before busy_input_mode was consulted. Result: plain follow-ups silently queued instead of interrupting, even with busy_input_mode left at its 'interrupt' default (regression #38390, silent-queue #31588). Collapse to one source of truth: busy_input_mode drives text handling. busy_text_mode is kept only as a legacy explicit override for back-compat (existing queue setups keep working); when unset it follows busy_input_mode. All default fallbacks flipped queue->interrupt. The debounce mechanism is preserved and now keyed off the resolved mode. Fixes #38390, #31588. --- gateway/platforms/base.py | 11 +++++-- gateway/run.py | 27 ++++++++++++----- .../gateway/test_active_session_text_merge.py | 9 +++--- tests/gateway/test_restart_drain.py | 29 ++++++++++++++----- 4 files changed, 55 insertions(+), 21 deletions(-) diff --git a/gateway/platforms/base.py b/gateway/platforms/base.py index 76084b3cb68..0ddcc1e8cb6 100644 --- a/gateway/platforms/base.py +++ b/gateway/platforms/base.py @@ -1817,9 +1817,14 @@ class BasePlatformAdapter(ABC): self._active_sessions: Dict[str, asyncio.Event] = {} self._pending_messages: Dict[str, MessageEvent] = {} self._session_tasks: Dict[str, asyncio.Task] = {} + # Legacy busy_text_mode env var; when unset the runner syncs the + # resolved value (driven by busy_input_mode) onto the adapter after + # construction (gateway/run.py). Default to "interrupt" so a stray + # pre-sync read matches the single-knob default rather than silently + # queueing. self._busy_text_mode: str = ( - os.environ.get("HERMES_GATEWAY_BUSY_TEXT_MODE", "queue").strip().lower() - or "queue" + os.environ.get("HERMES_GATEWAY_BUSY_TEXT_MODE", "interrupt").strip().lower() + or "interrupt" ) self._busy_text_debounce_seconds: float = _float_env( "HERMES_GATEWAY_BUSY_TEXT_DEBOUNCE_SECONDS", 0.35 @@ -3406,7 +3411,7 @@ class BasePlatformAdapter(ABC): def _is_queue_text_debounce_candidate(self, event: MessageEvent) -> bool: """Return True for normal text eligible for queue-mode debounce.""" result = ( - getattr(self, "_busy_text_mode", "queue") == "queue" + getattr(self, "_busy_text_mode", "interrupt") == "queue" and event.message_type == MessageType.TEXT and not getattr(event, "internal", False) and not event.is_command() diff --git a/gateway/run.py b/gateway/run.py index 18aa5ef175f..8ec8eefb549 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -3234,14 +3234,27 @@ class GatewayRunner: @staticmethod def _load_busy_text_mode() -> str: - """Load normal busy TEXT follow-up behavior from config/env.""" - mode = os.getenv("HERMES_GATEWAY_BUSY_TEXT_MODE", "").strip().lower() - if not mode: + """Resolve normal busy TEXT follow-up behavior. + + ``busy_input_mode`` is the single source of truth (default + ``interrupt``). The legacy ``busy_text_mode`` knob is honored only + when a user explicitly set it, so existing queue setups keep + working; new installs follow ``busy_input_mode``. Returns one of + ``interrupt`` | ``queue`` (``steer`` is handled upstream by + ``busy_input_mode`` and maps to non-queue text handling here). + """ + # Legacy explicit override wins for backward compat. + legacy = os.getenv("HERMES_GATEWAY_BUSY_TEXT_MODE", "").strip().lower() + if not legacy: cfg = _load_gateway_runtime_config() - mode = str(cfg_get(cfg, "display", "busy_text_mode", default="") or "").strip().lower() - if mode == "interrupt": + legacy = str(cfg_get(cfg, "display", "busy_text_mode", default="") or "").strip().lower() + if legacy == "interrupt": return "interrupt" - return "queue" + if legacy == "queue": + return "queue" + # No explicit legacy knob → follow busy_input_mode. + input_mode = GatewayRunner._load_busy_input_mode() + return "queue" if input_mode == "queue" else "interrupt" @staticmethod def _load_restart_drain_timeout() -> float: @@ -3429,7 +3442,7 @@ class GatewayRunner: running_agent = self._running_agents.get(session_key) effective_mode = self._busy_input_mode - busy_text_mode = getattr(self, "_busy_text_mode", "queue") + busy_text_mode = getattr(self, "_busy_text_mode", "interrupt") if ( event.message_type == MessageType.TEXT and busy_text_mode == "queue" diff --git a/tests/gateway/test_active_session_text_merge.py b/tests/gateway/test_active_session_text_merge.py index 05e7a36fd6b..16d40815ba2 100644 --- a/tests/gateway/test_active_session_text_merge.py +++ b/tests/gateway/test_active_session_text_merge.py @@ -352,14 +352,15 @@ async def test_single_followup_is_stored_as_is(): assert not adapter._active_sessions[session_key].is_set() -def test_adapter_defaults_to_queue_mode(monkeypatch): +def test_adapter_defaults_to_interrupt_mode(monkeypatch): monkeypatch.delenv("HERMES_GATEWAY_BUSY_TEXT_MODE", raising=False) adapter = _make_initialized_adapter() - assert adapter._busy_text_mode == "queue" - assert adapter._is_queue_text_debounce_candidate(_make_event("hello")) + assert adapter._busy_text_mode == "interrupt" + assert not adapter._is_queue_text_debounce_candidate(_make_event("hello")) -def test_adapter_is_queue_text_debounce_candidate_by_default(): +def test_adapter_is_queue_text_debounce_candidate_when_queue_set(): + # _make_adapter() pins _busy_text_mode="queue" to exercise debounce. adapter = _make_adapter() assert adapter._is_queue_text_debounce_candidate(_make_event("hello world")) diff --git a/tests/gateway/test_restart_drain.py b/tests/gateway/test_restart_drain.py index c1578e3617a..a48e5f73781 100644 --- a/tests/gateway/test_restart_drain.py +++ b/tests/gateway/test_restart_drain.py @@ -116,22 +116,37 @@ def test_load_busy_input_mode_prefers_env_then_config_then_default(tmp_path, mon assert gateway_run.GatewayRunner._load_busy_input_mode() == "interrupt" -def test_load_busy_text_mode_defaults_to_queue_and_allows_interrupt(tmp_path, monkeypatch): +def test_load_busy_text_mode_follows_input_mode_and_honors_legacy(tmp_path, monkeypatch): monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path) monkeypatch.delenv("HERMES_GATEWAY_BUSY_TEXT_MODE", raising=False) + monkeypatch.delenv("HERMES_GATEWAY_BUSY_INPUT_MODE", raising=False) - assert gateway_run.GatewayRunner._load_busy_text_mode() == "queue" - - (tmp_path / "config.yaml").write_text( - "display:\n busy_text_mode: interrupt\n", encoding="utf-8" - ) + # No knobs set → follows busy_input_mode, which defaults to interrupt. assert gateway_run.GatewayRunner._load_busy_text_mode() == "interrupt" + # busy_input_mode=queue propagates to text handling (single source of truth). + (tmp_path / "config.yaml").write_text( + "display:\n busy_input_mode: queue\n", encoding="utf-8" + ) + assert gateway_run.GatewayRunner._load_busy_text_mode() == "queue" + + # Legacy explicit busy_text_mode still wins for backward compat. + (tmp_path / "config.yaml").write_text( + "display:\n busy_input_mode: interrupt\n busy_text_mode: queue\n", + encoding="utf-8", + ) + assert gateway_run.GatewayRunner._load_busy_text_mode() == "queue" + + # Legacy env override wins too. + (tmp_path / "config.yaml").write_text( + "display:\n busy_input_mode: interrupt\n", encoding="utf-8" + ) monkeypatch.setenv("HERMES_GATEWAY_BUSY_TEXT_MODE", "queue") assert gateway_run.GatewayRunner._load_busy_text_mode() == "queue" + # Bogus legacy value is ignored → falls through to busy_input_mode (interrupt). monkeypatch.setenv("HERMES_GATEWAY_BUSY_TEXT_MODE", "bogus") - assert gateway_run.GatewayRunner._load_busy_text_mode() == "queue" + assert gateway_run.GatewayRunner._load_busy_text_mode() == "interrupt" def test_load_restart_drain_timeout_prefers_env_then_config_then_default(