fix(gateway): plain text while busy interrupts by default again (#40590)

* fix: respect disabled auto-compaction on context overflow

Port from anomalyco/opencode#30749.

When compression.enabled is false, NO automatic compaction trigger may
fire. The proactive token-threshold paths (preflight + post-response
should_compress gate) already honoured the setting, but the three
provider-overflow recovery paths in the agent loop — long-context-tier
429, 413 payload-too-large, and context-overflow — called
_compress_context() unconditionally, silently compressing and rotating
the session against the user's explicit choice.

Add a single guard at the top of the overflow-recovery dispatch: when
compression is disabled and the error is one of those three overflow
classes, surface a terminal error (compaction_disabled: True) telling the
user to /compress manually, /new, switch to a larger-context model, or
reduce attachments. Manual /compress (force=True) is unaffected — it never
enters this loop.

Tests: new TestOverflowWithCompactionDisabled (413 + 400 overflow don't
compress when disabled; control case still compresses when enabled).
Existing overflow-recovery tests updated to enable compaction explicitly
(they verify the recovery fires); fixture defaults flipped to True to
match production (compression.enabled defaults to True).

* fix(gateway): plain text while busy interrupts by default again

busy_input_mode (default 'interrupt') was advertised as the busy-behavior
knob, but a second knob added in 7abd62719 — busy_text_mode, defaulting to
'queue' — short-circuited every plain TEXT message before busy_input_mode
was consulted. Result: plain follow-ups silently queued instead of
interrupting, even with busy_input_mode left at its 'interrupt' default
(regression #38390, silent-queue #31588).

Collapse to one source of truth: busy_input_mode drives text handling.
busy_text_mode is kept only as a legacy explicit override for back-compat
(existing queue setups keep working); when unset it follows busy_input_mode.
All default fallbacks flipped queue->interrupt. The debounce mechanism is
preserved and now keyed off the resolved mode.

Fixes #38390, #31588.
This commit is contained in:
Teknium 2026-06-06 09:00:10 -07:00 committed by GitHub
parent 1c2189839d
commit 54e7b74f7f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 55 additions and 21 deletions

View file

@ -1817,9 +1817,14 @@ class BasePlatformAdapter(ABC):
self._active_sessions: Dict[str, asyncio.Event] = {}
self._pending_messages: Dict[str, MessageEvent] = {}
self._session_tasks: Dict[str, asyncio.Task] = {}
# Legacy busy_text_mode env var; when unset the runner syncs the
# resolved value (driven by busy_input_mode) onto the adapter after
# construction (gateway/run.py). Default to "interrupt" so a stray
# pre-sync read matches the single-knob default rather than silently
# queueing.
self._busy_text_mode: str = (
os.environ.get("HERMES_GATEWAY_BUSY_TEXT_MODE", "queue").strip().lower()
or "queue"
os.environ.get("HERMES_GATEWAY_BUSY_TEXT_MODE", "interrupt").strip().lower()
or "interrupt"
)
self._busy_text_debounce_seconds: float = _float_env(
"HERMES_GATEWAY_BUSY_TEXT_DEBOUNCE_SECONDS", 0.35
@ -3406,7 +3411,7 @@ class BasePlatformAdapter(ABC):
def _is_queue_text_debounce_candidate(self, event: MessageEvent) -> bool:
"""Return True for normal text eligible for queue-mode debounce."""
result = (
getattr(self, "_busy_text_mode", "queue") == "queue"
getattr(self, "_busy_text_mode", "interrupt") == "queue"
and event.message_type == MessageType.TEXT
and not getattr(event, "internal", False)
and not event.is_command()

View file

@ -3234,14 +3234,27 @@ class GatewayRunner:
@staticmethod
def _load_busy_text_mode() -> str:
"""Load normal busy TEXT follow-up behavior from config/env."""
mode = os.getenv("HERMES_GATEWAY_BUSY_TEXT_MODE", "").strip().lower()
if not mode:
"""Resolve normal busy TEXT follow-up behavior.
``busy_input_mode`` is the single source of truth (default
``interrupt``). The legacy ``busy_text_mode`` knob is honored only
when a user explicitly set it, so existing queue setups keep
working; new installs follow ``busy_input_mode``. Returns one of
``interrupt`` | ``queue`` (``steer`` is handled upstream by
``busy_input_mode`` and maps to non-queue text handling here).
"""
# Legacy explicit override wins for backward compat.
legacy = os.getenv("HERMES_GATEWAY_BUSY_TEXT_MODE", "").strip().lower()
if not legacy:
cfg = _load_gateway_runtime_config()
mode = str(cfg_get(cfg, "display", "busy_text_mode", default="") or "").strip().lower()
if mode == "interrupt":
legacy = str(cfg_get(cfg, "display", "busy_text_mode", default="") or "").strip().lower()
if legacy == "interrupt":
return "interrupt"
return "queue"
if legacy == "queue":
return "queue"
# No explicit legacy knob → follow busy_input_mode.
input_mode = GatewayRunner._load_busy_input_mode()
return "queue" if input_mode == "queue" else "interrupt"
@staticmethod
def _load_restart_drain_timeout() -> float:
@ -3429,7 +3442,7 @@ class GatewayRunner:
running_agent = self._running_agents.get(session_key)
effective_mode = self._busy_input_mode
busy_text_mode = getattr(self, "_busy_text_mode", "queue")
busy_text_mode = getattr(self, "_busy_text_mode", "interrupt")
if (
event.message_type == MessageType.TEXT
and busy_text_mode == "queue"

View file

@ -352,14 +352,15 @@ async def test_single_followup_is_stored_as_is():
assert not adapter._active_sessions[session_key].is_set()
def test_adapter_defaults_to_queue_mode(monkeypatch):
def test_adapter_defaults_to_interrupt_mode(monkeypatch):
monkeypatch.delenv("HERMES_GATEWAY_BUSY_TEXT_MODE", raising=False)
adapter = _make_initialized_adapter()
assert adapter._busy_text_mode == "queue"
assert adapter._is_queue_text_debounce_candidate(_make_event("hello"))
assert adapter._busy_text_mode == "interrupt"
assert not adapter._is_queue_text_debounce_candidate(_make_event("hello"))
def test_adapter_is_queue_text_debounce_candidate_by_default():
def test_adapter_is_queue_text_debounce_candidate_when_queue_set():
# _make_adapter() pins _busy_text_mode="queue" to exercise debounce.
adapter = _make_adapter()
assert adapter._is_queue_text_debounce_candidate(_make_event("hello world"))

View file

@ -116,22 +116,37 @@ def test_load_busy_input_mode_prefers_env_then_config_then_default(tmp_path, mon
assert gateway_run.GatewayRunner._load_busy_input_mode() == "interrupt"
def test_load_busy_text_mode_defaults_to_queue_and_allows_interrupt(tmp_path, monkeypatch):
def test_load_busy_text_mode_follows_input_mode_and_honors_legacy(tmp_path, monkeypatch):
monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
monkeypatch.delenv("HERMES_GATEWAY_BUSY_TEXT_MODE", raising=False)
monkeypatch.delenv("HERMES_GATEWAY_BUSY_INPUT_MODE", raising=False)
assert gateway_run.GatewayRunner._load_busy_text_mode() == "queue"
(tmp_path / "config.yaml").write_text(
"display:\n busy_text_mode: interrupt\n", encoding="utf-8"
)
# No knobs set → follows busy_input_mode, which defaults to interrupt.
assert gateway_run.GatewayRunner._load_busy_text_mode() == "interrupt"
# busy_input_mode=queue propagates to text handling (single source of truth).
(tmp_path / "config.yaml").write_text(
"display:\n busy_input_mode: queue\n", encoding="utf-8"
)
assert gateway_run.GatewayRunner._load_busy_text_mode() == "queue"
# Legacy explicit busy_text_mode still wins for backward compat.
(tmp_path / "config.yaml").write_text(
"display:\n busy_input_mode: interrupt\n busy_text_mode: queue\n",
encoding="utf-8",
)
assert gateway_run.GatewayRunner._load_busy_text_mode() == "queue"
# Legacy env override wins too.
(tmp_path / "config.yaml").write_text(
"display:\n busy_input_mode: interrupt\n", encoding="utf-8"
)
monkeypatch.setenv("HERMES_GATEWAY_BUSY_TEXT_MODE", "queue")
assert gateway_run.GatewayRunner._load_busy_text_mode() == "queue"
# Bogus legacy value is ignored → falls through to busy_input_mode (interrupt).
monkeypatch.setenv("HERMES_GATEWAY_BUSY_TEXT_MODE", "bogus")
assert gateway_run.GatewayRunner._load_busy_text_mode() == "queue"
assert gateway_run.GatewayRunner._load_busy_text_mode() == "interrupt"
def test_load_restart_drain_timeout_prefers_env_then_config_then_default(