From e0272cfef28f8ef86904a5adf0e34b575b63b109 Mon Sep 17 00:00:00 2001
From: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com>
Date: Thu, 25 Jun 2026 01:04:44 +0530
Subject: [PATCH] Revert "fix(compression): make minimum context floor
 configurable (#31600)"

This reverts commit cae1ee44a7afb462a9fe11863d30feffa0736966.
---
 agent/agent_init.py                           | 12 ---
 agent/context_compressor.py                   | 27 ++-----
 agent/conversation_compression.py             | 16 ++--
 agent/model_metadata.py                       | 19 -----
 gateway/run.py                                |  1 -
 tests/gateway/test_agent_cache.py             | 23 +-----
 .../run_agent/test_compression_feasibility.py | 26 -------
 .../test_compression_minimum_floor.py         | 75 -------------------
 8 files changed, 12 insertions(+), 187 deletions(-)
 delete mode 100644 tests/run_agent/test_compression_minimum_floor.py

diff --git a/agent/agent_init.py b/agent/agent_init.py
index 9054c6f2528..e7f2ed9eac3 100644
--- a/agent/agent_init.py
+++ b/agent/agent_init.py
@@ -1358,17 +1358,6 @@ def init_agent(
     compression_in_place = is_truthy_value(
         _compression_cfg.get("in_place"), default=False
     )
-    # Allow users to lower the compression floor for models whose
-    # structured output degrades well before the default 64K tokens.
-    # Clamped to a hard-coded safety limit of 16K by the compressor.
-    _raw_floor = _compression_cfg.get("minimum_context_floor", None)
-    if _raw_floor is not None:
-        try:
-            compression_minimum_context_floor = int(_raw_floor)
-        except (TypeError, ValueError):
-            compression_minimum_context_floor = None
-    else:
-        compression_minimum_context_floor = None
 
     # Read optional explicit context_length override for the auxiliary
     # compression model. Custom endpoints often cannot report this via
@@ -1587,7 +1576,6 @@ def init_agent(
             api_mode=agent.api_mode,
             abort_on_summary_failure=compression_abort_on_summary_failure,
             max_tokens=agent.max_tokens,
-            minimum_context_floor=compression_minimum_context_floor,
         )
     agent.compression_enabled = compression_enabled
     agent.compression_in_place = compression_in_place
diff --git a/agent/context_compressor.py b/agent/context_compressor.py
index 5b446516939..fbde99bda5f 100644
--- a/agent/context_compressor.py
+++ b/agent/context_compressor.py
@@ -27,7 +27,6 @@ from agent.auxiliary_client import call_llm, _is_connection_error, aux_interrupt
 from agent.context_engine import ContextEngine
 from agent.model_metadata import (
     MINIMUM_CONTEXT_LENGTH,
-    get_configurable_minimum_context,
     get_model_context_length,
     estimate_messages_tokens_rough,
 )
@@ -684,7 +683,6 @@ class ContextCompressor(ContextEngine):
             self.max_tokens = self._coerce_max_tokens(max_tokens)
         self.threshold_tokens = self._compute_threshold_tokens(
             context_length, self.threshold_percent, self.max_tokens,
-            minimum_floor=self._minimum_context_floor,
         )
         # Recalculate token budgets for the new context length so the
         # compressor stays calibrated after a model switch (e.g. 200K → 32K).
@@ -743,7 +741,6 @@ class ContextCompressor(ContextEngine):
     @staticmethod
     def _compute_threshold_tokens(
         context_length: int, threshold_percent: float, max_tokens: int | None = None,
-        minimum_floor: int = MINIMUM_CONTEXT_LENGTH,
     ) -> int:
         """Compute the compaction trigger threshold in tokens.
 
@@ -773,7 +770,7 @@ class ContextCompressor(ContextEngine):
         if effective_window <= 0:
             effective_window = context_length
         pct_value = int(effective_window * threshold_percent)
-        floored = max(pct_value, minimum_floor)
+        floored = max(pct_value, MINIMUM_CONTEXT_LENGTH)
         # If flooring pushed the threshold to/over the effective window it can
         # never be reached. Trigger at 85% of the effective input budget so a
         # minimum-context model rides most of its budget before compacting
@@ -799,7 +796,6 @@ class ContextCompressor(ContextEngine):
         api_mode: str = "",
         abort_on_summary_failure: bool = False,
         max_tokens: int | None = None,
-        minimum_context_floor: int | None = None,
     ):
         self.model = model
         self.base_url = base_url
@@ -824,30 +820,19 @@ class ContextCompressor(ContextEngine):
         # deterministic "summary unavailable" handoff and drop the middle window.
         self.abort_on_summary_failure = abort_on_summary_failure
 
-        # Configurable compression floor — allows users with large-context
-        # models that degrade before 64K tokens to lower the bar (never below
-        # the hard-coded safety limit of 16K).  When None, the default
-        # MINIMUM_CONTEXT_LENGTH (64K) applies.
-        self._minimum_context_floor = get_configurable_minimum_context(
-            minimum_context_floor
-        )
-
         self.context_length = get_model_context_length(
             model, base_url=base_url, api_key=api_key,
             config_context_length=config_context_length,
             provider=provider,
         )
-        # Floor: never compress below the configured minimum context floor
-        # even if the percentage would suggest a lower value.  This prevents
-        # premature compression on large-context models at 50% while keeping
-        # the % sane for models right at the minimum. _compute_threshold_tokens
-        # also guards the degenerate case where the floor would equal/exceed the
+        # Floor: never compress below MINIMUM_CONTEXT_LENGTH tokens even if
+        # the percentage would suggest a lower value.  This prevents premature
+        # compression on large-context models at 50% while keeping the % sane
+        # for models right at the minimum. _compute_threshold_tokens also
+        # guards the degenerate case where the floor would equal/exceed the
         # window (small models), so auto-compression can still fire (#14690).
-        # The floor is configurable via compression.minimum_context_floor for
-        # models whose structured output degrades well below 64K tokens.
         self.threshold_tokens = self._compute_threshold_tokens(
             self.context_length, threshold_percent, self.max_tokens,
-            minimum_floor=self._minimum_context_floor,
         )
         self.compression_count = 0
 
diff --git a/agent/conversation_compression.py b/agent/conversation_compression.py
index 24f3e1e691b..ba67f036954 100644
--- a/agent/conversation_compression.py
+++ b/agent/conversation_compression.py
@@ -94,15 +94,9 @@ def check_compression_model_feasibility(agent: Any) -> None:
         )
         from agent.model_metadata import (
             MINIMUM_CONTEXT_LENGTH,
-            get_configurable_minimum_context,
             get_model_context_length,
         )
 
-        # Configurable compression floor from the compressor instance
-        _compression_floor = getattr(
-            agent.context_compressor, "_minimum_context_floor", MINIMUM_CONTEXT_LENGTH
-        )
-
         client, aux_model = get_text_auxiliary_client(
             "compression",
             main_runtime=agent._current_main_runtime(),
@@ -162,18 +156,18 @@ def check_compression_model_feasibility(agent: Any) -> None:
         )
 
         # Hard floor: the auxiliary compression model must have at least
-        # the configured compression floor's worth of context.  The main model
-        # is already required to meet its floor (checked earlier in
+        # MINIMUM_CONTEXT_LENGTH (64K) tokens of context.  The main model
+        # is already required to meet this floor (checked earlier in
         # __init__), so the compression model must too — otherwise it
         # cannot summarise a full threshold-sized window of main-model
         # content.  Mirrors the main-model rejection pattern.
-        if aux_context and aux_context < _compression_floor:
+        if aux_context and aux_context < MINIMUM_CONTEXT_LENGTH:
             raise ValueError(
                 f"Auxiliary compression model {aux_model} has a context "
                 f"window of {aux_context:,} tokens, which is below the "
-                f"minimum {_compression_floor:,} required by Hermes "
+                f"minimum {MINIMUM_CONTEXT_LENGTH:,} required by Hermes "
                 f"Agent.  Choose a compression model with at least "
-                f"{_compression_floor // 1000}K context (set "
+                f"{MINIMUM_CONTEXT_LENGTH // 1000}K context (set "
                 f"auxiliary.compression.model in config.yaml), or set "
                 f"auxiliary.compression.context_length to override the "
                 f"detected value if it is wrong."
diff --git a/agent/model_metadata.py b/agent/model_metadata.py
index cba5b0ac423..4493eae5f1f 100644
--- a/agent/model_metadata.py
+++ b/agent/model_metadata.py
@@ -184,25 +184,6 @@ DEFAULT_FALLBACK_CONTEXT = CONTEXT_PROBE_TIERS[0]
 # Sessions, model switches, and cron jobs should reject models below this.
 MINIMUM_CONTEXT_LENGTH = 64_000
 
-# Lower bound for user-configured compression floor overrides.
-# Users with large-context models that degrade before 64K tokens
-# (e.g. Gemini Flash via proxies) can use ``compression.minimum_context_floor``
-# in config.yaml to lower this, but never below this hard safety limit.
-_MINIMUM_CONTEXT_FLOOR_HARD_LIMIT = 16_000
-
-
-def get_configurable_minimum_context(config_floor: int | None = None) -> int:
-    """Return the effective minimum context floor for compression.
-
-    When *config_floor* is provided (from ``compression.minimum_context_floor``),
-    it is clamped to the hard limit and returned, allowing users to lower the
-    default 64K compression floor for models that degrade earlier.  When
-    *config_floor* is ``None`` the default ``MINIMUM_CONTEXT_LENGTH`` is used.
-    """
-    if config_floor is None:
-        return MINIMUM_CONTEXT_LENGTH
-    return max(config_floor, _MINIMUM_CONTEXT_FLOOR_HARD_LIMIT)
-
 # Thin fallback defaults — only broad model family patterns.
 # These fire only when provider is unknown AND models.dev/OpenRouter/Anthropic
 # all miss. Replaced the previous 80+ entry dict.
diff --git a/gateway/run.py b/gateway/run.py
index 03695956b7c..4b285287b22 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -13490,7 +13490,6 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
         ("model", "max_tokens"),
         ("compression", "enabled"),
         ("compression", "threshold"),
-        ("compression", "minimum_context_floor"),
         ("compression", "target_ratio"),
         ("compression", "protect_last_n"),
         ("agent", "disabled_toolsets"),
diff --git a/tests/gateway/test_agent_cache.py b/tests/gateway/test_agent_cache.py
index ce84d0a5945..559e1c0e96c 100644
--- a/tests/gateway/test_agent_cache.py
+++ b/tests/gateway/test_agent_cache.py
@@ -226,7 +226,6 @@ class TestExtractCacheBustingConfig:
                 "compression": {
                     "enabled": False,
                     "threshold": 0.6,
-                    "minimum_context_floor": 32_000,
                     "target_ratio": 0.3,
                     "protect_last_n": 25,
                     "some_other_key": "ignored",
@@ -235,7 +234,6 @@ class TestExtractCacheBustingConfig:
         )
         assert out["compression.enabled"] is False
         assert out["compression.threshold"] == 0.6
-        assert out["compression.minimum_context_floor"] == 32_000
         assert out["compression.target_ratio"] == 0.3
         assert out["compression.protect_last_n"] == 25
 
@@ -388,7 +386,7 @@ class TestExtractCacheBustingConfig:
         extracted cache_keys change produces a new signature."""
         from gateway.run import GatewayRunner
 
-        runtime = {"api_key": "***", "base_url": "u", "provider": "p"}
+        runtime = {"api_key": "k", "base_url": "u", "provider": "p"}
         cfg_before = {
             "model": {"context_length": 200_000},
             "compression": {"threshold": 0.50, "enabled": True},
@@ -411,25 +409,6 @@ class TestExtractCacheBustingConfig:
             "gateway's cached agent so the new threshold takes effect."
         )
 
-    def test_minimum_context_floor_edit_busts_cache(self):
-        """Gateway sessions must rebuild the agent when the new floor changes."""
-        from gateway.run import GatewayRunner
-
-        runtime = {"api_key": "***", "base_url": "u", "provider": "p"}
-        cfg_before = {"compression": {"minimum_context_floor": 64_000}}
-        cfg_after = {"compression": {"minimum_context_floor": 32_000}}
-
-        sig_before = GatewayRunner._agent_config_signature(
-            "m", runtime, [], "",
-            cache_keys=GatewayRunner._extract_cache_busting_config(cfg_before),
-        )
-        sig_after = GatewayRunner._agent_config_signature(
-            "m", runtime, [], "",
-            cache_keys=GatewayRunner._extract_cache_busting_config(cfg_after),
-        )
-
-        assert sig_before != sig_after
-
 
 class TestAgentCacheLifecycle:
     """End-to-end cache behavior with real AIAgent construction."""
diff --git a/tests/run_agent/test_compression_feasibility.py b/tests/run_agent/test_compression_feasibility.py
index ba9dc8a3649..3be0f0235a3 100644
--- a/tests/run_agent/test_compression_feasibility.py
+++ b/tests/run_agent/test_compression_feasibility.py
@@ -31,7 +31,6 @@ def _make_agent(
     compression_enabled: bool = True,
     threshold_percent: float = 0.50,
     main_context: int = 200_000,
-    minimum_context_floor: int = 64_000,
 ) -> AIAgent:
     """Build a minimal AIAgent with a compressor, skipping __init__."""
     agent = AIAgent.__new__(AIAgent)
@@ -58,7 +57,6 @@ def _make_agent(
     compressor = MagicMock(spec=ContextCompressor)
     compressor.context_length = main_context
     compressor.threshold_tokens = int(main_context * threshold_percent)
-    compressor._minimum_context_floor = minimum_context_floor
     agent.context_compressor = compressor
 
     return agent
@@ -123,30 +121,6 @@ def test_rejects_aux_below_minimum_context(mock_get_client, mock_ctx_len):
     assert "below the minimum" in err
 
 
-@patch("agent.model_metadata.get_model_context_length", return_value=32_768)
-@patch("agent.auxiliary_client.get_text_auxiliary_client")
-def test_configured_floor_allows_smaller_aux_model(mock_get_client, mock_ctx_len):
-    """A lowered compression floor also lowers the aux-model hard floor."""
-    agent = _make_agent(
-        main_context=96_000,
-        threshold_percent=0.50,
-        minimum_context_floor=32_000,
-    )
-    mock_client = MagicMock()
-    mock_client.base_url = "https://openrouter.ai/api/v1"
-    mock_client.api_key = "sk-aux"
-    mock_get_client.return_value = (mock_client, "small-aux-model")
-
-    messages = []
-    agent._emit_status = lambda msg: messages.append(msg)
-
-    agent._check_compression_model_feasibility()
-
-    assert messages
-    assert "Auto-lowered" in messages[0]
-    assert agent.context_compressor.threshold_tokens == 32_768
-
-
 @patch("agent.model_metadata.get_model_context_length", return_value=200_000)
 @patch("agent.auxiliary_client.get_text_auxiliary_client")
 def test_no_warning_when_aux_context_sufficient(mock_get_client, mock_ctx_len):
diff --git a/tests/run_agent/test_compression_minimum_floor.py b/tests/run_agent/test_compression_minimum_floor.py
deleted file mode 100644
index b056a5a40ac..00000000000
--- a/tests/run_agent/test_compression_minimum_floor.py
+++ /dev/null
@@ -1,75 +0,0 @@
-"""Configurable minimum context compression floor (#31600)."""
-
-from unittest.mock import patch
-
-from agent.model_metadata import (
-    MINIMUM_CONTEXT_LENGTH,
-    _MINIMUM_CONTEXT_FLOOR_HARD_LIMIT,
-    get_configurable_minimum_context,
-)
-
-
-class TestConfigurableMinimumContext:
-    """Unit tests for ``get_configurable_minimum_context``."""
-
-    def test_default_returns_minimum_context_length(self):
-        assert get_configurable_minimum_context(None) == MINIMUM_CONTEXT_LENGTH
-        assert get_configurable_minimum_context() == MINIMUM_CONTEXT_LENGTH
-
-    def test_config_floor_respected(self):
-        assert get_configurable_minimum_context(32_000) == 32_000
-        assert get_configurable_minimum_context(65_536) == 65_536
-
-    def test_config_floor_clamped_to_hard_limit(self):
-        assert get_configurable_minimum_context(1_000) == _MINIMUM_CONTEXT_FLOOR_HARD_LIMIT
-        assert get_configurable_minimum_context(0) == _MINIMUM_CONTEXT_FLOOR_HARD_LIMIT
-        assert get_configurable_minimum_context(-1) == _MINIMUM_CONTEXT_FLOOR_HARD_LIMIT
-        assert get_configurable_minimum_context(15_999) == _MINIMUM_CONTEXT_FLOOR_HARD_LIMIT
-
-
-class TestContextCompressorFloor:
-    """Verify ContextCompressor uses the configurable floor in real paths."""
-
-    def test_default_floor_in_threshold(self):
-        from agent.context_compressor import ContextCompressor
-
-        with patch("agent.context_compressor.get_model_context_length", return_value=100_000):
-            cc = ContextCompressor(model="test", quiet_mode=True)
-
-        assert cc._minimum_context_floor == MINIMUM_CONTEXT_LENGTH
-        assert cc.threshold_tokens == MINIMUM_CONTEXT_LENGTH
-
-    def test_small_model_with_lowered_floor(self):
-        from agent.context_compressor import ContextCompressor
-
-        with patch("agent.context_compressor.get_model_context_length", return_value=48_000):
-            cc = ContextCompressor(
-                model="test", quiet_mode=True, minimum_context_floor=24_000
-            )
-
-        assert cc._minimum_context_floor == 24_000
-        assert cc.threshold_tokens == 24_000
-
-    def test_floor_dominates_on_large_models_too(self):
-        from agent.context_compressor import ContextCompressor
-
-        with patch("agent.context_compressor.get_model_context_length", return_value=1_000_000):
-            cc = ContextCompressor(
-                model="test",
-                quiet_mode=True,
-                threshold_percent=0.02,
-                minimum_context_floor=32_000,
-            )
-
-        assert cc.threshold_tokens == 32_000
-
-    def test_update_model_uses_floor(self):
-        from agent.context_compressor import ContextCompressor
-
-        with patch("agent.context_compressor.get_model_context_length", return_value=200_000):
-            cc = ContextCompressor(
-                model="test", quiet_mode=True, minimum_context_floor=40_000
-            )
-
-        cc.update_model("switched", context_length=64_000)
-        assert cc.threshold_tokens == 40_000