From b2c84a16267245dfb34b2c497113b425542ef446 Mon Sep 17 00:00:00 2001 From: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com> Date: Mon, 22 Jun 2026 16:33:18 +0530 Subject: [PATCH] fix(agent): defer preflight compaction until real usage after a compaction (#23767, #36718) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After a compaction, the post-compression path parks last_prompt_tokens=-1 and sets awaiting_real_usage_after_compression=True, but last_real_prompt_tokens still holds the stale pre-compression value (above threshold). should_defer_ preflight_to_real_usage() hit the 'last_real_prompt_tokens >= threshold => False' short-circuit and let preflight fire a SECOND compaction before the provider reported real post-compaction usage. Add an early-return on the awaiting flag so deferral holds for exactly one turn; update_from_response() clears it. The flag-setting half (#36718) already landed on main via the in-place compaction path (conversation_compression.py); this adds the missing should_defer guard that consumes it. Credit: - @ashishpatel26 (#38133) — diagnosis + the should_defer early-return design - @Tranquil-Flow (#36769) — same #36718 fix, identical guard placement Closes #36718. --- agent/context_compressor.py | 12 ++++++++++++ tests/agent/test_context_compressor.py | 22 ++++++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/agent/context_compressor.py b/agent/context_compressor.py index a521fb12117..f1c6fca6f6e 100644 --- a/agent/context_compressor.py +++ b/agent/context_compressor.py @@ -878,6 +878,18 @@ class ContextCompressor(ContextEngine): """ if rough_tokens < self.threshold_tokens: return False + # Immediately after a compaction the post-compression path sets + # ``awaiting_real_usage_after_compression`` and parks + # ``last_prompt_tokens = -1``, but ``last_real_prompt_tokens`` still + # holds the STALE pre-compression value (above threshold — that's why + # compaction fired). Without this guard that stale value defeats the + # ``last_real_prompt_tokens >= threshold_tokens`` check below, so + # preflight fires a SECOND compaction before the provider has reported + # real token usage for the now-shorter conversation. Defer for exactly + # one turn; update_from_response() clears the flag when real usage + # arrives. (#36718) + if self.awaiting_real_usage_after_compression: + return True if self.last_real_prompt_tokens <= 0: return False if self.last_real_prompt_tokens >= self.threshold_tokens: diff --git a/tests/agent/test_context_compressor.py b/tests/agent/test_context_compressor.py index cef5f66da81..79e89b457bd 100644 --- a/tests/agent/test_context_compressor.py +++ b/tests/agent/test_context_compressor.py @@ -86,6 +86,28 @@ class TestPreflightDeferral: assert compressor.should_defer_preflight_to_real_usage(93_000) is False + def test_defers_immediately_after_compaction_with_stale_real_prompt(self, compressor): + """#36718: right after a compaction, last_real_prompt_tokens still holds + the stale pre-compression value (above threshold). The awaiting flag + must force deferral so preflight doesn't fire a SECOND compaction before + real post-compaction usage arrives.""" + compressor.threshold_tokens = 85_000 + # Stale pre-compression value — would hit the `>= threshold => False` + # short-circuit and defeat deferral without the flag guard. + compressor.last_real_prompt_tokens = 120_000 + compressor.awaiting_real_usage_after_compression = True + assert compressor.should_defer_preflight_to_real_usage(95_000) is True + + def test_resumes_normal_deferral_after_flag_cleared(self, compressor): + """Once update_from_response() clears the flag, the normal baseline/ + growth deferral logic governs again (no permanent deferral).""" + compressor.threshold_tokens = 85_000 + compressor.last_real_prompt_tokens = 120_000 + compressor.awaiting_real_usage_after_compression = False + # Stale-high real prompt with the flag cleared => the >= threshold + # short-circuit applies => no deferral. + assert compressor.should_defer_preflight_to_real_usage(95_000) is False + class TestCompress: