From 1f874dfe4467f1d74ac6dbcb585b075100f6c576 Mon Sep 17 00:00:00 2001 From: teknium1 <127238744+teknium1@users.noreply.github.com> Date: Sat, 20 Jun 2026 22:59:37 -0700 Subject: [PATCH] fix(compression): stop fallback summary triplicating the latest user ask MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When LLM summarization fails, the deterministic fallback summary rendered the latest user ask (active_task = "User asked: ''") verbatim under THREE headings — Historical Task Snapshot, Historical In-Progress State, and Historical Pending User Asks. Re-presenting an already-handled ask as unresolved in-progress/pending work made the model re-answer it AND treat the resurrected ask as the active turn, burying the genuinely-new post-compaction user message (#49307: answer repetition + new-instruction loss, P1). Keep the latest ask once, under Task Snapshot, as historical context only. The In-Progress and Pending-Asks sections now say 'Unknown / None recoverable from deterministic fallback' (consistent with the Active State / Key Decisions / Resolved Questions sections) and explicitly note the ask is historical, not outstanding. The raw turn text still appears in the verbatim 'Last Dropped Turns' transcript — that's the dropped-turn record, not a re-labeled instruction. Note: the separate role=assistant standalone-summary regurgitation (#33256) is left as-is — that role choice is constrained by strict message alternation (user collides with a user-ending head) and is already mitigated by the summary end-marker; forcing the role would risk the alternation invariant. Co-authored-by: r266-tech Co-authored-by: kyssta-exe --- agent/context_compressor.py | 9 +++++-- tests/agent/test_context_compressor.py | 34 ++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 2 deletions(-) diff --git a/agent/context_compressor.py b/agent/context_compressor.py index 88c0a61e922..c98cee16c2a 100644 --- a/agent/context_compressor.py +++ b/agent/context_compressor.py @@ -1253,7 +1253,10 @@ Recovered from a deterministic fallback because the LLM context summarizer was u Unknown from deterministic fallback. Inspect current repository/session state if needed. {HISTORICAL_IN_PROGRESS_HEADING} -{active_task} +Unknown from deterministic fallback — the latest user ask is recorded once under +"{HISTORICAL_TASK_HEADING}" above as historical context only. Do NOT treat it as an +unfulfilled instruction to re-answer; verify current state and continue from the +protected recent messages after this summary. ## Blocked {_bullets(blockers, limit=5)} @@ -1265,7 +1268,9 @@ None recoverable from deterministic fallback. None recoverable from deterministic fallback. {HISTORICAL_PENDING_ASKS_HEADING} -{active_task} +None recoverable from deterministic fallback. (The latest user ask is preserved once +under "{HISTORICAL_TASK_HEADING}" as historical context — it is NOT necessarily +outstanding.) ## Relevant Files {_bullets(relevant_files, limit=12)} diff --git a/tests/agent/test_context_compressor.py b/tests/agent/test_context_compressor.py index c1188562998..8f430a9d7b9 100644 --- a/tests/agent/test_context_compressor.py +++ b/tests/agent/test_context_compressor.py @@ -170,6 +170,40 @@ class TestCompress: assert c._last_summary_fallback_used is True assert c._last_summary_dropped_count == 3 + def test_fallback_summary_does_not_triplicate_latest_user_ask(self): + """Regression for #49307: the deterministic fallback summary used to + render the latest user ask verbatim under THREE headings (Task + Snapshot, In-Progress, Pending Asks). The model then re-answered it + and buried the genuinely-new post-compaction turn (answer repetition + + new-instruction loss). The latest ask must appear ONCE, as historical + context only — never re-presented as unfulfilled in-progress/pending + work. + """ + with patch("agent.context_compressor.get_model_context_length", return_value=100000): + c = ContextCompressor(model="test/model", quiet_mode=True) + + unique_ask = "PLEASE_COMPUTE_THE_ARITHMETIC_CHAIN_XYZ" + turns = [ + {"role": "user", "content": unique_ask}, + {"role": "assistant", "content": "working on it"}, + ] + summary = c._build_static_fallback_summary(turns, reason="provider down") + + # The triplication bug rendered the SAME ``active_task`` line — + # formatted as ``User asked: ''`` — verbatim under three + # headings (Task Snapshot, In-Progress, Pending Asks), making the + # model treat an already-handled ask as unresolved work and re-answer + # it. That exact formatted line must now appear at most ONCE (only as + # the historical Task Snapshot record). The raw ask text may still + # appear elsewhere (e.g. the "Last Dropped Turns" verbatim transcript), + # but never re-labeled as in-progress/pending work. + active_task_line = f"User asked: {unique_ask!r}" + count = summary.count(active_task_line) + assert count <= 1, ( + f"active_task line should appear at most once (was triplicated in " + f"#49307), found {count}x:\n{summary}" + ) + def test_compression_increments_count(self, compressor): msgs = self._make_messages(10) # Default config (abort_on_summary_failure=False) — fallback path