diff --git a/agent/context_compressor.py b/agent/context_compressor.py index 88c0a61e922..c98cee16c2a 100644 --- a/agent/context_compressor.py +++ b/agent/context_compressor.py @@ -1253,7 +1253,10 @@ Recovered from a deterministic fallback because the LLM context summarizer was u Unknown from deterministic fallback. Inspect current repository/session state if needed. {HISTORICAL_IN_PROGRESS_HEADING} -{active_task} +Unknown from deterministic fallback — the latest user ask is recorded once under +"{HISTORICAL_TASK_HEADING}" above as historical context only. Do NOT treat it as an +unfulfilled instruction to re-answer; verify current state and continue from the +protected recent messages after this summary. ## Blocked {_bullets(blockers, limit=5)} @@ -1265,7 +1268,9 @@ None recoverable from deterministic fallback. None recoverable from deterministic fallback. {HISTORICAL_PENDING_ASKS_HEADING} -{active_task} +None recoverable from deterministic fallback. (The latest user ask is preserved once +under "{HISTORICAL_TASK_HEADING}" as historical context — it is NOT necessarily +outstanding.) ## Relevant Files {_bullets(relevant_files, limit=12)} diff --git a/tests/agent/test_context_compressor.py b/tests/agent/test_context_compressor.py index c1188562998..8f430a9d7b9 100644 --- a/tests/agent/test_context_compressor.py +++ b/tests/agent/test_context_compressor.py @@ -170,6 +170,40 @@ class TestCompress: assert c._last_summary_fallback_used is True assert c._last_summary_dropped_count == 3 + def test_fallback_summary_does_not_triplicate_latest_user_ask(self): + """Regression for #49307: the deterministic fallback summary used to + render the latest user ask verbatim under THREE headings (Task + Snapshot, In-Progress, Pending Asks). The model then re-answered it + and buried the genuinely-new post-compaction turn (answer repetition + + new-instruction loss). The latest ask must appear ONCE, as historical + context only — never re-presented as unfulfilled in-progress/pending + work. + """ + with patch("agent.context_compressor.get_model_context_length", return_value=100000): + c = ContextCompressor(model="test/model", quiet_mode=True) + + unique_ask = "PLEASE_COMPUTE_THE_ARITHMETIC_CHAIN_XYZ" + turns = [ + {"role": "user", "content": unique_ask}, + {"role": "assistant", "content": "working on it"}, + ] + summary = c._build_static_fallback_summary(turns, reason="provider down") + + # The triplication bug rendered the SAME ``active_task`` line — + # formatted as ``User asked: ''`` — verbatim under three + # headings (Task Snapshot, In-Progress, Pending Asks), making the + # model treat an already-handled ask as unresolved work and re-answer + # it. That exact formatted line must now appear at most ONCE (only as + # the historical Task Snapshot record). The raw ask text may still + # appear elsewhere (e.g. the "Last Dropped Turns" verbatim transcript), + # but never re-labeled as in-progress/pending work. + active_task_line = f"User asked: {unique_ask!r}" + count = summary.count(active_task_line) + assert count <= 1, ( + f"active_task line should appear at most once (was triplicated in " + f"#49307), found {count}x:\n{summary}" + ) + def test_compression_increments_count(self, compressor): msgs = self._make_messages(10) # Default config (abort_on_summary_failure=False) — fallback path