diff --git a/agent/context_compressor.py b/agent/context_compressor.py index 11b5c5b80..359b05c42 100644 --- a/agent/context_compressor.py +++ b/agent/context_compressor.py @@ -253,18 +253,24 @@ Write only the summary body. Do not include any preamble or prefix; the system w """Pull a compress-end boundary backward to avoid splitting a tool_call / result group. - If the message just before ``idx`` is an assistant message with - tool_calls, those tool results will start at ``idx`` and would be - separated from their parent. Move backwards to include the whole - group in the summarised region. + If the boundary falls in the middle of a tool-result group (i.e. + there are consecutive tool messages before ``idx``), walk backward + past all of them to find the parent assistant message. If found, + move the boundary before the assistant so the entire + assistant + tool_results group is included in the summarised region + rather than being split (which causes silent data loss when + ``_sanitize_tool_pairs`` removes the orphaned tail results). """ if idx <= 0 or idx >= len(messages): return idx - prev = messages[idx - 1] - if prev.get("role") == "assistant" and prev.get("tool_calls"): - # The results for this assistant turn sit at idx..idx+k. - # Include the assistant message in the summarised region too. - idx -= 1 + # Walk backward past consecutive tool results + check = idx - 1 + while check >= 0 and messages[check].get("role") == "tool": + check -= 1 + # If we landed on the parent assistant with tool_calls, pull the + # boundary before it so the whole group gets summarised together. + if check >= 0 and messages[check].get("role") == "assistant" and messages[check].get("tool_calls"): + idx = check return idx def compress(self, messages: List[Dict[str, Any]], current_tokens: int = None) -> List[Dict[str, Any]]: diff --git a/tests/test_compression_boundary.py b/tests/test_compression_boundary.py new file mode 100644 index 000000000..db7bb67b8 --- /dev/null +++ b/tests/test_compression_boundary.py @@ -0,0 +1,199 @@ +"""Tests for context compression boundary alignment. + +Verifies that _align_boundary_backward correctly handles tool result groups +so that parallel tool calls are never split during compression. +""" + +import pytest +from unittest.mock import patch, MagicMock + +from agent.context_compressor import ContextCompressor + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def _tc(call_id: str) -> dict: + """Create a minimal tool_call dict.""" + return {"id": call_id, "type": "function", "function": {"name": "test", "arguments": "{}"}} + + +def _tool_result(call_id: str, content: str = "result") -> dict: + """Create a tool result message.""" + return {"role": "tool", "tool_call_id": call_id, "content": content} + + +def _assistant_with_tools(*call_ids: str) -> dict: + """Create an assistant message with tool_calls.""" + return {"role": "assistant", "tool_calls": [_tc(cid) for cid in call_ids], "content": None} + + +def _make_compressor(**kwargs) -> ContextCompressor: + defaults = dict( + model="test-model", + threshold_percent=0.75, + protect_first_n=3, + protect_last_n=4, + quiet_mode=True, + ) + defaults.update(kwargs) + with patch("agent.context_compressor.get_model_context_length", return_value=8000): + return ContextCompressor(**defaults) + + +# --------------------------------------------------------------------------- +# _align_boundary_backward tests +# --------------------------------------------------------------------------- + +class TestAlignBoundaryBackward: + """Test that compress-end boundary never splits a tool_call/result group.""" + + def test_boundary_at_clean_position(self): + """Boundary after a user message — no adjustment needed.""" + comp = _make_compressor() + messages = [ + {"role": "system", "content": "sys"}, + {"role": "user", "content": "hello"}, + {"role": "assistant", "content": "hi"}, + {"role": "user", "content": "do something"}, + _assistant_with_tools("tc_1"), + _tool_result("tc_1", "done"), + {"role": "user", "content": "thanks"}, # idx=6 + {"role": "assistant", "content": "np"}, + ] + # Boundary at 7, messages[6] = user — no adjustment + assert comp._align_boundary_backward(messages, 7) == 7 + + def test_boundary_after_assistant_with_tools(self): + """Original case: boundary right after assistant with tool_calls.""" + comp = _make_compressor() + messages = [ + {"role": "system", "content": "sys"}, + {"role": "user", "content": "hello"}, + {"role": "assistant", "content": "hi"}, + _assistant_with_tools("tc_1", "tc_2"), # idx=3 + _tool_result("tc_1"), # idx=4 + _tool_result("tc_2"), # idx=5 + {"role": "user", "content": "next"}, + ] + # Boundary at 4, messages[3] = assistant with tool_calls → pull back to 3 + assert comp._align_boundary_backward(messages, 4) == 3 + + def test_boundary_in_middle_of_tool_results(self): + """THE BUG: boundary falls between tool results of the same group.""" + comp = _make_compressor() + messages = [ + {"role": "system", "content": "sys"}, + {"role": "user", "content": "hello"}, + {"role": "assistant", "content": "hi"}, + {"role": "user", "content": "do 5 things"}, + _assistant_with_tools("tc_A", "tc_B", "tc_C", "tc_D", "tc_E"), # idx=4 + _tool_result("tc_A", "result A"), # idx=5 + _tool_result("tc_B", "result B"), # idx=6 + _tool_result("tc_C", "result C"), # idx=7 + _tool_result("tc_D", "result D"), # idx=8 + _tool_result("tc_E", "result E"), # idx=9 + {"role": "user", "content": "ok"}, + {"role": "assistant", "content": "done"}, + ] + # Boundary at 8 — in middle of tool results. messages[7] = tool result. + # Must walk back to idx=4 (the parent assistant). + assert comp._align_boundary_backward(messages, 8) == 4 + + def test_boundary_at_last_tool_result(self): + """Boundary right after last tool result — messages[idx-1] is tool.""" + comp = _make_compressor() + messages = [ + {"role": "system", "content": "sys"}, + {"role": "user", "content": "hello"}, + {"role": "assistant", "content": "hi"}, + _assistant_with_tools("tc_1", "tc_2", "tc_3"), # idx=3 + _tool_result("tc_1"), # idx=4 + _tool_result("tc_2"), # idx=5 + _tool_result("tc_3"), # idx=6 + {"role": "user", "content": "next"}, + ] + # Boundary at 7 — messages[6] is last tool result. + # Walk back: [6]=tool, [5]=tool, [4]=tool, [3]=assistant with tools → idx=3 + assert comp._align_boundary_backward(messages, 7) == 3 + + def test_boundary_with_consecutive_tool_groups(self): + """Two consecutive tool groups — only walk back to the nearest parent.""" + comp = _make_compressor() + messages = [ + {"role": "system", "content": "sys"}, + {"role": "user", "content": "hello"}, + _assistant_with_tools("tc_1"), # idx=2 + _tool_result("tc_1"), # idx=3 + {"role": "user", "content": "more"}, + _assistant_with_tools("tc_2", "tc_3"), # idx=5 + _tool_result("tc_2"), # idx=6 + _tool_result("tc_3"), # idx=7 + {"role": "user", "content": "done"}, + ] + # Boundary at 7 — messages[6] = tool result for tc_2 group + # Walk back: [6]=tool, [5]=assistant with tools → idx=5 + assert comp._align_boundary_backward(messages, 7) == 5 + + +# --------------------------------------------------------------------------- +# End-to-end: compression must not lose tool results +# --------------------------------------------------------------------------- + +class TestCompressionToolResultPreservation: + """Verify that compress() never silently drops tool results.""" + + def test_parallel_tool_results_not_lost(self): + """The exact scenario that triggered silent data loss before the fix.""" + comp = _make_compressor(protect_first_n=3, protect_last_n=4) + + messages = [ + {"role": "system", "content": "You are helpful."}, # 0 + {"role": "user", "content": "Hello"}, # 1 + {"role": "assistant", "content": "Hi there!"}, # 2 (end of head) + {"role": "user", "content": "Read 7 files for me"}, # 3 + _assistant_with_tools("tc_A", "tc_B", "tc_C", "tc_D", "tc_E", "tc_F", "tc_G"), # 4 + _tool_result("tc_A", "content of file A"), # 5 + _tool_result("tc_B", "content of file B"), # 6 + _tool_result("tc_C", "content of file C"), # 7 + _tool_result("tc_D", "content of file D"), # 8 + _tool_result("tc_E", "content of file E"), # 9 + _tool_result("tc_F", "content of file F"), # 10 + _tool_result("tc_G", "CRITICAL DATA in file G"), # 11 ← compress_end=15-4=11 + {"role": "user", "content": "Now summarize them"}, # 12 + {"role": "assistant", "content": "Here is the summary..."}, # 13 + {"role": "user", "content": "Thanks"}, # 14 + ] + # 15 messages. compress_end = 15 - 4 = 11 (before fix: splits tool group) + + fake_summary = "[Summary of earlier conversation]" + with patch.object(comp, "_generate_summary", return_value=fake_summary): + result = comp.compress(messages, current_tokens=7000) + + # After compression, no tool results should be orphaned/lost. + # All tool results in the result must have a matching assistant tool_call. + assistant_call_ids = set() + for msg in result: + if msg.get("role") == "assistant": + for tc in msg.get("tool_calls") or []: + cid = tc.get("id", "") + if cid: + assistant_call_ids.add(cid) + + tool_result_ids = set() + for msg in result: + if msg.get("role") == "tool": + cid = msg.get("tool_call_id") + if cid: + tool_result_ids.add(cid) + + # Every tool result must have a parent — no orphans + orphaned = tool_result_ids - assistant_call_ids + assert not orphaned, f"Orphaned tool results found (data loss!): {orphaned}" + + # Every assistant tool_call must have a real result (not a stub) + for msg in result: + if msg.get("role") == "tool": + assert msg["content"] != "[Result from earlier conversation — see context summary above]", \ + f"Stub result found for {msg.get('tool_call_id')} — real result was lost"