From 7e46533d9f3ae879b87e6561d61394d89ba9c231 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Fri, 12 Jun 2026 16:32:52 -0700 Subject: [PATCH] test: compressed-summary metadata flag set in-process, stripped on wire --- .../agent/test_compressed_summary_metadata.py | 93 +++++++++++++++++++ 1 file changed, 93 insertions(+) create mode 100644 tests/agent/test_compressed_summary_metadata.py diff --git a/tests/agent/test_compressed_summary_metadata.py b/tests/agent/test_compressed_summary_metadata.py new file mode 100644 index 00000000000..fba47767596 --- /dev/null +++ b/tests/agent/test_compressed_summary_metadata.py @@ -0,0 +1,93 @@ +"""Regression tests for the compressed-summary metadata flag (#38389). + +The compressor marks summary messages with ``COMPRESSED_SUMMARY_METADATA_KEY`` +so frontends (CLI, Desktop, gateway, TUI) can distinguish them from real +assistant/user messages without content-prefix heuristics. + +Two invariants: +1. The flag is present on exactly the summary-bearing message after compress() + (standalone insertion AND merge-into-tail). +2. The key is underscore-prefixed so the chat-completions wire sanitizer + strips it — strict gateways (Fireworks, Mistral, Moonshot/Kimi, + opencode-go) reject unknown message keys with "Extra inputs are not + permitted", poisoning the session. +""" +from unittest.mock import MagicMock, patch + +import pytest + +from agent.context_compressor import ( + COMPRESSED_SUMMARY_METADATA_KEY, + ContextCompressor, +) + + +def _make_compressor(): + with patch( + "agent.context_compressor.get_model_context_length", return_value=8000 + ): + return ContextCompressor( + model="test-model", quiet_mode=True, config_context_length=8000 + ) + + +def _make_messages(n_turns=30): + msgs = [{"role": "system", "content": "sys"}] + for i in range(n_turns): + msgs.append({"role": "user", "content": f"question {i} " + "x" * 400}) + msgs.append({"role": "assistant", "content": f"answer {i} " + "y" * 400}) + return msgs + + +def _compress(cc, msgs): + resp = MagicMock() + resp.choices[0].message.content = "## Active Task\nstuff" + with patch("agent.context_compressor.call_llm", return_value=resp): + return cc.compress(msgs, current_tokens=100_000, force=True) + + +class TestMetadataFlagSet: + def test_exactly_one_flagged_message_after_compress(self): + cc = _make_compressor() + out = _compress(cc, _make_messages()) + flagged = [ + m for m in out + if isinstance(m, dict) and m.get(COMPRESSED_SUMMARY_METADATA_KEY) + ] + assert len(flagged) == 1 + # The flagged message is the one carrying the compaction handoff. + assert "[CONTEXT COMPACTION" in flagged[0]["content"] + + def test_helper_detects_flag(self): + assert ContextCompressor._has_compressed_summary_metadata( + {COMPRESSED_SUMMARY_METADATA_KEY: True} + ) + assert not ContextCompressor._has_compressed_summary_metadata( + {"role": "assistant", "content": "hi"} + ) + assert not ContextCompressor._has_compressed_summary_metadata("not a dict") + assert not ContextCompressor._has_compressed_summary_metadata(None) + + +class TestMetadataFlagNeverReachesWire: + def test_key_is_underscore_prefixed(self): + """The wire sanitizers strip every top-level message key starting + with '_'. A bare key would reach strict gateways (Fireworks etc.) + and 400 with 'Extra inputs are not permitted'.""" + assert COMPRESSED_SUMMARY_METADATA_KEY.startswith("_") + + def test_chat_completions_transport_strips_flag(self): + from agent.transports.chat_completions import ChatCompletionsTransport + + cc = _make_compressor() + out = _compress(cc, _make_messages()) + wire = ChatCompletionsTransport().convert_messages(out, model="some-model") + assert not any( + isinstance(m, dict) and COMPRESSED_SUMMARY_METADATA_KEY in m + for m in wire + ) + # Sanitization must not destroy the in-process flag on the originals. + assert any( + isinstance(m, dict) and m.get(COMPRESSED_SUMMARY_METADATA_KEY) + for m in out + )