diff --git a/agent/context_compressor.py b/agent/context_compressor.py index 01aa2af804..16234b180a 100644 --- a/agent/context_compressor.py +++ b/agent/context_compressor.py @@ -5,6 +5,7 @@ Uses Gemini Flash (cheap/fast) to summarize middle turns while protecting head and tail context. """ +import json import logging import os from typing import Any, Dict, List, Optional @@ -82,6 +83,41 @@ class ContextCompressor: "compression_count": self.compression_count, } + @staticmethod + def _content_to_text(content: Any) -> str: + """Convert message content to plain text for summarization. + + Handles: + - str → returned as-is + - None → empty string + - list (multimodal) → text parts joined, images replaced with [image] + - other → JSON serialization or str() fallback + """ + if isinstance(content, str): + return content + if content is None: + return "" + if isinstance(content, list): + parts = [] + for item in content: + if isinstance(item, dict): + item_type = item.get("type") + if item_type == "text": + parts.append(item.get("text", "")) + elif item_type == "image_url": + parts.append("[image]") + elif item_type: + parts.append(f"[{item_type}]") + else: + parts.append(str(item)) + else: + parts.append(str(item)) + return "\n".join(part for part in parts if part) + try: + return json.dumps(content, ensure_ascii=False, sort_keys=True) + except TypeError: + return str(content) + def _generate_summary(self, turns_to_summarize: List[Dict[str, Any]]) -> Optional[str]: """Generate a concise summary of conversation turns. @@ -93,7 +129,7 @@ class ContextCompressor: parts = [] for msg in turns_to_summarize: role = msg.get("role", "unknown") - content = msg.get("content") or "" + content = self._content_to_text(msg.get("content")) if len(content) > 2000: content = content[:1000] + "\n...[truncated]...\n" + content[-500:] tool_calls = msg.get("tool_calls", []) diff --git a/tests/agent/test_context_compressor.py b/tests/agent/test_context_compressor.py index 12fa374c8c..3d5eef9b91 100644 --- a/tests/agent/test_context_compressor.py +++ b/tests/agent/test_context_compressor.py @@ -115,6 +115,70 @@ class TestCompress: assert result[-2]["content"] == msgs[-2]["content"] +class TestContentToText: + """Test _content_to_text handles all content types without crashing.""" + + def test_string_passthrough(self, compressor): + assert compressor._content_to_text("hello") == "hello" + + def test_none_returns_empty(self, compressor): + assert compressor._content_to_text(None) == "" + + def test_multimodal_text_parts(self, compressor): + content = [ + {"type": "text", "text": "describe this image"}, + {"type": "image_url", "image_url": {"url": "data:image/png;base64,AAAA"}}, + ] + result = compressor._content_to_text(content) + assert "describe this image" in result + assert "[image]" in result + + def test_multimodal_mixed_types(self, compressor): + content = [ + {"type": "text", "text": "first part"}, + {"type": "audio", "audio": {"data": "..."}}, + {"type": "text", "text": "second part"}, + ] + result = compressor._content_to_text(content) + assert "first part" in result + assert "[audio]" in result + assert "second part" in result + + def test_dict_content_json_serialized(self, compressor): + content = {"key": "value"} + result = compressor._content_to_text(content) + assert "key" in result + assert "value" in result + + def test_multimodal_in_generate_summary(self): + """Multimodal user messages should not crash _generate_summary.""" + mock_client = MagicMock() + mock_response = MagicMock() + mock_response.choices = [MagicMock()] + mock_response.choices[0].message.content = "[CONTEXT SUMMARY]: image was discussed" + mock_client.chat.completions.create.return_value = mock_response + + with patch("agent.context_compressor.get_model_context_length", return_value=100000), \ + patch("agent.context_compressor.get_text_auxiliary_client", return_value=(mock_client, "test-model")): + c = ContextCompressor(model="test", quiet_mode=True) + + messages = [ + {"role": "user", "content": [ + {"type": "text", "text": "What is in this image?"}, + {"type": "image_url", "image_url": {"url": "data:image/png;base64,AAAA"}}, + ]}, + {"role": "assistant", "content": "I see a cat."}, + {"role": "user", "content": "thanks"}, + ] + + summary = c._generate_summary(messages) + assert isinstance(summary, str) + # The prompt sent to the model should contain the text, not raw list + prompt = mock_client.chat.completions.create.call_args.kwargs["messages"][0]["content"] + assert "What is in this image?" in prompt + assert "[image]" in prompt + + class TestGenerateSummaryNoneContent: """Regression: content=None (from tool-call-only assistant messages) must not crash."""