mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
fix(compression): use extract_content_or_reasoning for reasoning model summaries
Reasoning models (DeepSeek-R1, Qwen-QwQ, glm-5-turbo) sometimes put all output inside think/reasoning blocks with an empty content field. The compressor was reading raw response.choices[0].message.content directly, getting an empty string, and silently dropping middle turns without a meaningful summary. Use the existing extract_content_or_reasoning() helper (from auxiliary_client) which already handles: - Empty content + structured reasoning field fallback - XML-style think/thinking/reasoning tag stripping Also normalize dict content (llama.cpp) before extraction to prevent type errors. Tests: 3 new cases covering reasoning-only, think-tag, and normal content extraction paths.
This commit is contained in:
parent
624ad582a5
commit
834d146f08
2 changed files with 72 additions and 2 deletions
|
|
@ -352,8 +352,16 @@ Write only the summary body. Do not include any preamble or prefix."""
|
|||
if self.summary_model:
|
||||
call_kwargs["model"] = self.summary_model
|
||||
response = call_llm(**call_kwargs)
|
||||
content = response.choices[0].message.content
|
||||
# Handle cases where content is not a string (e.g., dict from llama.cpp)
|
||||
# Normalize dict content (e.g. llama.cpp tool calls) before
|
||||
# extracting, then use extract_content_or_reasoning to handle
|
||||
# models that put all output inside think/reasoning blocks with
|
||||
# empty content field (e.g. DeepSeek-R1, Qwen-QwQ, glm-5-turbo).
|
||||
raw_content = response.choices[0].message.content
|
||||
if isinstance(raw_content, dict):
|
||||
raw_content = str(raw_content) if raw_content else ""
|
||||
response.choices[0].message.content = raw_content
|
||||
from agent.auxiliary_client import extract_content_or_reasoning
|
||||
content = extract_content_or_reasoning(response)
|
||||
if not isinstance(content, str):
|
||||
content = str(content) if content else ""
|
||||
summary = content.strip()
|
||||
|
|
|
|||
|
|
@ -198,6 +198,68 @@ class TestNonStringContent:
|
|||
assert summary == SUMMARY_PREFIX
|
||||
|
||||
|
||||
class TestReasoningOnlyExtraction:
|
||||
"""Regression: reasoning models (DeepSeek-R1, QwQ, glm-5-turbo) that put
|
||||
all output in reasoning/think blocks with empty content must still produce
|
||||
a valid summary via extract_content_or_reasoning."""
|
||||
|
||||
@pytest.fixture
|
||||
def _compressor(self):
|
||||
with patch("agent.context_compressor.get_model_context_length", return_value=100000):
|
||||
return ContextCompressor(model="test", quiet_mode=True)
|
||||
|
||||
def test_reasoning_content_extracted_as_summary(self, _compressor):
|
||||
mock_response = MagicMock()
|
||||
mock_response.choices = [MagicMock()]
|
||||
mock_response.choices[0].message.content = ""
|
||||
mock_response.choices[0].message.reasoning = "The user was working on feature X and completed steps 1-3."
|
||||
|
||||
messages = [
|
||||
{"role": "user", "content": "do something"},
|
||||
{"role": "assistant", "content": "ok"},
|
||||
]
|
||||
|
||||
with patch("agent.context_compressor.call_llm", return_value=mock_response):
|
||||
summary = _compressor._generate_summary(messages)
|
||||
assert isinstance(summary, str)
|
||||
assert "feature X" in summary
|
||||
|
||||
def test_think_blocks_stripped_and_content_used(self, _compressor):
|
||||
mock_response = MagicMock()
|
||||
mock_response.choices = [MagicMock()]
|
||||
mock_response.choices[0].message.content = "Let me analyze this.\n\n## Summary\nWorked on the API module."
|
||||
mock_response.choices[0].message.reasoning = None
|
||||
|
||||
messages = [
|
||||
{"role": "user", "content": "summarize"},
|
||||
{"role": "assistant", "content": "done"},
|
||||
]
|
||||
|
||||
with patch("agent.context_compressor.call_llm", return_value=mock_response):
|
||||
summary = _compressor._generate_summary(messages)
|
||||
assert isinstance(summary, str)
|
||||
assert "API module" in summary
|
||||
|
||||
def test_inline_think_tags_stripped(self, _compressor):
|
||||
mock_response = MagicMock()
|
||||
mock_response.choices = [MagicMock()]
|
||||
# extract_content_or_reasoning strips <think/> XML tags but not emoji blocks
|
||||
mock_response.choices[0].message.content = "<thinking>Internal reasoning here.</thinking>\n\nThe actual summary content."
|
||||
mock_response.choices[0].message.reasoning = None
|
||||
|
||||
messages = [
|
||||
{"role": "user", "content": "summarize"},
|
||||
{"role": "assistant", "content": "done"},
|
||||
]
|
||||
|
||||
with patch("agent.context_compressor.call_llm", return_value=mock_response):
|
||||
summary = _compressor._generate_summary(messages)
|
||||
assert isinstance(summary, str)
|
||||
assert "actual summary" in summary
|
||||
# Think block content should not leak into summary
|
||||
assert "Internal reasoning" not in summary
|
||||
|
||||
|
||||
class TestSummaryPrefixNormalization:
|
||||
def test_legacy_prefix_is_replaced(self):
|
||||
summary = ContextCompressor._with_summary_prefix("[CONTEXT SUMMARY]: did work")
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue