diff --git a/agent/context_compressor.py b/agent/context_compressor.py index 2eb896a9934..19bc0e5f0f1 100644 --- a/agent/context_compressor.py +++ b/agent/context_compressor.py @@ -1591,6 +1591,22 @@ This compaction should PRIORITISE preserving all information related to the focu # Handle cases where content is not a string (e.g., dict from llama.cpp) if not isinstance(content, str): content = str(content) if content else "" + # Some OpenAI-compatible proxies (e.g. cmkey.cn, one-api channels) + # return a well-formed HTTP 200 with an empty or whitespace-only + # ``content`` instead of an error or empty ``choices``. That payload + # passes ``_validate_llm_response`` (a ``message`` exists), so it + # reaches here and would otherwise be stored as a prefix-only + # summary with no body — silently wiping the compacted turns and + # making the model forget the in-progress task (#11978, #11914). + # Treat empty content as a failure so it routes through the same + # main-model fallback + cooldown machinery as a transport error, + # rather than replacing real context with an empty summary. + if not content.strip(): + raise RuntimeError( + "Context compression LLM returned empty content " + f"(provider={self.provider or 'auto'} " + f"model={self.summary_model or self.model})" + ) # Redact the summary output as well — the summarizer LLM may # ignore prompt instructions and echo back secrets verbatim. summary = redact_sensitive_text(content.strip()) @@ -1601,16 +1617,27 @@ This compaction should PRIORITISE preserving all information related to the focu self._last_summary_error = None self._last_summary_auth_failure = False return self._with_summary_prefix(summary) - except RuntimeError: - # No provider configured — long cooldown, unlikely to self-resolve - self._summary_failure_cooldown_until = time.monotonic() + _SUMMARY_FAILURE_COOLDOWN_SECONDS - self._last_summary_error = "no auxiliary LLM provider configured" - logger.warning("Context compression: no provider available for " - "summary. Middle turns will be dropped without summary " - "for %d seconds.", - _SUMMARY_FAILURE_COOLDOWN_SECONDS) - return None except Exception as e: + # ``call_llm`` raises ``RuntimeError`` for two very different cases: + # 1. No provider configured ("No LLM provider configured ...") — + # a permanent misconfiguration, long cooldown is correct. + # 2. An empty/invalid response from a configured provider + # (``_validate_llm_response`` empty-``choices``/``None``, or our + # empty-``content`` guard above) — a transient/proxy fault that + # should fall back to the main model first, exactly like the + # transport errors handled below. + # Only (1) belongs in the long no-provider cooldown; (2) and every + # other exception flow into the generic fallback logic so they get + # a main-model retry before any cooldown. (#11978, #11914) + if isinstance(e, RuntimeError) and "no llm provider configured" in str(e).lower(): + # No provider configured — long cooldown, unlikely to self-resolve + self._summary_failure_cooldown_until = time.monotonic() + _SUMMARY_FAILURE_COOLDOWN_SECONDS + self._last_summary_error = "no auxiliary LLM provider configured" + logger.warning("Context compression: no provider available for " + "summary. Middle turns will be dropped without summary " + "for %d seconds.", + _SUMMARY_FAILURE_COOLDOWN_SECONDS) + return None # If the summary model is different from the main model and the # error looks permanent (model not found, 503, 404), fall back to # using the main model instead of entering cooldown that leaves diff --git a/tests/agent/test_context_compressor.py b/tests/agent/test_context_compressor.py index 084cb446b4d..cef5f66da81 100644 --- a/tests/agent/test_context_compressor.py +++ b/tests/agent/test_context_compressor.py @@ -357,11 +357,41 @@ class TestNonStringContent: assert isinstance(summary, str) assert summary.startswith(SUMMARY_PREFIX) - def test_none_content_coerced_to_empty(self): + def test_none_content_treated_as_failure_not_empty_summary(self): + """Regression #11978/#11914: a well-formed response with ``content=None`` + (some OpenAI-compatible proxies, e.g. cmkey.cn, return HTTP 200 with + null/empty content) must NOT be stored as a prefix-only summary that + silently wipes the compacted turns. It is treated as a summary failure + and routed through cooldown so the turns are dropped without a summary + rather than replaced by an empty one.""" mock_response = MagicMock() mock_response.choices = [MagicMock()] mock_response.choices[0].message.content = None + with patch("agent.context_compressor.get_model_context_length", return_value=100000): + # summary_model == model here, so no fallback path: straight to cooldown. + c = ContextCompressor(model="test", quiet_mode=True) + + messages = [ + {"role": "user", "content": "do something"}, + {"role": "assistant", "content": "ok"}, + ] + + with patch("agent.context_compressor.call_llm", return_value=mock_response): + summary = c._generate_summary(messages) + # Empty content → failure → None (drop turns), NOT a prefix-only summary. + assert summary is None + assert summary != SUMMARY_PREFIX + # Transient cooldown engaged so we don't immediately retry the bad proxy. + assert c._summary_failure_cooldown_until > 0 + + def test_empty_string_content_treated_as_failure(self): + """An empty-string (or whitespace-only) ``content`` is handled the same + as ``None`` — failure, not an empty summary (#11978).""" + mock_response = MagicMock() + mock_response.choices = [MagicMock()] + mock_response.choices[0].message.content = " \n " + with patch("agent.context_compressor.get_model_context_length", return_value=100000): c = ContextCompressor(model="test", quiet_mode=True) @@ -372,9 +402,36 @@ class TestNonStringContent: with patch("agent.context_compressor.call_llm", return_value=mock_response): summary = c._generate_summary(messages) - # None content → empty string → standardized compaction handoff prefix added - assert summary is not None - assert summary == SUMMARY_PREFIX + assert summary is None + assert c._summary_failure_cooldown_until > 0 + + def test_empty_content_falls_back_to_main_model(self): + """When the auxiliary summary model returns empty content and a distinct + main model is configured, compression falls back to the main model + before entering cooldown (#11978 glm-5.1 → glm-5 path).""" + mock_response = MagicMock() + mock_response.choices = [MagicMock()] + mock_response.choices[0].message.content = "" + + with patch("agent.context_compressor.get_model_context_length", return_value=100000): + c = ContextCompressor( + model="glm-5", + summary_model_override="glm-5.1", + quiet_mode=True, + ) + + messages = [ + {"role": "user", "content": "do something"}, + {"role": "assistant", "content": "ok"}, + ] + + with patch("agent.context_compressor.call_llm", return_value=mock_response) as mock_call: + summary = c._generate_summary(messages) + # Two calls: aux model (glm-5.1) then fallback to main (glm-5). + assert mock_call.call_count == 2 + assert c._summary_model_fallen_back is True + assert summary is None + assert c._summary_failure_cooldown_until > 0 def test_summary_call_does_not_force_temperature(self): mock_response = MagicMock()