diff --git a/agent/context_compressor.py b/agent/context_compressor.py index dfb93b30aa3..5f0792be882 100644 --- a/agent/context_compressor.py +++ b/agent/context_compressor.py @@ -763,6 +763,33 @@ class ContextCompressor(ContextEngine): return "\n\n".join(parts) + def _fallback_to_main_for_compression(self, e: Exception, reason: str) -> None: + """Switch from a separate ``summary_model`` back to the main model. + + Centralises the bookkeeping shared by every fallback branch in + :meth:`_generate_summary` (model-not-found, timeout, JSON decode, + unknown error): record the aux-model failure for ``/usage``-style + callers, clear the summary model so the next call uses the main one, + and clear the cooldown so the immediate retry can run. + + ``reason`` is a short human-readable phrase ("unavailable", + "timed out", "returned invalid JSON", "failed") that is interpolated + into the warning log. + """ + self._summary_model_fallen_back = True + logging.warning( + "Summary model '%s' %s (%s). " + "Falling back to main model '%s' for compression.", + self.summary_model, reason, e, self.model, + ) + _err_text = str(e).strip() or e.__class__.__name__ + if len(_err_text) > 220: + _err_text = _err_text[:217].rstrip() + "..." + self._last_aux_model_failure_error = _err_text + self._last_aux_model_failure_model = self.summary_model + self.summary_model = "" # empty = use main model + self._summary_failure_cooldown_until = 0.0 # no cooldown — retry immediately + def _generate_summary(self, turns_to_summarize: List[Dict[str, Any]], focus_topic: str = None) -> Optional[str]: """Generate a structured summary of conversation turns. @@ -961,28 +988,42 @@ The user has requested that this compaction PRIORITISE preserving all informatio _status in (408, 429, 502, 504) or "timeout" in _err_str ) + # Non-JSON / malformed-body responses from misconfigured providers + # or proxies (e.g. an HTML 502 page returned with + # ``Content-Type: application/json``) bubble up as + # ``json.JSONDecodeError`` from the OpenAI SDK's ``response.json()``, + # or as a wrapping ``APIResponseValidationError`` whose message + # carries the substring "expecting value". Treat these like a + # transient provider failure: one retry on the main model, then a + # short cooldown. Issue #22244. + _is_json_decode = ( + isinstance(e, json.JSONDecodeError) + or "expecting value" in _err_str + ) + if _is_json_decode and not _is_model_not_found and not _is_timeout: + logger.error( + "Context compression failed: auxiliary LLM returned a " + "non-JSON response. provider=%s summary_model=%s " + "main_model=%s base_url=%s err=%s", + self.provider or "auto", + self.summary_model or "(main)", + self.model, + self.base_url or "default", + e, + ) if ( - (_is_model_not_found or _is_timeout) + (_is_model_not_found or _is_timeout or _is_json_decode) and self.summary_model and self.summary_model != self.model and not getattr(self, "_summary_model_fallen_back", False) ): - self._summary_model_fallen_back = True - logging.warning( - "Summary model '%s' unavailable (%s). " - "Falling back to main model '%s' for compression.", - self.summary_model, e, self.model, - ) - # Record the aux-model failure so callers can warn the user - # even if the retry-on-main succeeds — a misconfigured aux - # model is something the user needs to fix. - _err_text = str(e).strip() or e.__class__.__name__ - if len(_err_text) > 220: - _err_text = _err_text[:217].rstrip() + "..." - self._last_aux_model_failure_error = _err_text - self._last_aux_model_failure_model = self.summary_model - self.summary_model = "" # empty = use main model - self._summary_failure_cooldown_until = 0.0 # no cooldown + if _is_json_decode: + _reason = "returned invalid JSON" + elif _is_model_not_found: + _reason = "unavailable" + else: + _reason = "timed out" + self._fallback_to_main_for_compression(e, _reason) return self._generate_summary(turns_to_summarize, focus_topic=focus_topic) # retry immediately # Unknown-error best-effort retry on main model. Losing N turns of @@ -999,26 +1040,13 @@ The user has requested that this compaction PRIORITISE preserving all informatio and self.summary_model != self.model and not getattr(self, "_summary_model_fallen_back", False) ): - self._summary_model_fallen_back = True - logging.warning( - "Summary model '%s' failed (%s). " - "Retrying on main model '%s' before giving up.", - self.summary_model, e, self.model, - ) - # Record the aux-model failure (see 404 branch above) — user - # should know their configured model is broken even if main - # recovers the call. - _err_text = str(e).strip() or e.__class__.__name__ - if len(_err_text) > 220: - _err_text = _err_text[:217].rstrip() + "..." - self._last_aux_model_failure_error = _err_text - self._last_aux_model_failure_model = self.summary_model - self.summary_model = "" # empty = use main model - self._summary_failure_cooldown_until = 0.0 + self._fallback_to_main_for_compression(e, "failed") return self._generate_summary(turns_to_summarize, focus_topic=focus_topic) - # Transient errors (timeout, rate limit, network) — shorter cooldown - _transient_cooldown = 60 + # Transient errors (timeout, rate limit, network, JSON decode) — + # shorter cooldown for JSON decode since the body shape can flip + # back to valid quickly when an upstream proxy recovers. + _transient_cooldown = 30 if _is_json_decode else 60 self._summary_failure_cooldown_until = time.monotonic() + _transient_cooldown err_text = str(e).strip() or e.__class__.__name__ if len(err_text) > 220: diff --git a/tests/agent/test_context_compressor.py b/tests/agent/test_context_compressor.py index 572ebce12fa..7817930851e 100644 --- a/tests/agent/test_context_compressor.py +++ b/tests/agent/test_context_compressor.py @@ -400,6 +400,104 @@ class TestSummaryFallbackToMainModel: assert result is None assert c._summary_model_fallen_back is True + def test_json_decode_error_falls_back_to_main_and_succeeds(self): + """JSONDecodeError from the OpenAI SDK's ``response.json()`` (raised + when a misconfigured proxy returns HTML/plain-text with + ``Content-Type: application/json``) should trigger the same + retry-on-main path as 404/timeout. Issue #22244.""" + import json as _json + + mock_ok = MagicMock() + mock_ok.choices = [MagicMock()] + mock_ok.choices[0].message.content = "summary via main model" + + # Simulate the SDK raising a raw JSONDecodeError with a realistic + # error message ("Expecting value: line X column Y char Z"). + err_json = _json.JSONDecodeError( + "Expecting value", "...", 0 + ) + + with patch("agent.context_compressor.get_model_context_length", return_value=100000): + c = ContextCompressor( + model="main-model", + summary_model_override="aux-via-broken-proxy", + quiet_mode=True, + ) + + with patch( + "agent.context_compressor.call_llm", + side_effect=[err_json, mock_ok], + ) as mock_call: + result = c._generate_summary(self._msgs()) + + assert mock_call.call_count == 2 + assert mock_call.call_args_list[0].kwargs.get("model") == "aux-via-broken-proxy" + assert "model" not in mock_call.call_args_list[1].kwargs + assert result is not None + assert "summary via main model" in result + # Aux-model failure recorded so /usage / gateway warnings can surface it + assert c._last_aux_model_failure_model == "aux-via-broken-proxy" + assert c._last_aux_model_failure_error is not None + # The 220-char cap is shared with other fallback branches + assert len(c._last_aux_model_failure_error) <= 220 + + def test_json_decode_error_substring_match_in_wrapped_exception(self): + """When the OpenAI SDK wraps the raw JSONDecodeError inside its own + ``APIResponseValidationError`` (or similar), ``isinstance`` no longer + matches but the substring "expecting value" still appears in + ``str(e)``. We detect this case by string match and fall back the + same way.""" + mock_ok = MagicMock() + mock_ok.choices = [MagicMock()] + mock_ok.choices[0].message.content = "summary via main model" + + # A plain Exception with the canonical JSON decode error text — what + # the SDK's APIResponseValidationError looks like at str() time. + err_wrapped = Exception("Expecting value: line 1 column 1 (char 0)") + + with patch("agent.context_compressor.get_model_context_length", return_value=100000): + c = ContextCompressor( + model="main-model", + summary_model_override="aux-model", + quiet_mode=True, + ) + + with patch( + "agent.context_compressor.call_llm", + side_effect=[err_wrapped, mock_ok], + ) as mock_call: + result = c._generate_summary(self._msgs()) + + assert mock_call.call_count == 2 + assert result is not None + assert "summary via main model" in result + + def test_json_decode_error_on_main_uses_short_cooldown(self): + """When already on the main model (no separate summary_model, or + fallback already happened), a JSONDecodeError should set the short + 30s cooldown, not the default 60s — provider bodies tend to + recover quickly when an upstream proxy comes back online.""" + import json as _json + + err_json = _json.JSONDecodeError("Expecting value", "", 0) + + with patch("agent.context_compressor.get_model_context_length", return_value=100000): + c = ContextCompressor( + model="main-model", + # No summary_model_override → already on main, no fallback path. + quiet_mode=True, + ) + + with patch( + "agent.context_compressor.call_llm", + side_effect=err_json, + ), patch("agent.context_compressor.time.monotonic", return_value=1000.0): + result = c._generate_summary(self._msgs()) + + assert result is None + # Short JSON-decode cooldown is 30s, not the default 60s. + assert c._summary_failure_cooldown_until == 1030.0 + class TestAuxModelFallbackSurfacedToCallers: """When summary_model fails but retry-on-main succeeds, compress() must