diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index 8ea986f816..1e1931d0d6 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -1349,21 +1349,27 @@ def _is_auth_error(exc: Exception) -> bool: return "error code: 401" in err_lower or "authenticationerror" in type(exc).__name__.lower() -def _is_unsupported_temperature_error(exc: Exception) -> bool: - """Detect API errors where the selected model rejects `temperature`. +def _is_unsupported_parameter_error(exc: Exception, param: str) -> bool: + """Detect provider 400s for an unsupported request parameter. - Triggered by provider responses like: - * OpenAI / Codex Responses — ``Unsupported parameter: temperature`` - * Copilot reasoning models — ``unsupported_parameter`` with temperature - * OpenRouter reasoning models — ``does not support temperature`` - * Anthropic Opus 4.7+ via OpenAI-compat — ``temperature is not supported`` + Different OpenAI-compatible endpoints phrase the same class of error a few + ways: ``Unsupported parameter: X``, ``unsupported_parameter`` with a + ``param`` field, ``X is not supported``, ``unknown parameter: X``, + ``unrecognized request argument: X``. We match on both the parameter + name and a generic "unsupported/unknown/unrecognized parameter" marker so + call sites can reactively retry without the offending key instead of + surfacing a noisy auxiliary failure. - The same backend can accept temperature for some models and reject it for - others (e.g. gpt-5.4 accepts, gpt-5.5 rejects on the same endpoint), so we - react to the concrete error rather than maintaining a model allowlist. + Generalizes the temperature-specific detector that originally shipped + with PR #15621 so the same retry strategy can cover ``max_tokens``, + ``seed``, ``top_p``, and any future quirk. Credit @nicholasrae (PR #15416) + for the generalization pattern. """ + param_lower = (param or "").lower() + if not param_lower: + return False err_lower = str(exc).lower() - if "temperature" not in err_lower: + if param_lower not in err_lower: return False return any(marker in err_lower for marker in ( "unsupported parameter", @@ -1372,9 +1378,20 @@ def _is_unsupported_temperature_error(exc: Exception) -> bool: "does not support", "unknown parameter", "unrecognized request argument", + "unrecognized parameter", + "invalid parameter", )) +def _is_unsupported_temperature_error(exc: Exception) -> bool: + """Back-compat wrapper: detect API errors where the model rejects ``temperature``. + + Delegates to :func:`_is_unsupported_parameter_error`; kept as a separate + public symbol because existing tests and call sites import it by name. + """ + return _is_unsupported_parameter_error(exc, "temperature") + + def _evict_cached_clients(provider: str) -> None: """Drop cached auxiliary clients for a provider so fresh creds are used.""" normalized = _normalize_aux_provider(provider) @@ -3012,7 +3029,11 @@ def call_llm( kwargs = retry_kwargs err_str = str(first_err) - if "max_tokens" in err_str or "unsupported_parameter" in err_str: + if max_tokens is not None and ( + "max_tokens" in err_str + or "unsupported_parameter" in err_str + or _is_unsupported_parameter_error(first_err, "max_tokens") + ): kwargs.pop("max_tokens", None) kwargs["max_completion_tokens"] = max_tokens try: @@ -3299,7 +3320,11 @@ async def async_call_llm( kwargs = retry_kwargs err_str = str(first_err) - if "max_tokens" in err_str or "unsupported_parameter" in err_str: + if max_tokens is not None and ( + "max_tokens" in err_str + or "unsupported_parameter" in err_str + or _is_unsupported_parameter_error(first_err, "max_tokens") + ): kwargs.pop("max_tokens", None) kwargs["max_completion_tokens"] = max_tokens try: diff --git a/tests/agent/test_unsupported_parameter_retry.py b/tests/agent/test_unsupported_parameter_retry.py new file mode 100644 index 0000000000..99745dc120 --- /dev/null +++ b/tests/agent/test_unsupported_parameter_retry.py @@ -0,0 +1,201 @@ +"""Regression tests for the generic unsupported-parameter detector in +``agent.auxiliary_client``. + +The original temperature-specific detector (PR #15621) was generalized so the +same reactive-retry strategy covers any provider that rejects an arbitrary +request parameter — ``max_tokens``, ``seed``, ``top_p``, future quirks — not +just ``temperature``. Credit @nicholasrae (PR #15416) for the generalization +pattern. + +These tests lock in: + * ``_is_unsupported_parameter_error(exc, param)`` across common phrasings + * the back-compat wrapper ``_is_unsupported_temperature_error`` still works + * the max_tokens retry branch no longer pops a key that was never set + (``max_tokens is None`` gate) + * the max_tokens retry branch matches via the generic helper on top of the + legacy ``"max_tokens"`` / ``"unsupported_parameter"`` substring checks +""" + +from unittest.mock import patch, MagicMock, AsyncMock + +import pytest + +from agent.auxiliary_client import ( + call_llm, + async_call_llm, + _is_unsupported_parameter_error, + _is_unsupported_temperature_error, +) + + +class TestIsUnsupportedParameterError: + """The generic detector must match real provider phrasings for any param.""" + + @pytest.mark.parametrize("param,message", [ + # temperature phrasings (regression coverage via the generic API) + ("temperature", "HTTP 400: Unsupported parameter: temperature"), + ("temperature", "Error code: 400 - {'error': {'code': 'unsupported_parameter', 'param': 'temperature'}}"), + ("temperature", "this model does not support temperature"), + # max_tokens phrasings + ("max_tokens", "HTTP 400: Unsupported parameter: max_tokens"), + ("max_tokens", "Unknown parameter: max_tokens — use max_completion_tokens"), + ("max_tokens", "Invalid parameter: max_tokens is not supported"), + # arbitrary future params + ("seed", "HTTP 400: unrecognized parameter: seed"), + ("top_p", "Error: top_p is not supported for this model"), + ]) + def test_matches_real_provider_messages(self, param, message): + assert _is_unsupported_parameter_error(RuntimeError(message), param) is True + + @pytest.mark.parametrize("param,message", [ + # Param not mentioned at all + ("temperature", "HTTP 400: max_tokens is too large"), + # Param mentioned but not flagged as unsupported + ("temperature", "temperature must be between 0 and 2"), + # Totally unrelated 400 + ("max_tokens", "Rate limit exceeded"), + # Connection-level errors + ("temperature", "Connection reset by peer"), + ]) + def test_does_not_match_unrelated_errors(self, param, message): + assert _is_unsupported_parameter_error(RuntimeError(message), param) is False + + def test_empty_param_returns_false(self): + assert _is_unsupported_parameter_error( + RuntimeError("HTTP 400: Unsupported parameter: temperature"), "" + ) is False + + def test_temperature_wrapper_delegates_to_generic(self): + """Back-compat: ``_is_unsupported_temperature_error`` still routes through.""" + msg = "HTTP 400: Unsupported parameter: temperature" + assert _is_unsupported_temperature_error(RuntimeError(msg)) is True + # And the unrelated-case still holds + assert _is_unsupported_temperature_error( + RuntimeError("max_tokens is too large")) is False + + +def _dummy_response(): + """Sentinel — real code calls ``_validate_llm_response`` which we patch out.""" + return {"ok": True} + + +class TestMaxTokensRetryHardening: + """The max_tokens retry branch now (a) gates on ``max_tokens is not None`` + and (b) also matches the generic phrasings via the helper. + """ + + def test_sync_max_tokens_retry_skipped_when_max_tokens_is_none(self): + """No max_tokens kwarg → must not pop/retry even if the error mentions it. + + Before the hardening, ``kwargs.pop("max_tokens", None)`` was safe but + ``kwargs["max_completion_tokens"] = max_tokens`` would set a None + value and hit the provider again. The gate skips the whole branch. + """ + client = MagicMock() + client.base_url = "https://api.openai.com/v1" + err = RuntimeError("HTTP 400: Unsupported parameter: max_tokens") + client.chat.completions.create.side_effect = err + + with ( + patch("agent.auxiliary_client._resolve_task_provider_model", + return_value=("openai-codex", "gpt-5.5", None, None, None)), + patch("agent.auxiliary_client._get_cached_client", + return_value=(client, "gpt-5.5")), + patch("agent.auxiliary_client._validate_llm_response", + side_effect=lambda resp, _task: resp), + ): + with pytest.raises(RuntimeError): + call_llm( + task="session_search", + messages=[{"role": "user", "content": "hi"}], + temperature=0.3, + # max_tokens omitted on purpose + ) + + # Only the initial attempt — no retry because the gate blocked it + assert client.chat.completions.create.call_count == 1 + + def test_sync_max_tokens_retry_matches_generic_phrasing(self): + """A 400 saying "Unknown parameter: max_tokens" (not the legacy + substring ``"max_tokens"`` bare + no ``unsupported_parameter`` token) + now triggers the retry via the generic helper. + """ + client = MagicMock() + client.base_url = "https://api.openai.com/v1" + err = RuntimeError("Unknown parameter: max_tokens") + response = _dummy_response() + client.chat.completions.create.side_effect = [err, response] + + with ( + patch("agent.auxiliary_client._resolve_task_provider_model", + return_value=("openai-codex", "gpt-5.5", None, None, None)), + patch("agent.auxiliary_client._get_cached_client", + return_value=(client, "gpt-5.5")), + patch("agent.auxiliary_client._validate_llm_response", + side_effect=lambda resp, _task: resp), + ): + result = call_llm( + task="session_search", + messages=[{"role": "user", "content": "hi"}], + temperature=0.3, + max_tokens=512, + ) + + assert result is response + assert client.chat.completions.create.call_count == 2 + second_call = client.chat.completions.create.call_args_list[1] + assert "max_tokens" not in second_call.kwargs + assert second_call.kwargs["max_completion_tokens"] == 512 + + @pytest.mark.asyncio + async def test_async_max_tokens_retry_skipped_when_max_tokens_is_none(self): + client = MagicMock() + client.base_url = "https://api.openai.com/v1" + err = RuntimeError("HTTP 400: Unsupported parameter: max_tokens") + client.chat.completions.create = AsyncMock(side_effect=err) + + with ( + patch("agent.auxiliary_client._resolve_task_provider_model", + return_value=("openai-codex", "gpt-5.5", None, None, None)), + patch("agent.auxiliary_client._get_cached_client", + return_value=(client, "gpt-5.5")), + patch("agent.auxiliary_client._validate_llm_response", + side_effect=lambda resp, _task: resp), + ): + with pytest.raises(RuntimeError): + await async_call_llm( + task="session_search", + messages=[{"role": "user", "content": "hi"}], + temperature=0.3, + ) + + assert client.chat.completions.create.call_count == 1 + + @pytest.mark.asyncio + async def test_async_max_tokens_retry_matches_generic_phrasing(self): + client = MagicMock() + client.base_url = "https://api.openai.com/v1" + err = RuntimeError("Unknown parameter: max_tokens") + response = _dummy_response() + client.chat.completions.create = AsyncMock(side_effect=[err, response]) + + with ( + patch("agent.auxiliary_client._resolve_task_provider_model", + return_value=("openai-codex", "gpt-5.5", None, None, None)), + patch("agent.auxiliary_client._get_cached_client", + return_value=(client, "gpt-5.5")), + patch("agent.auxiliary_client._validate_llm_response", + side_effect=lambda resp, _task: resp), + ): + result = await async_call_llm( + task="session_search", + messages=[{"role": "user", "content": "hi"}], + temperature=0.3, + max_tokens=512, + ) + + assert result is response + assert client.chat.completions.create.await_count == 2 + second_call = client.chat.completions.create.call_args_list[1] + assert "max_tokens" not in second_call.kwargs + assert second_call.kwargs["max_completion_tokens"] == 512