diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index 5e8a60e76..59c91828a 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -1349,6 +1349,27 @@ def _is_auth_error(exc: Exception) -> bool: return "error code: 401" in err_lower or "authenticationerror" in type(exc).__name__.lower() +def _is_unsupported_parameter_error(exc: Exception, param: str) -> bool: + """Detect provider 400s for unsupported request parameters. + + Different OpenAI-compatible endpoints phrase this a few ways: + ``Unsupported parameter: temperature``, ``unsupported_parameter``, + ``Unknown parameter: max_tokens``, etc. Match by both the parameter name + and a generic unsupported/unknown-parameter marker so we can retry with a + safer request instead of surfacing a noisy auxiliary failure. + """ + err_lower = str(exc).lower().replace("_", " ") + param_lower = (param or "").lower() + if not param_lower or param_lower not in err_lower: + return False + return any(marker in err_lower for marker in ( + "unsupported parameter", + "unknown parameter", + "unrecognized parameter", + "invalid parameter", + )) + + def _evict_cached_clients(provider: str) -> None: """Drop cached auxiliary clients for a provider so fresh creds are used.""" normalized = _normalize_aux_provider(provider) @@ -2952,13 +2973,26 @@ def call_llm( if _is_anthropic_compat_endpoint(resolved_provider, _client_base): kwargs["messages"] = _convert_openai_images_to_anthropic(kwargs["messages"]) - # Handle max_tokens vs max_completion_tokens retry, then payment fallback. + # Handle unsupported parameter retries, then payment/auth fallback. try: return _validate_llm_response( client.chat.completions.create(**kwargs), task) except Exception as first_err: err_str = str(first_err) - if "max_tokens" in err_str or "unsupported_parameter" in err_str: + if _is_unsupported_parameter_error(first_err, "temperature") and "temperature" in kwargs: + kwargs.pop("temperature", None) + try: + return _validate_llm_response( + client.chat.completions.create(**kwargs), task) + except Exception as retry_err: + # Keep processing the new error below. Providers sometimes + # reject temperature first, then reveal max_tokens on retry. + first_err = retry_err + err_str = str(first_err) + + if max_tokens is not None and ( + "max_tokens" in err_str or _is_unsupported_parameter_error(first_err, "max_tokens") + ): kwargs.pop("max_tokens", None) kwargs["max_completion_tokens"] = max_tokens try: @@ -3222,7 +3256,20 @@ async def async_call_llm( await client.chat.completions.create(**kwargs), task) except Exception as first_err: err_str = str(first_err) - if "max_tokens" in err_str or "unsupported_parameter" in err_str: + if _is_unsupported_parameter_error(first_err, "temperature") and "temperature" in kwargs: + kwargs.pop("temperature", None) + try: + return _validate_llm_response( + await client.chat.completions.create(**kwargs), task) + except Exception as retry_err: + # Keep processing the new error below. Providers sometimes + # reject temperature first, then reveal max_tokens on retry. + first_err = retry_err + err_str = str(first_err) + + if max_tokens is not None and ( + "max_tokens" in err_str or _is_unsupported_parameter_error(first_err, "max_tokens") + ): kwargs.pop("max_tokens", None) kwargs["max_completion_tokens"] = max_tokens try: diff --git a/tests/agent/test_auxiliary_client.py b/tests/agent/test_auxiliary_client.py index 5ee0f1265..9685c47b9 100644 --- a/tests/agent/test_auxiliary_client.py +++ b/tests/agent/test_auxiliary_client.py @@ -1032,6 +1032,81 @@ class TestStaleBaseUrlWarning: assert mod._stale_base_url_warned is True +class TestAuxiliaryUnsupportedParameterRetry: + def test_sync_call_retries_without_temperature_when_endpoint_rejects_it(self): + client = MagicMock() + client.base_url = "https://chatgpt.com/backend-api/codex/" + response = MagicMock() + response.choices = [MagicMock(message=MagicMock(content="ok"))] + calls = [] + + def create(**kwargs): + calls.append(dict(kwargs)) + if len(calls) == 1: + raise Exception( + "HTTP 400: {'detail': 'Unsupported parameter: temperature'}" + ) + return response + + client.chat.completions.create.side_effect = create + + with patch( + "agent.auxiliary_client._get_cached_client", + return_value=(client, "gpt-5.5"), + ), patch( + "agent.auxiliary_client._resolve_task_provider_model", + return_value=("openai-codex", "gpt-5.5", None, None, None), + ): + result = call_llm( + task="flush_memories", + messages=[{"role": "user", "content": "hi"}], + temperature=0.3, + max_tokens=32, + ) + + assert result is response + assert calls[0]["temperature"] == 0.3 + assert "temperature" not in calls[1] + assert calls[1]["max_tokens"] == 32 + + @pytest.mark.asyncio + async def test_async_call_retries_without_temperature_when_endpoint_rejects_it(self): + client = MagicMock() + client.base_url = "https://chatgpt.com/backend-api/codex/" + response = MagicMock() + response.choices = [MagicMock(message=MagicMock(content="ok"))] + calls = [] + + async def create(**kwargs): + calls.append(dict(kwargs)) + if len(calls) == 1: + raise Exception( + "HTTP 400: {'detail': 'Unsupported parameter: temperature'}" + ) + return response + + client.chat.completions.create = AsyncMock(side_effect=create) + + with patch( + "agent.auxiliary_client._get_cached_client", + return_value=(client, "gpt-5.5"), + ), patch( + "agent.auxiliary_client._resolve_task_provider_model", + return_value=("openai-codex", "gpt-5.5", None, None, None), + ): + result = await async_call_llm( + task="flush_memories", + messages=[{"role": "user", "content": "hi"}], + temperature=0.3, + max_tokens=32, + ) + + assert result is response + assert calls[0]["temperature"] == 0.3 + assert "temperature" not in calls[1] + assert calls[1]["max_tokens"] == 32 + + class TestAuxiliaryTaskExtraBody: def test_sync_call_merges_task_extra_body_from_config(self): client = MagicMock()