fix: retry auxiliary calls without unsupported temperature

2026-04-25 00:51:20 +00:00 · 2026-04-24 16:17:46 -07:00 · 2026-04-24 16:17:46 -07:00 · 8821b9d674
commit 8821b9d674
parent 00c3d848d8
2 changed files with 125 additions and 3 deletions
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@ -1349,6 +1349,27 @@ def _is_auth_error(exc: Exception) -> bool:
    return "error code: 401" in err_lower or "authenticationerror" in type(exc).__name__.lower()


+def _is_unsupported_parameter_error(exc: Exception, param: str) -> bool:
+    """Detect provider 400s for unsupported request parameters.
+
+    Different OpenAI-compatible endpoints phrase this a few ways:
+    ``Unsupported parameter: temperature``, ``unsupported_parameter``,
+    ``Unknown parameter: max_tokens``, etc.  Match by both the parameter name
+    and a generic unsupported/unknown-parameter marker so we can retry with a
+    safer request instead of surfacing a noisy auxiliary failure.
+    """
+    err_lower = str(exc).lower().replace("_", " ")
+    param_lower = (param or "").lower()
+    if not param_lower or param_lower not in err_lower:
+        return False
+    return any(marker in err_lower for marker in (
+        "unsupported parameter",
+        "unknown parameter",
+        "unrecognized parameter",
+        "invalid parameter",
+    ))
+
+
 def _evict_cached_clients(provider: str) -> None:
    """Drop cached auxiliary clients for a provider so fresh creds are used."""
    normalized = _normalize_aux_provider(provider)
@ -2952,13 +2973,26 @@ def call_llm(
    if _is_anthropic_compat_endpoint(resolved_provider, _client_base):
        kwargs["messages"] = _convert_openai_images_to_anthropic(kwargs["messages"])

-    # Handle max_tokens vs max_completion_tokens retry, then payment fallback.
+    # Handle unsupported parameter retries, then payment/auth fallback.
    try:
        return _validate_llm_response(
            client.chat.completions.create(**kwargs), task)
    except Exception as first_err:
        err_str = str(first_err)
-        if "max_tokens" in err_str or "unsupported_parameter" in err_str:
+        if _is_unsupported_parameter_error(first_err, "temperature") and "temperature" in kwargs:
+            kwargs.pop("temperature", None)
+            try:
+                return _validate_llm_response(
+                    client.chat.completions.create(**kwargs), task)
+            except Exception as retry_err:
+                # Keep processing the new error below.  Providers sometimes
+                # reject temperature first, then reveal max_tokens on retry.
+                first_err = retry_err
+                err_str = str(first_err)
+
+        if max_tokens is not None and (
+            "max_tokens" in err_str or _is_unsupported_parameter_error(first_err, "max_tokens")
+        ):
            kwargs.pop("max_tokens", None)
            kwargs["max_completion_tokens"] = max_tokens
            try:
@ -3222,7 +3256,20 @@ async def async_call_llm(
            await client.chat.completions.create(**kwargs), task)
    except Exception as first_err:
        err_str = str(first_err)
-        if "max_tokens" in err_str or "unsupported_parameter" in err_str:
+        if _is_unsupported_parameter_error(first_err, "temperature") and "temperature" in kwargs:
+            kwargs.pop("temperature", None)
+            try:
+                return _validate_llm_response(
+                    await client.chat.completions.create(**kwargs), task)
+            except Exception as retry_err:
+                # Keep processing the new error below.  Providers sometimes
+                # reject temperature first, then reveal max_tokens on retry.
+                first_err = retry_err
+                err_str = str(first_err)
+
+        if max_tokens is not None and (
+            "max_tokens" in err_str or _is_unsupported_parameter_error(first_err, "max_tokens")
+        ):
            kwargs.pop("max_tokens", None)
            kwargs["max_completion_tokens"] = max_tokens
            try:
--- a/tests/agent/test_auxiliary_client.py
+++ b/tests/agent/test_auxiliary_client.py
@ -1032,6 +1032,81 @@ class TestStaleBaseUrlWarning:
        assert mod._stale_base_url_warned is True


+class TestAuxiliaryUnsupportedParameterRetry:
+    def test_sync_call_retries_without_temperature_when_endpoint_rejects_it(self):
+        client = MagicMock()
+        client.base_url = "https://chatgpt.com/backend-api/codex/"
+        response = MagicMock()
+        response.choices = [MagicMock(message=MagicMock(content="ok"))]
+        calls = []
+
+        def create(**kwargs):
+            calls.append(dict(kwargs))
+            if len(calls) == 1:
+                raise Exception(
+                    "HTTP 400: {'detail': 'Unsupported parameter: temperature'}"
+                )
+            return response
+
+        client.chat.completions.create.side_effect = create
+
+        with patch(
+            "agent.auxiliary_client._get_cached_client",
+            return_value=(client, "gpt-5.5"),
+        ), patch(
+            "agent.auxiliary_client._resolve_task_provider_model",
+            return_value=("openai-codex", "gpt-5.5", None, None, None),
+        ):
+            result = call_llm(
+                task="flush_memories",
+                messages=[{"role": "user", "content": "hi"}],
+                temperature=0.3,
+                max_tokens=32,
+            )
+
+        assert result is response
+        assert calls[0]["temperature"] == 0.3
+        assert "temperature" not in calls[1]
+        assert calls[1]["max_tokens"] == 32
+
+    @pytest.mark.asyncio
+    async def test_async_call_retries_without_temperature_when_endpoint_rejects_it(self):
+        client = MagicMock()
+        client.base_url = "https://chatgpt.com/backend-api/codex/"
+        response = MagicMock()
+        response.choices = [MagicMock(message=MagicMock(content="ok"))]
+        calls = []
+
+        async def create(**kwargs):
+            calls.append(dict(kwargs))
+            if len(calls) == 1:
+                raise Exception(
+                    "HTTP 400: {'detail': 'Unsupported parameter: temperature'}"
+                )
+            return response
+
+        client.chat.completions.create = AsyncMock(side_effect=create)
+
+        with patch(
+            "agent.auxiliary_client._get_cached_client",
+            return_value=(client, "gpt-5.5"),
+        ), patch(
+            "agent.auxiliary_client._resolve_task_provider_model",
+            return_value=("openai-codex", "gpt-5.5", None, None, None),
+        ):
+            result = await async_call_llm(
+                task="flush_memories",
+                messages=[{"role": "user", "content": "hi"}],
+                temperature=0.3,
+                max_tokens=32,
+            )
+
+        assert result is response
+        assert calls[0]["temperature"] == 0.3
+        assert "temperature" not in calls[1]
+        assert calls[1]["max_tokens"] == 32
+
+
 class TestAuxiliaryTaskExtraBody:
    def test_sync_call_merges_task_extra_body_from_config(self):
        client = MagicMock()