mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-29 01:31:41 +00:00
Universal reactive fix for 'HTTP 400: Unsupported parameter: temperature' across all providers/models — not just Codex Responses. The same backend can accept temperature for some models and reject it for others (e.g. gpt-5.4 accepts but gpt-5.5 rejects on the same OpenAI endpoint; similar patterns on Copilot, OpenRouter reasoning routes, and Anthropic Opus 4.7+ via OAI-compat). An allow/deny-list by model name does not scale. call_llm / async_call_llm now detect the concrete 'unsupported parameter: temperature' 400 and transparently retry once without temperature. Kimi's server-managed omission and Opus 4.7+'s proactive strip stay in place — this is the safety net for everything else. Changes: - agent/auxiliary_client.py: add _is_unsupported_temperature_error helper; wire into both sync and async call_llm paths before the existing max_tokens/payment/auth retry ladder - tests/agent/test_unsupported_temperature_retry.py: 19 tests covering detector phrasings, sync + async retry, no-retry-without-temperature, and non-temperature 400s not triggering the retry Builds on PR #15620 (codex_responses fallback) which stripped temperature up front for that one api_mode. This PR closes the gap for every other provider/model combo via reactive retry. Credit: retry approach and detector originate from @BlueBirdBack's PR #15578. Co-authored-by: BlueBirdBack <BlueBirdBack@users.noreply.github.com>
237 lines
10 KiB
Python
237 lines
10 KiB
Python
"""Regression tests for the universal "unsupported temperature" retry in
|
|
``agent.auxiliary_client``.
|
|
|
|
Auxiliary callers (``flush_memories``, context compression, session search,
|
|
web extract summarisation, etc.) hardcode ``temperature=0.3`` for historical
|
|
reasons. Several provider/model combinations reject ``temperature`` with a
|
|
400:
|
|
|
|
* OpenAI Responses (gpt-5/o-series reasoning models)
|
|
* Copilot Responses (reasoning models)
|
|
* OpenRouter reasoning models (gpt-5.5, some anthropic via OAI-compat)
|
|
* Anthropic Opus 4.7+ via OpenAI-compat endpoints
|
|
* Kimi/Moonshot (server-managed)
|
|
|
|
``_fixed_temperature_for_model`` catches Kimi up front, and
|
|
``build_chat_completion_kwargs`` drops temperature for Anthropic Opus 4.7+,
|
|
but the same backend can accept ``temperature`` for some models and reject
|
|
it for others (for example gpt-5.4 accepts but gpt-5.5 rejects on the same
|
|
endpoint). An allow/deny-list is not maintainable across providers.
|
|
|
|
The universal fix is reactive: when a call returns an
|
|
``Unsupported parameter: temperature`` 400, retry once without temperature.
|
|
These tests lock in that behaviour for both sync and async paths.
|
|
"""
|
|
|
|
from unittest.mock import patch, MagicMock, AsyncMock
|
|
|
|
import pytest
|
|
|
|
from agent.auxiliary_client import (
|
|
call_llm,
|
|
async_call_llm,
|
|
_is_unsupported_temperature_error,
|
|
)
|
|
|
|
|
|
class TestIsUnsupportedTemperatureError:
|
|
"""The detector must match the phrasings providers actually return."""
|
|
|
|
@pytest.mark.parametrize("message", [
|
|
# OpenAI / Codex Responses
|
|
"HTTP 400: Unsupported parameter: temperature",
|
|
"Error code: 400 - {'error': {'message': \"Unsupported parameter: 'temperature'\"}}",
|
|
# Copilot / OpenAI error-code form
|
|
"Error code: 400 - {'error': {'code': 'unsupported_parameter', 'param': 'temperature'}}",
|
|
# OpenRouter-style
|
|
"Provider returned error: temperature is not supported for this model",
|
|
"this model does not support temperature",
|
|
# Anthropic-style via OAI-compat
|
|
"temperature: unknown parameter",
|
|
# Some gateways
|
|
"unrecognized request argument supplied: temperature",
|
|
])
|
|
def test_matches_real_provider_messages(self, message):
|
|
assert _is_unsupported_temperature_error(RuntimeError(message)) is True
|
|
|
|
@pytest.mark.parametrize("message", [
|
|
# Unrelated 400s must NOT trigger a silent-retry
|
|
"HTTP 400: Invalid value: 'tool'. Supported values are: 'assistant'...",
|
|
"max_tokens is too large for this model",
|
|
"Rate limit exceeded",
|
|
"Connection reset by peer",
|
|
# Temperature value error is a different class of problem
|
|
"temperature must be between 0 and 2",
|
|
])
|
|
def test_does_not_match_unrelated_errors(self, message):
|
|
assert _is_unsupported_temperature_error(RuntimeError(message)) is False
|
|
|
|
|
|
def _dummy_response():
|
|
# The real code calls _validate_llm_response which inspects
|
|
# response.choices[0].message. The tests here patch that out, so
|
|
# any sentinel object is fine.
|
|
return {"ok": True}
|
|
|
|
|
|
class TestCallLlmUnsupportedTemperatureRetry:
|
|
"""``call_llm`` retries once without temperature and returns on success."""
|
|
|
|
def _setup(self, first_exc):
|
|
client = MagicMock()
|
|
client.base_url = "https://api.openai.com/v1"
|
|
client.chat.completions.create.side_effect = [first_exc, _dummy_response()]
|
|
return client
|
|
|
|
@pytest.mark.parametrize("error_message", [
|
|
"HTTP 400: Unsupported parameter: temperature",
|
|
"Error code: 400 - {'error': {'code': 'unsupported_parameter', 'param': 'temperature'}}",
|
|
"Provider error: this model does not support temperature",
|
|
])
|
|
def test_retries_once_without_temperature(self, error_message):
|
|
client = self._setup(RuntimeError(error_message))
|
|
|
|
with (
|
|
patch("agent.auxiliary_client._resolve_task_provider_model",
|
|
return_value=("openai-codex", "gpt-5.5", None, None, None)),
|
|
patch("agent.auxiliary_client._get_cached_client",
|
|
return_value=(client, "gpt-5.5")),
|
|
patch("agent.auxiliary_client._validate_llm_response",
|
|
side_effect=lambda resp, _task: resp),
|
|
):
|
|
result = call_llm(
|
|
task="flush_memories",
|
|
messages=[{"role": "user", "content": "remember this"}],
|
|
temperature=0.3,
|
|
max_tokens=500,
|
|
)
|
|
|
|
assert result == {"ok": True}
|
|
assert client.chat.completions.create.call_count == 2
|
|
first_kwargs = client.chat.completions.create.call_args_list[0].kwargs
|
|
retry_kwargs = client.chat.completions.create.call_args_list[1].kwargs
|
|
assert first_kwargs["temperature"] == 0.3
|
|
assert "temperature" not in retry_kwargs
|
|
# other kwargs preserved
|
|
assert retry_kwargs["max_tokens"] == 500
|
|
|
|
def test_non_temperature_400_does_not_retry_as_temperature(self):
|
|
"""Unrelated 400s (e.g. bad tool role) must not silently drop temp."""
|
|
client = MagicMock()
|
|
client.base_url = "https://api.openai.com/v1"
|
|
non_temp_err = RuntimeError(
|
|
"HTTP 400: Invalid value: 'tool'. Supported values are: 'assistant'..."
|
|
)
|
|
client.chat.completions.create.side_effect = non_temp_err
|
|
|
|
with (
|
|
patch("agent.auxiliary_client._resolve_task_provider_model",
|
|
return_value=("openai-codex", "gpt-5.5", None, None, None)),
|
|
patch("agent.auxiliary_client._get_cached_client",
|
|
return_value=(client, "gpt-5.5")),
|
|
patch("agent.auxiliary_client._validate_llm_response",
|
|
side_effect=lambda resp, _task: resp),
|
|
patch("agent.auxiliary_client._try_payment_fallback",
|
|
return_value=None),
|
|
):
|
|
with pytest.raises(RuntimeError, match="Invalid value"):
|
|
call_llm(
|
|
task="flush_memories",
|
|
messages=[{"role": "user", "content": "x"}],
|
|
temperature=0.3,
|
|
max_tokens=500,
|
|
)
|
|
# Should NOT have retried (non-temperature 400 doesn't match)
|
|
assert client.chat.completions.create.call_count == 1
|
|
|
|
def test_no_retry_when_temperature_not_in_kwargs(self):
|
|
"""If caller didn't send temperature, don't invent a temperature-retry."""
|
|
client = MagicMock()
|
|
client.base_url = "https://api.openai.com/v1"
|
|
# Provider complains about temperature even though we didn't send it.
|
|
# (Pathological but possible with misleading error text.) The guard
|
|
# ``"temperature" in kwargs`` must prevent an unnecessary retry.
|
|
err = RuntimeError("HTTP 400: Unsupported parameter: temperature")
|
|
client.chat.completions.create.side_effect = err
|
|
|
|
with (
|
|
patch("agent.auxiliary_client._resolve_task_provider_model",
|
|
return_value=("openai-codex", "gpt-5.5", None, None, None)),
|
|
patch("agent.auxiliary_client._get_cached_client",
|
|
return_value=(client, "gpt-5.5")),
|
|
patch("agent.auxiliary_client._validate_llm_response",
|
|
side_effect=lambda resp, _task: resp),
|
|
patch("agent.auxiliary_client._try_payment_fallback",
|
|
return_value=None),
|
|
):
|
|
with pytest.raises(RuntimeError):
|
|
call_llm(
|
|
task="flush_memories",
|
|
messages=[{"role": "user", "content": "x"}],
|
|
temperature=None, # explicit: no temperature sent
|
|
max_tokens=500,
|
|
)
|
|
assert client.chat.completions.create.call_count == 1
|
|
|
|
|
|
class TestAsyncCallLlmUnsupportedTemperatureRetry:
|
|
"""``async_call_llm`` mirror of the sync retry semantics."""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_async_retries_once_without_temperature(self):
|
|
client = MagicMock()
|
|
client.base_url = "https://api.openai.com/v1"
|
|
client.chat.completions.create = AsyncMock(side_effect=[
|
|
RuntimeError("HTTP 400: Unsupported parameter: temperature"),
|
|
_dummy_response(),
|
|
])
|
|
|
|
with (
|
|
patch("agent.auxiliary_client._resolve_task_provider_model",
|
|
return_value=("openai-codex", "gpt-5.5", None, None, None)),
|
|
patch("agent.auxiliary_client._get_cached_client",
|
|
return_value=(client, "gpt-5.5")),
|
|
patch("agent.auxiliary_client._validate_llm_response",
|
|
side_effect=lambda resp, _task: resp),
|
|
):
|
|
result = await async_call_llm(
|
|
task="session_search",
|
|
messages=[{"role": "user", "content": "query"}],
|
|
temperature=0.3,
|
|
max_tokens=500,
|
|
)
|
|
|
|
assert result == {"ok": True}
|
|
assert client.chat.completions.create.await_count == 2
|
|
first_kwargs = client.chat.completions.create.call_args_list[0].kwargs
|
|
retry_kwargs = client.chat.completions.create.call_args_list[1].kwargs
|
|
assert first_kwargs["temperature"] == 0.3
|
|
assert "temperature" not in retry_kwargs
|
|
assert retry_kwargs["max_tokens"] == 500
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_async_non_temperature_400_does_not_retry(self):
|
|
client = MagicMock()
|
|
client.base_url = "https://api.openai.com/v1"
|
|
client.chat.completions.create = AsyncMock(
|
|
side_effect=RuntimeError("HTTP 400: Invalid value: 'tool'"),
|
|
)
|
|
|
|
with (
|
|
patch("agent.auxiliary_client._resolve_task_provider_model",
|
|
return_value=("openai-codex", "gpt-5.5", None, None, None)),
|
|
patch("agent.auxiliary_client._get_cached_client",
|
|
return_value=(client, "gpt-5.5")),
|
|
patch("agent.auxiliary_client._validate_llm_response",
|
|
side_effect=lambda resp, _task: resp),
|
|
patch("agent.auxiliary_client._try_payment_fallback",
|
|
return_value=None),
|
|
):
|
|
with pytest.raises(RuntimeError, match="Invalid value"):
|
|
await async_call_llm(
|
|
task="session_search",
|
|
messages=[{"role": "user", "content": "x"}],
|
|
temperature=0.3,
|
|
max_tokens=500,
|
|
)
|
|
assert client.chat.completions.create.await_count == 1
|