mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
fix: retry auxiliary calls without unsupported temperature
This commit is contained in:
parent
00c3d848d8
commit
8821b9d674
2 changed files with 125 additions and 3 deletions
|
|
@ -1349,6 +1349,27 @@ def _is_auth_error(exc: Exception) -> bool:
|
|||
return "error code: 401" in err_lower or "authenticationerror" in type(exc).__name__.lower()
|
||||
|
||||
|
||||
def _is_unsupported_parameter_error(exc: Exception, param: str) -> bool:
|
||||
"""Detect provider 400s for unsupported request parameters.
|
||||
|
||||
Different OpenAI-compatible endpoints phrase this a few ways:
|
||||
``Unsupported parameter: temperature``, ``unsupported_parameter``,
|
||||
``Unknown parameter: max_tokens``, etc. Match by both the parameter name
|
||||
and a generic unsupported/unknown-parameter marker so we can retry with a
|
||||
safer request instead of surfacing a noisy auxiliary failure.
|
||||
"""
|
||||
err_lower = str(exc).lower().replace("_", " ")
|
||||
param_lower = (param or "").lower()
|
||||
if not param_lower or param_lower not in err_lower:
|
||||
return False
|
||||
return any(marker in err_lower for marker in (
|
||||
"unsupported parameter",
|
||||
"unknown parameter",
|
||||
"unrecognized parameter",
|
||||
"invalid parameter",
|
||||
))
|
||||
|
||||
|
||||
def _evict_cached_clients(provider: str) -> None:
|
||||
"""Drop cached auxiliary clients for a provider so fresh creds are used."""
|
||||
normalized = _normalize_aux_provider(provider)
|
||||
|
|
@ -2952,13 +2973,26 @@ def call_llm(
|
|||
if _is_anthropic_compat_endpoint(resolved_provider, _client_base):
|
||||
kwargs["messages"] = _convert_openai_images_to_anthropic(kwargs["messages"])
|
||||
|
||||
# Handle max_tokens vs max_completion_tokens retry, then payment fallback.
|
||||
# Handle unsupported parameter retries, then payment/auth fallback.
|
||||
try:
|
||||
return _validate_llm_response(
|
||||
client.chat.completions.create(**kwargs), task)
|
||||
except Exception as first_err:
|
||||
err_str = str(first_err)
|
||||
if "max_tokens" in err_str or "unsupported_parameter" in err_str:
|
||||
if _is_unsupported_parameter_error(first_err, "temperature") and "temperature" in kwargs:
|
||||
kwargs.pop("temperature", None)
|
||||
try:
|
||||
return _validate_llm_response(
|
||||
client.chat.completions.create(**kwargs), task)
|
||||
except Exception as retry_err:
|
||||
# Keep processing the new error below. Providers sometimes
|
||||
# reject temperature first, then reveal max_tokens on retry.
|
||||
first_err = retry_err
|
||||
err_str = str(first_err)
|
||||
|
||||
if max_tokens is not None and (
|
||||
"max_tokens" in err_str or _is_unsupported_parameter_error(first_err, "max_tokens")
|
||||
):
|
||||
kwargs.pop("max_tokens", None)
|
||||
kwargs["max_completion_tokens"] = max_tokens
|
||||
try:
|
||||
|
|
@ -3222,7 +3256,20 @@ async def async_call_llm(
|
|||
await client.chat.completions.create(**kwargs), task)
|
||||
except Exception as first_err:
|
||||
err_str = str(first_err)
|
||||
if "max_tokens" in err_str or "unsupported_parameter" in err_str:
|
||||
if _is_unsupported_parameter_error(first_err, "temperature") and "temperature" in kwargs:
|
||||
kwargs.pop("temperature", None)
|
||||
try:
|
||||
return _validate_llm_response(
|
||||
await client.chat.completions.create(**kwargs), task)
|
||||
except Exception as retry_err:
|
||||
# Keep processing the new error below. Providers sometimes
|
||||
# reject temperature first, then reveal max_tokens on retry.
|
||||
first_err = retry_err
|
||||
err_str = str(first_err)
|
||||
|
||||
if max_tokens is not None and (
|
||||
"max_tokens" in err_str or _is_unsupported_parameter_error(first_err, "max_tokens")
|
||||
):
|
||||
kwargs.pop("max_tokens", None)
|
||||
kwargs["max_completion_tokens"] = max_tokens
|
||||
try:
|
||||
|
|
|
|||
|
|
@ -1032,6 +1032,81 @@ class TestStaleBaseUrlWarning:
|
|||
assert mod._stale_base_url_warned is True
|
||||
|
||||
|
||||
class TestAuxiliaryUnsupportedParameterRetry:
|
||||
def test_sync_call_retries_without_temperature_when_endpoint_rejects_it(self):
|
||||
client = MagicMock()
|
||||
client.base_url = "https://chatgpt.com/backend-api/codex/"
|
||||
response = MagicMock()
|
||||
response.choices = [MagicMock(message=MagicMock(content="ok"))]
|
||||
calls = []
|
||||
|
||||
def create(**kwargs):
|
||||
calls.append(dict(kwargs))
|
||||
if len(calls) == 1:
|
||||
raise Exception(
|
||||
"HTTP 400: {'detail': 'Unsupported parameter: temperature'}"
|
||||
)
|
||||
return response
|
||||
|
||||
client.chat.completions.create.side_effect = create
|
||||
|
||||
with patch(
|
||||
"agent.auxiliary_client._get_cached_client",
|
||||
return_value=(client, "gpt-5.5"),
|
||||
), patch(
|
||||
"agent.auxiliary_client._resolve_task_provider_model",
|
||||
return_value=("openai-codex", "gpt-5.5", None, None, None),
|
||||
):
|
||||
result = call_llm(
|
||||
task="flush_memories",
|
||||
messages=[{"role": "user", "content": "hi"}],
|
||||
temperature=0.3,
|
||||
max_tokens=32,
|
||||
)
|
||||
|
||||
assert result is response
|
||||
assert calls[0]["temperature"] == 0.3
|
||||
assert "temperature" not in calls[1]
|
||||
assert calls[1]["max_tokens"] == 32
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_async_call_retries_without_temperature_when_endpoint_rejects_it(self):
|
||||
client = MagicMock()
|
||||
client.base_url = "https://chatgpt.com/backend-api/codex/"
|
||||
response = MagicMock()
|
||||
response.choices = [MagicMock(message=MagicMock(content="ok"))]
|
||||
calls = []
|
||||
|
||||
async def create(**kwargs):
|
||||
calls.append(dict(kwargs))
|
||||
if len(calls) == 1:
|
||||
raise Exception(
|
||||
"HTTP 400: {'detail': 'Unsupported parameter: temperature'}"
|
||||
)
|
||||
return response
|
||||
|
||||
client.chat.completions.create = AsyncMock(side_effect=create)
|
||||
|
||||
with patch(
|
||||
"agent.auxiliary_client._get_cached_client",
|
||||
return_value=(client, "gpt-5.5"),
|
||||
), patch(
|
||||
"agent.auxiliary_client._resolve_task_provider_model",
|
||||
return_value=("openai-codex", "gpt-5.5", None, None, None),
|
||||
):
|
||||
result = await async_call_llm(
|
||||
task="flush_memories",
|
||||
messages=[{"role": "user", "content": "hi"}],
|
||||
temperature=0.3,
|
||||
max_tokens=32,
|
||||
)
|
||||
|
||||
assert result is response
|
||||
assert calls[0]["temperature"] == 0.3
|
||||
assert "temperature" not in calls[1]
|
||||
assert calls[1]["max_tokens"] == 32
|
||||
|
||||
|
||||
class TestAuxiliaryTaskExtraBody:
|
||||
def test_sync_call_merges_task_extra_body_from_config(self):
|
||||
client = MagicMock()
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue