mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-26 01:01:40 +00:00
fix: retry auxiliary calls without unsupported temperature
This commit is contained in:
parent
00c3d848d8
commit
8821b9d674
2 changed files with 125 additions and 3 deletions
|
|
@ -1349,6 +1349,27 @@ def _is_auth_error(exc: Exception) -> bool:
|
||||||
return "error code: 401" in err_lower or "authenticationerror" in type(exc).__name__.lower()
|
return "error code: 401" in err_lower or "authenticationerror" in type(exc).__name__.lower()
|
||||||
|
|
||||||
|
|
||||||
|
def _is_unsupported_parameter_error(exc: Exception, param: str) -> bool:
|
||||||
|
"""Detect provider 400s for unsupported request parameters.
|
||||||
|
|
||||||
|
Different OpenAI-compatible endpoints phrase this a few ways:
|
||||||
|
``Unsupported parameter: temperature``, ``unsupported_parameter``,
|
||||||
|
``Unknown parameter: max_tokens``, etc. Match by both the parameter name
|
||||||
|
and a generic unsupported/unknown-parameter marker so we can retry with a
|
||||||
|
safer request instead of surfacing a noisy auxiliary failure.
|
||||||
|
"""
|
||||||
|
err_lower = str(exc).lower().replace("_", " ")
|
||||||
|
param_lower = (param or "").lower()
|
||||||
|
if not param_lower or param_lower not in err_lower:
|
||||||
|
return False
|
||||||
|
return any(marker in err_lower for marker in (
|
||||||
|
"unsupported parameter",
|
||||||
|
"unknown parameter",
|
||||||
|
"unrecognized parameter",
|
||||||
|
"invalid parameter",
|
||||||
|
))
|
||||||
|
|
||||||
|
|
||||||
def _evict_cached_clients(provider: str) -> None:
|
def _evict_cached_clients(provider: str) -> None:
|
||||||
"""Drop cached auxiliary clients for a provider so fresh creds are used."""
|
"""Drop cached auxiliary clients for a provider so fresh creds are used."""
|
||||||
normalized = _normalize_aux_provider(provider)
|
normalized = _normalize_aux_provider(provider)
|
||||||
|
|
@ -2952,13 +2973,26 @@ def call_llm(
|
||||||
if _is_anthropic_compat_endpoint(resolved_provider, _client_base):
|
if _is_anthropic_compat_endpoint(resolved_provider, _client_base):
|
||||||
kwargs["messages"] = _convert_openai_images_to_anthropic(kwargs["messages"])
|
kwargs["messages"] = _convert_openai_images_to_anthropic(kwargs["messages"])
|
||||||
|
|
||||||
# Handle max_tokens vs max_completion_tokens retry, then payment fallback.
|
# Handle unsupported parameter retries, then payment/auth fallback.
|
||||||
try:
|
try:
|
||||||
return _validate_llm_response(
|
return _validate_llm_response(
|
||||||
client.chat.completions.create(**kwargs), task)
|
client.chat.completions.create(**kwargs), task)
|
||||||
except Exception as first_err:
|
except Exception as first_err:
|
||||||
err_str = str(first_err)
|
err_str = str(first_err)
|
||||||
if "max_tokens" in err_str or "unsupported_parameter" in err_str:
|
if _is_unsupported_parameter_error(first_err, "temperature") and "temperature" in kwargs:
|
||||||
|
kwargs.pop("temperature", None)
|
||||||
|
try:
|
||||||
|
return _validate_llm_response(
|
||||||
|
client.chat.completions.create(**kwargs), task)
|
||||||
|
except Exception as retry_err:
|
||||||
|
# Keep processing the new error below. Providers sometimes
|
||||||
|
# reject temperature first, then reveal max_tokens on retry.
|
||||||
|
first_err = retry_err
|
||||||
|
err_str = str(first_err)
|
||||||
|
|
||||||
|
if max_tokens is not None and (
|
||||||
|
"max_tokens" in err_str or _is_unsupported_parameter_error(first_err, "max_tokens")
|
||||||
|
):
|
||||||
kwargs.pop("max_tokens", None)
|
kwargs.pop("max_tokens", None)
|
||||||
kwargs["max_completion_tokens"] = max_tokens
|
kwargs["max_completion_tokens"] = max_tokens
|
||||||
try:
|
try:
|
||||||
|
|
@ -3222,7 +3256,20 @@ async def async_call_llm(
|
||||||
await client.chat.completions.create(**kwargs), task)
|
await client.chat.completions.create(**kwargs), task)
|
||||||
except Exception as first_err:
|
except Exception as first_err:
|
||||||
err_str = str(first_err)
|
err_str = str(first_err)
|
||||||
if "max_tokens" in err_str or "unsupported_parameter" in err_str:
|
if _is_unsupported_parameter_error(first_err, "temperature") and "temperature" in kwargs:
|
||||||
|
kwargs.pop("temperature", None)
|
||||||
|
try:
|
||||||
|
return _validate_llm_response(
|
||||||
|
await client.chat.completions.create(**kwargs), task)
|
||||||
|
except Exception as retry_err:
|
||||||
|
# Keep processing the new error below. Providers sometimes
|
||||||
|
# reject temperature first, then reveal max_tokens on retry.
|
||||||
|
first_err = retry_err
|
||||||
|
err_str = str(first_err)
|
||||||
|
|
||||||
|
if max_tokens is not None and (
|
||||||
|
"max_tokens" in err_str or _is_unsupported_parameter_error(first_err, "max_tokens")
|
||||||
|
):
|
||||||
kwargs.pop("max_tokens", None)
|
kwargs.pop("max_tokens", None)
|
||||||
kwargs["max_completion_tokens"] = max_tokens
|
kwargs["max_completion_tokens"] = max_tokens
|
||||||
try:
|
try:
|
||||||
|
|
|
||||||
|
|
@ -1032,6 +1032,81 @@ class TestStaleBaseUrlWarning:
|
||||||
assert mod._stale_base_url_warned is True
|
assert mod._stale_base_url_warned is True
|
||||||
|
|
||||||
|
|
||||||
|
class TestAuxiliaryUnsupportedParameterRetry:
|
||||||
|
def test_sync_call_retries_without_temperature_when_endpoint_rejects_it(self):
|
||||||
|
client = MagicMock()
|
||||||
|
client.base_url = "https://chatgpt.com/backend-api/codex/"
|
||||||
|
response = MagicMock()
|
||||||
|
response.choices = [MagicMock(message=MagicMock(content="ok"))]
|
||||||
|
calls = []
|
||||||
|
|
||||||
|
def create(**kwargs):
|
||||||
|
calls.append(dict(kwargs))
|
||||||
|
if len(calls) == 1:
|
||||||
|
raise Exception(
|
||||||
|
"HTTP 400: {'detail': 'Unsupported parameter: temperature'}"
|
||||||
|
)
|
||||||
|
return response
|
||||||
|
|
||||||
|
client.chat.completions.create.side_effect = create
|
||||||
|
|
||||||
|
with patch(
|
||||||
|
"agent.auxiliary_client._get_cached_client",
|
||||||
|
return_value=(client, "gpt-5.5"),
|
||||||
|
), patch(
|
||||||
|
"agent.auxiliary_client._resolve_task_provider_model",
|
||||||
|
return_value=("openai-codex", "gpt-5.5", None, None, None),
|
||||||
|
):
|
||||||
|
result = call_llm(
|
||||||
|
task="flush_memories",
|
||||||
|
messages=[{"role": "user", "content": "hi"}],
|
||||||
|
temperature=0.3,
|
||||||
|
max_tokens=32,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result is response
|
||||||
|
assert calls[0]["temperature"] == 0.3
|
||||||
|
assert "temperature" not in calls[1]
|
||||||
|
assert calls[1]["max_tokens"] == 32
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_async_call_retries_without_temperature_when_endpoint_rejects_it(self):
|
||||||
|
client = MagicMock()
|
||||||
|
client.base_url = "https://chatgpt.com/backend-api/codex/"
|
||||||
|
response = MagicMock()
|
||||||
|
response.choices = [MagicMock(message=MagicMock(content="ok"))]
|
||||||
|
calls = []
|
||||||
|
|
||||||
|
async def create(**kwargs):
|
||||||
|
calls.append(dict(kwargs))
|
||||||
|
if len(calls) == 1:
|
||||||
|
raise Exception(
|
||||||
|
"HTTP 400: {'detail': 'Unsupported parameter: temperature'}"
|
||||||
|
)
|
||||||
|
return response
|
||||||
|
|
||||||
|
client.chat.completions.create = AsyncMock(side_effect=create)
|
||||||
|
|
||||||
|
with patch(
|
||||||
|
"agent.auxiliary_client._get_cached_client",
|
||||||
|
return_value=(client, "gpt-5.5"),
|
||||||
|
), patch(
|
||||||
|
"agent.auxiliary_client._resolve_task_provider_model",
|
||||||
|
return_value=("openai-codex", "gpt-5.5", None, None, None),
|
||||||
|
):
|
||||||
|
result = await async_call_llm(
|
||||||
|
task="flush_memories",
|
||||||
|
messages=[{"role": "user", "content": "hi"}],
|
||||||
|
temperature=0.3,
|
||||||
|
max_tokens=32,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result is response
|
||||||
|
assert calls[0]["temperature"] == 0.3
|
||||||
|
assert "temperature" not in calls[1]
|
||||||
|
assert calls[1]["max_tokens"] == 32
|
||||||
|
|
||||||
|
|
||||||
class TestAuxiliaryTaskExtraBody:
|
class TestAuxiliaryTaskExtraBody:
|
||||||
def test_sync_call_merges_task_extra_body_from_config(self):
|
def test_sync_call_merges_task_extra_body_from_config(self):
|
||||||
client = MagicMock()
|
client = MagicMock()
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue