mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
fix(auxiliary): validate response shape in call_llm/async_call_llm (#7264)
async_call_llm (and call_llm) can return non-OpenAI objects from
custom providers or adapter shims, crashing downstream consumers
with misleading AttributeError ('str' has no attribute 'choices').
Add _validate_llm_response() that checks the response has the
expected .choices[0].message shape before returning. Wraps all
return paths in call_llm, async_call_llm, and fallback paths.
Fails fast with a clear RuntimeError identifying the task, response
type, and a preview of the malformed payload.
Closes #7264
This commit is contained in:
parent
4f5e8b22a7
commit
d3c5d65563
1 changed files with 43 additions and 6 deletions
|
|
@ -2078,6 +2078,37 @@ def _build_call_kwargs(
|
|||
return kwargs
|
||||
|
||||
|
||||
def _validate_llm_response(response: Any, task: str = None) -> Any:
|
||||
"""Validate that an LLM response has the expected .choices[0].message shape.
|
||||
|
||||
Fails fast with a clear error instead of letting malformed payloads
|
||||
propagate to downstream consumers where they crash with misleading
|
||||
AttributeError (e.g. "'str' object has no attribute 'choices'").
|
||||
|
||||
See #7264.
|
||||
"""
|
||||
if response is None:
|
||||
raise RuntimeError(
|
||||
f"Auxiliary {task or 'call'}: LLM returned None response"
|
||||
)
|
||||
# Allow SimpleNamespace responses from adapters (CodexAuxiliaryClient,
|
||||
# AnthropicAuxiliaryClient) — they have .choices[0].message.
|
||||
try:
|
||||
choices = response.choices
|
||||
if not choices or not hasattr(choices[0], "message"):
|
||||
raise AttributeError("missing choices[0].message")
|
||||
except (AttributeError, TypeError, IndexError) as exc:
|
||||
response_type = type(response).__name__
|
||||
response_preview = str(response)[:120]
|
||||
raise RuntimeError(
|
||||
f"Auxiliary {task or 'call'}: LLM returned invalid response "
|
||||
f"(type={response_type}): {response_preview!r}. "
|
||||
f"Expected object with .choices[0].message — check provider "
|
||||
f"adapter or custom endpoint compatibility."
|
||||
) from exc
|
||||
return response
|
||||
|
||||
|
||||
def call_llm(
|
||||
task: str = None,
|
||||
*,
|
||||
|
|
@ -2193,14 +2224,16 @@ def call_llm(
|
|||
|
||||
# Handle max_tokens vs max_completion_tokens retry, then payment fallback.
|
||||
try:
|
||||
return client.chat.completions.create(**kwargs)
|
||||
return _validate_llm_response(
|
||||
client.chat.completions.create(**kwargs), task)
|
||||
except Exception as first_err:
|
||||
err_str = str(first_err)
|
||||
if "max_tokens" in err_str or "unsupported_parameter" in err_str:
|
||||
kwargs.pop("max_tokens", None)
|
||||
kwargs["max_completion_tokens"] = max_tokens
|
||||
try:
|
||||
return client.chat.completions.create(**kwargs)
|
||||
return _validate_llm_response(
|
||||
client.chat.completions.create(**kwargs), task)
|
||||
except Exception as retry_err:
|
||||
# If the max_tokens retry also hits a payment or connection
|
||||
# error, fall through to the fallback chain below.
|
||||
|
|
@ -2237,7 +2270,8 @@ def call_llm(
|
|||
temperature=temperature, max_tokens=max_tokens,
|
||||
tools=tools, timeout=effective_timeout,
|
||||
extra_body=extra_body)
|
||||
return fb_client.chat.completions.create(**fb_kwargs)
|
||||
return _validate_llm_response(
|
||||
fb_client.chat.completions.create(**fb_kwargs), task)
|
||||
raise
|
||||
|
||||
|
||||
|
|
@ -2377,14 +2411,16 @@ async def async_call_llm(
|
|||
base_url=resolved_base_url)
|
||||
|
||||
try:
|
||||
return await client.chat.completions.create(**kwargs)
|
||||
return _validate_llm_response(
|
||||
await client.chat.completions.create(**kwargs), task)
|
||||
except Exception as first_err:
|
||||
err_str = str(first_err)
|
||||
if "max_tokens" in err_str or "unsupported_parameter" in err_str:
|
||||
kwargs.pop("max_tokens", None)
|
||||
kwargs["max_completion_tokens"] = max_tokens
|
||||
try:
|
||||
return await client.chat.completions.create(**kwargs)
|
||||
return _validate_llm_response(
|
||||
await client.chat.completions.create(**kwargs), task)
|
||||
except Exception as retry_err:
|
||||
# If the max_tokens retry also hits a payment or connection
|
||||
# error, fall through to the fallback chain below.
|
||||
|
|
@ -2411,5 +2447,6 @@ async def async_call_llm(
|
|||
async_fb, async_fb_model = _to_async_client(fb_client, fb_model or "")
|
||||
if async_fb_model and async_fb_model != fb_kwargs.get("model"):
|
||||
fb_kwargs["model"] = async_fb_model
|
||||
return await async_fb.chat.completions.create(**fb_kwargs)
|
||||
return _validate_llm_response(
|
||||
await async_fb.chat.completions.create(**fb_kwargs), task)
|
||||
raise
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue