mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-07-01 12:02:05 +00:00
fix(agent,cli): surface empty-body API errors and fail oneshot exit code
When an LLM API call returns HTTP 4xx with an empty parsed SDK `body` ({}),
`_summarize_api_error` fell through to a bare `str(error)`, so users saw only
"HTTP 400" with no provider detail (reported on Windows in #36109). The SDK
leaves `body` empty in this case, but the httpx `response` still carries the
payload in `.text`.
- run_agent.py `_summarize_api_error`: when `body` is empty, fall back to
`response.text` — parse a JSON `error.message`/`message` when present, else
surface the raw (truncated) body. Platform-agnostic diagnostics.
- hermes_cli/oneshot.py: `hermes -z` now runs via `run_conversation` and returns
exit code 2 when the run is failed/partial with no usable final response, so
scripts can detect LLM failures (still 0 when a response — incl. an error
summary as output — is produced).
Tests: new tests/run_agent/test_summarize_api_error.py (empty-body JSON + raw
text, RED/GREEN verified) + oneshot exit-code/`run_conversation` wiring tests.
NOTE: #36109's original root cause (Windows "all providers return empty 400")
is not reproducible on current main (heavy provider-transport churn since
v0.15.1). This change does not claim to fix that root cause — it makes any
empty-body API error LEGIBLE so a future occurrence shows the real provider
message instead of a bare HTTP 400. Relates to #36109 (does not close it).
This commit is contained in:
parent
c0b4a3438a
commit
093f567f0d
4 changed files with 125 additions and 14 deletions
|
|
@ -178,11 +178,12 @@ def run_oneshot(
|
|||
devnull = open(os.devnull, "w", encoding="utf-8")
|
||||
|
||||
response: Optional[str] = None
|
||||
result: dict = {}
|
||||
failure: BaseException | None = None
|
||||
try:
|
||||
with redirect_stdout(devnull), redirect_stderr(devnull):
|
||||
try:
|
||||
response = _run_agent(
|
||||
response, result = _run_agent(
|
||||
prompt,
|
||||
model=model,
|
||||
provider=provider,
|
||||
|
|
@ -213,16 +214,20 @@ def run_oneshot(
|
|||
real_stderr.flush()
|
||||
return 1
|
||||
|
||||
if response:
|
||||
real_stdout.write(response)
|
||||
if not response.endswith("\n"):
|
||||
real_stdout.write("\n")
|
||||
real_stdout.flush()
|
||||
|
||||
if (result.get("failed") or result.get("partial")) and not (response or "").strip():
|
||||
return 2
|
||||
|
||||
if not (response or "").strip():
|
||||
real_stderr.write("hermes -z: no final response was produced; treating the run as failed.\n")
|
||||
real_stderr.flush()
|
||||
return 1
|
||||
|
||||
assert response is not None # narrowed by the empty-response guard above
|
||||
real_stdout.write(response)
|
||||
if not response.endswith("\n"):
|
||||
real_stdout.write("\n")
|
||||
real_stdout.flush()
|
||||
return 0
|
||||
|
||||
|
||||
|
|
@ -248,9 +253,9 @@ def _run_agent(
|
|||
provider: Optional[str] = None,
|
||||
toolsets: object = None,
|
||||
use_config_toolsets: bool = True,
|
||||
) -> str:
|
||||
) -> tuple[str, dict]:
|
||||
"""Build an AIAgent exactly like a normal CLI chat turn would, then
|
||||
run a single conversation. Returns the final response string."""
|
||||
run a single conversation. Returns ``(final_response, run_result)``."""
|
||||
# Imports are local so they don't run when hermes is invoked for
|
||||
# other commands (keeps top-level CLI startup cheap).
|
||||
from hermes_cli.config import load_config
|
||||
|
|
@ -364,7 +369,8 @@ def _run_agent(
|
|||
agent.stream_delta_callback = None
|
||||
agent.tool_gen_callback = None
|
||||
|
||||
return agent.chat(prompt) or ""
|
||||
result = agent.run_conversation(prompt)
|
||||
return (result.get("final_response") or "", result)
|
||||
|
||||
|
||||
def _oneshot_clarify_callback(question: str, choices=None) -> str:
|
||||
|
|
|
|||
23
run_agent.py
23
run_agent.py
|
|
@ -1953,6 +1953,29 @@ class AIAgent:
|
|||
msg = AIAgent._coerce_api_error_detail(msg)
|
||||
return AIAgent._decorate_xai_entitlement_error(f"{prefix}{msg[:300]}")
|
||||
|
||||
# SDK may leave body empty while httpx still has the payload (#36109).
|
||||
# Redact before returning: the raw provider/proxy error body is
|
||||
# attacker-influenced and may echo Authorization / x-api-key / request
|
||||
# JSON, which would otherwise leak into final_response + logs (this path
|
||||
# widens exposure vs the old empty-body "HTTP 400" string).
|
||||
response = getattr(error, "response", None)
|
||||
if response is not None:
|
||||
snippet = (getattr(response, "text", None) or "").strip()
|
||||
if snippet:
|
||||
status_code = getattr(error, "status_code", None)
|
||||
prefix = f"HTTP {status_code}: " if status_code else ""
|
||||
try:
|
||||
payload = json.loads(snippet)
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
payload = None
|
||||
if isinstance(payload, dict):
|
||||
err = payload.get("error")
|
||||
if isinstance(err, dict) and err.get("message"):
|
||||
return redact_sensitive_text(f"{prefix}{str(err['message'])[:300]}")
|
||||
if payload.get("message"):
|
||||
return redact_sensitive_text(f"{prefix}{str(payload['message'])[:300]}")
|
||||
return redact_sensitive_text(f"{prefix}{snippet[:300]}")
|
||||
|
||||
# Fallback: truncate the raw string but give more room than 200 chars
|
||||
status_code = getattr(error, "status_code", None)
|
||||
prefix = f"HTTP {status_code}: " if status_code else ""
|
||||
|
|
|
|||
|
|
@ -642,7 +642,7 @@ def test_oneshot_fails_closed_on_empty_final_response(monkeypatch, capsys):
|
|||
_stub_plugin_discovery(monkeypatch)
|
||||
import hermes_cli.oneshot as oneshot_mod
|
||||
|
||||
monkeypatch.setattr(oneshot_mod, "_run_agent", lambda *_args, **_kwargs: "")
|
||||
monkeypatch.setattr(oneshot_mod, "_run_agent", lambda *_args, **_kwargs: ("", {}))
|
||||
|
||||
assert oneshot_mod.run_oneshot("hello") == 1
|
||||
captured = capsys.readouterr()
|
||||
|
|
@ -654,7 +654,7 @@ def test_oneshot_prints_nonempty_final_response(monkeypatch, capsys):
|
|||
_stub_plugin_discovery(monkeypatch)
|
||||
import hermes_cli.oneshot as oneshot_mod
|
||||
|
||||
monkeypatch.setattr(oneshot_mod, "_run_agent", lambda *_args, **_kwargs: "done")
|
||||
monkeypatch.setattr(oneshot_mod, "_run_agent", lambda *_args, **_kwargs: ("done", {}))
|
||||
|
||||
assert oneshot_mod.run_oneshot("hello") == 0
|
||||
captured = capsys.readouterr()
|
||||
|
|
@ -678,6 +678,30 @@ def test_oneshot_fails_closed_on_agent_exception(monkeypatch, capsys):
|
|||
assert "not a TTY" in captured.err
|
||||
|
||||
|
||||
def test_oneshot_exit_code_when_failed_without_response(monkeypatch):
|
||||
from hermes_cli.oneshot import run_oneshot
|
||||
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.oneshot._run_agent",
|
||||
lambda *_a, **_k: ("", {"failed": True, "partial": False}),
|
||||
)
|
||||
assert run_oneshot("hi") == 2
|
||||
|
||||
|
||||
def test_oneshot_exit_code_zero_when_failed_with_error_text(monkeypatch, capsys):
|
||||
from hermes_cli.oneshot import run_oneshot
|
||||
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.oneshot._run_agent",
|
||||
lambda *_a, **_k: (
|
||||
"API call failed after 3 retries: HTTP 404: model not found",
|
||||
{"failed": True, "partial": False},
|
||||
),
|
||||
)
|
||||
assert run_oneshot("hi") == 0
|
||||
assert "HTTP 404" in capsys.readouterr().out
|
||||
|
||||
|
||||
def test_oneshot_reraises_keyboard_interrupt(monkeypatch):
|
||||
_stub_plugin_discovery(monkeypatch)
|
||||
import hermes_cli.oneshot as oneshot_mod
|
||||
|
|
@ -806,9 +830,9 @@ def test_oneshot_wires_session_db_for_recall(monkeypatch):
|
|||
self.stream_delta_callback = object()
|
||||
self.tool_gen_callback = object()
|
||||
|
||||
def chat(self, prompt):
|
||||
def run_conversation(self, prompt, **_kwargs):
|
||||
captured["prompt"] = prompt
|
||||
return "ok"
|
||||
return {"final_response": "ok", "failed": False, "partial": False}
|
||||
|
||||
class FakeSessionDB:
|
||||
def __new__(cls):
|
||||
|
|
@ -852,7 +876,9 @@ def test_oneshot_wires_session_db_for_recall(monkeypatch):
|
|||
mod("hermes_cli.tools_config", _get_platform_tools=lambda *_args, **_kwargs: {"session_search"}),
|
||||
)
|
||||
|
||||
assert _run_agent("recall this") == "ok"
|
||||
text, result = _run_agent("recall this")
|
||||
assert text == "ok"
|
||||
assert not result.get("failed")
|
||||
assert captured["session_db"] is sentinel_db
|
||||
assert captured["enabled_toolsets"] == ["session_search"]
|
||||
assert captured["prompt"] == "recall this"
|
||||
|
|
|
|||
56
tests/run_agent/test_summarize_api_error.py
Normal file
56
tests/run_agent/test_summarize_api_error.py
Normal file
|
|
@ -0,0 +1,56 @@
|
|||
"""Regression: empty-body HTTP 4xx errors must still surface a real provider message.
|
||||
|
||||
Reported on Windows (#36109): an LLM API call returned HTTP 400 with an *empty*
|
||||
parsed SDK ``body`` ({}), so ``_summarize_api_error`` fell through to the bare
|
||||
``str(error)`` path and the user saw only "HTTP 400" with no provider detail.
|
||||
The SDK leaves ``body`` empty in this case, but the underlying httpx
|
||||
``response`` still carries the real payload in ``.text``. These tests lock the
|
||||
contract: when ``body`` is empty, fall back to ``response.text`` (parsing a JSON
|
||||
``error.message`` / ``message`` when present) so logs and CLI show the real
|
||||
provider error. This is a diagnostic improvement and is platform-agnostic.
|
||||
"""
|
||||
|
||||
from types import SimpleNamespace
|
||||
|
||||
from run_agent import AIAgent
|
||||
|
||||
|
||||
def _make_empty_body_error(response_text: str, status_code: int = 400) -> Exception:
|
||||
"""Mimic an OpenAI-SDK error whose parsed body is empty but whose httpx
|
||||
response still holds the payload text."""
|
||||
err = Exception("") # str(error) is empty/uninformative on this path
|
||||
err.status_code = status_code
|
||||
err.body = {} # empty dict — the #36109 trigger
|
||||
err.response = SimpleNamespace(text=response_text)
|
||||
return err
|
||||
|
||||
|
||||
def test_empty_body_falls_back_to_response_json_error_message():
|
||||
"""A JSON payload with error.message is surfaced (not a bare HTTP 400)."""
|
||||
err = _make_empty_body_error(
|
||||
'{"error": {"message": "model `foo` does not exist", "type": "invalid_request_error"}}'
|
||||
)
|
||||
summary = AIAgent._summarize_api_error(err)
|
||||
assert "HTTP 400" in summary
|
||||
assert "model `foo` does not exist" in summary
|
||||
|
||||
|
||||
def test_empty_body_falls_back_to_raw_response_text_when_not_json():
|
||||
"""A non-JSON response body is surfaced verbatim (truncated), not dropped."""
|
||||
err = _make_empty_body_error("upstream connect error or disconnect/reset before headers")
|
||||
summary = AIAgent._summarize_api_error(err)
|
||||
assert "HTTP 400" in summary
|
||||
assert "upstream connect error" in summary
|
||||
|
||||
|
||||
def test_empty_body_fallback_redacts_secrets(monkeypatch):
|
||||
"""The surfaced provider/proxy error body must pass through the secret
|
||||
redactor — a proxy echoing an API key in the error must not leak it into
|
||||
final_response/logs (the empty-body path previously hid it as bare HTTP 400)."""
|
||||
monkeypatch.setenv("HERMES_REDACT_SECRETS", "true")
|
||||
err = _make_empty_body_error(
|
||||
'{"error": {"message": "bad key: sk-proj-ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdef"}}'
|
||||
)
|
||||
summary = AIAgent._summarize_api_error(err)
|
||||
assert "sk-proj-ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdef" not in summary
|
||||
|
||||
Loading…
Add table
Add a link
Reference in a new issue