fix(agent,cli): surface empty-body API errors and fail oneshot exit code

When an LLM API call returns HTTP 4xx with an empty parsed SDK `body` ({}),
`_summarize_api_error` fell through to a bare `str(error)`, so users saw only
"HTTP 400" with no provider detail (reported on Windows in #36109). The SDK
leaves `body` empty in this case, but the httpx `response` still carries the
payload in `.text`.

- run_agent.py `_summarize_api_error`: when `body` is empty, fall back to
  `response.text` — parse a JSON `error.message`/`message` when present, else
  surface the raw (truncated) body. Platform-agnostic diagnostics.
- hermes_cli/oneshot.py: `hermes -z` now runs via `run_conversation` and returns
  exit code 2 when the run is failed/partial with no usable final response, so
  scripts can detect LLM failures (still 0 when a response — incl. an error
  summary as output — is produced).

Tests: new tests/run_agent/test_summarize_api_error.py (empty-body JSON + raw
text, RED/GREEN verified) + oneshot exit-code/`run_conversation` wiring tests.

NOTE: #36109's original root cause (Windows "all providers return empty 400")
is not reproducible on current main (heavy provider-transport churn since
v0.15.1). This change does not claim to fix that root cause — it makes any
empty-body API error LEGIBLE so a future occurrence shows the real provider
message instead of a bare HTTP 400. Relates to #36109 (does not close it).
This commit is contained in:
xxxigm 2026-06-28 12:16:52 +05:30 committed by Teknium
parent c0b4a3438a
commit 093f567f0d
4 changed files with 125 additions and 14 deletions

View file

@ -178,11 +178,12 @@ def run_oneshot(
devnull = open(os.devnull, "w", encoding="utf-8")
response: Optional[str] = None
result: dict = {}
failure: BaseException | None = None
try:
with redirect_stdout(devnull), redirect_stderr(devnull):
try:
response = _run_agent(
response, result = _run_agent(
prompt,
model=model,
provider=provider,
@ -213,16 +214,20 @@ def run_oneshot(
real_stderr.flush()
return 1
if response:
real_stdout.write(response)
if not response.endswith("\n"):
real_stdout.write("\n")
real_stdout.flush()
if (result.get("failed") or result.get("partial")) and not (response or "").strip():
return 2
if not (response or "").strip():
real_stderr.write("hermes -z: no final response was produced; treating the run as failed.\n")
real_stderr.flush()
return 1
assert response is not None # narrowed by the empty-response guard above
real_stdout.write(response)
if not response.endswith("\n"):
real_stdout.write("\n")
real_stdout.flush()
return 0
@ -248,9 +253,9 @@ def _run_agent(
provider: Optional[str] = None,
toolsets: object = None,
use_config_toolsets: bool = True,
) -> str:
) -> tuple[str, dict]:
"""Build an AIAgent exactly like a normal CLI chat turn would, then
run a single conversation. Returns the final response string."""
run a single conversation. Returns ``(final_response, run_result)``."""
# Imports are local so they don't run when hermes is invoked for
# other commands (keeps top-level CLI startup cheap).
from hermes_cli.config import load_config
@ -364,7 +369,8 @@ def _run_agent(
agent.stream_delta_callback = None
agent.tool_gen_callback = None
return agent.chat(prompt) or ""
result = agent.run_conversation(prompt)
return (result.get("final_response") or "", result)
def _oneshot_clarify_callback(question: str, choices=None) -> str:

View file

@ -1953,6 +1953,29 @@ class AIAgent:
msg = AIAgent._coerce_api_error_detail(msg)
return AIAgent._decorate_xai_entitlement_error(f"{prefix}{msg[:300]}")
# SDK may leave body empty while httpx still has the payload (#36109).
# Redact before returning: the raw provider/proxy error body is
# attacker-influenced and may echo Authorization / x-api-key / request
# JSON, which would otherwise leak into final_response + logs (this path
# widens exposure vs the old empty-body "HTTP 400" string).
response = getattr(error, "response", None)
if response is not None:
snippet = (getattr(response, "text", None) or "").strip()
if snippet:
status_code = getattr(error, "status_code", None)
prefix = f"HTTP {status_code}: " if status_code else ""
try:
payload = json.loads(snippet)
except (json.JSONDecodeError, TypeError):
payload = None
if isinstance(payload, dict):
err = payload.get("error")
if isinstance(err, dict) and err.get("message"):
return redact_sensitive_text(f"{prefix}{str(err['message'])[:300]}")
if payload.get("message"):
return redact_sensitive_text(f"{prefix}{str(payload['message'])[:300]}")
return redact_sensitive_text(f"{prefix}{snippet[:300]}")
# Fallback: truncate the raw string but give more room than 200 chars
status_code = getattr(error, "status_code", None)
prefix = f"HTTP {status_code}: " if status_code else ""

View file

@ -642,7 +642,7 @@ def test_oneshot_fails_closed_on_empty_final_response(monkeypatch, capsys):
_stub_plugin_discovery(monkeypatch)
import hermes_cli.oneshot as oneshot_mod
monkeypatch.setattr(oneshot_mod, "_run_agent", lambda *_args, **_kwargs: "")
monkeypatch.setattr(oneshot_mod, "_run_agent", lambda *_args, **_kwargs: ("", {}))
assert oneshot_mod.run_oneshot("hello") == 1
captured = capsys.readouterr()
@ -654,7 +654,7 @@ def test_oneshot_prints_nonempty_final_response(monkeypatch, capsys):
_stub_plugin_discovery(monkeypatch)
import hermes_cli.oneshot as oneshot_mod
monkeypatch.setattr(oneshot_mod, "_run_agent", lambda *_args, **_kwargs: "done")
monkeypatch.setattr(oneshot_mod, "_run_agent", lambda *_args, **_kwargs: ("done", {}))
assert oneshot_mod.run_oneshot("hello") == 0
captured = capsys.readouterr()
@ -678,6 +678,30 @@ def test_oneshot_fails_closed_on_agent_exception(monkeypatch, capsys):
assert "not a TTY" in captured.err
def test_oneshot_exit_code_when_failed_without_response(monkeypatch):
from hermes_cli.oneshot import run_oneshot
monkeypatch.setattr(
"hermes_cli.oneshot._run_agent",
lambda *_a, **_k: ("", {"failed": True, "partial": False}),
)
assert run_oneshot("hi") == 2
def test_oneshot_exit_code_zero_when_failed_with_error_text(monkeypatch, capsys):
from hermes_cli.oneshot import run_oneshot
monkeypatch.setattr(
"hermes_cli.oneshot._run_agent",
lambda *_a, **_k: (
"API call failed after 3 retries: HTTP 404: model not found",
{"failed": True, "partial": False},
),
)
assert run_oneshot("hi") == 0
assert "HTTP 404" in capsys.readouterr().out
def test_oneshot_reraises_keyboard_interrupt(monkeypatch):
_stub_plugin_discovery(monkeypatch)
import hermes_cli.oneshot as oneshot_mod
@ -806,9 +830,9 @@ def test_oneshot_wires_session_db_for_recall(monkeypatch):
self.stream_delta_callback = object()
self.tool_gen_callback = object()
def chat(self, prompt):
def run_conversation(self, prompt, **_kwargs):
captured["prompt"] = prompt
return "ok"
return {"final_response": "ok", "failed": False, "partial": False}
class FakeSessionDB:
def __new__(cls):
@ -852,7 +876,9 @@ def test_oneshot_wires_session_db_for_recall(monkeypatch):
mod("hermes_cli.tools_config", _get_platform_tools=lambda *_args, **_kwargs: {"session_search"}),
)
assert _run_agent("recall this") == "ok"
text, result = _run_agent("recall this")
assert text == "ok"
assert not result.get("failed")
assert captured["session_db"] is sentinel_db
assert captured["enabled_toolsets"] == ["session_search"]
assert captured["prompt"] == "recall this"

View file

@ -0,0 +1,56 @@
"""Regression: empty-body HTTP 4xx errors must still surface a real provider message.
Reported on Windows (#36109): an LLM API call returned HTTP 400 with an *empty*
parsed SDK ``body`` ({}), so ``_summarize_api_error`` fell through to the bare
``str(error)`` path and the user saw only "HTTP 400" with no provider detail.
The SDK leaves ``body`` empty in this case, but the underlying httpx
``response`` still carries the real payload in ``.text``. These tests lock the
contract: when ``body`` is empty, fall back to ``response.text`` (parsing a JSON
``error.message`` / ``message`` when present) so logs and CLI show the real
provider error. This is a diagnostic improvement and is platform-agnostic.
"""
from types import SimpleNamespace
from run_agent import AIAgent
def _make_empty_body_error(response_text: str, status_code: int = 400) -> Exception:
"""Mimic an OpenAI-SDK error whose parsed body is empty but whose httpx
response still holds the payload text."""
err = Exception("") # str(error) is empty/uninformative on this path
err.status_code = status_code
err.body = {} # empty dict — the #36109 trigger
err.response = SimpleNamespace(text=response_text)
return err
def test_empty_body_falls_back_to_response_json_error_message():
"""A JSON payload with error.message is surfaced (not a bare HTTP 400)."""
err = _make_empty_body_error(
'{"error": {"message": "model `foo` does not exist", "type": "invalid_request_error"}}'
)
summary = AIAgent._summarize_api_error(err)
assert "HTTP 400" in summary
assert "model `foo` does not exist" in summary
def test_empty_body_falls_back_to_raw_response_text_when_not_json():
"""A non-JSON response body is surfaced verbatim (truncated), not dropped."""
err = _make_empty_body_error("upstream connect error or disconnect/reset before headers")
summary = AIAgent._summarize_api_error(err)
assert "HTTP 400" in summary
assert "upstream connect error" in summary
def test_empty_body_fallback_redacts_secrets(monkeypatch):
"""The surfaced provider/proxy error body must pass through the secret
redactor a proxy echoing an API key in the error must not leak it into
final_response/logs (the empty-body path previously hid it as bare HTTP 400)."""
monkeypatch.setenv("HERMES_REDACT_SECRETS", "true")
err = _make_empty_body_error(
'{"error": {"message": "bad key: sk-proj-ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdef"}}'
)
summary = AIAgent._summarize_api_error(err)
assert "sk-proj-ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdef" not in summary