fix(agent,cli): surface empty-body API errors and fail oneshot exit code

When an LLM API call returns HTTP 4xx with an empty parsed SDK `body` ({}), `_summarize_api_error` fell through to a bare `str(error)`, so users saw only "HTTP 400" with no provider detail (reported on Windows in #36109). The SDK leaves `body` empty in this case, but the httpx `response` still carries the payload in `.text`. - run_agent.py `_summarize_api_error`: when `body` is empty, fall back to `response.text` — parse a JSON `error.message`/`message` when present, else surface the raw (truncated) body. Platform-agnostic diagnostics. - hermes_cli/oneshot.py: `hermes -z` now runs via `run_conversation` and returns exit code 2 when the run is failed/partial with no usable final response, so scripts can detect LLM failures (still 0 when a response — incl. an error summary as output — is produced). Tests: new tests/run_agent/test_summarize_api_error.py (empty-body JSON + raw text, RED/GREEN verified) + oneshot exit-code/`run_conversation` wiring tests. NOTE: #36109's original root cause (Windows "all providers return empty 400") is not reproducible on current main (heavy provider-transport churn since v0.15.1). This change does not claim to fix that root cause — it makes any empty-body API error LEGIBLE so a future occurrence shows the real provider message instead of a bare HTTP 400. Relates to #36109 (does not close it).
2026-07-01 12:02:05 +00:00 · 2026-06-28 12:16:52 +05:30 · 2026-06-28 12:16:52 +05:30 · 093f567f0d
commit 093f567f0d
parent c0b4a3438a
4 changed files with 125 additions and 14 deletions
--- a/hermes_cli/oneshot.py
+++ b/hermes_cli/oneshot.py
@ -178,11 +178,12 @@ def run_oneshot(
    devnull = open(os.devnull, "w", encoding="utf-8")

    response: Optional[str] = None
+    result: dict = {}
    failure: BaseException | None = None
    try:
        with redirect_stdout(devnull), redirect_stderr(devnull):
            try:
-                response = _run_agent(
+                response, result = _run_agent(
                    prompt,
                    model=model,
                    provider=provider,
@ -213,16 +214,20 @@ def run_oneshot(
        real_stderr.flush()
        return 1

+    if response:
+        real_stdout.write(response)
+        if not response.endswith("\n"):
+            real_stdout.write("\n")
+        real_stdout.flush()
+
+    if (result.get("failed") or result.get("partial")) and not (response or "").strip():
+        return 2
+
    if not (response or "").strip():
        real_stderr.write("hermes -z: no final response was produced; treating the run as failed.\n")
        real_stderr.flush()
        return 1

-    assert response is not None  # narrowed by the empty-response guard above
-    real_stdout.write(response)
-    if not response.endswith("\n"):
-        real_stdout.write("\n")
-    real_stdout.flush()
    return 0


@ -248,9 +253,9 @@ def _run_agent(
    provider: Optional[str] = None,
    toolsets: object = None,
    use_config_toolsets: bool = True,
-) -> str:
+) -> tuple[str, dict]:
    """Build an AIAgent exactly like a normal CLI chat turn would, then
-    run a single conversation.  Returns the final response string."""
+    run a single conversation.  Returns ``(final_response, run_result)``."""
    # Imports are local so they don't run when hermes is invoked for
    # other commands (keeps top-level CLI startup cheap).
    from hermes_cli.config import load_config
@ -364,7 +369,8 @@ def _run_agent(
    agent.stream_delta_callback = None
    agent.tool_gen_callback = None

-    return agent.chat(prompt) or ""
+    result = agent.run_conversation(prompt)
+    return (result.get("final_response") or "", result)


 def _oneshot_clarify_callback(question: str, choices=None) -> str:
--- a/run_agent.py
+++ b/run_agent.py
@ -1953,6 +1953,29 @@ class AIAgent:
                msg = AIAgent._coerce_api_error_detail(msg)
                return AIAgent._decorate_xai_entitlement_error(f"{prefix}{msg[:300]}")

+        # SDK may leave body empty while httpx still has the payload (#36109).
+        # Redact before returning: the raw provider/proxy error body is
+        # attacker-influenced and may echo Authorization / x-api-key / request
+        # JSON, which would otherwise leak into final_response + logs (this path
+        # widens exposure vs the old empty-body "HTTP 400" string).
+        response = getattr(error, "response", None)
+        if response is not None:
+            snippet = (getattr(response, "text", None) or "").strip()
+            if snippet:
+                status_code = getattr(error, "status_code", None)
+                prefix = f"HTTP {status_code}: " if status_code else ""
+                try:
+                    payload = json.loads(snippet)
+                except (json.JSONDecodeError, TypeError):
+                    payload = None
+                if isinstance(payload, dict):
+                    err = payload.get("error")
+                    if isinstance(err, dict) and err.get("message"):
+                        return redact_sensitive_text(f"{prefix}{str(err['message'])[:300]}")
+                    if payload.get("message"):
+                        return redact_sensitive_text(f"{prefix}{str(payload['message'])[:300]}")
+                return redact_sensitive_text(f"{prefix}{snippet[:300]}")
+
        # Fallback: truncate the raw string but give more room than 200 chars
        status_code = getattr(error, "status_code", None)
        prefix = f"HTTP {status_code}: " if status_code else ""
--- a/tests/hermes_cli/test_tui_resume_flow.py
+++ b/tests/hermes_cli/test_tui_resume_flow.py
@ -642,7 +642,7 @@ def test_oneshot_fails_closed_on_empty_final_response(monkeypatch, capsys):
    _stub_plugin_discovery(monkeypatch)
    import hermes_cli.oneshot as oneshot_mod

-    monkeypatch.setattr(oneshot_mod, "_run_agent", lambda *_args, **_kwargs: "")
+    monkeypatch.setattr(oneshot_mod, "_run_agent", lambda *_args, **_kwargs: ("", {}))

    assert oneshot_mod.run_oneshot("hello") == 1
    captured = capsys.readouterr()
@ -654,7 +654,7 @@ def test_oneshot_prints_nonempty_final_response(monkeypatch, capsys):
    _stub_plugin_discovery(monkeypatch)
    import hermes_cli.oneshot as oneshot_mod

-    monkeypatch.setattr(oneshot_mod, "_run_agent", lambda *_args, **_kwargs: "done")
+    monkeypatch.setattr(oneshot_mod, "_run_agent", lambda *_args, **_kwargs: ("done", {}))

    assert oneshot_mod.run_oneshot("hello") == 0
    captured = capsys.readouterr()
@ -678,6 +678,30 @@ def test_oneshot_fails_closed_on_agent_exception(monkeypatch, capsys):
    assert "not a TTY" in captured.err


+def test_oneshot_exit_code_when_failed_without_response(monkeypatch):
+    from hermes_cli.oneshot import run_oneshot
+
+    monkeypatch.setattr(
+        "hermes_cli.oneshot._run_agent",
+        lambda *_a, **_k: ("", {"failed": True, "partial": False}),
+    )
+    assert run_oneshot("hi") == 2
+
+
+def test_oneshot_exit_code_zero_when_failed_with_error_text(monkeypatch, capsys):
+    from hermes_cli.oneshot import run_oneshot
+
+    monkeypatch.setattr(
+        "hermes_cli.oneshot._run_agent",
+        lambda *_a, **_k: (
+            "API call failed after 3 retries: HTTP 404: model not found",
+            {"failed": True, "partial": False},
+        ),
+    )
+    assert run_oneshot("hi") == 0
+    assert "HTTP 404" in capsys.readouterr().out
+
+
 def test_oneshot_reraises_keyboard_interrupt(monkeypatch):
    _stub_plugin_discovery(monkeypatch)
    import hermes_cli.oneshot as oneshot_mod
@ -806,9 +830,9 @@ def test_oneshot_wires_session_db_for_recall(monkeypatch):
            self.stream_delta_callback = object()
            self.tool_gen_callback = object()

-        def chat(self, prompt):
+        def run_conversation(self, prompt, **_kwargs):
            captured["prompt"] = prompt
-            return "ok"
+            return {"final_response": "ok", "failed": False, "partial": False}

    class FakeSessionDB:
        def __new__(cls):
@ -852,7 +876,9 @@ def test_oneshot_wires_session_db_for_recall(monkeypatch):
        mod("hermes_cli.tools_config", _get_platform_tools=lambda *_args, **_kwargs: {"session_search"}),
    )

-    assert _run_agent("recall this") == "ok"
+    text, result = _run_agent("recall this")
+    assert text == "ok"
+    assert not result.get("failed")
    assert captured["session_db"] is sentinel_db
    assert captured["enabled_toolsets"] == ["session_search"]
    assert captured["prompt"] == "recall this"
--- a/tests/run_agent/test_summarize_api_error.py
+++ b/tests/run_agent/test_summarize_api_error.py
@ -0,0 +1,56 @@
+"""Regression: empty-body HTTP 4xx errors must still surface a real provider message.
+
+Reported on Windows (#36109): an LLM API call returned HTTP 400 with an *empty*
+parsed SDK ``body`` ({}), so ``_summarize_api_error`` fell through to the bare
+``str(error)`` path and the user saw only "HTTP 400" with no provider detail.
+The SDK leaves ``body`` empty in this case, but the underlying httpx
+``response`` still carries the real payload in ``.text``. These tests lock the
+contract: when ``body`` is empty, fall back to ``response.text`` (parsing a JSON
+``error.message`` / ``message`` when present) so logs and CLI show the real
+provider error. This is a diagnostic improvement and is platform-agnostic.
+"""
+
+from types import SimpleNamespace
+
+from run_agent import AIAgent
+
+
+def _make_empty_body_error(response_text: str, status_code: int = 400) -> Exception:
+    """Mimic an OpenAI-SDK error whose parsed body is empty but whose httpx
+    response still holds the payload text."""
+    err = Exception("")  # str(error) is empty/uninformative on this path
+    err.status_code = status_code
+    err.body = {}  # empty dict — the #36109 trigger
+    err.response = SimpleNamespace(text=response_text)
+    return err
+
+
+def test_empty_body_falls_back_to_response_json_error_message():
+    """A JSON payload with error.message is surfaced (not a bare HTTP 400)."""
+    err = _make_empty_body_error(
+        '{"error": {"message": "model `foo` does not exist", "type": "invalid_request_error"}}'
+    )
+    summary = AIAgent._summarize_api_error(err)
+    assert "HTTP 400" in summary
+    assert "model `foo` does not exist" in summary
+
+
+def test_empty_body_falls_back_to_raw_response_text_when_not_json():
+    """A non-JSON response body is surfaced verbatim (truncated), not dropped."""
+    err = _make_empty_body_error("upstream connect error or disconnect/reset before headers")
+    summary = AIAgent._summarize_api_error(err)
+    assert "HTTP 400" in summary
+    assert "upstream connect error" in summary
+
+
+def test_empty_body_fallback_redacts_secrets(monkeypatch):
+    """The surfaced provider/proxy error body must pass through the secret
+    redactor — a proxy echoing an API key in the error must not leak it into
+    final_response/logs (the empty-body path previously hid it as bare HTTP 400)."""
+    monkeypatch.setenv("HERMES_REDACT_SECRETS", "true")
+    err = _make_empty_body_error(
+        '{"error": {"message": "bad key: sk-proj-ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdef"}}'
+    )
+    summary = AIAgent._summarize_api_error(err)
+    assert "sk-proj-ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdef" not in summary
+