diff --git a/hermes_cli/oneshot.py b/hermes_cli/oneshot.py index f66d71c62e6..38d8e5e84bc 100644 --- a/hermes_cli/oneshot.py +++ b/hermes_cli/oneshot.py @@ -178,11 +178,12 @@ def run_oneshot( devnull = open(os.devnull, "w", encoding="utf-8") response: Optional[str] = None + result: dict = {} failure: BaseException | None = None try: with redirect_stdout(devnull), redirect_stderr(devnull): try: - response = _run_agent( + response, result = _run_agent( prompt, model=model, provider=provider, @@ -213,16 +214,20 @@ def run_oneshot( real_stderr.flush() return 1 + if response: + real_stdout.write(response) + if not response.endswith("\n"): + real_stdout.write("\n") + real_stdout.flush() + + if (result.get("failed") or result.get("partial")) and not (response or "").strip(): + return 2 + if not (response or "").strip(): real_stderr.write("hermes -z: no final response was produced; treating the run as failed.\n") real_stderr.flush() return 1 - assert response is not None # narrowed by the empty-response guard above - real_stdout.write(response) - if not response.endswith("\n"): - real_stdout.write("\n") - real_stdout.flush() return 0 @@ -248,9 +253,9 @@ def _run_agent( provider: Optional[str] = None, toolsets: object = None, use_config_toolsets: bool = True, -) -> str: +) -> tuple[str, dict]: """Build an AIAgent exactly like a normal CLI chat turn would, then - run a single conversation. Returns the final response string.""" + run a single conversation. Returns ``(final_response, run_result)``.""" # Imports are local so they don't run when hermes is invoked for # other commands (keeps top-level CLI startup cheap). from hermes_cli.config import load_config @@ -364,7 +369,8 @@ def _run_agent( agent.stream_delta_callback = None agent.tool_gen_callback = None - return agent.chat(prompt) or "" + result = agent.run_conversation(prompt) + return (result.get("final_response") or "", result) def _oneshot_clarify_callback(question: str, choices=None) -> str: diff --git a/run_agent.py b/run_agent.py index 981fbd32398..6a5fe32d29b 100644 --- a/run_agent.py +++ b/run_agent.py @@ -1953,6 +1953,29 @@ class AIAgent: msg = AIAgent._coerce_api_error_detail(msg) return AIAgent._decorate_xai_entitlement_error(f"{prefix}{msg[:300]}") + # SDK may leave body empty while httpx still has the payload (#36109). + # Redact before returning: the raw provider/proxy error body is + # attacker-influenced and may echo Authorization / x-api-key / request + # JSON, which would otherwise leak into final_response + logs (this path + # widens exposure vs the old empty-body "HTTP 400" string). + response = getattr(error, "response", None) + if response is not None: + snippet = (getattr(response, "text", None) or "").strip() + if snippet: + status_code = getattr(error, "status_code", None) + prefix = f"HTTP {status_code}: " if status_code else "" + try: + payload = json.loads(snippet) + except (json.JSONDecodeError, TypeError): + payload = None + if isinstance(payload, dict): + err = payload.get("error") + if isinstance(err, dict) and err.get("message"): + return redact_sensitive_text(f"{prefix}{str(err['message'])[:300]}") + if payload.get("message"): + return redact_sensitive_text(f"{prefix}{str(payload['message'])[:300]}") + return redact_sensitive_text(f"{prefix}{snippet[:300]}") + # Fallback: truncate the raw string but give more room than 200 chars status_code = getattr(error, "status_code", None) prefix = f"HTTP {status_code}: " if status_code else "" diff --git a/tests/hermes_cli/test_tui_resume_flow.py b/tests/hermes_cli/test_tui_resume_flow.py index ad8ffbe79b8..2b6305ac8a3 100644 --- a/tests/hermes_cli/test_tui_resume_flow.py +++ b/tests/hermes_cli/test_tui_resume_flow.py @@ -642,7 +642,7 @@ def test_oneshot_fails_closed_on_empty_final_response(monkeypatch, capsys): _stub_plugin_discovery(monkeypatch) import hermes_cli.oneshot as oneshot_mod - monkeypatch.setattr(oneshot_mod, "_run_agent", lambda *_args, **_kwargs: "") + monkeypatch.setattr(oneshot_mod, "_run_agent", lambda *_args, **_kwargs: ("", {})) assert oneshot_mod.run_oneshot("hello") == 1 captured = capsys.readouterr() @@ -654,7 +654,7 @@ def test_oneshot_prints_nonempty_final_response(monkeypatch, capsys): _stub_plugin_discovery(monkeypatch) import hermes_cli.oneshot as oneshot_mod - monkeypatch.setattr(oneshot_mod, "_run_agent", lambda *_args, **_kwargs: "done") + monkeypatch.setattr(oneshot_mod, "_run_agent", lambda *_args, **_kwargs: ("done", {})) assert oneshot_mod.run_oneshot("hello") == 0 captured = capsys.readouterr() @@ -678,6 +678,30 @@ def test_oneshot_fails_closed_on_agent_exception(monkeypatch, capsys): assert "not a TTY" in captured.err +def test_oneshot_exit_code_when_failed_without_response(monkeypatch): + from hermes_cli.oneshot import run_oneshot + + monkeypatch.setattr( + "hermes_cli.oneshot._run_agent", + lambda *_a, **_k: ("", {"failed": True, "partial": False}), + ) + assert run_oneshot("hi") == 2 + + +def test_oneshot_exit_code_zero_when_failed_with_error_text(monkeypatch, capsys): + from hermes_cli.oneshot import run_oneshot + + monkeypatch.setattr( + "hermes_cli.oneshot._run_agent", + lambda *_a, **_k: ( + "API call failed after 3 retries: HTTP 404: model not found", + {"failed": True, "partial": False}, + ), + ) + assert run_oneshot("hi") == 0 + assert "HTTP 404" in capsys.readouterr().out + + def test_oneshot_reraises_keyboard_interrupt(monkeypatch): _stub_plugin_discovery(monkeypatch) import hermes_cli.oneshot as oneshot_mod @@ -806,9 +830,9 @@ def test_oneshot_wires_session_db_for_recall(monkeypatch): self.stream_delta_callback = object() self.tool_gen_callback = object() - def chat(self, prompt): + def run_conversation(self, prompt, **_kwargs): captured["prompt"] = prompt - return "ok" + return {"final_response": "ok", "failed": False, "partial": False} class FakeSessionDB: def __new__(cls): @@ -852,7 +876,9 @@ def test_oneshot_wires_session_db_for_recall(monkeypatch): mod("hermes_cli.tools_config", _get_platform_tools=lambda *_args, **_kwargs: {"session_search"}), ) - assert _run_agent("recall this") == "ok" + text, result = _run_agent("recall this") + assert text == "ok" + assert not result.get("failed") assert captured["session_db"] is sentinel_db assert captured["enabled_toolsets"] == ["session_search"] assert captured["prompt"] == "recall this" diff --git a/tests/run_agent/test_summarize_api_error.py b/tests/run_agent/test_summarize_api_error.py new file mode 100644 index 00000000000..6739d8e48e8 --- /dev/null +++ b/tests/run_agent/test_summarize_api_error.py @@ -0,0 +1,56 @@ +"""Regression: empty-body HTTP 4xx errors must still surface a real provider message. + +Reported on Windows (#36109): an LLM API call returned HTTP 400 with an *empty* +parsed SDK ``body`` ({}), so ``_summarize_api_error`` fell through to the bare +``str(error)`` path and the user saw only "HTTP 400" with no provider detail. +The SDK leaves ``body`` empty in this case, but the underlying httpx +``response`` still carries the real payload in ``.text``. These tests lock the +contract: when ``body`` is empty, fall back to ``response.text`` (parsing a JSON +``error.message`` / ``message`` when present) so logs and CLI show the real +provider error. This is a diagnostic improvement and is platform-agnostic. +""" + +from types import SimpleNamespace + +from run_agent import AIAgent + + +def _make_empty_body_error(response_text: str, status_code: int = 400) -> Exception: + """Mimic an OpenAI-SDK error whose parsed body is empty but whose httpx + response still holds the payload text.""" + err = Exception("") # str(error) is empty/uninformative on this path + err.status_code = status_code + err.body = {} # empty dict — the #36109 trigger + err.response = SimpleNamespace(text=response_text) + return err + + +def test_empty_body_falls_back_to_response_json_error_message(): + """A JSON payload with error.message is surfaced (not a bare HTTP 400).""" + err = _make_empty_body_error( + '{"error": {"message": "model `foo` does not exist", "type": "invalid_request_error"}}' + ) + summary = AIAgent._summarize_api_error(err) + assert "HTTP 400" in summary + assert "model `foo` does not exist" in summary + + +def test_empty_body_falls_back_to_raw_response_text_when_not_json(): + """A non-JSON response body is surfaced verbatim (truncated), not dropped.""" + err = _make_empty_body_error("upstream connect error or disconnect/reset before headers") + summary = AIAgent._summarize_api_error(err) + assert "HTTP 400" in summary + assert "upstream connect error" in summary + + +def test_empty_body_fallback_redacts_secrets(monkeypatch): + """The surfaced provider/proxy error body must pass through the secret + redactor — a proxy echoing an API key in the error must not leak it into + final_response/logs (the empty-body path previously hid it as bare HTTP 400).""" + monkeypatch.setenv("HERMES_REDACT_SECRETS", "true") + err = _make_empty_body_error( + '{"error": {"message": "bad key: sk-proj-ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdef"}}' + ) + summary = AIAgent._summarize_api_error(err) + assert "sk-proj-ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdef" not in summary +