From 5e774de76e401b3d528ae76409197f030b81e1e7 Mon Sep 17 00:00:00 2001 From: Coy Geek <65363919+coygeek@users.noreply.github.com> Date: Tue, 2 Jun 2026 17:00:24 -0700 Subject: [PATCH] fix(api-server): redact provider errors at HTTP boundary Force API-server error text through the existing secret redactor before returning OpenAI-compatible errors, response fallback text, response snapshots, and run failure events. This prevents credential-shaped provider failure text from crossing the API-server boundary while preserving debuggable sanitized messages. --- gateway/platforms/api_server.py | 40 ++++++++++++++--------- tests/gateway/test_api_server.py | 56 ++++++++++++++++++++++++++++++++ 2 files changed, 81 insertions(+), 15 deletions(-) diff --git a/gateway/platforms/api_server.py b/gateway/platforms/api_server.py index 0025277d457..b6efab767d5 100644 --- a/gateway/platforms/api_server.py +++ b/gateway/platforms/api_server.py @@ -58,6 +58,7 @@ from gateway.platforms.base import ( SendResult, is_network_accessible, ) +from agent.redact import redact_sensitive_text logger = logging.getLogger(__name__) @@ -571,11 +572,19 @@ else: cors_middleware = None # type: ignore[assignment] +def _redact_api_error_text(value: Any, *, limit: int | None = None) -> str: + """Redact API-bound error text before it crosses the HTTP boundary.""" + redacted = redact_sensitive_text(str(value), force=True) + if limit is not None: + return redacted[:limit] + return redacted + + def _openai_error(message: str, err_type: str = "invalid_request_error", param: str = None, code: str = None) -> Dict[str, Any]: """OpenAI-style error envelope.""" return { "error": { - "message": message, + "message": _redact_api_error_text(message), "type": err_type, "param": param, "code": code, @@ -2055,7 +2064,8 @@ class APIServerAdapter(BasePlatformAdapter): is_partial = bool(result.get("partial")) is_failed = bool(result.get("failed")) completed = bool(result.get("completed", True)) - err_msg = result.get("error") + raw_err_msg = result.get("error") + err_msg = _redact_api_error_text(raw_err_msg) if raw_err_msg else raw_err_msg # Decide finish_reason. OpenAI uses "length" for truncation, "stop" # for normal completion, and downstream SDKs accept "error" / custom @@ -2126,7 +2136,7 @@ class APIServerAdapter(BasePlatformAdapter): response_headers["X-Hermes-Completed"] = "false" response_headers["X-Hermes-Partial"] = "true" if is_partial else "false" if err_msg: - response_headers["X-Hermes-Error"] = err_msg[:200] + response_headers["X-Hermes-Error"] = _redact_api_error_text(err_msg, limit=200) return web.json_response(response_data, headers=response_headers) @@ -2744,10 +2754,10 @@ class APIServerAdapter(BasePlatformAdapter): if agent_final and not final_response_text: final_response_text = agent_final if isinstance(result, dict) and result.get("error") and not final_response_text: - agent_error = result["error"] + agent_error = _redact_api_error_text(result["error"]) except Exception as e: # noqa: BLE001 logger.error("Error running agent for streaming responses: %s", e, exc_info=True) - agent_error = str(e) + agent_error = _redact_api_error_text(e) # Close the message item if it was opened final_response_text = "".join(final_text_parts) or final_response_text @@ -2809,14 +2819,14 @@ class APIServerAdapter(BasePlatformAdapter): "type": "message", "role": "assistant", "content": [ - {"type": "output_text", "text": final_response_text or (agent_error or "")} + {"type": "output_text", "text": final_response_text or (_redact_api_error_text(agent_error) if agent_error else "")} ], }) if agent_error: failed_env = _envelope("failed") failed_env["output"] = final_items - failed_env["error"] = {"message": agent_error, "type": "server_error"} + failed_env["error"] = {"message": _redact_api_error_text(agent_error), "type": "server_error"} failed_env["usage"] = { "input_tokens": usage.get("input_tokens", 0), "output_tokens": usage.get("output_tokens", 0), @@ -2827,7 +2837,7 @@ class APIServerAdapter(BasePlatformAdapter): if final_response_text or agent_error: _failed_history.append({ "role": "assistant", - "content": final_response_text or agent_error, + "content": final_response_text or _redact_api_error_text(agent_error), }) _persist_response_snapshot( failed_env, @@ -2902,11 +2912,11 @@ class APIServerAdapter(BasePlatformAdapter): # get a TransferEncodingError from incomplete chunked encoding. import traceback as _tb _persist_incomplete_if_needed() - agent_error = _tb.format_exc() + agent_error = _redact_api_error_text(_tb.format_exc()) try: failed_env = _envelope("failed") failed_env["output"] = list(emitted_items) - failed_env["error"] = {"message": str(_exc)[:500], "type": "server_error"} + failed_env["error"] = {"message": _redact_api_error_text(_exc, limit=500), "type": "server_error"} failed_env["usage"] = { "input_tokens": usage.get("input_tokens", 0), "output_tokens": usage.get("output_tokens", 0), @@ -3151,7 +3161,7 @@ class APIServerAdapter(BasePlatformAdapter): final_response = result.get("final_response", "") if not final_response: - final_response = result.get("error", "(No response generated)") + final_response = _redact_api_error_text(result.get("error", "(No response generated)")) response_id = f"resp_{uuid.uuid4().hex[:28]}" created_at = int(time.time()) @@ -3666,7 +3676,7 @@ class APIServerAdapter(BasePlatformAdapter): # Final assistant message final = result.get("final_response", "") if not final: - final = result.get("error", "(No response generated)") + final = _redact_api_error_text(result.get("error", "(No response generated)")) items.append({ "type": "message", @@ -4084,7 +4094,7 @@ class APIServerAdapter(BasePlatformAdapter): # 401/400 return failed=True instead of raising, so the except # block below never fires — issue #15561). if isinstance(result, dict) and result.get("failed"): - error_msg = result.get("error") or "agent run failed" + error_msg = _redact_api_error_text(result.get("error") or "agent run failed") q.put_nowait({ "event": "run.failed", "run_id": run_id, @@ -4133,7 +4143,7 @@ class APIServerAdapter(BasePlatformAdapter): self._set_run_status( run_id, "failed", - error=str(exc), + error=_redact_api_error_text(exc), last_event="run.failed", ) try: @@ -4141,7 +4151,7 @@ class APIServerAdapter(BasePlatformAdapter): "event": "run.failed", "run_id": run_id, "timestamp": time.time(), - "error": str(exc), + "error": _redact_api_error_text(exc), }) except Exception: pass diff --git a/tests/gateway/test_api_server.py b/tests/gateway/test_api_server.py index a941d4afc93..752e3931bb9 100644 --- a/tests/gateway/test_api_server.py +++ b/tests/gateway/test_api_server.py @@ -2064,6 +2064,33 @@ class TestResponsesEndpoint: assert resp.status == 500 + @pytest.mark.asyncio + async def test_result_error_fallback_is_redacted(self, adapter): + raw_secret = "sk-responses-leak-1234567890" + app = _create_app(adapter) + async with TestClient(TestServer(app)) as cli: + with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run: + mock_run.return_value = ( + { + "final_response": "", + "error": f"provider auth failed OPENAI_API_KEY={raw_secret}", + "messages": [], + "api_calls": 1, + }, + {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}, + ) + resp = await cli.post( + "/v1/responses", + json={"model": "hermes-agent", "input": "Hello"}, + ) + + assert resp.status == 200 + data = await resp.json() + body = json.dumps(data) + assert raw_secret not in body + assert "OPENAI_API_KEY=" in body + assert data["output"][0]["content"][0]["text"] != f"provider auth failed OPENAI_API_KEY={raw_secret}" + @pytest.mark.asyncio async def test_invalid_input_type_returns_400(self, adapter): app = _create_app(adapter) @@ -2967,6 +2994,35 @@ class TestChatCompletionsAgentIncomplete: assert resp.headers.get("X-Hermes-Completed") == "false" assert resp.headers.get("X-Hermes-Partial") == "true" + @pytest.mark.asyncio + async def test_hard_failure_redacts_secret_like_error_text(self, adapter): + raw_secret = "sk-api-server-leak-1234567890" + mock_result = { + "final_response": "", + "completed": False, + "partial": False, + "failed": True, + "error": f"provider auth failed OPENAI_API_KEY={raw_secret}", + "messages": [], + "api_calls": 1, + } + app = _create_app(adapter) + async with TestClient(TestServer(app)) as cli: + with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run: + mock_run.return_value = (mock_result, {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}) + resp = await cli.post( + "/v1/chat/completions", + json={"model": "hermes-agent", "messages": [{"role": "user", "content": "hello"}]}, + ) + + assert resp.status == 502 + data = await resp.json() + body = json.dumps(data) + assert raw_secret not in body + assert raw_secret not in resp.headers.get("X-Hermes-Error", "") + assert "OPENAI_API_KEY=" in body + assert data["error"]["hermes"]["failed"] is True + @pytest.mark.asyncio async def test_failure_with_no_text_returns_502_error_envelope(self, adapter): """No usable assistant text + failure → 502 with OpenAI error envelope.