mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-07-01 12:02:05 +00:00
fix(api-server): redact provider errors at HTTP boundary
Force API-server error text through the existing secret redactor before returning OpenAI-compatible errors, response fallback text, response snapshots, and run failure events. This prevents credential-shaped provider failure text from crossing the API-server boundary while preserving debuggable sanitized messages.
This commit is contained in:
parent
d2fda5925d
commit
5e774de76e
2 changed files with 81 additions and 15 deletions
|
|
@ -58,6 +58,7 @@ from gateway.platforms.base import (
|
|||
SendResult,
|
||||
is_network_accessible,
|
||||
)
|
||||
from agent.redact import redact_sensitive_text
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -571,11 +572,19 @@ else:
|
|||
cors_middleware = None # type: ignore[assignment]
|
||||
|
||||
|
||||
def _redact_api_error_text(value: Any, *, limit: int | None = None) -> str:
|
||||
"""Redact API-bound error text before it crosses the HTTP boundary."""
|
||||
redacted = redact_sensitive_text(str(value), force=True)
|
||||
if limit is not None:
|
||||
return redacted[:limit]
|
||||
return redacted
|
||||
|
||||
|
||||
def _openai_error(message: str, err_type: str = "invalid_request_error", param: str = None, code: str = None) -> Dict[str, Any]:
|
||||
"""OpenAI-style error envelope."""
|
||||
return {
|
||||
"error": {
|
||||
"message": message,
|
||||
"message": _redact_api_error_text(message),
|
||||
"type": err_type,
|
||||
"param": param,
|
||||
"code": code,
|
||||
|
|
@ -2055,7 +2064,8 @@ class APIServerAdapter(BasePlatformAdapter):
|
|||
is_partial = bool(result.get("partial"))
|
||||
is_failed = bool(result.get("failed"))
|
||||
completed = bool(result.get("completed", True))
|
||||
err_msg = result.get("error")
|
||||
raw_err_msg = result.get("error")
|
||||
err_msg = _redact_api_error_text(raw_err_msg) if raw_err_msg else raw_err_msg
|
||||
|
||||
# Decide finish_reason. OpenAI uses "length" for truncation, "stop"
|
||||
# for normal completion, and downstream SDKs accept "error" / custom
|
||||
|
|
@ -2126,7 +2136,7 @@ class APIServerAdapter(BasePlatformAdapter):
|
|||
response_headers["X-Hermes-Completed"] = "false"
|
||||
response_headers["X-Hermes-Partial"] = "true" if is_partial else "false"
|
||||
if err_msg:
|
||||
response_headers["X-Hermes-Error"] = err_msg[:200]
|
||||
response_headers["X-Hermes-Error"] = _redact_api_error_text(err_msg, limit=200)
|
||||
|
||||
return web.json_response(response_data, headers=response_headers)
|
||||
|
||||
|
|
@ -2744,10 +2754,10 @@ class APIServerAdapter(BasePlatformAdapter):
|
|||
if agent_final and not final_response_text:
|
||||
final_response_text = agent_final
|
||||
if isinstance(result, dict) and result.get("error") and not final_response_text:
|
||||
agent_error = result["error"]
|
||||
agent_error = _redact_api_error_text(result["error"])
|
||||
except Exception as e: # noqa: BLE001
|
||||
logger.error("Error running agent for streaming responses: %s", e, exc_info=True)
|
||||
agent_error = str(e)
|
||||
agent_error = _redact_api_error_text(e)
|
||||
|
||||
# Close the message item if it was opened
|
||||
final_response_text = "".join(final_text_parts) or final_response_text
|
||||
|
|
@ -2809,14 +2819,14 @@ class APIServerAdapter(BasePlatformAdapter):
|
|||
"type": "message",
|
||||
"role": "assistant",
|
||||
"content": [
|
||||
{"type": "output_text", "text": final_response_text or (agent_error or "")}
|
||||
{"type": "output_text", "text": final_response_text or (_redact_api_error_text(agent_error) if agent_error else "")}
|
||||
],
|
||||
})
|
||||
|
||||
if agent_error:
|
||||
failed_env = _envelope("failed")
|
||||
failed_env["output"] = final_items
|
||||
failed_env["error"] = {"message": agent_error, "type": "server_error"}
|
||||
failed_env["error"] = {"message": _redact_api_error_text(agent_error), "type": "server_error"}
|
||||
failed_env["usage"] = {
|
||||
"input_tokens": usage.get("input_tokens", 0),
|
||||
"output_tokens": usage.get("output_tokens", 0),
|
||||
|
|
@ -2827,7 +2837,7 @@ class APIServerAdapter(BasePlatformAdapter):
|
|||
if final_response_text or agent_error:
|
||||
_failed_history.append({
|
||||
"role": "assistant",
|
||||
"content": final_response_text or agent_error,
|
||||
"content": final_response_text or _redact_api_error_text(agent_error),
|
||||
})
|
||||
_persist_response_snapshot(
|
||||
failed_env,
|
||||
|
|
@ -2902,11 +2912,11 @@ class APIServerAdapter(BasePlatformAdapter):
|
|||
# get a TransferEncodingError from incomplete chunked encoding.
|
||||
import traceback as _tb
|
||||
_persist_incomplete_if_needed()
|
||||
agent_error = _tb.format_exc()
|
||||
agent_error = _redact_api_error_text(_tb.format_exc())
|
||||
try:
|
||||
failed_env = _envelope("failed")
|
||||
failed_env["output"] = list(emitted_items)
|
||||
failed_env["error"] = {"message": str(_exc)[:500], "type": "server_error"}
|
||||
failed_env["error"] = {"message": _redact_api_error_text(_exc, limit=500), "type": "server_error"}
|
||||
failed_env["usage"] = {
|
||||
"input_tokens": usage.get("input_tokens", 0),
|
||||
"output_tokens": usage.get("output_tokens", 0),
|
||||
|
|
@ -3151,7 +3161,7 @@ class APIServerAdapter(BasePlatformAdapter):
|
|||
|
||||
final_response = result.get("final_response", "")
|
||||
if not final_response:
|
||||
final_response = result.get("error", "(No response generated)")
|
||||
final_response = _redact_api_error_text(result.get("error", "(No response generated)"))
|
||||
|
||||
response_id = f"resp_{uuid.uuid4().hex[:28]}"
|
||||
created_at = int(time.time())
|
||||
|
|
@ -3666,7 +3676,7 @@ class APIServerAdapter(BasePlatformAdapter):
|
|||
# Final assistant message
|
||||
final = result.get("final_response", "")
|
||||
if not final:
|
||||
final = result.get("error", "(No response generated)")
|
||||
final = _redact_api_error_text(result.get("error", "(No response generated)"))
|
||||
|
||||
items.append({
|
||||
"type": "message",
|
||||
|
|
@ -4084,7 +4094,7 @@ class APIServerAdapter(BasePlatformAdapter):
|
|||
# 401/400 return failed=True instead of raising, so the except
|
||||
# block below never fires — issue #15561).
|
||||
if isinstance(result, dict) and result.get("failed"):
|
||||
error_msg = result.get("error") or "agent run failed"
|
||||
error_msg = _redact_api_error_text(result.get("error") or "agent run failed")
|
||||
q.put_nowait({
|
||||
"event": "run.failed",
|
||||
"run_id": run_id,
|
||||
|
|
@ -4133,7 +4143,7 @@ class APIServerAdapter(BasePlatformAdapter):
|
|||
self._set_run_status(
|
||||
run_id,
|
||||
"failed",
|
||||
error=str(exc),
|
||||
error=_redact_api_error_text(exc),
|
||||
last_event="run.failed",
|
||||
)
|
||||
try:
|
||||
|
|
@ -4141,7 +4151,7 @@ class APIServerAdapter(BasePlatformAdapter):
|
|||
"event": "run.failed",
|
||||
"run_id": run_id,
|
||||
"timestamp": time.time(),
|
||||
"error": str(exc),
|
||||
"error": _redact_api_error_text(exc),
|
||||
})
|
||||
except Exception:
|
||||
pass
|
||||
|
|
|
|||
|
|
@ -2064,6 +2064,33 @@ class TestResponsesEndpoint:
|
|||
|
||||
assert resp.status == 500
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_result_error_fallback_is_redacted(self, adapter):
|
||||
raw_secret = "sk-responses-leak-1234567890"
|
||||
app = _create_app(adapter)
|
||||
async with TestClient(TestServer(app)) as cli:
|
||||
with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run:
|
||||
mock_run.return_value = (
|
||||
{
|
||||
"final_response": "",
|
||||
"error": f"provider auth failed OPENAI_API_KEY={raw_secret}",
|
||||
"messages": [],
|
||||
"api_calls": 1,
|
||||
},
|
||||
{"input_tokens": 0, "output_tokens": 0, "total_tokens": 0},
|
||||
)
|
||||
resp = await cli.post(
|
||||
"/v1/responses",
|
||||
json={"model": "hermes-agent", "input": "Hello"},
|
||||
)
|
||||
|
||||
assert resp.status == 200
|
||||
data = await resp.json()
|
||||
body = json.dumps(data)
|
||||
assert raw_secret not in body
|
||||
assert "OPENAI_API_KEY=" in body
|
||||
assert data["output"][0]["content"][0]["text"] != f"provider auth failed OPENAI_API_KEY={raw_secret}"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_invalid_input_type_returns_400(self, adapter):
|
||||
app = _create_app(adapter)
|
||||
|
|
@ -2967,6 +2994,35 @@ class TestChatCompletionsAgentIncomplete:
|
|||
assert resp.headers.get("X-Hermes-Completed") == "false"
|
||||
assert resp.headers.get("X-Hermes-Partial") == "true"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_hard_failure_redacts_secret_like_error_text(self, adapter):
|
||||
raw_secret = "sk-api-server-leak-1234567890"
|
||||
mock_result = {
|
||||
"final_response": "",
|
||||
"completed": False,
|
||||
"partial": False,
|
||||
"failed": True,
|
||||
"error": f"provider auth failed OPENAI_API_KEY={raw_secret}",
|
||||
"messages": [],
|
||||
"api_calls": 1,
|
||||
}
|
||||
app = _create_app(adapter)
|
||||
async with TestClient(TestServer(app)) as cli:
|
||||
with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run:
|
||||
mock_run.return_value = (mock_result, {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0})
|
||||
resp = await cli.post(
|
||||
"/v1/chat/completions",
|
||||
json={"model": "hermes-agent", "messages": [{"role": "user", "content": "hello"}]},
|
||||
)
|
||||
|
||||
assert resp.status == 502
|
||||
data = await resp.json()
|
||||
body = json.dumps(data)
|
||||
assert raw_secret not in body
|
||||
assert raw_secret not in resp.headers.get("X-Hermes-Error", "")
|
||||
assert "OPENAI_API_KEY=" in body
|
||||
assert data["error"]["hermes"]["failed"] is True
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_failure_with_no_text_returns_502_error_envelope(self, adapter):
|
||||
"""No usable assistant text + failure → 502 with OpenAI error envelope.
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue