mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-30 06:41:51 +00:00
fix(security): redact credentials before persistence in session capture
Two-layer redaction at the persistence boundary so credentials never reach
state.db, session_*.json, or compression:
1. agent/chat_completion_helpers.py :: build_assistant_message
- Redact assistant content before the message dict is constructed
(catches PATs / API keys the model inlines into natural language)
- Redact tool_call.function.arguments at the same site (catches secrets
inlined into tool args, e.g. terminal command=curl -H 'Authorization: ...')
Tool execution uses the raw API response object, not this dict, so
redacting the persisted shape is safe.
2. run_agent.py :: _save_session_log
- Add _redact_message_content() static helper that handles both string
content and OpenAI/Anthropic multimodal list-of-parts (image parts
pass through untouched, only text/content fields are redacted)
- Apply to every message + the cached system prompt before writing
session_*.json
Both layers respect HERMES_REDACT_SECRETS via redact_sensitive_text —
no-op when disabled.
Tests (TestSaveSessionLogRedactsSecrets, 4 cases):
- api key in tool content
- api key in user message
- api key in system prompt
- multimodal list-of-parts (image part preserved, text redacted)
Tests use an autouse fixture to force _REDACT_ENABLED=True because the
hermetic conftest defaults the env var to false.
Salvaged from PR #24758 by @vgocoder (build_assistant_message + session_log)
+ PR #19855 by @liuhao1024 (multimodal list helper, system_prompt redaction).
Kept only the redaction concern from #19855; its unrelated whatsapp npm
timeout + PATCH_SCHEMA changes are out of scope and dropped.
Refs #19798 (PAT leak via assistant inline mention), #19845 (session capture
credential leak).
Co-authored-by: liuhao1024 <liuhao03@bilibili.com>
Co-authored-by: teknium1 <127238744+teknium1@users.noreply.github.com>
This commit is contained in:
parent
243ebc7a61
commit
dcc163ee28
3 changed files with 133 additions and 1 deletions
|
|
@ -581,6 +581,17 @@ def build_assistant_message(agent, assistant_message, finish_reason: str) -> dic
|
|||
if isinstance(_san_content, str) and _san_content:
|
||||
_san_content = agent._strip_think_blocks(_san_content).strip()
|
||||
|
||||
# Defence-in-depth: redact credentials (PATs, API keys, Bearer tokens)
|
||||
# from assistant content BEFORE the message enters conversation history.
|
||||
# If the model accidentally inlines a secret in its natural-language
|
||||
# response, catch it here at the persistence boundary so it never
|
||||
# reaches state.db, session_*.json, gateway delivery, or compression.
|
||||
# Respects HERMES_REDACT_SECRETS via redact_sensitive_text — no-op
|
||||
# when disabled. (#19798)
|
||||
if isinstance(_san_content, str) and _san_content:
|
||||
from agent.redact import redact_sensitive_text
|
||||
_san_content = redact_sensitive_text(_san_content)
|
||||
|
||||
msg = {
|
||||
"role": "assistant",
|
||||
"content": _san_content,
|
||||
|
|
@ -702,6 +713,18 @@ def build_assistant_message(agent, assistant_message, finish_reason: str) -> dic
|
|||
"arguments": tool_call.function.arguments
|
||||
},
|
||||
}
|
||||
# Defence-in-depth: redact credentials from tool call arguments
|
||||
# before they enter conversation history. Tool execution uses the
|
||||
# raw API response object, not this dict, so redacting the
|
||||
# persisted shape is safe and only affects storage. Catches the
|
||||
# case where a model accidentally inlines a secret into a tool
|
||||
# call (e.g. `terminal(command="curl -H 'Authorization: Bearer
|
||||
# sk-...'")`). (#19798)
|
||||
if isinstance(tc_dict["function"]["arguments"], str):
|
||||
from agent.redact import redact_sensitive_text
|
||||
tc_dict["function"]["arguments"] = redact_sensitive_text(
|
||||
tc_dict["function"]["arguments"]
|
||||
)
|
||||
# Preserve extra_content (e.g. Gemini thought_signature) so it
|
||||
# is sent back on subsequent API calls. Without this, Gemini 3
|
||||
# thinking models reject the request with a 400 error.
|
||||
|
|
|
|||
41
run_agent.py
41
run_agent.py
|
|
@ -124,6 +124,7 @@ from agent.memory_manager import StreamingContextScrubber, build_memory_context_
|
|||
from agent.think_scrubber import StreamingThinkScrubber
|
||||
from agent.retry_utils import jittered_backoff
|
||||
from agent.error_classifier import classify_api_error, FailoverReason
|
||||
from agent.redact import redact_sensitive_text
|
||||
from agent.prompt_builder import (
|
||||
DEFAULT_AGENT_IDENTITY, PLATFORM_HINTS,
|
||||
MEMORY_GUIDANCE, SESSION_SEARCH_GUIDANCE, SKILLS_GUIDANCE,
|
||||
|
|
@ -1546,6 +1547,36 @@ class AIAgent:
|
|||
content = re.sub(r'(</think>)\n+', r'\1\n', content)
|
||||
return content.strip()
|
||||
|
||||
@staticmethod
|
||||
def _redact_message_content(content):
|
||||
"""Apply secret redaction to message content (str or list-of-parts).
|
||||
|
||||
Handles both plain-string content and the OpenAI/Anthropic multimodal
|
||||
shape where ``content`` is a list of ``{"type": "text", "text": ...}``
|
||||
/ ``{"type": "image_url", ...}`` / ``{"type": "input_text", "content": ...}``
|
||||
parts. Image / binary parts are left untouched; only text fields are
|
||||
passed through ``redact_sensitive_text``.
|
||||
|
||||
Respects ``HERMES_REDACT_SECRETS`` via ``redact_sensitive_text`` —
|
||||
when disabled the helper is effectively a no-op.
|
||||
"""
|
||||
if content is None:
|
||||
return content
|
||||
if isinstance(content, str):
|
||||
return redact_sensitive_text(content)
|
||||
if isinstance(content, list):
|
||||
redacted = []
|
||||
for part in content:
|
||||
if isinstance(part, dict):
|
||||
part = dict(part)
|
||||
if isinstance(part.get("text"), str):
|
||||
part["text"] = redact_sensitive_text(part["text"])
|
||||
if isinstance(part.get("content"), str):
|
||||
part["content"] = redact_sensitive_text(part["content"])
|
||||
redacted.append(part)
|
||||
return redacted
|
||||
return content
|
||||
|
||||
def _save_session_log(self, messages: List[Dict[str, Any]] = None):
|
||||
"""Optional per-session JSON snapshot writer.
|
||||
|
||||
|
|
@ -1581,6 +1612,14 @@ class AIAgent:
|
|||
if msg.get("role") == "assistant" and msg.get("content"):
|
||||
msg = dict(msg)
|
||||
msg["content"] = self._clean_session_content(msg["content"])
|
||||
# Defence-in-depth: redact credentials from every message
|
||||
# content before persistence. Catches PATs / API keys / Bearer
|
||||
# tokens that may have leaked into assistant responses, tool
|
||||
# output, or user paste. Respects HERMES_REDACT_SECRETS via
|
||||
# redact_sensitive_text — no-op when disabled. (#19798, #19845)
|
||||
if "content" in msg:
|
||||
msg = dict(msg)
|
||||
msg["content"] = self._redact_message_content(msg.get("content"))
|
||||
cleaned.append(msg)
|
||||
|
||||
# Guard: never overwrite a larger session log with fewer messages.
|
||||
|
|
@ -1606,7 +1645,7 @@ class AIAgent:
|
|||
"platform": self.platform,
|
||||
"session_start": self.session_start.isoformat(),
|
||||
"last_updated": datetime.now().isoformat(),
|
||||
"system_prompt": self._cached_system_prompt or "",
|
||||
"system_prompt": redact_sensitive_text(self._cached_system_prompt or ""),
|
||||
"tools": self.tools or [],
|
||||
"message_count": len(cleaned),
|
||||
"messages": cleaned,
|
||||
|
|
|
|||
|
|
@ -600,6 +600,76 @@ class TestSessionJsonSnapshotOptIn:
|
|||
assert hasattr(agent, "logs_dir")
|
||||
|
||||
|
||||
class TestSaveSessionLogRedactsSecrets:
|
||||
"""Regression: session_*.json must not contain plaintext credentials (#19798, #19845)."""
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _ensure_redaction_enabled(self, monkeypatch):
|
||||
"""Force redaction on regardless of host HERMES_REDACT_SECRETS state.
|
||||
The hermetic conftest blanks the env var; the module-level
|
||||
``_REDACT_ENABLED`` constant is captured at import time, so we
|
||||
flip it directly for the duration of these tests."""
|
||||
monkeypatch.delenv("HERMES_REDACT_SECRETS", raising=False)
|
||||
monkeypatch.setattr("agent.redact._REDACT_ENABLED", True)
|
||||
|
||||
def test_redacts_api_key_in_tool_content(self, agent, tmp_path):
|
||||
agent._session_json_enabled = True
|
||||
agent.logs_dir = tmp_path
|
||||
messages = [
|
||||
{"role": "user", "content": "Hello"},
|
||||
{
|
||||
"role": "tool",
|
||||
"content": "Response: Authorization: Bearer sk-proj-abc123def456ghi789jkl012mno",
|
||||
},
|
||||
]
|
||||
agent._save_session_log(messages)
|
||||
|
||||
snapshot = (tmp_path / f"session_{agent.session_id}.json").read_text(encoding="utf-8")
|
||||
assert "sk-proj-abc123def456ghi789jkl012mno" not in snapshot
|
||||
|
||||
def test_redacts_api_key_in_user_message(self, agent, tmp_path):
|
||||
agent._session_json_enabled = True
|
||||
agent.logs_dir = tmp_path
|
||||
messages = [
|
||||
{"role": "user", "content": "My key is sk-ant-api03-abc123def456ghi789jkl012mno please use it"},
|
||||
]
|
||||
agent._save_session_log(messages)
|
||||
|
||||
snapshot = (tmp_path / f"session_{agent.session_id}.json").read_text(encoding="utf-8")
|
||||
assert "sk-ant-api03-abc123def456ghi789jkl012mno" not in snapshot
|
||||
|
||||
def test_redacts_system_prompt_credentials(self, agent, tmp_path):
|
||||
agent._session_json_enabled = True
|
||||
agent.logs_dir = tmp_path
|
||||
agent._cached_system_prompt = "Use key sk-proj-realkey1234567890123456 for API calls"
|
||||
agent._save_session_log([{"role": "user", "content": "test"}])
|
||||
|
||||
snapshot = (tmp_path / f"session_{agent.session_id}.json").read_text(encoding="utf-8")
|
||||
assert "sk-proj-realkey1234567890123456" not in snapshot
|
||||
|
||||
def test_redacts_list_type_multimodal_content(self, agent, tmp_path):
|
||||
"""OpenAI/Anthropic multimodal shape: content = list of {type, text|image_url} parts."""
|
||||
agent._session_json_enabled = True
|
||||
agent.logs_dir = tmp_path
|
||||
messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": "Key: gsk_abc123def456ghi789jkl012mno"},
|
||||
{"type": "image_url", "image_url": {"url": "data:image/png;base64,abc"}},
|
||||
],
|
||||
},
|
||||
]
|
||||
agent._save_session_log(messages)
|
||||
|
||||
snapshot_text = (tmp_path / f"session_{agent.session_id}.json").read_text(encoding="utf-8")
|
||||
snapshot = json.loads(snapshot_text)
|
||||
parts = snapshot["messages"][0]["content"]
|
||||
assert "gsk_abc123def456ghi789jkl012mno" not in parts[0]["text"]
|
||||
# Image part preserved untouched
|
||||
assert parts[1]["image_url"]["url"].startswith("data:image")
|
||||
|
||||
|
||||
class TestGetMessagesUpToLastAssistant:
|
||||
def test_empty_list(self, agent):
|
||||
assert agent._get_messages_up_to_last_assistant([]) == []
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue