diff --git a/agent/chat_completion_helpers.py b/agent/chat_completion_helpers.py index b3261b60d0b..b52bd6a1fb1 100644 --- a/agent/chat_completion_helpers.py +++ b/agent/chat_completion_helpers.py @@ -581,6 +581,17 @@ def build_assistant_message(agent, assistant_message, finish_reason: str) -> dic if isinstance(_san_content, str) and _san_content: _san_content = agent._strip_think_blocks(_san_content).strip() + # Defence-in-depth: redact credentials (PATs, API keys, Bearer tokens) + # from assistant content BEFORE the message enters conversation history. + # If the model accidentally inlines a secret in its natural-language + # response, catch it here at the persistence boundary so it never + # reaches state.db, session_*.json, gateway delivery, or compression. + # Respects HERMES_REDACT_SECRETS via redact_sensitive_text — no-op + # when disabled. (#19798) + if isinstance(_san_content, str) and _san_content: + from agent.redact import redact_sensitive_text + _san_content = redact_sensitive_text(_san_content) + msg = { "role": "assistant", "content": _san_content, @@ -702,6 +713,18 @@ def build_assistant_message(agent, assistant_message, finish_reason: str) -> dic "arguments": tool_call.function.arguments }, } + # Defence-in-depth: redact credentials from tool call arguments + # before they enter conversation history. Tool execution uses the + # raw API response object, not this dict, so redacting the + # persisted shape is safe and only affects storage. Catches the + # case where a model accidentally inlines a secret into a tool + # call (e.g. `terminal(command="curl -H 'Authorization: Bearer + # sk-...'")`). (#19798) + if isinstance(tc_dict["function"]["arguments"], str): + from agent.redact import redact_sensitive_text + tc_dict["function"]["arguments"] = redact_sensitive_text( + tc_dict["function"]["arguments"] + ) # Preserve extra_content (e.g. Gemini thought_signature) so it # is sent back on subsequent API calls. Without this, Gemini 3 # thinking models reject the request with a 400 error. diff --git a/run_agent.py b/run_agent.py index b364127c278..4a2f3cb3ba4 100644 --- a/run_agent.py +++ b/run_agent.py @@ -124,6 +124,7 @@ from agent.memory_manager import StreamingContextScrubber, build_memory_context_ from agent.think_scrubber import StreamingThinkScrubber from agent.retry_utils import jittered_backoff from agent.error_classifier import classify_api_error, FailoverReason +from agent.redact import redact_sensitive_text from agent.prompt_builder import ( DEFAULT_AGENT_IDENTITY, PLATFORM_HINTS, MEMORY_GUIDANCE, SESSION_SEARCH_GUIDANCE, SKILLS_GUIDANCE, @@ -1546,6 +1547,36 @@ class AIAgent: content = re.sub(r'()\n+', r'\1\n', content) return content.strip() + @staticmethod + def _redact_message_content(content): + """Apply secret redaction to message content (str or list-of-parts). + + Handles both plain-string content and the OpenAI/Anthropic multimodal + shape where ``content`` is a list of ``{"type": "text", "text": ...}`` + / ``{"type": "image_url", ...}`` / ``{"type": "input_text", "content": ...}`` + parts. Image / binary parts are left untouched; only text fields are + passed through ``redact_sensitive_text``. + + Respects ``HERMES_REDACT_SECRETS`` via ``redact_sensitive_text`` — + when disabled the helper is effectively a no-op. + """ + if content is None: + return content + if isinstance(content, str): + return redact_sensitive_text(content) + if isinstance(content, list): + redacted = [] + for part in content: + if isinstance(part, dict): + part = dict(part) + if isinstance(part.get("text"), str): + part["text"] = redact_sensitive_text(part["text"]) + if isinstance(part.get("content"), str): + part["content"] = redact_sensitive_text(part["content"]) + redacted.append(part) + return redacted + return content + def _save_session_log(self, messages: List[Dict[str, Any]] = None): """Optional per-session JSON snapshot writer. @@ -1581,6 +1612,14 @@ class AIAgent: if msg.get("role") == "assistant" and msg.get("content"): msg = dict(msg) msg["content"] = self._clean_session_content(msg["content"]) + # Defence-in-depth: redact credentials from every message + # content before persistence. Catches PATs / API keys / Bearer + # tokens that may have leaked into assistant responses, tool + # output, or user paste. Respects HERMES_REDACT_SECRETS via + # redact_sensitive_text — no-op when disabled. (#19798, #19845) + if "content" in msg: + msg = dict(msg) + msg["content"] = self._redact_message_content(msg.get("content")) cleaned.append(msg) # Guard: never overwrite a larger session log with fewer messages. @@ -1606,7 +1645,7 @@ class AIAgent: "platform": self.platform, "session_start": self.session_start.isoformat(), "last_updated": datetime.now().isoformat(), - "system_prompt": self._cached_system_prompt or "", + "system_prompt": redact_sensitive_text(self._cached_system_prompt or ""), "tools": self.tools or [], "message_count": len(cleaned), "messages": cleaned, diff --git a/tests/run_agent/test_run_agent.py b/tests/run_agent/test_run_agent.py index 3d0dcedddd0..7c49a002cff 100644 --- a/tests/run_agent/test_run_agent.py +++ b/tests/run_agent/test_run_agent.py @@ -600,6 +600,76 @@ class TestSessionJsonSnapshotOptIn: assert hasattr(agent, "logs_dir") +class TestSaveSessionLogRedactsSecrets: + """Regression: session_*.json must not contain plaintext credentials (#19798, #19845).""" + + @pytest.fixture(autouse=True) + def _ensure_redaction_enabled(self, monkeypatch): + """Force redaction on regardless of host HERMES_REDACT_SECRETS state. + The hermetic conftest blanks the env var; the module-level + ``_REDACT_ENABLED`` constant is captured at import time, so we + flip it directly for the duration of these tests.""" + monkeypatch.delenv("HERMES_REDACT_SECRETS", raising=False) + monkeypatch.setattr("agent.redact._REDACT_ENABLED", True) + + def test_redacts_api_key_in_tool_content(self, agent, tmp_path): + agent._session_json_enabled = True + agent.logs_dir = tmp_path + messages = [ + {"role": "user", "content": "Hello"}, + { + "role": "tool", + "content": "Response: Authorization: Bearer sk-proj-abc123def456ghi789jkl012mno", + }, + ] + agent._save_session_log(messages) + + snapshot = (tmp_path / f"session_{agent.session_id}.json").read_text(encoding="utf-8") + assert "sk-proj-abc123def456ghi789jkl012mno" not in snapshot + + def test_redacts_api_key_in_user_message(self, agent, tmp_path): + agent._session_json_enabled = True + agent.logs_dir = tmp_path + messages = [ + {"role": "user", "content": "My key is sk-ant-api03-abc123def456ghi789jkl012mno please use it"}, + ] + agent._save_session_log(messages) + + snapshot = (tmp_path / f"session_{agent.session_id}.json").read_text(encoding="utf-8") + assert "sk-ant-api03-abc123def456ghi789jkl012mno" not in snapshot + + def test_redacts_system_prompt_credentials(self, agent, tmp_path): + agent._session_json_enabled = True + agent.logs_dir = tmp_path + agent._cached_system_prompt = "Use key sk-proj-realkey1234567890123456 for API calls" + agent._save_session_log([{"role": "user", "content": "test"}]) + + snapshot = (tmp_path / f"session_{agent.session_id}.json").read_text(encoding="utf-8") + assert "sk-proj-realkey1234567890123456" not in snapshot + + def test_redacts_list_type_multimodal_content(self, agent, tmp_path): + """OpenAI/Anthropic multimodal shape: content = list of {type, text|image_url} parts.""" + agent._session_json_enabled = True + agent.logs_dir = tmp_path + messages = [ + { + "role": "user", + "content": [ + {"type": "text", "text": "Key: gsk_abc123def456ghi789jkl012mno"}, + {"type": "image_url", "image_url": {"url": "data:image/png;base64,abc"}}, + ], + }, + ] + agent._save_session_log(messages) + + snapshot_text = (tmp_path / f"session_{agent.session_id}.json").read_text(encoding="utf-8") + snapshot = json.loads(snapshot_text) + parts = snapshot["messages"][0]["content"] + assert "gsk_abc123def456ghi789jkl012mno" not in parts[0]["text"] + # Image part preserved untouched + assert parts[1]["image_url"]["url"].startswith("data:image") + + class TestGetMessagesUpToLastAssistant: def test_empty_list(self, agent): assert agent._get_messages_up_to_last_assistant([]) == []