diff --git a/agent/display.py b/agent/display.py index 062ede1050f..861d84bc410 100644 --- a/agent/display.py +++ b/agent/display.py @@ -351,13 +351,17 @@ def redact_browser_typed_text_for_display(value: Any, typed_text: Any) -> Any: Normal typed text (search queries, addresses, form fields) matches no secret pattern, so it passes through unchanged and stays readable. + + Redaction is forced here regardless of the global ``security.redact_secrets`` + preference: a typed credential leaking into chat history is a security + boundary, not mere log hygiene. """ if typed_text is None: return value needle = str(typed_text) if needle == "": return value - redacted = redact_sensitive_text(needle) + redacted = redact_sensitive_text(needle, force=True) if redacted == needle: # Nothing secret-looking in the typed text; leave payload untouched. return value @@ -387,7 +391,7 @@ def redact_tool_args_for_display(tool_name: str, args: dict | None) -> dict | No return args if tool_name == "browser_type" and isinstance(args.get("text"), str): safe_args = dict(args) - safe_args["text"] = redact_sensitive_text(args["text"]) + safe_args["text"] = redact_sensitive_text(args["text"], force=True) return safe_args return args diff --git a/tests/agent/test_display.py b/tests/agent/test_display.py index 30b26f1cfac..9ff3694d2bd 100644 --- a/tests/agent/test_display.py +++ b/tests/agent/test_display.py @@ -87,20 +87,34 @@ class TestBuildToolPreview: result = build_tool_preview("read_file", {"path": "./package.json", "offset": 1, "limit": 5}) assert result == "package.json L1-5" - def test_browser_type_preview_never_echoes_typed_text(self): - typed_text = "my_secret_password_123" - result = build_tool_preview("browser_type", {"ref": "@e3", "text": typed_text}) + def test_browser_type_preview_redacts_api_key(self): + secret = "sk-proj-ABCD1234567890EFGH" + result = build_tool_preview("browser_type", {"ref": "@e3", "text": secret}) assert result is not None - assert typed_text not in result - assert "redacted typed text" in result + assert secret not in result + assert "sk-pro" in result and "..." in result - def test_browser_type_display_args_never_echo_typed_text(self): - typed_text = "normal-looking-but-sensitive" + def test_browser_type_preview_keeps_normal_text(self): + text = "hello world search query" + result = build_tool_preview("browser_type", {"ref": "@e3", "text": text}) + assert result is not None + assert text in result + + def test_browser_type_display_args_redact_api_key(self): + secret = "ghp_ABCDEFGHIJ1234567890" safe_args = redact_tool_args_for_display( - "browser_type", {"ref": "@e3", "text": typed_text} + "browser_type", {"ref": "@e3", "text": secret} ) - assert safe_args == {"ref": "@e3", "text": "[redacted typed text]"} - assert typed_text not in str(safe_args) + assert secret not in str(safe_args) + assert safe_args["ref"] == "@e3" + assert safe_args["text"].startswith("ghp_AB") + + def test_browser_type_display_args_keep_normal_text(self): + text = "my_normal_password_123" + safe_args = redact_tool_args_for_display( + "browser_type", {"ref": "@e3", "text": text} + ) + assert safe_args == {"ref": "@e3", "text": text} def test_unknown_tool_with_fallback_key(self): """Unknown tool but with a recognized fallback key should still preview.""" @@ -258,17 +272,28 @@ class TestCuteToolMessagePreviewLength: ) assert "2x: Review PR A | Review PR B" in line - def test_browser_type_cute_message_never_echoes_typed_text(self): - typed_text = "my_secret_password_123" + def test_browser_type_cute_message_redacts_api_key(self): + secret = "sk-proj-ABCD1234567890EFGH" line = get_cute_tool_message( "browser_type", - {"ref": "@password", "text": typed_text}, + {"ref": "@password", "text": secret}, 0.1, - result='{"success": true, "typed": "[redacted typed text]"}', + result='{"success": true, "typed": "sk-pro...EFGH"}', ) - assert typed_text not in line - assert "redacted typed text" in line + assert secret not in line + assert "sk-pro" in line + + def test_browser_type_cute_message_keeps_normal_text(self): + text = "hello world" + line = get_cute_tool_message( + "browser_type", + {"ref": "@search", "text": text}, + 0.1, + result='{"success": true, "typed": "hello world"}', + ) + + assert text in line class TestEditDiffPreview: diff --git a/tests/run_agent/test_run_agent.py b/tests/run_agent/test_run_agent.py index 168d9505aac..10570c72616 100644 --- a/tests/run_agent/test_run_agent.py +++ b/tests/run_agent/test_run_agent.py @@ -2589,11 +2589,11 @@ class TestConcurrentToolExecution: assert starts == [("c1", "web_search", {"query": "hello"})] assert completes == [("c1", "web_search", {"query": "hello"}, '{"success": true}')] - def test_sequential_browser_type_callbacks_never_echo_typed_text(self, agent): - typed_text = "my_secret_password_123" + def test_sequential_browser_type_callbacks_redact_api_key(self, agent): + secret = "sk-proj-ABCD1234567890EFGH" tool_call = _mock_tool_call( name="browser_type", - arguments=json.dumps({"ref": "@password", "text": typed_text}), + arguments=json.dumps({"ref": "@apikey", "text": secret}), call_id="c-secret", ) mock_msg = _mock_assistant_msg(content="", tool_calls=[tool_call]) @@ -2605,13 +2605,13 @@ class TestConcurrentToolExecution: agent.tool_complete_callback = lambda tool_call_id, function_name, function_args, function_result: completes.append((tool_call_id, function_name, function_args, function_result)) agent.tool_progress_callback = lambda event, name, preview, args, **kw: progress.append((event, name, preview, args)) - with patch("run_agent.handle_function_call", return_value='{"success": true, "typed": "[redacted typed text]"}'): + with patch("run_agent.handle_function_call", return_value='{"success": true, "typed": "sk-pro...EFGH"}'): agent._execute_tool_calls_sequential(mock_msg, messages, "task-1") - assert starts == [("c-secret", "browser_type", {"ref": "@password", "text": "[redacted typed text]"})] - assert completes[0][2] == {"ref": "@password", "text": "[redacted typed text]"} - assert progress[0][2] == "[redacted typed text]" - assert typed_text not in repr(starts + completes + progress) + assert starts[0][2]["text"].startswith("sk-pro") + assert completes[0][2]["text"].startswith("sk-pro") + assert progress[0][2].startswith("sk-pro") + assert secret not in repr(starts + completes + progress) def test_concurrent_tool_callbacks_fire_for_each_tool(self, agent): tc1 = _mock_tool_call(name="web_search", arguments='{"query":"one"}', call_id="c1") @@ -2634,11 +2634,11 @@ class TestConcurrentToolExecution: assert {entry[0] for entry in completes} == {"c1", "c2"} assert {entry[3] for entry in completes} == {'{"id":1}', '{"id":2}'} - def test_concurrent_browser_type_callbacks_never_echo_typed_text(self, agent): - typed_text = "my_secret_password_123" + def test_concurrent_browser_type_callbacks_redact_api_key(self, agent): + secret = "sk-proj-ABCD1234567890EFGH" tc = _mock_tool_call( name="browser_type", - arguments=json.dumps({"ref": "@password", "text": typed_text}), + arguments=json.dumps({"ref": "@apikey", "text": secret}), call_id="c-secret", ) mock_msg = _mock_assistant_msg(content="", tool_calls=[tc]) @@ -2650,13 +2650,13 @@ class TestConcurrentToolExecution: agent.tool_complete_callback = lambda tool_call_id, function_name, function_args, function_result: completes.append((tool_call_id, function_name, function_args, function_result)) agent.tool_progress_callback = lambda event, name, preview, args, **kw: progress.append((event, name, preview, args)) - with patch("run_agent.handle_function_call", return_value='{"success": true, "typed": "[redacted typed text]"}'): + with patch("run_agent.handle_function_call", return_value='{"success": true, "typed": "sk-pro...EFGH"}'): agent._execute_tool_calls_concurrent(mock_msg, messages, "task-1") - assert starts == [("c-secret", "browser_type", {"ref": "@password", "text": "[redacted typed text]"})] - assert completes[0][2] == {"ref": "@password", "text": "[redacted typed text]"} - assert progress[0][2] == "[redacted typed text]" - assert typed_text not in repr(starts + completes + progress) + assert starts[0][2]["text"].startswith("sk-pro") + assert completes[0][2]["text"].startswith("sk-pro") + assert progress[0][2].startswith("sk-pro") + assert secret not in repr(starts + completes + progress) def test_invoke_tool_handles_agent_level_tools(self, agent): """_invoke_tool should handle todo tool directly.""" diff --git a/tests/tools/test_browser_camofox.py b/tests/tools/test_browser_camofox.py index ecdbde349ca..df5bef4aff6 100644 --- a/tests/tools/test_browser_camofox.py +++ b/tests/tools/test_browser_camofox.py @@ -235,35 +235,38 @@ class TestCamofoxInteractions: mock_post.return_value = _mock_response(json_data={"ok": True}) result = json.loads(camofox_type("@e3", "hello world", task_id="t5")) assert result["success"] is True - assert result["typed"] == "[redacted typed text]" + # Normal text is left readable. + assert result["typed"] == "hello world" @patch("tools.browser_camofox.requests.post") - def test_type_never_echoes_raw_secret(self, mock_post, monkeypatch): + def test_type_redacts_api_key(self, mock_post, monkeypatch): monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377") + monkeypatch.setenv("HERMES_REDACT_SECRETS", "true") mock_post.return_value = _mock_response(json_data={"tabId": "tab5b", "url": "https://x.com"}) camofox_navigate("https://x.com", task_id="t5b") - typed_text = "my_secret_password_123" + secret = "sk-proj-ABCD1234567890EFGH" mock_post.return_value = _mock_response(json_data={"ok": True}) - result = json.loads(camofox_type("@password", typed_text, task_id="t5b")) + result = json.loads(camofox_type("@apikey", secret, task_id="t5b")) assert result["success"] is True - assert typed_text not in json.dumps(result) - assert result["typed"] == "[redacted typed text]" + assert secret not in json.dumps(result) + assert result["typed"].startswith("sk-pro") @patch("tools.browser_camofox.requests.post") - def test_type_failure_never_echoes_raw_secret(self, mock_post, monkeypatch): + def test_type_failure_redacts_api_key(self, mock_post, monkeypatch): monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377") + monkeypatch.setenv("HERMES_REDACT_SECRETS", "true") mock_post.return_value = _mock_response(json_data={"tabId": "tab5c", "url": "https://x.com"}) camofox_navigate("https://x.com", task_id="t5c") - typed_text = "my_secret_password_123" - mock_post.side_effect = RuntimeError(f"camofox failed while typing {typed_text}") - raw_result = camofox_type("@password", typed_text, task_id="t5c") + secret = "sk-proj-ABCD1234567890EFGH" + mock_post.side_effect = RuntimeError(f"camofox failed while typing {secret}") + raw_result = camofox_type("@apikey", secret, task_id="t5c") result = json.loads(raw_result) assert result["success"] is False - assert typed_text not in raw_result - assert "[redacted typed text]" in raw_result + assert secret not in raw_result + assert "sk-pro" in raw_result @patch("tools.browser_camofox.requests.post") def test_scroll(self, mock_post, monkeypatch): diff --git a/tests/tools/test_browser_type_redaction.py b/tests/tools/test_browser_type_redaction.py index 9a98e71bfd7..10a210f7b58 100644 --- a/tests/tools/test_browser_type_redaction.py +++ b/tests/tools/test_browser_type_redaction.py @@ -1,4 +1,10 @@ -"""Regression tests for browser_type display redaction.""" +"""Regression tests for browser_type display redaction. + +Typed text is passed through the same secret-pattern redactor used for logs: +recognizable credentials (API keys, tokens) are masked in display-facing +output, while normal typed text is left intact. The raw value is always sent +to the browser backend regardless. +""" import json from unittest.mock import patch @@ -6,42 +12,63 @@ from unittest.mock import patch from tools.browser_tool import browser_type -def test_browser_type_never_echoes_raw_typed_text(monkeypatch): +def test_browser_type_redacts_api_key_in_output(monkeypatch): monkeypatch.delenv("CAMOFOX_URL", raising=False) monkeypatch.delenv("BROWSER_CDP_URL", raising=False) - typed_text = "my_secret_password_123" + monkeypatch.setenv("HERMES_REDACT_SECRETS", "true") + secret = "sk-proj-ABCD1234567890EFGH" with patch( "tools.browser_tool._run_browser_command", return_value={"success": True}, ) as mock_run: - result = json.loads(browser_type("@password", typed_text, task_id="redaction-test")) + result = json.loads(browser_type("@apikey", secret, task_id="redaction-test")) assert result["success"] is True - assert result["typed"] == "[redacted typed text]" - assert typed_text not in json.dumps(result) + assert secret not in json.dumps(result) + assert result["typed"].startswith("sk-pro") + # Raw secret still typed into the page. mock_run.assert_called_once() - assert mock_run.call_args.args[2] == ["@password", typed_text] + assert mock_run.call_args.args[2] == ["@apikey", secret] -def test_browser_type_failure_never_echoes_raw_typed_text(monkeypatch): +def test_browser_type_keeps_normal_text_in_output(monkeypatch): monkeypatch.delenv("CAMOFOX_URL", raising=False) monkeypatch.delenv("BROWSER_CDP_URL", raising=False) - typed_text = "my_secret_password_123" + monkeypatch.setenv("HERMES_REDACT_SECRETS", "true") + text = "hello world search query" + + with patch( + "tools.browser_tool._run_browser_command", + return_value={"success": True}, + ) as mock_run: + result = json.loads(browser_type("@search", text, task_id="redaction-test")) + + assert result["success"] is True + assert result["typed"] == text + mock_run.assert_called_once() + assert mock_run.call_args.args[2] == ["@search", text] + + +def test_browser_type_failure_redacts_api_key_in_error(monkeypatch): + monkeypatch.delenv("CAMOFOX_URL", raising=False) + monkeypatch.delenv("BROWSER_CDP_URL", raising=False) + monkeypatch.setenv("HERMES_REDACT_SECRETS", "true") + secret = "sk-proj-ABCD1234567890EFGH" with patch( "tools.browser_tool._run_browser_command", return_value={ "success": False, - "error": f"backend failed while typing {typed_text}", - "fallback_warning": f"chrome fallback also saw {typed_text}", + "error": f"backend failed while typing {secret}", + "fallback_warning": f"chrome fallback also saw {secret}", }, ) as mock_run: - raw_result = browser_type("@password", typed_text, task_id="redaction-test") + raw_result = browser_type("@apikey", secret, task_id="redaction-test") result = json.loads(raw_result) assert result["success"] is False - assert typed_text not in raw_result - assert "[redacted typed text]" in raw_result + assert secret not in raw_result + assert "sk-pro" in raw_result mock_run.assert_called_once() - assert mock_run.call_args.args[2] == ["@password", typed_text] + assert mock_run.call_args.args[2] == ["@apikey", secret] diff --git a/tools/browser_camofox.py b/tools/browser_camofox.py index a9f385d3908..b16d7105a99 100644 --- a/tools/browser_camofox.py +++ b/tools/browser_camofox.py @@ -571,7 +571,8 @@ def camofox_type(ref: str, text: str, task_id: Optional[str] = None) -> str: response = { "success": True, - # Match browser_tool.browser_type: do not echo raw credentials in + # Match browser_tool.browser_type: run typed text through the + # secret-pattern redactor so API keys / tokens don't leak into # tool progress or chat history. The raw text is still typed into # the page; only the returned display value is redacted. "typed": display_text, diff --git a/tools/browser_tool.py b/tools/browser_tool.py index 88ce60bbc00..d679997d3bf 100644 --- a/tools/browser_tool.py +++ b/tools/browser_tool.py @@ -2768,10 +2768,10 @@ def browser_type(ref: str, text: str, task_id: Optional[str] = None) -> str: if result.get("success"): response = { "success": True, - # Never echo raw typed text back to tool progress/log surfaces: it - # is commonly a password, API key, or other credential. Redact - # only the returned display value; the original text was already - # sent to the browser command above. + # Run typed text through the secret-pattern redactor so API keys / + # tokens don't leak into tool progress or chat history. Normal + # text passes through unchanged. The raw value was already sent + # to the browser command above. "typed": display_text, "element": ref }