From 8ff426e53b9fa42fdaef8a0196d265fd59218718 Mon Sep 17 00:00:00 2001 From: rebel Date: Wed, 24 Jun 2026 11:02:35 +0530 Subject: [PATCH] fix: redact browser typed text surfaces --- agent/display.py | 55 ++++++++++++++++++++++ agent/tool_executor.py | 46 +++++++++++------- tests/agent/test_display.py | 28 +++++++++++ tests/run_agent/test_run_agent.py | 48 +++++++++++++++++++ tests/tools/test_browser_camofox.py | 30 +++++++++++- tests/tools/test_browser_type_redaction.py | 47 ++++++++++++++++++ tools/browser_camofox.py | 22 +++++++-- tools/browser_tool.py | 21 +++++++-- 8 files changed, 272 insertions(+), 25 deletions(-) create mode 100644 tests/tools/test_browser_type_redaction.py diff --git a/agent/display.py b/agent/display.py index 77d832e91c3..062ede1050f 100644 --- a/agent/display.py +++ b/agent/display.py @@ -16,6 +16,7 @@ from pathlib import Path from typing import Any from utils import safe_json_loads +from agent.redact import redact_sensitive_text from agent.tool_result_classification import file_mutation_result_landed # ANSI escape codes for coloring tool failure indicators @@ -339,6 +340,58 @@ def _read_file_line_label(args: dict) -> str: return f"L{offset}-{offset + limit - 1}" +def redact_browser_typed_text_for_display(value: Any, typed_text: Any) -> Any: + """Apply secret redaction to browser_type text in display-facing payloads. + + Backends sometimes echo the attempted input in error strings or fallback + metadata. When the raw typed value contains a recognizable secret (API + key, token, JWT, etc.) the redacted form differs from the raw value, so we + replace every occurrence of the raw value with its redacted form before a + browser_type result reaches logs, callbacks, the model, or chat history. + + Normal typed text (search queries, addresses, form fields) matches no + secret pattern, so it passes through unchanged and stays readable. + """ + if typed_text is None: + return value + needle = str(typed_text) + if needle == "": + return value + redacted = redact_sensitive_text(needle) + if redacted == needle: + # Nothing secret-looking in the typed text; leave payload untouched. + return value + if isinstance(value, str): + return value.replace(needle, redacted) + if isinstance(value, dict): + return { + key: redact_browser_typed_text_for_display(item, typed_text) + for key, item in value.items() + } + if isinstance(value, list): + return [redact_browser_typed_text_for_display(item, typed_text) for item in value] + if isinstance(value, tuple): + return tuple(redact_browser_typed_text_for_display(item, typed_text) for item in value) + return value + + +def redact_tool_args_for_display(tool_name: str, args: dict | None) -> dict | None: + """Return a copy of tool args safe for logs/progress UI. + + For ``browser_type`` the ``text`` argument is run through the same + secret-pattern redactor used for logs. Recognizable credentials (API + keys, tokens) are masked before the value reaches tool progress + notifications; normal typed text is left intact for debuggability. + """ + if not isinstance(args, dict): + return args + if tool_name == "browser_type" and isinstance(args.get("text"), str): + safe_args = dict(args) + safe_args["text"] = redact_sensitive_text(args["text"]) + return safe_args + return args + + def _delegate_task_goal_parts(tasks: Any, *, per_goal_len: int) -> tuple[int, list[str]]: if not isinstance(tasks, list): return 0, [] @@ -362,6 +415,7 @@ def build_tool_preview(tool_name: str, args: dict, max_len: int | None = None) - max_len = _tool_preview_max_len if not args: return None + args = redact_tool_args_for_display(tool_name, args) or args primary_args = { "terminal": "command", "web_search": "query", "web_extract": "urls", "read_file": "path", "write_file": "path", "patch": "path", @@ -1085,6 +1139,7 @@ def get_cute_tool_message( When *result* is provided the line is checked for failure indicators. Failed tool calls get a red prefix and an informational suffix. """ + args = redact_tool_args_for_display(tool_name, args) or args dur = f"{duration:.1f}s" is_failure, failure_suffix = _detect_tool_failure(tool_name, result) skin_prefix = get_skin_tool_prefix() diff --git a/agent/tool_executor.py b/agent/tool_executor.py index 42d3c75d537..6845f79195e 100644 --- a/agent/tool_executor.py +++ b/agent/tool_executor.py @@ -26,6 +26,7 @@ from agent.display import ( build_tool_preview as _build_tool_preview, get_cute_tool_message as _get_cute_tool_message_impl, get_tool_emoji as _get_tool_emoji, + redact_tool_args_for_display as _redact_tool_args_for_display, _detect_tool_failure, ) from agent.tool_guardrails import ToolGuardrailDecision @@ -469,10 +470,11 @@ def execute_tool_calls_concurrent(agent, assistant_message, messages: list, effe if not agent.quiet_mode and getattr(agent, "tool_progress_mode", "all") != "off": print(f" ⚡ Concurrent: {num_tools} tool calls — {tool_names_str}") for i, (tc, name, args, middleware_trace, block_result, blocked_by_guardrail) in enumerate(parsed_calls, 1): - args_str = json.dumps(args, ensure_ascii=False) + display_args = _redact_tool_args_for_display(name, args) or args + args_str = json.dumps(display_args, ensure_ascii=False) if agent.verbose_logging: - print(f" 📞 Tool {i}: {name}({list(args.keys())})") - print(agent._wrap_verbose("Args: ", json.dumps(args, indent=2, ensure_ascii=False))) + print(f" 📞 Tool {i}: {name}({list(display_args.keys())})") + print(agent._wrap_verbose("Args: ", json.dumps(display_args, indent=2, ensure_ascii=False))) else: args_preview = args_str[:agent.log_prefix_chars] + "..." if len(args_str) > agent.log_prefix_chars else args_str print(f" 📞 Tool {i}: {name}({list(args.keys())}) - {args_preview}") @@ -482,8 +484,9 @@ def execute_tool_calls_concurrent(agent, assistant_message, messages: list, effe continue if agent.tool_progress_callback: try: - preview = _build_tool_preview(name, args) - agent.tool_progress_callback("tool.started", name, preview, args) + display_args = _redact_tool_args_for_display(name, args) or args + preview = _build_tool_preview(name, display_args) + agent.tool_progress_callback("tool.started", name, preview, display_args) except Exception as cb_err: logging.debug(f"Tool progress callback error: {cb_err}") @@ -492,7 +495,8 @@ def execute_tool_calls_concurrent(agent, assistant_message, messages: list, effe continue if agent.tool_start_callback: try: - agent.tool_start_callback(tc.id, name, args) + display_args = _redact_tool_args_for_display(name, args) or args + agent.tool_start_callback(tc.id, name, display_args) except Exception as cb_err: logging.debug(f"Tool start callback error: {cb_err}") @@ -792,7 +796,8 @@ def execute_tool_calls_concurrent(agent, assistant_message, messages: list, effe if not blocked and agent.tool_complete_callback: try: - agent.tool_complete_callback(tc.id, name, args, function_result) + display_args = _redact_tool_args_for_display(name, args) or args + agent.tool_complete_callback(tc.id, name, display_args, function_result) except Exception as cb_err: logging.debug(f"Tool complete callback error: {cb_err}") @@ -954,10 +959,11 @@ def execute_tool_calls_sequential(agent, assistant_message, messages: list, effe agent._iters_since_skill = 0 if not agent.quiet_mode and getattr(agent, "tool_progress_mode", "all") != "off": - args_str = json.dumps(function_args, ensure_ascii=False) + display_args = _redact_tool_args_for_display(function_name, function_args) or function_args + args_str = json.dumps(display_args, ensure_ascii=False) if agent.verbose_logging: - print(f" 📞 Tool {i}: {function_name}({list(function_args.keys())})") - print(agent._wrap_verbose("Args: ", json.dumps(function_args, indent=2, ensure_ascii=False))) + print(f" 📞 Tool {i}: {function_name}({list(display_args.keys())})") + print(agent._wrap_verbose("Args: ", json.dumps(display_args, indent=2, ensure_ascii=False))) else: args_preview = args_str[:agent.log_prefix_chars] + "..." if len(args_str) > agent.log_prefix_chars else args_str print(f" 📞 Tool {i}: {function_name}({list(function_args.keys())}) - {args_preview}") @@ -978,14 +984,16 @@ def execute_tool_calls_sequential(agent, assistant_message, messages: list, effe if not _execution_blocked and agent.tool_progress_callback: try: - preview = _build_tool_preview(function_name, function_args) - agent.tool_progress_callback("tool.started", function_name, preview, function_args) + display_args = _redact_tool_args_for_display(function_name, function_args) or function_args + preview = _build_tool_preview(function_name, display_args) + agent.tool_progress_callback("tool.started", function_name, preview, display_args) except Exception as cb_err: logging.debug(f"Tool progress callback error: {cb_err}") if not _execution_blocked and agent.tool_start_callback: try: - agent.tool_start_callback(tool_call.id, function_name, function_args) + display_args = _redact_tool_args_for_display(function_name, function_args) or function_args + agent.tool_start_callback(tool_call.id, function_name, display_args) except Exception as cb_err: logging.debug(f"Tool start callback error: {cb_err}") @@ -1215,7 +1223,8 @@ def execute_tool_calls_sequential(agent, assistant_message, messages: list, effe if agent._should_emit_quiet_tool_messages(): face = random.choice(KawaiiSpinner.get_waiting_faces()) emoji = _get_tool_emoji(function_name) - preview = _build_tool_preview(function_name, function_args) or function_name + display_args = _redact_tool_args_for_display(function_name, function_args) or function_args + preview = _build_tool_preview(function_name, display_args) or function_name spinner = KawaiiSpinner(f"{face} {emoji} {preview}", spinner_type='dots', print_fn=agent._print_fn) spinner.start() _ce_result = None @@ -1248,7 +1257,8 @@ def execute_tool_calls_sequential(agent, assistant_message, messages: list, effe if agent._should_emit_quiet_tool_messages() and agent._should_start_quiet_spinner(): face = random.choice(KawaiiSpinner.get_waiting_faces()) emoji = _get_tool_emoji(function_name) - preview = _build_tool_preview(function_name, function_args) or function_name + display_args = _redact_tool_args_for_display(function_name, function_args) or function_args + preview = _build_tool_preview(function_name, display_args) or function_name spinner = KawaiiSpinner(f"{face} {emoji} {preview}", spinner_type='dots', print_fn=agent._print_fn) spinner.start() _mem_result = None @@ -1279,7 +1289,8 @@ def execute_tool_calls_sequential(agent, assistant_message, messages: list, effe if agent._should_emit_quiet_tool_messages() and agent._should_start_quiet_spinner(): face = random.choice(KawaiiSpinner.get_waiting_faces()) emoji = _get_tool_emoji(function_name) - preview = _build_tool_preview(function_name, function_args) or function_name + display_args = _redact_tool_args_for_display(function_name, function_args) or function_args + preview = _build_tool_preview(function_name, display_args) or function_name spinner = KawaiiSpinner(f"{face} {emoji} {preview}", spinner_type='dots', print_fn=agent._print_fn) spinner.start() _spinner_result = None @@ -1441,7 +1452,8 @@ def execute_tool_calls_sequential(agent, assistant_message, messages: list, effe if not _execution_blocked and agent.tool_complete_callback: try: - agent.tool_complete_callback(tool_call.id, function_name, function_args, function_result) + display_args = _redact_tool_args_for_display(function_name, function_args) or function_args + agent.tool_complete_callback(tool_call.id, function_name, display_args, function_result) except Exception as cb_err: logging.debug(f"Tool complete callback error: {cb_err}") diff --git a/tests/agent/test_display.py b/tests/agent/test_display.py index 941cb526707..30b26f1cfac 100644 --- a/tests/agent/test_display.py +++ b/tests/agent/test_display.py @@ -9,6 +9,7 @@ from agent.display import ( capture_local_edit_snapshot, extract_edit_diff, get_cute_tool_message, + redact_tool_args_for_display, set_tool_preview_max_len, _render_inline_unified_diff, _summarize_rendered_diff_sections, @@ -86,6 +87,21 @@ class TestBuildToolPreview: result = build_tool_preview("read_file", {"path": "./package.json", "offset": 1, "limit": 5}) assert result == "package.json L1-5" + def test_browser_type_preview_never_echoes_typed_text(self): + typed_text = "my_secret_password_123" + result = build_tool_preview("browser_type", {"ref": "@e3", "text": typed_text}) + assert result is not None + assert typed_text not in result + assert "redacted typed text" in result + + def test_browser_type_display_args_never_echo_typed_text(self): + typed_text = "normal-looking-but-sensitive" + safe_args = redact_tool_args_for_display( + "browser_type", {"ref": "@e3", "text": typed_text} + ) + assert safe_args == {"ref": "@e3", "text": "[redacted typed text]"} + assert typed_text not in str(safe_args) + def test_unknown_tool_with_fallback_key(self): """Unknown tool but with a recognized fallback key should still preview.""" result = build_tool_preview("custom_tool", {"query": "test query"}) @@ -242,6 +258,18 @@ class TestCuteToolMessagePreviewLength: ) assert "2x: Review PR A | Review PR B" in line + def test_browser_type_cute_message_never_echoes_typed_text(self): + typed_text = "my_secret_password_123" + line = get_cute_tool_message( + "browser_type", + {"ref": "@password", "text": typed_text}, + 0.1, + result='{"success": true, "typed": "[redacted typed text]"}', + ) + + assert typed_text not in line + assert "redacted typed text" in line + class TestEditDiffPreview: def test_extract_edit_diff_for_patch(self): diff --git a/tests/run_agent/test_run_agent.py b/tests/run_agent/test_run_agent.py index 381f9f554c8..168d9505aac 100644 --- a/tests/run_agent/test_run_agent.py +++ b/tests/run_agent/test_run_agent.py @@ -2589,6 +2589,30 @@ class TestConcurrentToolExecution: assert starts == [("c1", "web_search", {"query": "hello"})] assert completes == [("c1", "web_search", {"query": "hello"}, '{"success": true}')] + def test_sequential_browser_type_callbacks_never_echo_typed_text(self, agent): + typed_text = "my_secret_password_123" + tool_call = _mock_tool_call( + name="browser_type", + arguments=json.dumps({"ref": "@password", "text": typed_text}), + call_id="c-secret", + ) + mock_msg = _mock_assistant_msg(content="", tool_calls=[tool_call]) + messages = [] + starts = [] + completes = [] + progress = [] + agent.tool_start_callback = lambda tool_call_id, function_name, function_args: starts.append((tool_call_id, function_name, function_args)) + agent.tool_complete_callback = lambda tool_call_id, function_name, function_args, function_result: completes.append((tool_call_id, function_name, function_args, function_result)) + agent.tool_progress_callback = lambda event, name, preview, args, **kw: progress.append((event, name, preview, args)) + + with patch("run_agent.handle_function_call", return_value='{"success": true, "typed": "[redacted typed text]"}'): + agent._execute_tool_calls_sequential(mock_msg, messages, "task-1") + + assert starts == [("c-secret", "browser_type", {"ref": "@password", "text": "[redacted typed text]"})] + assert completes[0][2] == {"ref": "@password", "text": "[redacted typed text]"} + assert progress[0][2] == "[redacted typed text]" + assert typed_text not in repr(starts + completes + progress) + def test_concurrent_tool_callbacks_fire_for_each_tool(self, agent): tc1 = _mock_tool_call(name="web_search", arguments='{"query":"one"}', call_id="c1") tc2 = _mock_tool_call(name="web_search", arguments='{"query":"two"}', call_id="c2") @@ -2610,6 +2634,30 @@ class TestConcurrentToolExecution: assert {entry[0] for entry in completes} == {"c1", "c2"} assert {entry[3] for entry in completes} == {'{"id":1}', '{"id":2}'} + def test_concurrent_browser_type_callbacks_never_echo_typed_text(self, agent): + typed_text = "my_secret_password_123" + tc = _mock_tool_call( + name="browser_type", + arguments=json.dumps({"ref": "@password", "text": typed_text}), + call_id="c-secret", + ) + mock_msg = _mock_assistant_msg(content="", tool_calls=[tc]) + messages = [] + starts = [] + completes = [] + progress = [] + agent.tool_start_callback = lambda tool_call_id, function_name, function_args: starts.append((tool_call_id, function_name, function_args)) + agent.tool_complete_callback = lambda tool_call_id, function_name, function_args, function_result: completes.append((tool_call_id, function_name, function_args, function_result)) + agent.tool_progress_callback = lambda event, name, preview, args, **kw: progress.append((event, name, preview, args)) + + with patch("run_agent.handle_function_call", return_value='{"success": true, "typed": "[redacted typed text]"}'): + agent._execute_tool_calls_concurrent(mock_msg, messages, "task-1") + + assert starts == [("c-secret", "browser_type", {"ref": "@password", "text": "[redacted typed text]"})] + assert completes[0][2] == {"ref": "@password", "text": "[redacted typed text]"} + assert progress[0][2] == "[redacted typed text]" + assert typed_text not in repr(starts + completes + progress) + def test_invoke_tool_handles_agent_level_tools(self, agent): """_invoke_tool should handle todo tool directly.""" with patch("tools.todo_tool.todo_tool", return_value='{"ok":true}') as mock_todo: diff --git a/tests/tools/test_browser_camofox.py b/tests/tools/test_browser_camofox.py index b8fc1a4d702..ecdbde349ca 100644 --- a/tests/tools/test_browser_camofox.py +++ b/tests/tools/test_browser_camofox.py @@ -235,7 +235,35 @@ class TestCamofoxInteractions: mock_post.return_value = _mock_response(json_data={"ok": True}) result = json.loads(camofox_type("@e3", "hello world", task_id="t5")) assert result["success"] is True - assert result["typed"] == "hello world" + assert result["typed"] == "[redacted typed text]" + + @patch("tools.browser_camofox.requests.post") + def test_type_never_echoes_raw_secret(self, mock_post, monkeypatch): + monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377") + mock_post.return_value = _mock_response(json_data={"tabId": "tab5b", "url": "https://x.com"}) + camofox_navigate("https://x.com", task_id="t5b") + + typed_text = "my_secret_password_123" + mock_post.return_value = _mock_response(json_data={"ok": True}) + result = json.loads(camofox_type("@password", typed_text, task_id="t5b")) + assert result["success"] is True + assert typed_text not in json.dumps(result) + assert result["typed"] == "[redacted typed text]" + + @patch("tools.browser_camofox.requests.post") + def test_type_failure_never_echoes_raw_secret(self, mock_post, monkeypatch): + monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377") + mock_post.return_value = _mock_response(json_data={"tabId": "tab5c", "url": "https://x.com"}) + camofox_navigate("https://x.com", task_id="t5c") + + typed_text = "my_secret_password_123" + mock_post.side_effect = RuntimeError(f"camofox failed while typing {typed_text}") + raw_result = camofox_type("@password", typed_text, task_id="t5c") + result = json.loads(raw_result) + + assert result["success"] is False + assert typed_text not in raw_result + assert "[redacted typed text]" in raw_result @patch("tools.browser_camofox.requests.post") def test_scroll(self, mock_post, monkeypatch): diff --git a/tests/tools/test_browser_type_redaction.py b/tests/tools/test_browser_type_redaction.py new file mode 100644 index 00000000000..9a98e71bfd7 --- /dev/null +++ b/tests/tools/test_browser_type_redaction.py @@ -0,0 +1,47 @@ +"""Regression tests for browser_type display redaction.""" + +import json +from unittest.mock import patch + +from tools.browser_tool import browser_type + + +def test_browser_type_never_echoes_raw_typed_text(monkeypatch): + monkeypatch.delenv("CAMOFOX_URL", raising=False) + monkeypatch.delenv("BROWSER_CDP_URL", raising=False) + typed_text = "my_secret_password_123" + + with patch( + "tools.browser_tool._run_browser_command", + return_value={"success": True}, + ) as mock_run: + result = json.loads(browser_type("@password", typed_text, task_id="redaction-test")) + + assert result["success"] is True + assert result["typed"] == "[redacted typed text]" + assert typed_text not in json.dumps(result) + mock_run.assert_called_once() + assert mock_run.call_args.args[2] == ["@password", typed_text] + + +def test_browser_type_failure_never_echoes_raw_typed_text(monkeypatch): + monkeypatch.delenv("CAMOFOX_URL", raising=False) + monkeypatch.delenv("BROWSER_CDP_URL", raising=False) + typed_text = "my_secret_password_123" + + with patch( + "tools.browser_tool._run_browser_command", + return_value={ + "success": False, + "error": f"backend failed while typing {typed_text}", + "fallback_warning": f"chrome fallback also saw {typed_text}", + }, + ) as mock_run: + raw_result = browser_type("@password", typed_text, task_id="redaction-test") + result = json.loads(raw_result) + + assert result["success"] is False + assert typed_text not in raw_result + assert "[redacted typed text]" in raw_result + mock_run.assert_called_once() + assert mock_run.call_args.args[2] == ["@password", typed_text] diff --git a/tools/browser_camofox.py b/tools/browser_camofox.py index b920160bd67..a9f385d3908 100644 --- a/tools/browser_camofox.py +++ b/tools/browser_camofox.py @@ -562,13 +562,27 @@ def camofox_type(ref: str, text: str, task_id: Optional[str] = None) -> str: f"/tabs/{session['tab_id']}/type", {"userId": session["user_id"], "ref": clean_ref, "text": text}, ) - return json.dumps({ + from agent.display import ( + redact_browser_typed_text_for_display, + redact_tool_args_for_display, + ) + + display_text = (redact_tool_args_for_display("browser_type", {"text": text}) or {})["text"] + + response = { "success": True, - "typed": text, + # Match browser_tool.browser_type: do not echo raw credentials in + # tool progress or chat history. The raw text is still typed into + # the page; only the returned display value is redacted. + "typed": display_text, "element": clean_ref, - }) + } + response = redact_browser_typed_text_for_display(response, text) + return json.dumps(response) except Exception as e: - return tool_error(str(e), success=False) + from agent.display import redact_browser_typed_text_for_display + + return tool_error(redact_browser_typed_text_for_display(str(e), text), success=False) def camofox_scroll(direction: str, task_id: Optional[str] = None) -> str: diff --git a/tools/browser_tool.py b/tools/browser_tool.py index d4da92c0d79..88ce60bbc00 100644 --- a/tools/browser_tool.py +++ b/tools/browser_tool.py @@ -2758,19 +2758,34 @@ def browser_type(ref: str, text: str, task_id: Optional[str] = None) -> str: # Use fill command (clears then types) result = _run_browser_command(effective_task_id, "fill", [ref, text]) + from agent.display import ( + redact_browser_typed_text_for_display, + redact_tool_args_for_display, + ) + + display_text = (redact_tool_args_for_display("browser_type", {"text": text}) or {})["text"] + if result.get("success"): response = { "success": True, - "typed": text, + # Never echo raw typed text back to tool progress/log surfaces: it + # is commonly a password, API key, or other credential. Redact + # only the returned display value; the original text was already + # sent to the browser command above. + "typed": display_text, "element": ref } - return json.dumps(_copy_fallback_warning(response, result), ensure_ascii=False) + response = _copy_fallback_warning(response, result) + response = redact_browser_typed_text_for_display(response, text) + return json.dumps(response, ensure_ascii=False) else: response = { "success": False, "error": result.get("error", f"Failed to type into {ref}") } - return json.dumps(_copy_fallback_warning(response, result), ensure_ascii=False) + response = _copy_fallback_warning(response, result) + response = redact_browser_typed_text_for_display(response, text) + return json.dumps(response, ensure_ascii=False) def browser_scroll(direction: str, task_id: Optional[str] = None) -> str: