fix: redact browser typed text surfaces

2026-06-27 11:22:03 +00:00 · 2026-06-24 11:02:35 +05:30 · 2026-06-24 11:02:35 +05:30 · 8ff426e53b
commit 8ff426e53b
parent 5add283ec8
8 changed files with 272 additions and 25 deletions
--- a/agent/display.py
+++ b/agent/display.py
@ -16,6 +16,7 @@ from pathlib import Path
 from typing import Any

 from utils import safe_json_loads
+from agent.redact import redact_sensitive_text
 from agent.tool_result_classification import file_mutation_result_landed

 # ANSI escape codes for coloring tool failure indicators
@ -339,6 +340,58 @@ def _read_file_line_label(args: dict) -> str:
    return f"L{offset}-{offset + limit - 1}"


+def redact_browser_typed_text_for_display(value: Any, typed_text: Any) -> Any:
+    """Apply secret redaction to browser_type text in display-facing payloads.
+
+    Backends sometimes echo the attempted input in error strings or fallback
+    metadata.  When the raw typed value contains a recognizable secret (API
+    key, token, JWT, etc.) the redacted form differs from the raw value, so we
+    replace every occurrence of the raw value with its redacted form before a
+    browser_type result reaches logs, callbacks, the model, or chat history.
+
+    Normal typed text (search queries, addresses, form fields) matches no
+    secret pattern, so it passes through unchanged and stays readable.
+    """
+    if typed_text is None:
+        return value
+    needle = str(typed_text)
+    if needle == "":
+        return value
+    redacted = redact_sensitive_text(needle)
+    if redacted == needle:
+        # Nothing secret-looking in the typed text; leave payload untouched.
+        return value
+    if isinstance(value, str):
+        return value.replace(needle, redacted)
+    if isinstance(value, dict):
+        return {
+            key: redact_browser_typed_text_for_display(item, typed_text)
+            for key, item in value.items()
+        }
+    if isinstance(value, list):
+        return [redact_browser_typed_text_for_display(item, typed_text) for item in value]
+    if isinstance(value, tuple):
+        return tuple(redact_browser_typed_text_for_display(item, typed_text) for item in value)
+    return value
+
+
+def redact_tool_args_for_display(tool_name: str, args: dict | None) -> dict | None:
+    """Return a copy of tool args safe for logs/progress UI.
+
+    For ``browser_type`` the ``text`` argument is run through the same
+    secret-pattern redactor used for logs.  Recognizable credentials (API
+    keys, tokens) are masked before the value reaches tool progress
+    notifications; normal typed text is left intact for debuggability.
+    """
+    if not isinstance(args, dict):
+        return args
+    if tool_name == "browser_type" and isinstance(args.get("text"), str):
+        safe_args = dict(args)
+        safe_args["text"] = redact_sensitive_text(args["text"])
+        return safe_args
+    return args
+
+
 def _delegate_task_goal_parts(tasks: Any, *, per_goal_len: int) -> tuple[int, list[str]]:
    if not isinstance(tasks, list):
        return 0, []
@ -362,6 +415,7 @@ def build_tool_preview(tool_name: str, args: dict, max_len: int | None = None) -
        max_len = _tool_preview_max_len
    if not args:
        return None
+    args = redact_tool_args_for_display(tool_name, args) or args
    primary_args = {
        "terminal": "command", "web_search": "query", "web_extract": "urls",
        "read_file": "path", "write_file": "path", "patch": "path",
@ -1085,6 +1139,7 @@ def get_cute_tool_message(
    When *result* is provided the line is checked for failure indicators.
    Failed tool calls get a red prefix and an informational suffix.
    """
+    args = redact_tool_args_for_display(tool_name, args) or args
    dur = f"{duration:.1f}s"
    is_failure, failure_suffix = _detect_tool_failure(tool_name, result)
    skin_prefix = get_skin_tool_prefix()
--- a/agent/tool_executor.py
+++ b/agent/tool_executor.py
@ -26,6 +26,7 @@ from agent.display import (
    build_tool_preview as _build_tool_preview,
    get_cute_tool_message as _get_cute_tool_message_impl,
    get_tool_emoji as _get_tool_emoji,
+    redact_tool_args_for_display as _redact_tool_args_for_display,
    _detect_tool_failure,
 )
 from agent.tool_guardrails import ToolGuardrailDecision
@ -469,10 +470,11 @@ def execute_tool_calls_concurrent(agent, assistant_message, messages: list, effe
    if not agent.quiet_mode and getattr(agent, "tool_progress_mode", "all") != "off":
        print(f"  ⚡ Concurrent: {num_tools} tool calls — {tool_names_str}")
        for i, (tc, name, args, middleware_trace, block_result, blocked_by_guardrail) in enumerate(parsed_calls, 1):
-            args_str = json.dumps(args, ensure_ascii=False)
+            display_args = _redact_tool_args_for_display(name, args) or args
+            args_str = json.dumps(display_args, ensure_ascii=False)
            if agent.verbose_logging:
-                print(f"  📞 Tool {i}: {name}({list(args.keys())})")
-                print(agent._wrap_verbose("Args: ", json.dumps(args, indent=2, ensure_ascii=False)))
+                print(f"  📞 Tool {i}: {name}({list(display_args.keys())})")
+                print(agent._wrap_verbose("Args: ", json.dumps(display_args, indent=2, ensure_ascii=False)))
            else:
                args_preview = args_str[:agent.log_prefix_chars] + "..." if len(args_str) > agent.log_prefix_chars else args_str
                print(f"  📞 Tool {i}: {name}({list(args.keys())}) - {args_preview}")
@ -482,8 +484,9 @@ def execute_tool_calls_concurrent(agent, assistant_message, messages: list, effe
            continue
        if agent.tool_progress_callback:
            try:
-                preview = _build_tool_preview(name, args)
-                agent.tool_progress_callback("tool.started", name, preview, args)
+                display_args = _redact_tool_args_for_display(name, args) or args
+                preview = _build_tool_preview(name, display_args)
+                agent.tool_progress_callback("tool.started", name, preview, display_args)
            except Exception as cb_err:
                logging.debug(f"Tool progress callback error: {cb_err}")

@ -492,7 +495,8 @@ def execute_tool_calls_concurrent(agent, assistant_message, messages: list, effe
            continue
        if agent.tool_start_callback:
            try:
-                agent.tool_start_callback(tc.id, name, args)
+                display_args = _redact_tool_args_for_display(name, args) or args
+                agent.tool_start_callback(tc.id, name, display_args)
            except Exception as cb_err:
                logging.debug(f"Tool start callback error: {cb_err}")

@ -792,7 +796,8 @@ def execute_tool_calls_concurrent(agent, assistant_message, messages: list, effe

        if not blocked and agent.tool_complete_callback:
            try:
-                agent.tool_complete_callback(tc.id, name, args, function_result)
+                display_args = _redact_tool_args_for_display(name, args) or args
+                agent.tool_complete_callback(tc.id, name, display_args, function_result)
            except Exception as cb_err:
                logging.debug(f"Tool complete callback error: {cb_err}")

@ -954,10 +959,11 @@ def execute_tool_calls_sequential(agent, assistant_message, messages: list, effe
            agent._iters_since_skill = 0

        if not agent.quiet_mode and getattr(agent, "tool_progress_mode", "all") != "off":
-            args_str = json.dumps(function_args, ensure_ascii=False)
+            display_args = _redact_tool_args_for_display(function_name, function_args) or function_args
+            args_str = json.dumps(display_args, ensure_ascii=False)
            if agent.verbose_logging:
-                print(f"  📞 Tool {i}: {function_name}({list(function_args.keys())})")
-                print(agent._wrap_verbose("Args: ", json.dumps(function_args, indent=2, ensure_ascii=False)))
+                print(f"  📞 Tool {i}: {function_name}({list(display_args.keys())})")
+                print(agent._wrap_verbose("Args: ", json.dumps(display_args, indent=2, ensure_ascii=False)))
            else:
                args_preview = args_str[:agent.log_prefix_chars] + "..." if len(args_str) > agent.log_prefix_chars else args_str
                print(f"  📞 Tool {i}: {function_name}({list(function_args.keys())}) - {args_preview}")
@ -978,14 +984,16 @@ def execute_tool_calls_sequential(agent, assistant_message, messages: list, effe

        if not _execution_blocked and agent.tool_progress_callback:
            try:
-                preview = _build_tool_preview(function_name, function_args)
-                agent.tool_progress_callback("tool.started", function_name, preview, function_args)
+                display_args = _redact_tool_args_for_display(function_name, function_args) or function_args
+                preview = _build_tool_preview(function_name, display_args)
+                agent.tool_progress_callback("tool.started", function_name, preview, display_args)
            except Exception as cb_err:
                logging.debug(f"Tool progress callback error: {cb_err}")

        if not _execution_blocked and agent.tool_start_callback:
            try:
-                agent.tool_start_callback(tool_call.id, function_name, function_args)
+                display_args = _redact_tool_args_for_display(function_name, function_args) or function_args
+                agent.tool_start_callback(tool_call.id, function_name, display_args)
            except Exception as cb_err:
                logging.debug(f"Tool start callback error: {cb_err}")

@ -1215,7 +1223,8 @@ def execute_tool_calls_sequential(agent, assistant_message, messages: list, effe
            if agent._should_emit_quiet_tool_messages():
                face = random.choice(KawaiiSpinner.get_waiting_faces())
                emoji = _get_tool_emoji(function_name)
-                preview = _build_tool_preview(function_name, function_args) or function_name
+                display_args = _redact_tool_args_for_display(function_name, function_args) or function_args
+                preview = _build_tool_preview(function_name, display_args) or function_name
                spinner = KawaiiSpinner(f"{face} {emoji} {preview}", spinner_type='dots', print_fn=agent._print_fn)
                spinner.start()
            _ce_result = None
@ -1248,7 +1257,8 @@ def execute_tool_calls_sequential(agent, assistant_message, messages: list, effe
            if agent._should_emit_quiet_tool_messages() and agent._should_start_quiet_spinner():
                face = random.choice(KawaiiSpinner.get_waiting_faces())
                emoji = _get_tool_emoji(function_name)
-                preview = _build_tool_preview(function_name, function_args) or function_name
+                display_args = _redact_tool_args_for_display(function_name, function_args) or function_args
+                preview = _build_tool_preview(function_name, display_args) or function_name
                spinner = KawaiiSpinner(f"{face} {emoji} {preview}", spinner_type='dots', print_fn=agent._print_fn)
                spinner.start()
            _mem_result = None
@ -1279,7 +1289,8 @@ def execute_tool_calls_sequential(agent, assistant_message, messages: list, effe
            if agent._should_emit_quiet_tool_messages() and agent._should_start_quiet_spinner():
                face = random.choice(KawaiiSpinner.get_waiting_faces())
                emoji = _get_tool_emoji(function_name)
-                preview = _build_tool_preview(function_name, function_args) or function_name
+                display_args = _redact_tool_args_for_display(function_name, function_args) or function_args
+                preview = _build_tool_preview(function_name, display_args) or function_name
                spinner = KawaiiSpinner(f"{face} {emoji} {preview}", spinner_type='dots', print_fn=agent._print_fn)
                spinner.start()
            _spinner_result = None
@ -1441,7 +1452,8 @@ def execute_tool_calls_sequential(agent, assistant_message, messages: list, effe

        if not _execution_blocked and agent.tool_complete_callback:
            try:
-                agent.tool_complete_callback(tool_call.id, function_name, function_args, function_result)
+                display_args = _redact_tool_args_for_display(function_name, function_args) or function_args
+                agent.tool_complete_callback(tool_call.id, function_name, display_args, function_result)
            except Exception as cb_err:
                logging.debug(f"Tool complete callback error: {cb_err}")

--- a/tests/agent/test_display.py
+++ b/tests/agent/test_display.py
@ -9,6 +9,7 @@ from agent.display import (
    capture_local_edit_snapshot,
    extract_edit_diff,
    get_cute_tool_message,
+    redact_tool_args_for_display,
    set_tool_preview_max_len,
    _render_inline_unified_diff,
    _summarize_rendered_diff_sections,
@ -86,6 +87,21 @@ class TestBuildToolPreview:
        result = build_tool_preview("read_file", {"path": "./package.json", "offset": 1, "limit": 5})
        assert result == "package.json L1-5"

+    def test_browser_type_preview_never_echoes_typed_text(self):
+        typed_text = "my_secret_password_123"
+        result = build_tool_preview("browser_type", {"ref": "@e3", "text": typed_text})
+        assert result is not None
+        assert typed_text not in result
+        assert "redacted typed text" in result
+
+    def test_browser_type_display_args_never_echo_typed_text(self):
+        typed_text = "normal-looking-but-sensitive"
+        safe_args = redact_tool_args_for_display(
+            "browser_type", {"ref": "@e3", "text": typed_text}
+        )
+        assert safe_args == {"ref": "@e3", "text": "[redacted typed text]"}
+        assert typed_text not in str(safe_args)
+
    def test_unknown_tool_with_fallback_key(self):
        """Unknown tool but with a recognized fallback key should still preview."""
        result = build_tool_preview("custom_tool", {"query": "test query"})
@ -242,6 +258,18 @@ class TestCuteToolMessagePreviewLength:
        )
        assert "2x: Review PR A | Review PR B" in line

+    def test_browser_type_cute_message_never_echoes_typed_text(self):
+        typed_text = "my_secret_password_123"
+        line = get_cute_tool_message(
+            "browser_type",
+            {"ref": "@password", "text": typed_text},
+            0.1,
+            result='{"success": true, "typed": "[redacted typed text]"}',
+        )
+
+        assert typed_text not in line
+        assert "redacted typed text" in line
+

 class TestEditDiffPreview:
    def test_extract_edit_diff_for_patch(self):
--- a/tests/run_agent/test_run_agent.py
+++ b/tests/run_agent/test_run_agent.py
@ -2589,6 +2589,30 @@ class TestConcurrentToolExecution:
        assert starts == [("c1", "web_search", {"query": "hello"})]
        assert completes == [("c1", "web_search", {"query": "hello"}, '{"success": true}')]

+    def test_sequential_browser_type_callbacks_never_echo_typed_text(self, agent):
+        typed_text = "my_secret_password_123"
+        tool_call = _mock_tool_call(
+            name="browser_type",
+            arguments=json.dumps({"ref": "@password", "text": typed_text}),
+            call_id="c-secret",
+        )
+        mock_msg = _mock_assistant_msg(content="", tool_calls=[tool_call])
+        messages = []
+        starts = []
+        completes = []
+        progress = []
+        agent.tool_start_callback = lambda tool_call_id, function_name, function_args: starts.append((tool_call_id, function_name, function_args))
+        agent.tool_complete_callback = lambda tool_call_id, function_name, function_args, function_result: completes.append((tool_call_id, function_name, function_args, function_result))
+        agent.tool_progress_callback = lambda event, name, preview, args, **kw: progress.append((event, name, preview, args))
+
+        with patch("run_agent.handle_function_call", return_value='{"success": true, "typed": "[redacted typed text]"}'):
+            agent._execute_tool_calls_sequential(mock_msg, messages, "task-1")
+
+        assert starts == [("c-secret", "browser_type", {"ref": "@password", "text": "[redacted typed text]"})]
+        assert completes[0][2] == {"ref": "@password", "text": "[redacted typed text]"}
+        assert progress[0][2] == "[redacted typed text]"
+        assert typed_text not in repr(starts + completes + progress)
+
    def test_concurrent_tool_callbacks_fire_for_each_tool(self, agent):
        tc1 = _mock_tool_call(name="web_search", arguments='{"query":"one"}', call_id="c1")
        tc2 = _mock_tool_call(name="web_search", arguments='{"query":"two"}', call_id="c2")
@ -2610,6 +2634,30 @@ class TestConcurrentToolExecution:
        assert {entry[0] for entry in completes} == {"c1", "c2"}
        assert {entry[3] for entry in completes} == {'{"id":1}', '{"id":2}'}

+    def test_concurrent_browser_type_callbacks_never_echo_typed_text(self, agent):
+        typed_text = "my_secret_password_123"
+        tc = _mock_tool_call(
+            name="browser_type",
+            arguments=json.dumps({"ref": "@password", "text": typed_text}),
+            call_id="c-secret",
+        )
+        mock_msg = _mock_assistant_msg(content="", tool_calls=[tc])
+        messages = []
+        starts = []
+        completes = []
+        progress = []
+        agent.tool_start_callback = lambda tool_call_id, function_name, function_args: starts.append((tool_call_id, function_name, function_args))
+        agent.tool_complete_callback = lambda tool_call_id, function_name, function_args, function_result: completes.append((tool_call_id, function_name, function_args, function_result))
+        agent.tool_progress_callback = lambda event, name, preview, args, **kw: progress.append((event, name, preview, args))
+
+        with patch("run_agent.handle_function_call", return_value='{"success": true, "typed": "[redacted typed text]"}'):
+            agent._execute_tool_calls_concurrent(mock_msg, messages, "task-1")
+
+        assert starts == [("c-secret", "browser_type", {"ref": "@password", "text": "[redacted typed text]"})]
+        assert completes[0][2] == {"ref": "@password", "text": "[redacted typed text]"}
+        assert progress[0][2] == "[redacted typed text]"
+        assert typed_text not in repr(starts + completes + progress)
+
    def test_invoke_tool_handles_agent_level_tools(self, agent):
        """_invoke_tool should handle todo tool directly."""
        with patch("tools.todo_tool.todo_tool", return_value='{"ok":true}') as mock_todo:
--- a/tests/tools/test_browser_camofox.py
+++ b/tests/tools/test_browser_camofox.py
@ -235,7 +235,35 @@ class TestCamofoxInteractions:
        mock_post.return_value = _mock_response(json_data={"ok": True})
        result = json.loads(camofox_type("@e3", "hello world", task_id="t5"))
        assert result["success"] is True
-        assert result["typed"] == "hello world"
+        assert result["typed"] == "[redacted typed text]"
+
+    @patch("tools.browser_camofox.requests.post")
+    def test_type_never_echoes_raw_secret(self, mock_post, monkeypatch):
+        monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
+        mock_post.return_value = _mock_response(json_data={"tabId": "tab5b", "url": "https://x.com"})
+        camofox_navigate("https://x.com", task_id="t5b")
+
+        typed_text = "my_secret_password_123"
+        mock_post.return_value = _mock_response(json_data={"ok": True})
+        result = json.loads(camofox_type("@password", typed_text, task_id="t5b"))
+        assert result["success"] is True
+        assert typed_text not in json.dumps(result)
+        assert result["typed"] == "[redacted typed text]"
+
+    @patch("tools.browser_camofox.requests.post")
+    def test_type_failure_never_echoes_raw_secret(self, mock_post, monkeypatch):
+        monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
+        mock_post.return_value = _mock_response(json_data={"tabId": "tab5c", "url": "https://x.com"})
+        camofox_navigate("https://x.com", task_id="t5c")
+
+        typed_text = "my_secret_password_123"
+        mock_post.side_effect = RuntimeError(f"camofox failed while typing {typed_text}")
+        raw_result = camofox_type("@password", typed_text, task_id="t5c")
+        result = json.loads(raw_result)
+
+        assert result["success"] is False
+        assert typed_text not in raw_result
+        assert "[redacted typed text]" in raw_result

    @patch("tools.browser_camofox.requests.post")
    def test_scroll(self, mock_post, monkeypatch):
--- a/tests/tools/test_browser_type_redaction.py
+++ b/tests/tools/test_browser_type_redaction.py
@ -0,0 +1,47 @@
+"""Regression tests for browser_type display redaction."""
+
+import json
+from unittest.mock import patch
+
+from tools.browser_tool import browser_type
+
+
+def test_browser_type_never_echoes_raw_typed_text(monkeypatch):
+    monkeypatch.delenv("CAMOFOX_URL", raising=False)
+    monkeypatch.delenv("BROWSER_CDP_URL", raising=False)
+    typed_text = "my_secret_password_123"
+
+    with patch(
+        "tools.browser_tool._run_browser_command",
+        return_value={"success": True},
+    ) as mock_run:
+        result = json.loads(browser_type("@password", typed_text, task_id="redaction-test"))
+
+    assert result["success"] is True
+    assert result["typed"] == "[redacted typed text]"
+    assert typed_text not in json.dumps(result)
+    mock_run.assert_called_once()
+    assert mock_run.call_args.args[2] == ["@password", typed_text]
+
+
+def test_browser_type_failure_never_echoes_raw_typed_text(monkeypatch):
+    monkeypatch.delenv("CAMOFOX_URL", raising=False)
+    monkeypatch.delenv("BROWSER_CDP_URL", raising=False)
+    typed_text = "my_secret_password_123"
+
+    with patch(
+        "tools.browser_tool._run_browser_command",
+        return_value={
+            "success": False,
+            "error": f"backend failed while typing {typed_text}",
+            "fallback_warning": f"chrome fallback also saw {typed_text}",
+        },
+    ) as mock_run:
+        raw_result = browser_type("@password", typed_text, task_id="redaction-test")
+        result = json.loads(raw_result)
+
+    assert result["success"] is False
+    assert typed_text not in raw_result
+    assert "[redacted typed text]" in raw_result
+    mock_run.assert_called_once()
+    assert mock_run.call_args.args[2] == ["@password", typed_text]
--- a/tools/browser_camofox.py
+++ b/tools/browser_camofox.py
@ -562,13 +562,27 @@ def camofox_type(ref: str, text: str, task_id: Optional[str] = None) -> str:
            f"/tabs/{session['tab_id']}/type",
            {"userId": session["user_id"], "ref": clean_ref, "text": text},
        )
-        return json.dumps({
+        from agent.display import (
+            redact_browser_typed_text_for_display,
+            redact_tool_args_for_display,
+        )
+
+        display_text = (redact_tool_args_for_display("browser_type", {"text": text}) or {})["text"]
+
+        response = {
            "success": True,
-            "typed": text,
+            # Match browser_tool.browser_type: do not echo raw credentials in
+            # tool progress or chat history.  The raw text is still typed into
+            # the page; only the returned display value is redacted.
+            "typed": display_text,
            "element": clean_ref,
-        })
+        }
+        response = redact_browser_typed_text_for_display(response, text)
+        return json.dumps(response)
    except Exception as e:
-        return tool_error(str(e), success=False)
+        from agent.display import redact_browser_typed_text_for_display
+
+        return tool_error(redact_browser_typed_text_for_display(str(e), text), success=False)


 def camofox_scroll(direction: str, task_id: Optional[str] = None) -> str:
--- a/tools/browser_tool.py
+++ b/tools/browser_tool.py
@ -2758,19 +2758,34 @@ def browser_type(ref: str, text: str, task_id: Optional[str] = None) -> str:
    # Use fill command (clears then types)
    result = _run_browser_command(effective_task_id, "fill", [ref, text])

+    from agent.display import (
+        redact_browser_typed_text_for_display,
+        redact_tool_args_for_display,
+    )
+
+    display_text = (redact_tool_args_for_display("browser_type", {"text": text}) or {})["text"]
+
    if result.get("success"):
        response = {
            "success": True,
-            "typed": text,
+            # Never echo raw typed text back to tool progress/log surfaces: it
+            # is commonly a password, API key, or other credential.  Redact
+            # only the returned display value; the original text was already
+            # sent to the browser command above.
+            "typed": display_text,
            "element": ref
        }
-        return json.dumps(_copy_fallback_warning(response, result), ensure_ascii=False)
+        response = _copy_fallback_warning(response, result)
+        response = redact_browser_typed_text_for_display(response, text)
+        return json.dumps(response, ensure_ascii=False)
    else:
        response = {
            "success": False,
            "error": result.get("error", f"Failed to type into {ref}")
        }
-        return json.dumps(_copy_fallback_warning(response, result), ensure_ascii=False)
+        response = _copy_fallback_warning(response, result)
+        response = redact_browser_typed_text_for_display(response, text)
+        return json.dumps(response, ensure_ascii=False)


 def browser_scroll(direction: str, task_id: Optional[str] = None) -> str: