fix(security): redact secrets in background process + foreground env-dump output (#43025) (#54149)

* fix(security): redact secrets in background process + foreground env-dump output Terminal-output redaction was incomplete (#43025): - Gap 1: process(action=poll/log/wait) returned background stdout verbatim — no redaction at all. A background printenv/server/test emitting a key leaked raw to the model, session.db, and CLI display. Same for the gateway background-process watcher's completion/progress notifications. - Gap 2: the foreground terminal path hardcoded code_file=True, which skips the ENV-assignment pass, so an opaque token (no vendor prefix) from env/printenv leaked even there. Adds agent.redact.redact_terminal_output(output, command) as the single policy for ALL terminal-output surfaces: env-dump commands (env/printenv/set/export/ declare) get the ENV-assignment pass (code_file=False) to mask opaque tokens; other commands stay on code_file=True to avoid false positives on source dumps. Wired into terminal_tool, process_registry (_handle_process boundary), and the gateway watcher. Respects security.redact_secrets (no force) — opt-out preserved. * docs: add infographic for #43025 terminal-output redaction fix
2026-07-01 12:02:05 +00:00 · 2026-06-28 02:44:21 -07:00 · 2026-06-28 02:44:21 -07:00 · c1c179a239
commit c1c179a239
parent d5ba374c03
7 changed files with 225 additions and 10 deletions
--- a/agent/redact.py
+++ b/agent/redact.py
@ -10,6 +10,7 @@ the first 6 and last 4 characters for debuggability.
 import logging
 import os
 import re
+import shlex

 logger = logging.getLogger(__name__)

@ -542,6 +543,66 @@ def redact_sensitive_text(text: str, *, force: bool = False, code_file: bool = F
    return text


+# Commands whose stdout is an environment-variable dump (KEY=value lines),
+# NOT source code. For these, terminal-output redaction must run the
+# ENV-assignment pass (code_file=False) so opaque tokens with no recognized
+# vendor prefix (e.g. ``MY_SERVICE_TOKEN=abc123randomstring``) are still
+# masked. For all other commands, code_file=True is used to avoid mangling
+# legitimate source/config dumps (``MAX_TOKENS=100``, ``"apiKey": "x"``
+# fixtures, ``postgresql://{user}`` f-string templates). See issue #43025.
+_ENV_DUMP_COMMANDS = frozenset({"env", "printenv", "set", "export", "declare"})
+
+
+def is_env_dump_command(command: str | None) -> bool:
+    """Return True if ``command`` dumps environment variables to stdout.
+
+    Detects ``env`` / ``printenv`` / ``set`` / ``export`` / ``declare`` as the
+    first token of any segment in a pipeline or sequence (``;`` / ``&&`` /
+    ``||`` / ``|``). Conservative: a parse failure or anything unrecognized
+    returns False (callers then fall back to the safer code_file=True path,
+    which still masks prefix-shaped keys).
+    """
+    if not command or not isinstance(command, str):
+        return False
+    # Split on shell separators, then inspect the first token of each segment.
+    segments = re.split(r"[|;&]+", command)
+    for seg in segments:
+        seg = seg.strip()
+        if not seg:
+            continue
+        try:
+            tokens = shlex.split(seg)
+        except ValueError:
+            tokens = seg.split()
+        if tokens and tokens[0] in _ENV_DUMP_COMMANDS:
+            return True
+    return False
+
+
+def redact_terminal_output(
+    output: str, command: str | None = None, *, force: bool = False
+) -> str:
+    """Redact secrets from terminal/process stdout.
+
+    Single redaction policy for ALL terminal-output surfaces — foreground
+    ``terminal`` results AND background ``process(action=poll/log/wait)``
+    output — so they can't diverge. Picks ``code_file`` based on whether
+    ``command`` is an environment dump:
+
+    - env-dump command (``env``/``printenv``/``set``/``export``/``declare``)
+      → ``code_file=False`` so the ENV-assignment pass masks opaque tokens.
+    - anything else (or unknown command) → ``code_file=True`` to avoid
+      false positives on source/config dumps.
+
+    ``force=True`` bypasses the global ``security.redact_secrets`` preference
+    for safety boundaries that must never emit raw credentials.
+    """
+    if not output:
+        return output
+    code_file = not is_env_dump_command(command or "")
+    return redact_sensitive_text(output, force=force, code_file=code_file)
+
+
 # Substrings used to gate ``_PREFIX_RE`` execution. If none of these appear in
 # the input string, the prefix regex cannot match anything, so we skip it.
 # False positives are fine (they just run the regex, which then matches
--- a/gateway/run.py
+++ b/gateway/run.py
@ -14149,6 +14149,11 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
                )
                if should_notify:
                    new_output = session.output_buffer[-1000:] if session.output_buffer else ""
+                    if new_output:
+                        from agent.redact import redact_terminal_output
+                        new_output = redact_terminal_output(
+                            new_output, getattr(session, "command", "") or ""
+                        )
                    message_text = (
                        f"[Background process {session_id} finished with exit code {session.exit_code}~ "
                        f"Here's the final output:\n{new_output}]"
@ -14174,6 +14179,11 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
                # New output available -- deliver status update (only in "all" mode)
                # Skip periodic updates for agent_notify watchers (they only care about completion)
                new_output = session.output_buffer[-500:] if session.output_buffer else ""
+                if new_output:
+                    from agent.redact import redact_terminal_output
+                    new_output = redact_terminal_output(
+                        new_output, getattr(session, "command", "") or ""
+                    )
                message_text = (
                    f"[Background process {session_id} is still running~ "
                    f"New output:\n{new_output}]"
--- a/infographic/redact-terminal-43025/infographic.png
+++ b/infographic/redact-terminal-43025/infographic.png
--- a/tests/agent/test_redact.py
+++ b/tests/agent/test_redact.py
@ -698,3 +698,58 @@ class TestDbConnstrCodeOutput:
        text = "connected via postgres://user:s3cr3tpw@host:5432/db ok"
        result = redact_sensitive_text(text, force=True)
        assert "s3cr3tpw" not in result
+
+
+class TestTerminalOutputRedaction:
+    """is_env_dump_command + redact_terminal_output — issue #43025.
+
+    Terminal/process stdout must be redacted on every surface (foreground
+    `terminal` AND background `process(poll/log/wait)`). Env-dump commands get
+    the ENV-assignment pass so opaque tokens (no vendor prefix) are masked;
+    other commands stay on the code_file path to avoid false positives.
+    """
+
+    def test_is_env_dump_command_detection(self):
+        from agent.redact import is_env_dump_command
+        assert is_env_dump_command("printenv")
+        assert is_env_dump_command("env")
+        assert is_env_dump_command("env | grep API")
+        assert is_env_dump_command("set")
+        assert is_env_dump_command("export")
+        assert is_env_dump_command("declare -x")
+        assert is_env_dump_command("cat /tmp/x && printenv")
+        assert not is_env_dump_command("python app.py")
+        assert not is_env_dump_command("cat config.py")
+        assert not is_env_dump_command("printf 'TOKEN=x'")
+        assert not is_env_dump_command("")
+        assert not is_env_dump_command(None)
+
+    def test_env_dump_masks_opaque_token(self):
+        from agent.redact import redact_terminal_output
+        out = "MY_SERVICE_TOKEN=abc123randomopaquetokenvalue999\nHOME=/home/u"
+        red = redact_terminal_output(out, "printenv")
+        assert "abc123randomopaquetokenvalue999" not in red
+        assert "HOME=/home/u" in red
+
+    def test_non_env_command_preserves_source_false_positives(self):
+        from agent.redact import redact_terminal_output
+        # code_file path: MAX_TOKENS=100 is source, must survive; real sk- masked.
+        out = "MAX_TOKENS=100\nOPENAI_API_KEY=sk-proj-abc123def456ghi789jkl012"
+        red = redact_terminal_output(out, "cat config.py")
+        assert "MAX_TOKENS=100" in red
+        assert "abc123def456" not in red
+
+    def test_unknown_command_uses_safe_code_file_path(self):
+        from agent.redact import redact_terminal_output
+        # No command → code_file=True; opaque non-prefix token NOT masked
+        # (safe default avoids mangling arbitrary output), prefix still masked.
+        out = "OPAQUE=plainvalue123\nKEY=sk-proj-abc123def456ghi789jkl012"
+        red = redact_terminal_output(out, None)
+        assert "abc123def456" not in red
+
+    def test_disabled_passes_through(self, monkeypatch):
+        from agent.redact import redact_terminal_output
+        monkeypatch.setattr("agent.redact._REDACT_ENABLED", False)
+        out = "CUSTOM_TOKEN=zzzopaque1234567890abcdef"
+        red = redact_terminal_output(out, "printenv")
+        assert "zzzopaque1234567890abcdef" in red
--- a/tests/tools/test_process_registry.py
+++ b/tests/tools/test_process_registry.py
@ -1652,3 +1652,59 @@ class TestSigkillEscalation:
                except (ProcessLookupError, PermissionError, OSError):
                    pass
            parent.wait()
+
+
+class TestHandleProcessRedaction:
+    """`_handle_process` redacts background-process output before it reaches the
+    model / session.db / CLI display — issue #43025.
+
+    Mirrors the foreground `terminal` redaction so the two surfaces can't
+    diverge. Env-dump commands (`printenv`/`env`) get the ENV-assignment pass
+    so opaque tokens are masked; other commands stay on the code_file path.
+    """
+
+    def _setup(self, monkeypatch, command, output):
+        import agent.redact as _r
+        monkeypatch.setattr(_r, "_REDACT_ENABLED", True)
+        from tools import process_registry as pr
+        reg = ProcessRegistry()
+        sess = _make_session(sid="proc_redact1", command=command)
+        sess.output_buffer = output
+        sess.exited = True
+        sess.exit_code = 0
+        reg._running.clear()
+        reg._finished[sess.id] = sess
+        reg._running[sess.id] = sess
+        monkeypatch.setattr(pr, "process_registry", reg)
+        return pr, sess
+
+    def test_log_redacts_env_dump_opaque_token(self, monkeypatch):
+        pr, sess = self._setup(
+            monkeypatch, "printenv",
+            "MY_SERVICE_TOKEN=abc123randomopaquetokenvalue999\nHOME=/home/u",
+        )
+        out = json.loads(pr._handle_process({"action": "log", "session_id": sess.id}))
+        assert "abc123randomopaquetokenvalue999" not in out["output"]
+        assert "HOME=/home/u" in out["output"]
+
+    def test_poll_redacts_prefix_key(self, monkeypatch):
+        pr, sess = self._setup(
+            monkeypatch, "python app.py",
+            "leaked OPENAI_API_KEY sk-proj-abc123def456ghi789jkl012 here",
+        )
+        out = json.loads(pr._handle_process({"action": "poll", "session_id": sess.id}))
+        assert "abc123def456" not in out["output_preview"]
+
+    def test_disabled_passes_through(self, monkeypatch):
+        import agent.redact as _r
+        monkeypatch.setattr(_r, "_REDACT_ENABLED", False)
+        from tools import process_registry as pr
+        reg = ProcessRegistry()
+        sess = _make_session(sid="proc_redact2", command="printenv")
+        sess.output_buffer = "CUSTOM_TOKEN=zzzopaque1234567890abcdef"
+        sess.exited = True
+        sess.exit_code = 0
+        reg._running[sess.id] = sess
+        monkeypatch.setattr(pr, "process_registry", reg)
+        out = json.loads(pr._handle_process({"action": "log", "session_id": sess.id}))
+        assert "zzzopaque1234567890abcdef" in out["output"]
--- a/tools/process_registry.py
+++ b/tools/process_registry.py
@ -1271,6 +1271,7 @@ class ProcessRegistry:

        result = {
            "session_id": session.id,
+            "command": session.command,
            "status": "exited" if session.exited else "running",
            "output": "\n".join(selected),
            "total_lines": total_lines,
@ -1330,6 +1331,7 @@ class ProcessRegistry:
                self._completion_consumed.add(session_id)
                result = {
                    "status": "exited",
+                    "command": session.command,
                    "exit_code": session.exit_code,
                    "completion_reason": session.completion_reason,
                    "termination_source": session.termination_source,
@ -1342,6 +1344,7 @@ class ProcessRegistry:
            if _is_interrupted():
                result = {
                    "status": "interrupted",
+                    "command": session.command,
                    "output": strip_ansi(session.output_buffer[-1000:]),
                    "note": "User sent a new message -- wait interrupted",
                }
@ -1356,6 +1359,7 @@ class ProcessRegistry:

        result = {
            "status": "timeout",
+            "command": session.command,
            "output": strip_ansi(session.output_buffer[-1000:]),
        }
        if timeout_note:
@ -2081,6 +2085,31 @@ PROCESS_SCHEMA = {
 }


+def _redact_process_result(result: dict) -> dict:
+    """Redact secrets from background-process output before it reaches the
+    model, session.db, and CLI display.
+
+    Mirrors the foreground ``terminal`` redaction (terminal_tool.py) so the
+    two surfaces can't diverge — issue #43025 (background output was returned
+    verbatim). Respects ``security.redact_secrets`` (no force): output fields
+    pass through ``redact_terminal_output`` which picks ``code_file`` based on
+    the recorded command (env dumps get the ENV-assignment pass). The command
+    string itself is also redacted in case it carried an inline credential.
+    """
+    if not isinstance(result, dict):
+        return result
+    from agent.redact import redact_sensitive_text, redact_terminal_output
+
+    command = result.get("command") or ""
+    for field in ("output", "output_preview"):
+        value = result.get(field)
+        if isinstance(value, str) and value:
+            result[field] = redact_terminal_output(value, command)
+    if isinstance(result.get("command"), str) and result["command"]:
+        result["command"] = redact_sensitive_text(result["command"], code_file=True)
+    return result
+
+
 def _handle_process(args, **kw):
    task_id = kw.get("task_id")
    action = args.get("action", "")
@ -2104,12 +2133,12 @@ def _handle_process(args, **kw):
        if not session_id:
            return tool_error(f"session_id is required for {action}")
        if action == "poll":
-            return json.dumps(process_registry.poll(session_id), ensure_ascii=False)
+            return json.dumps(_redact_process_result(process_registry.poll(session_id)), ensure_ascii=False)
        elif action == "log":
-            return json.dumps(process_registry.read_log(
-                session_id, offset=args.get("offset", 0), limit=args.get("limit", 200)), ensure_ascii=False)
+            return json.dumps(_redact_process_result(process_registry.read_log(
+                session_id, offset=args.get("offset", 0), limit=args.get("limit", 200))), ensure_ascii=False)
        elif action == "wait":
-            return json.dumps(process_registry.wait(session_id, timeout=args.get("timeout")), ensure_ascii=False)
+            return json.dumps(_redact_process_result(process_registry.wait(session_id, timeout=args.get("timeout"))), ensure_ascii=False)
        elif action == "kill":
            return json.dumps(process_registry.kill_process(session_id), ensure_ascii=False)
        elif action == "write":
--- a/tools/terminal_tool.py
+++ b/tools/terminal_tool.py
@ -2656,13 +2656,17 @@ def terminal_tool(
            from tools.ansi_strip import strip_ansi
            output = strip_ansi(output)

-            # Redact secrets from command output (catches env/printenv leaking
-            # keys). code_file=True: terminal output often echoes source/config
+            # Redact secrets from command output. For source/config dumps
            # (MAX_TOKENS=100, "apiKey": "x" fixtures, postgresql:// f-string
-            # templates) — skip false-positive ENV/JSON/template redaction while
-            # still masking real prefixes, auth headers, JWTs, and private keys.
-            from agent.redact import redact_sensitive_text
-            output = redact_sensitive_text(output.strip(), code_file=True) if output else ""
+            # templates) the ENV/JSON/template passes are skipped to avoid
+            # false positives (code_file=True). But for env-dump commands
+            # (env/printenv/set/export/declare) the output IS a KEY=value
+            # credential dump, so redact_terminal_output runs the ENV pass
+            # (code_file=False) to mask opaque tokens with no vendor prefix.
+            # Real prefixes, auth headers, JWTs, private keys are masked in
+            # both modes. See issue #43025.
+            from agent.redact import redact_terminal_output
+            output = redact_terminal_output(output.strip(), command) if output else ""

            # Interpret non-zero exit codes that aren't real errors
            # (e.g. grep=1 means "no matches", diff=1 means "files differ")