diff --git a/agent/redact.py b/agent/redact.py index 82e4b8a386d..957f4c3e7cd 100644 --- a/agent/redact.py +++ b/agent/redact.py @@ -10,6 +10,7 @@ the first 6 and last 4 characters for debuggability. import logging import os import re +import shlex logger = logging.getLogger(__name__) @@ -542,6 +543,66 @@ def redact_sensitive_text(text: str, *, force: bool = False, code_file: bool = F return text +# Commands whose stdout is an environment-variable dump (KEY=value lines), +# NOT source code. For these, terminal-output redaction must run the +# ENV-assignment pass (code_file=False) so opaque tokens with no recognized +# vendor prefix (e.g. ``MY_SERVICE_TOKEN=abc123randomstring``) are still +# masked. For all other commands, code_file=True is used to avoid mangling +# legitimate source/config dumps (``MAX_TOKENS=100``, ``"apiKey": "x"`` +# fixtures, ``postgresql://{user}`` f-string templates). See issue #43025. +_ENV_DUMP_COMMANDS = frozenset({"env", "printenv", "set", "export", "declare"}) + + +def is_env_dump_command(command: str | None) -> bool: + """Return True if ``command`` dumps environment variables to stdout. + + Detects ``env`` / ``printenv`` / ``set`` / ``export`` / ``declare`` as the + first token of any segment in a pipeline or sequence (``;`` / ``&&`` / + ``||`` / ``|``). Conservative: a parse failure or anything unrecognized + returns False (callers then fall back to the safer code_file=True path, + which still masks prefix-shaped keys). + """ + if not command or not isinstance(command, str): + return False + # Split on shell separators, then inspect the first token of each segment. + segments = re.split(r"[|;&]+", command) + for seg in segments: + seg = seg.strip() + if not seg: + continue + try: + tokens = shlex.split(seg) + except ValueError: + tokens = seg.split() + if tokens and tokens[0] in _ENV_DUMP_COMMANDS: + return True + return False + + +def redact_terminal_output( + output: str, command: str | None = None, *, force: bool = False +) -> str: + """Redact secrets from terminal/process stdout. + + Single redaction policy for ALL terminal-output surfaces — foreground + ``terminal`` results AND background ``process(action=poll/log/wait)`` + output — so they can't diverge. Picks ``code_file`` based on whether + ``command`` is an environment dump: + + - env-dump command (``env``/``printenv``/``set``/``export``/``declare``) + → ``code_file=False`` so the ENV-assignment pass masks opaque tokens. + - anything else (or unknown command) → ``code_file=True`` to avoid + false positives on source/config dumps. + + ``force=True`` bypasses the global ``security.redact_secrets`` preference + for safety boundaries that must never emit raw credentials. + """ + if not output: + return output + code_file = not is_env_dump_command(command or "") + return redact_sensitive_text(output, force=force, code_file=code_file) + + # Substrings used to gate ``_PREFIX_RE`` execution. If none of these appear in # the input string, the prefix regex cannot match anything, so we skip it. # False positives are fine (they just run the regex, which then matches diff --git a/gateway/run.py b/gateway/run.py index 1d44921ba59..9e55fe40466 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -14149,6 +14149,11 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew ) if should_notify: new_output = session.output_buffer[-1000:] if session.output_buffer else "" + if new_output: + from agent.redact import redact_terminal_output + new_output = redact_terminal_output( + new_output, getattr(session, "command", "") or "" + ) message_text = ( f"[Background process {session_id} finished with exit code {session.exit_code}~ " f"Here's the final output:\n{new_output}]" @@ -14174,6 +14179,11 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew # New output available -- deliver status update (only in "all" mode) # Skip periodic updates for agent_notify watchers (they only care about completion) new_output = session.output_buffer[-500:] if session.output_buffer else "" + if new_output: + from agent.redact import redact_terminal_output + new_output = redact_terminal_output( + new_output, getattr(session, "command", "") or "" + ) message_text = ( f"[Background process {session_id} is still running~ " f"New output:\n{new_output}]" diff --git a/infographic/redact-terminal-43025/infographic.png b/infographic/redact-terminal-43025/infographic.png new file mode 100644 index 00000000000..d8f3f4f3027 Binary files /dev/null and b/infographic/redact-terminal-43025/infographic.png differ diff --git a/tests/agent/test_redact.py b/tests/agent/test_redact.py index 4822f61acd9..977c1d9cf21 100644 --- a/tests/agent/test_redact.py +++ b/tests/agent/test_redact.py @@ -698,3 +698,58 @@ class TestDbConnstrCodeOutput: text = "connected via postgres://user:s3cr3tpw@host:5432/db ok" result = redact_sensitive_text(text, force=True) assert "s3cr3tpw" not in result + + +class TestTerminalOutputRedaction: + """is_env_dump_command + redact_terminal_output — issue #43025. + + Terminal/process stdout must be redacted on every surface (foreground + `terminal` AND background `process(poll/log/wait)`). Env-dump commands get + the ENV-assignment pass so opaque tokens (no vendor prefix) are masked; + other commands stay on the code_file path to avoid false positives. + """ + + def test_is_env_dump_command_detection(self): + from agent.redact import is_env_dump_command + assert is_env_dump_command("printenv") + assert is_env_dump_command("env") + assert is_env_dump_command("env | grep API") + assert is_env_dump_command("set") + assert is_env_dump_command("export") + assert is_env_dump_command("declare -x") + assert is_env_dump_command("cat /tmp/x && printenv") + assert not is_env_dump_command("python app.py") + assert not is_env_dump_command("cat config.py") + assert not is_env_dump_command("printf 'TOKEN=x'") + assert not is_env_dump_command("") + assert not is_env_dump_command(None) + + def test_env_dump_masks_opaque_token(self): + from agent.redact import redact_terminal_output + out = "MY_SERVICE_TOKEN=abc123randomopaquetokenvalue999\nHOME=/home/u" + red = redact_terminal_output(out, "printenv") + assert "abc123randomopaquetokenvalue999" not in red + assert "HOME=/home/u" in red + + def test_non_env_command_preserves_source_false_positives(self): + from agent.redact import redact_terminal_output + # code_file path: MAX_TOKENS=100 is source, must survive; real sk- masked. + out = "MAX_TOKENS=100\nOPENAI_API_KEY=sk-proj-abc123def456ghi789jkl012" + red = redact_terminal_output(out, "cat config.py") + assert "MAX_TOKENS=100" in red + assert "abc123def456" not in red + + def test_unknown_command_uses_safe_code_file_path(self): + from agent.redact import redact_terminal_output + # No command → code_file=True; opaque non-prefix token NOT masked + # (safe default avoids mangling arbitrary output), prefix still masked. + out = "OPAQUE=plainvalue123\nKEY=sk-proj-abc123def456ghi789jkl012" + red = redact_terminal_output(out, None) + assert "abc123def456" not in red + + def test_disabled_passes_through(self, monkeypatch): + from agent.redact import redact_terminal_output + monkeypatch.setattr("agent.redact._REDACT_ENABLED", False) + out = "CUSTOM_TOKEN=zzzopaque1234567890abcdef" + red = redact_terminal_output(out, "printenv") + assert "zzzopaque1234567890abcdef" in red diff --git a/tests/tools/test_process_registry.py b/tests/tools/test_process_registry.py index 3dd40aec421..7a89b29aaf2 100644 --- a/tests/tools/test_process_registry.py +++ b/tests/tools/test_process_registry.py @@ -1652,3 +1652,59 @@ class TestSigkillEscalation: except (ProcessLookupError, PermissionError, OSError): pass parent.wait() + + +class TestHandleProcessRedaction: + """`_handle_process` redacts background-process output before it reaches the + model / session.db / CLI display — issue #43025. + + Mirrors the foreground `terminal` redaction so the two surfaces can't + diverge. Env-dump commands (`printenv`/`env`) get the ENV-assignment pass + so opaque tokens are masked; other commands stay on the code_file path. + """ + + def _setup(self, monkeypatch, command, output): + import agent.redact as _r + monkeypatch.setattr(_r, "_REDACT_ENABLED", True) + from tools import process_registry as pr + reg = ProcessRegistry() + sess = _make_session(sid="proc_redact1", command=command) + sess.output_buffer = output + sess.exited = True + sess.exit_code = 0 + reg._running.clear() + reg._finished[sess.id] = sess + reg._running[sess.id] = sess + monkeypatch.setattr(pr, "process_registry", reg) + return pr, sess + + def test_log_redacts_env_dump_opaque_token(self, monkeypatch): + pr, sess = self._setup( + monkeypatch, "printenv", + "MY_SERVICE_TOKEN=abc123randomopaquetokenvalue999\nHOME=/home/u", + ) + out = json.loads(pr._handle_process({"action": "log", "session_id": sess.id})) + assert "abc123randomopaquetokenvalue999" not in out["output"] + assert "HOME=/home/u" in out["output"] + + def test_poll_redacts_prefix_key(self, monkeypatch): + pr, sess = self._setup( + monkeypatch, "python app.py", + "leaked OPENAI_API_KEY sk-proj-abc123def456ghi789jkl012 here", + ) + out = json.loads(pr._handle_process({"action": "poll", "session_id": sess.id})) + assert "abc123def456" not in out["output_preview"] + + def test_disabled_passes_through(self, monkeypatch): + import agent.redact as _r + monkeypatch.setattr(_r, "_REDACT_ENABLED", False) + from tools import process_registry as pr + reg = ProcessRegistry() + sess = _make_session(sid="proc_redact2", command="printenv") + sess.output_buffer = "CUSTOM_TOKEN=zzzopaque1234567890abcdef" + sess.exited = True + sess.exit_code = 0 + reg._running[sess.id] = sess + monkeypatch.setattr(pr, "process_registry", reg) + out = json.loads(pr._handle_process({"action": "log", "session_id": sess.id})) + assert "zzzopaque1234567890abcdef" in out["output"] diff --git a/tools/process_registry.py b/tools/process_registry.py index 6d966c14e34..f364c00ebe6 100644 --- a/tools/process_registry.py +++ b/tools/process_registry.py @@ -1271,6 +1271,7 @@ class ProcessRegistry: result = { "session_id": session.id, + "command": session.command, "status": "exited" if session.exited else "running", "output": "\n".join(selected), "total_lines": total_lines, @@ -1330,6 +1331,7 @@ class ProcessRegistry: self._completion_consumed.add(session_id) result = { "status": "exited", + "command": session.command, "exit_code": session.exit_code, "completion_reason": session.completion_reason, "termination_source": session.termination_source, @@ -1342,6 +1344,7 @@ class ProcessRegistry: if _is_interrupted(): result = { "status": "interrupted", + "command": session.command, "output": strip_ansi(session.output_buffer[-1000:]), "note": "User sent a new message -- wait interrupted", } @@ -1356,6 +1359,7 @@ class ProcessRegistry: result = { "status": "timeout", + "command": session.command, "output": strip_ansi(session.output_buffer[-1000:]), } if timeout_note: @@ -2081,6 +2085,31 @@ PROCESS_SCHEMA = { } +def _redact_process_result(result: dict) -> dict: + """Redact secrets from background-process output before it reaches the + model, session.db, and CLI display. + + Mirrors the foreground ``terminal`` redaction (terminal_tool.py) so the + two surfaces can't diverge — issue #43025 (background output was returned + verbatim). Respects ``security.redact_secrets`` (no force): output fields + pass through ``redact_terminal_output`` which picks ``code_file`` based on + the recorded command (env dumps get the ENV-assignment pass). The command + string itself is also redacted in case it carried an inline credential. + """ + if not isinstance(result, dict): + return result + from agent.redact import redact_sensitive_text, redact_terminal_output + + command = result.get("command") or "" + for field in ("output", "output_preview"): + value = result.get(field) + if isinstance(value, str) and value: + result[field] = redact_terminal_output(value, command) + if isinstance(result.get("command"), str) and result["command"]: + result["command"] = redact_sensitive_text(result["command"], code_file=True) + return result + + def _handle_process(args, **kw): task_id = kw.get("task_id") action = args.get("action", "") @@ -2104,12 +2133,12 @@ def _handle_process(args, **kw): if not session_id: return tool_error(f"session_id is required for {action}") if action == "poll": - return json.dumps(process_registry.poll(session_id), ensure_ascii=False) + return json.dumps(_redact_process_result(process_registry.poll(session_id)), ensure_ascii=False) elif action == "log": - return json.dumps(process_registry.read_log( - session_id, offset=args.get("offset", 0), limit=args.get("limit", 200)), ensure_ascii=False) + return json.dumps(_redact_process_result(process_registry.read_log( + session_id, offset=args.get("offset", 0), limit=args.get("limit", 200))), ensure_ascii=False) elif action == "wait": - return json.dumps(process_registry.wait(session_id, timeout=args.get("timeout")), ensure_ascii=False) + return json.dumps(_redact_process_result(process_registry.wait(session_id, timeout=args.get("timeout"))), ensure_ascii=False) elif action == "kill": return json.dumps(process_registry.kill_process(session_id), ensure_ascii=False) elif action == "write": diff --git a/tools/terminal_tool.py b/tools/terminal_tool.py index add5caeeb13..e9ad5d9f970 100644 --- a/tools/terminal_tool.py +++ b/tools/terminal_tool.py @@ -2656,13 +2656,17 @@ def terminal_tool( from tools.ansi_strip import strip_ansi output = strip_ansi(output) - # Redact secrets from command output (catches env/printenv leaking - # keys). code_file=True: terminal output often echoes source/config + # Redact secrets from command output. For source/config dumps # (MAX_TOKENS=100, "apiKey": "x" fixtures, postgresql:// f-string - # templates) — skip false-positive ENV/JSON/template redaction while - # still masking real prefixes, auth headers, JWTs, and private keys. - from agent.redact import redact_sensitive_text - output = redact_sensitive_text(output.strip(), code_file=True) if output else "" + # templates) the ENV/JSON/template passes are skipped to avoid + # false positives (code_file=True). But for env-dump commands + # (env/printenv/set/export/declare) the output IS a KEY=value + # credential dump, so redact_terminal_output runs the ENV pass + # (code_file=False) to mask opaque tokens with no vendor prefix. + # Real prefixes, auth headers, JWTs, private keys are masked in + # both modes. See issue #43025. + from agent.redact import redact_terminal_output + output = redact_terminal_output(output.strip(), command) if output else "" # Interpret non-zero exit codes that aren't real errors # (e.g. grep=1 means "no matches", diff=1 means "files differ")