fix(security): redact secrets in background process + foreground env-dump output (#43025) (#54149)

* fix(security): redact secrets in background process + foreground env-dump output

Terminal-output redaction was incomplete (#43025):

- Gap 1: process(action=poll/log/wait) returned background stdout verbatim —
  no redaction at all. A background printenv/server/test emitting a key leaked
  raw to the model, session.db, and CLI display. Same for the gateway
  background-process watcher's completion/progress notifications.
- Gap 2: the foreground terminal path hardcoded code_file=True, which skips the
  ENV-assignment pass, so an opaque token (no vendor prefix) from env/printenv
  leaked even there.

Adds agent.redact.redact_terminal_output(output, command) as the single policy
for ALL terminal-output surfaces: env-dump commands (env/printenv/set/export/
declare) get the ENV-assignment pass (code_file=False) to mask opaque tokens;
other commands stay on code_file=True to avoid false positives on source dumps.
Wired into terminal_tool, process_registry (_handle_process boundary), and the
gateway watcher. Respects security.redact_secrets (no force) — opt-out preserved.

* docs: add infographic for #43025 terminal-output redaction fix
This commit is contained in:
Teknium 2026-06-28 02:44:21 -07:00 committed by GitHub
parent d5ba374c03
commit c1c179a239
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 225 additions and 10 deletions

View file

@ -10,6 +10,7 @@ the first 6 and last 4 characters for debuggability.
import logging
import os
import re
import shlex
logger = logging.getLogger(__name__)
@ -542,6 +543,66 @@ def redact_sensitive_text(text: str, *, force: bool = False, code_file: bool = F
return text
# Commands whose stdout is an environment-variable dump (KEY=value lines),
# NOT source code. For these, terminal-output redaction must run the
# ENV-assignment pass (code_file=False) so opaque tokens with no recognized
# vendor prefix (e.g. ``MY_SERVICE_TOKEN=abc123randomstring``) are still
# masked. For all other commands, code_file=True is used to avoid mangling
# legitimate source/config dumps (``MAX_TOKENS=100``, ``"apiKey": "x"``
# fixtures, ``postgresql://{user}`` f-string templates). See issue #43025.
_ENV_DUMP_COMMANDS = frozenset({"env", "printenv", "set", "export", "declare"})
def is_env_dump_command(command: str | None) -> bool:
"""Return True if ``command`` dumps environment variables to stdout.
Detects ``env`` / ``printenv`` / ``set`` / ``export`` / ``declare`` as the
first token of any segment in a pipeline or sequence (``;`` / ``&&`` /
``||`` / ``|``). Conservative: a parse failure or anything unrecognized
returns False (callers then fall back to the safer code_file=True path,
which still masks prefix-shaped keys).
"""
if not command or not isinstance(command, str):
return False
# Split on shell separators, then inspect the first token of each segment.
segments = re.split(r"[|;&]+", command)
for seg in segments:
seg = seg.strip()
if not seg:
continue
try:
tokens = shlex.split(seg)
except ValueError:
tokens = seg.split()
if tokens and tokens[0] in _ENV_DUMP_COMMANDS:
return True
return False
def redact_terminal_output(
output: str, command: str | None = None, *, force: bool = False
) -> str:
"""Redact secrets from terminal/process stdout.
Single redaction policy for ALL terminal-output surfaces foreground
``terminal`` results AND background ``process(action=poll/log/wait)``
output so they can't diverge. Picks ``code_file`` based on whether
``command`` is an environment dump:
- env-dump command (``env``/``printenv``/``set``/``export``/``declare``)
``code_file=False`` so the ENV-assignment pass masks opaque tokens.
- anything else (or unknown command) ``code_file=True`` to avoid
false positives on source/config dumps.
``force=True`` bypasses the global ``security.redact_secrets`` preference
for safety boundaries that must never emit raw credentials.
"""
if not output:
return output
code_file = not is_env_dump_command(command or "")
return redact_sensitive_text(output, force=force, code_file=code_file)
# Substrings used to gate ``_PREFIX_RE`` execution. If none of these appear in
# the input string, the prefix regex cannot match anything, so we skip it.
# False positives are fine (they just run the regex, which then matches

View file

@ -14149,6 +14149,11 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
)
if should_notify:
new_output = session.output_buffer[-1000:] if session.output_buffer else ""
if new_output:
from agent.redact import redact_terminal_output
new_output = redact_terminal_output(
new_output, getattr(session, "command", "") or ""
)
message_text = (
f"[Background process {session_id} finished with exit code {session.exit_code}~ "
f"Here's the final output:\n{new_output}]"
@ -14174,6 +14179,11 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
# New output available -- deliver status update (only in "all" mode)
# Skip periodic updates for agent_notify watchers (they only care about completion)
new_output = session.output_buffer[-500:] if session.output_buffer else ""
if new_output:
from agent.redact import redact_terminal_output
new_output = redact_terminal_output(
new_output, getattr(session, "command", "") or ""
)
message_text = (
f"[Background process {session_id} is still running~ "
f"New output:\n{new_output}]"

Binary file not shown.

After

Width:  |  Height:  |  Size: 2 MiB

View file

@ -698,3 +698,58 @@ class TestDbConnstrCodeOutput:
text = "connected via postgres://user:s3cr3tpw@host:5432/db ok"
result = redact_sensitive_text(text, force=True)
assert "s3cr3tpw" not in result
class TestTerminalOutputRedaction:
"""is_env_dump_command + redact_terminal_output — issue #43025.
Terminal/process stdout must be redacted on every surface (foreground
`terminal` AND background `process(poll/log/wait)`). Env-dump commands get
the ENV-assignment pass so opaque tokens (no vendor prefix) are masked;
other commands stay on the code_file path to avoid false positives.
"""
def test_is_env_dump_command_detection(self):
from agent.redact import is_env_dump_command
assert is_env_dump_command("printenv")
assert is_env_dump_command("env")
assert is_env_dump_command("env | grep API")
assert is_env_dump_command("set")
assert is_env_dump_command("export")
assert is_env_dump_command("declare -x")
assert is_env_dump_command("cat /tmp/x && printenv")
assert not is_env_dump_command("python app.py")
assert not is_env_dump_command("cat config.py")
assert not is_env_dump_command("printf 'TOKEN=x'")
assert not is_env_dump_command("")
assert not is_env_dump_command(None)
def test_env_dump_masks_opaque_token(self):
from agent.redact import redact_terminal_output
out = "MY_SERVICE_TOKEN=abc123randomopaquetokenvalue999\nHOME=/home/u"
red = redact_terminal_output(out, "printenv")
assert "abc123randomopaquetokenvalue999" not in red
assert "HOME=/home/u" in red
def test_non_env_command_preserves_source_false_positives(self):
from agent.redact import redact_terminal_output
# code_file path: MAX_TOKENS=100 is source, must survive; real sk- masked.
out = "MAX_TOKENS=100\nOPENAI_API_KEY=sk-proj-abc123def456ghi789jkl012"
red = redact_terminal_output(out, "cat config.py")
assert "MAX_TOKENS=100" in red
assert "abc123def456" not in red
def test_unknown_command_uses_safe_code_file_path(self):
from agent.redact import redact_terminal_output
# No command → code_file=True; opaque non-prefix token NOT masked
# (safe default avoids mangling arbitrary output), prefix still masked.
out = "OPAQUE=plainvalue123\nKEY=sk-proj-abc123def456ghi789jkl012"
red = redact_terminal_output(out, None)
assert "abc123def456" not in red
def test_disabled_passes_through(self, monkeypatch):
from agent.redact import redact_terminal_output
monkeypatch.setattr("agent.redact._REDACT_ENABLED", False)
out = "CUSTOM_TOKEN=zzzopaque1234567890abcdef"
red = redact_terminal_output(out, "printenv")
assert "zzzopaque1234567890abcdef" in red

View file

@ -1652,3 +1652,59 @@ class TestSigkillEscalation:
except (ProcessLookupError, PermissionError, OSError):
pass
parent.wait()
class TestHandleProcessRedaction:
"""`_handle_process` redacts background-process output before it reaches the
model / session.db / CLI display issue #43025.
Mirrors the foreground `terminal` redaction so the two surfaces can't
diverge. Env-dump commands (`printenv`/`env`) get the ENV-assignment pass
so opaque tokens are masked; other commands stay on the code_file path.
"""
def _setup(self, monkeypatch, command, output):
import agent.redact as _r
monkeypatch.setattr(_r, "_REDACT_ENABLED", True)
from tools import process_registry as pr
reg = ProcessRegistry()
sess = _make_session(sid="proc_redact1", command=command)
sess.output_buffer = output
sess.exited = True
sess.exit_code = 0
reg._running.clear()
reg._finished[sess.id] = sess
reg._running[sess.id] = sess
monkeypatch.setattr(pr, "process_registry", reg)
return pr, sess
def test_log_redacts_env_dump_opaque_token(self, monkeypatch):
pr, sess = self._setup(
monkeypatch, "printenv",
"MY_SERVICE_TOKEN=abc123randomopaquetokenvalue999\nHOME=/home/u",
)
out = json.loads(pr._handle_process({"action": "log", "session_id": sess.id}))
assert "abc123randomopaquetokenvalue999" not in out["output"]
assert "HOME=/home/u" in out["output"]
def test_poll_redacts_prefix_key(self, monkeypatch):
pr, sess = self._setup(
monkeypatch, "python app.py",
"leaked OPENAI_API_KEY sk-proj-abc123def456ghi789jkl012 here",
)
out = json.loads(pr._handle_process({"action": "poll", "session_id": sess.id}))
assert "abc123def456" not in out["output_preview"]
def test_disabled_passes_through(self, monkeypatch):
import agent.redact as _r
monkeypatch.setattr(_r, "_REDACT_ENABLED", False)
from tools import process_registry as pr
reg = ProcessRegistry()
sess = _make_session(sid="proc_redact2", command="printenv")
sess.output_buffer = "CUSTOM_TOKEN=zzzopaque1234567890abcdef"
sess.exited = True
sess.exit_code = 0
reg._running[sess.id] = sess
monkeypatch.setattr(pr, "process_registry", reg)
out = json.loads(pr._handle_process({"action": "log", "session_id": sess.id}))
assert "zzzopaque1234567890abcdef" in out["output"]

View file

@ -1271,6 +1271,7 @@ class ProcessRegistry:
result = {
"session_id": session.id,
"command": session.command,
"status": "exited" if session.exited else "running",
"output": "\n".join(selected),
"total_lines": total_lines,
@ -1330,6 +1331,7 @@ class ProcessRegistry:
self._completion_consumed.add(session_id)
result = {
"status": "exited",
"command": session.command,
"exit_code": session.exit_code,
"completion_reason": session.completion_reason,
"termination_source": session.termination_source,
@ -1342,6 +1344,7 @@ class ProcessRegistry:
if _is_interrupted():
result = {
"status": "interrupted",
"command": session.command,
"output": strip_ansi(session.output_buffer[-1000:]),
"note": "User sent a new message -- wait interrupted",
}
@ -1356,6 +1359,7 @@ class ProcessRegistry:
result = {
"status": "timeout",
"command": session.command,
"output": strip_ansi(session.output_buffer[-1000:]),
}
if timeout_note:
@ -2081,6 +2085,31 @@ PROCESS_SCHEMA = {
}
def _redact_process_result(result: dict) -> dict:
"""Redact secrets from background-process output before it reaches the
model, session.db, and CLI display.
Mirrors the foreground ``terminal`` redaction (terminal_tool.py) so the
two surfaces can't diverge — issue #43025 (background output was returned
verbatim). Respects ``security.redact_secrets`` (no force): output fields
pass through ``redact_terminal_output`` which picks ``code_file`` based on
the recorded command (env dumps get the ENV-assignment pass). The command
string itself is also redacted in case it carried an inline credential.
"""
if not isinstance(result, dict):
return result
from agent.redact import redact_sensitive_text, redact_terminal_output
command = result.get("command") or ""
for field in ("output", "output_preview"):
value = result.get(field)
if isinstance(value, str) and value:
result[field] = redact_terminal_output(value, command)
if isinstance(result.get("command"), str) and result["command"]:
result["command"] = redact_sensitive_text(result["command"], code_file=True)
return result
def _handle_process(args, **kw):
task_id = kw.get("task_id")
action = args.get("action", "")
@ -2104,12 +2133,12 @@ def _handle_process(args, **kw):
if not session_id:
return tool_error(f"session_id is required for {action}")
if action == "poll":
return json.dumps(process_registry.poll(session_id), ensure_ascii=False)
return json.dumps(_redact_process_result(process_registry.poll(session_id)), ensure_ascii=False)
elif action == "log":
return json.dumps(process_registry.read_log(
session_id, offset=args.get("offset", 0), limit=args.get("limit", 200)), ensure_ascii=False)
return json.dumps(_redact_process_result(process_registry.read_log(
session_id, offset=args.get("offset", 0), limit=args.get("limit", 200))), ensure_ascii=False)
elif action == "wait":
return json.dumps(process_registry.wait(session_id, timeout=args.get("timeout")), ensure_ascii=False)
return json.dumps(_redact_process_result(process_registry.wait(session_id, timeout=args.get("timeout"))), ensure_ascii=False)
elif action == "kill":
return json.dumps(process_registry.kill_process(session_id), ensure_ascii=False)
elif action == "write":

View file

@ -2656,13 +2656,17 @@ def terminal_tool(
from tools.ansi_strip import strip_ansi
output = strip_ansi(output)
# Redact secrets from command output (catches env/printenv leaking
# keys). code_file=True: terminal output often echoes source/config
# Redact secrets from command output. For source/config dumps
# (MAX_TOKENS=100, "apiKey": "x" fixtures, postgresql:// f-string
# templates) — skip false-positive ENV/JSON/template redaction while
# still masking real prefixes, auth headers, JWTs, and private keys.
from agent.redact import redact_sensitive_text
output = redact_sensitive_text(output.strip(), code_file=True) if output else ""
# templates) the ENV/JSON/template passes are skipped to avoid
# false positives (code_file=True). But for env-dump commands
# (env/printenv/set/export/declare) the output IS a KEY=value
# credential dump, so redact_terminal_output runs the ENV pass
# (code_file=False) to mask opaque tokens with no vendor prefix.
# Real prefixes, auth headers, JWTs, private keys are masked in
# both modes. See issue #43025.
from agent.redact import redact_terminal_output
output = redact_terminal_output(output.strip(), command) if output else ""
# Interpret non-zero exit codes that aren't real errors
# (e.g. grep=1 means "no matches", diff=1 means "files differ")