mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-07-01 12:02:05 +00:00
Secret redaction is display/output-scoped on main — write_file writes content verbatim, terminal/execute_code redact only output not the command/source. The real bug is in displayed tool OUTPUT (read_file, terminal, execute_code): _DB_CONNSTR_RE's password group [^@]+ was greedy across newlines, so on a multi-line block it scanned past the DSN line to the next stray '@' (a Python @decorator), replacing every intervening character — including line breaks — with ***. That dropped lines and concatenated the next line onto the f-string line, making read_file output look corrupted (the file on disk was always correct). Reported in #33801. Fix: - Forbid whitespace in the userinfo/password groups ([^:\s]+ / [^@\s]+) so the match can never span a line break. A real DSN password never contains whitespace. This alone kills the catastrophic line-dropping. - Under code_file=True, preserve a password group that is a pure {...} brace expression — f"postgresql://{user}:{pass}@{host}" is an f-string template, not a live credential. Literal passwords are still masked. - Pass code_file=True at the terminal and execute_code output redaction call sites (file_tools already did) so code-execution output isn't corrupted by ENV/JSON/template false positives. Real prefixes, auth headers, JWTs, and private keys are still redacted. Verified E2E against the reporter's exact pydantic-settings module: file written verbatim, read_file shows the DSN f-string + @model_validator intact with zero *** corruption, while a literal postgresql://admin:pw@host DSN and a real sk- key are still masked. Reported-by: koishi70 Reported-by: pfrenssen
This commit is contained in:
parent
de6e9ac760
commit
674e16e7c6
5 changed files with 119 additions and 11 deletions
|
|
@ -202,9 +202,15 @@ _PRIVATE_KEY_RE = re.compile(
|
|||
)
|
||||
|
||||
# Database connection strings: protocol://user:PASSWORD@host
|
||||
# Catches postgres, mysql, mongodb, redis, amqp URLs and redacts the password
|
||||
# Catches postgres, mysql, mongodb, redis, amqp URLs and redacts the password.
|
||||
# The userinfo and password groups forbid whitespace ([^:\s]+ / [^@\s]+) so the
|
||||
# match can never span a line break. A real DSN password never contains
|
||||
# whitespace; without this bound the greedy [^@]+ would scan past the end of a
|
||||
# code line to the next stray "@" (e.g. a Python decorator), swallowing
|
||||
# intervening lines and corrupting tool OUTPUT for any source containing a
|
||||
# postgresql:// f-string template. See issue #33801.
|
||||
_DB_CONNSTR_RE = re.compile(
|
||||
r"((?:postgres(?:ql)?|mysql|mongodb(?:\+srv)?|redis|amqp)://[^:]+:)([^@]+)(@)",
|
||||
r"((?:postgres(?:ql)?|mysql|mongodb(?:\+srv)?|redis|amqp)://[^:\s]+:)([^@\s]+)(@)",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
|
|
@ -483,9 +489,22 @@ def redact_sensitive_text(text: str, *, force: bool = False, code_file: bool = F
|
|||
if "BEGIN" in text and "-----" in text:
|
||||
text = _PRIVATE_KEY_RE.sub("[REDACTED PRIVATE KEY]", text)
|
||||
|
||||
# Database connection string passwords
|
||||
# Database connection string passwords. With code_file=True, a password
|
||||
# group that is a pure ``{...}`` brace expression is an f-string template
|
||||
# reference (e.g. f"postgresql://{user}:{pass}@{host}"), not a literal
|
||||
# credential — preserve it. Literal passwords are still redacted. The regex
|
||||
# forbids whitespace in the password group, so a single-line template's
|
||||
# group(2) is exactly the brace expression. See issue #33801.
|
||||
if "://" in text:
|
||||
text = _DB_CONNSTR_RE.sub(lambda m: f"{m.group(1)}***{m.group(3)}", text)
|
||||
if code_file:
|
||||
def _redact_db(m):
|
||||
pw = m.group(2)
|
||||
if pw.startswith("{") and pw.endswith("}"):
|
||||
return m.group(0)
|
||||
return f"{m.group(1)}***{m.group(3)}"
|
||||
text = _DB_CONNSTR_RE.sub(_redact_db, text)
|
||||
else:
|
||||
text = _DB_CONNSTR_RE.sub(lambda m: f"{m.group(1)}***{m.group(3)}", text)
|
||||
|
||||
# JWT tokens (eyJ... — base64-encoded JSON headers)
|
||||
if "eyJ" in text:
|
||||
|
|
|
|||
|
|
@ -603,3 +603,79 @@ class TestXaiToken:
|
|||
def test_prefix_visible_in_masked_output(self):
|
||||
result = redact_sensitive_text(self.KEY, force=True)
|
||||
assert result.startswith("xai-AB")
|
||||
|
||||
|
||||
class TestDbConnstrCodeOutput:
|
||||
"""Regression tests for issue #33801 — _DB_CONNSTR_RE corrupting code output.
|
||||
|
||||
Two distinct flaws, both confined to displayed tool OUTPUT (read_file /
|
||||
terminal / execute_code), never the on-disk content:
|
||||
|
||||
1. The password group ``[^@]+`` was greedy across newlines, so on a
|
||||
multi-line block it scanned past the DSN line to the next stray ``@``
|
||||
(e.g. a Python ``@decorator``), replacing everything in between with
|
||||
``***`` — dropping lines and concatenating the next one.
|
||||
2. An f-string DSN template (``f"postgresql://{user}:{pass}@{host}"``) is
|
||||
not a live credential, but was redacted anyway. Under ``code_file=True``
|
||||
a pure ``{...}`` brace password is now preserved.
|
||||
"""
|
||||
|
||||
MULTILINE = (
|
||||
' return f"postgresql://{auth}@{self.pg_host}:'
|
||||
'{self.pg_port}/{self.pg_database}"\n'
|
||||
"\n"
|
||||
' @model_validator(mode="after")\n'
|
||||
' def _validate_critical_settings(self) -> "Settings":'
|
||||
)
|
||||
|
||||
def test_multiline_block_not_corrupted(self):
|
||||
"""The newline bound stops the greedy match from swallowing the
|
||||
decorator line. Original exact repro from the issue thread."""
|
||||
result = redact_sensitive_text(self.MULTILINE, code_file=True, force=True)
|
||||
assert result == self.MULTILINE
|
||||
# No line dropped, no concatenation onto the f-string line.
|
||||
assert "@model_validator" in result
|
||||
assert "_validate_critical_settings" in result
|
||||
assert result.count("\n") == self.MULTILINE.count("\n")
|
||||
|
||||
def test_multiline_block_no_corruption_without_code_file(self):
|
||||
"""Even without code_file, the newline bound alone prevents the
|
||||
catastrophic line-dropping. The single-line template's {pass} group
|
||||
is still masked here (code_file=False), but lines stay intact."""
|
||||
result = redact_sensitive_text(self.MULTILINE, force=True)
|
||||
assert "@model_validator" in result
|
||||
assert "_validate_critical_settings" in result
|
||||
assert result.count("\n") == self.MULTILINE.count("\n")
|
||||
|
||||
def test_fstring_template_preserved_with_code_file(self):
|
||||
"""A single-line DSN f-string template is preserved under code_file."""
|
||||
text = 'return f"postgresql://{user}:{password}@{host}:{port}/{db}"'
|
||||
assert redact_sensitive_text(text, code_file=True, force=True) == text
|
||||
|
||||
def test_fstring_template_self_attr_preserved(self):
|
||||
text = 'dsn = f"postgresql://{u}:{self.db_pass}@{h}:{p}/{d}"'
|
||||
assert redact_sensitive_text(text, code_file=True, force=True) == text
|
||||
|
||||
def test_literal_connstr_still_redacted_with_code_file(self):
|
||||
"""A real password in a literal DSN is still masked under code_file."""
|
||||
text = "postgresql://admin:realpassword@db.internal:5432/app"
|
||||
result = redact_sensitive_text(text, code_file=True, force=True)
|
||||
assert "realpassword" not in result
|
||||
assert "***" in result
|
||||
|
||||
def test_literal_connstr_redacted_all_schemes(self):
|
||||
for scheme, secret in [
|
||||
("postgres", "pgsecret1234"),
|
||||
("mysql", "mysqlsecret99"),
|
||||
("redis", "redissecret77"),
|
||||
("mongodb+srv", "mongosecret55"),
|
||||
("amqp", "amqpsecret33"),
|
||||
]:
|
||||
text = f"{scheme}://user:{secret}@host:1234/db"
|
||||
result = redact_sensitive_text(text, code_file=True, force=True)
|
||||
assert secret not in result, scheme
|
||||
|
||||
def test_literal_connstr_in_log_line_redacted(self):
|
||||
text = "connected via postgres://user:s3cr3tpw@host:5432/db ok"
|
||||
result = redact_sensitive_text(text, force=True)
|
||||
assert "s3cr3tpw" not in result
|
||||
|
|
|
|||
|
|
@ -128,9 +128,14 @@ def test_terminal_output_transform_still_runs_strip_and_redact(monkeypatch, tmp_
|
|||
)
|
||||
|
||||
assert "\x1b" not in result["output"]
|
||||
# Terminal output now passes code_file=True: ENV-assignment redaction is
|
||||
# skipped (so code constants like MAX_TOKENS=100 aren't corrupted), but a
|
||||
# real sk-/ghp_/JWT-shaped value is STILL masked by _PREFIX_RE. The full
|
||||
# secret never survives; only the leading prefix marker remains. (#33801)
|
||||
assert secret not in result["output"]
|
||||
assert "OPENAI_API_KEY=" in result["output"]
|
||||
assert "***" in result["output"]
|
||||
assert "sk-pro" in result["output"] # prefix marker from _mask_token
|
||||
assert "abc123def456" not in result["output"] # secret body is gone
|
||||
|
||||
|
||||
def test_terminal_output_transform_hook_exception_falls_back(monkeypatch, tmp_path):
|
||||
|
|
|
|||
|
|
@ -1031,9 +1031,11 @@ def _execute_remote(
|
|||
from tools.ansi_strip import strip_ansi
|
||||
stdout_text = strip_ansi(stdout_text)
|
||||
|
||||
# Redact secrets
|
||||
# Redact secrets. code_file=True: execute_code output is code-execution
|
||||
# output that often echoes source/config — skip false-positive ENV/JSON/
|
||||
# f-string-template redaction while still masking real credentials.
|
||||
from agent.redact import redact_sensitive_text
|
||||
stdout_text = redact_sensitive_text(stdout_text)
|
||||
stdout_text = redact_sensitive_text(stdout_text, code_file=True)
|
||||
|
||||
# Build response
|
||||
result: Dict[str, Any] = {
|
||||
|
|
@ -1441,9 +1443,11 @@ def execute_code(
|
|||
# The sandbox env-var filter (lines 434-454) blocks os.environ access,
|
||||
# but scripts can still read secrets from disk (e.g. open('~/.hermes/.env')).
|
||||
# This ensures leaked secrets never enter the model context.
|
||||
# code_file=True: this is code-execution output — skip false-positive
|
||||
# ENV/JSON/f-string-template redaction; real credentials still masked.
|
||||
from agent.redact import redact_sensitive_text
|
||||
stdout_text = redact_sensitive_text(stdout_text)
|
||||
stderr_text = redact_sensitive_text(stderr_text)
|
||||
stdout_text = redact_sensitive_text(stdout_text, code_file=True)
|
||||
stderr_text = redact_sensitive_text(stderr_text, code_file=True)
|
||||
|
||||
# Build response
|
||||
result: Dict[str, Any] = {
|
||||
|
|
|
|||
|
|
@ -2656,9 +2656,13 @@ def terminal_tool(
|
|||
from tools.ansi_strip import strip_ansi
|
||||
output = strip_ansi(output)
|
||||
|
||||
# Redact secrets from command output (catches env/printenv leaking keys)
|
||||
# Redact secrets from command output (catches env/printenv leaking
|
||||
# keys). code_file=True: terminal output often echoes source/config
|
||||
# (MAX_TOKENS=100, "apiKey": "x" fixtures, postgresql:// f-string
|
||||
# templates) — skip false-positive ENV/JSON/template redaction while
|
||||
# still masking real prefixes, auth headers, JWTs, and private keys.
|
||||
from agent.redact import redact_sensitive_text
|
||||
output = redact_sensitive_text(output.strip()) if output else ""
|
||||
output = redact_sensitive_text(output.strip(), code_file=True) if output else ""
|
||||
|
||||
# Interpret non-zero exit codes that aren't real errors
|
||||
# (e.g. grep=1 means "no matches", diff=1 means "files differ")
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue