mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-29 06:31:32 +00:00
fix(redact): pass web URLs through unchanged (#34029)
* fix(redact): pass web URLs through unchanged Magic-link checkout URLs, OAuth callbacks the agent is meant to follow, and pre-signed share URLs were getting `?token=***` / `?code=***` / `?signature=***` blanket-redacted by parameter NAME, which breaks any skill that has to round-trip a URL through history (the model's tool call arguments get sanitized before persistence — the live call fires with the real URL, but the next turn sees `***`). Joe Rinaldi Johnson hit this with a checkout-acceleration skill that uses magic links in URLs. Drops three call sites from `redact_sensitive_text`: - `_redact_url_query_params` (was redacting `access_token`, `token`, `api_key`, `code`, `signature`, `key`, `auth`, etc.) - `_redact_url_userinfo` (was redacting `https://user:pass@host`) - `_redact_http_request_target_query_params` (was redacting access-log request targets like `"POST /hook?password=... HTTP/1.1"`) The helpers themselves are kept in the module — still importable by anything that wants to opt in explicitly. Still redacted (unchanged): - Vendor-prefix credential shapes (sk-, ghp_, AKIA, gAAAA, etc.) anywhere they appear, including inside URLs — see the `test_known_prefix_inside_url_still_redacted` case. - JWTs (`eyJ...`) - DB connection-string passwords (`postgres://admin:pw@host`) — these are connection strings, not web URLs the agent navigates to. - Authorization headers, ENV assignments, JSON `apiKey`/`token` fields, Telegram bot tokens, private key blocks, Discord mentions, E.164 phone numbers, and form-urlencoded bodies (request bodies, not URLs). Tests: replaces `TestUrlQueryParamRedaction` + `TestUrlUserinfoRedaction` with `TestWebUrlsNotRedacted`, asserting representative URLs (OAuth callback, magic link, S3 pre-signed, websocket, userinfo, access log) pass through unchanged. Adds positive cases proving the prefix and DB connstr nets still fire. 74 redact tests + 10 browser-exfil + 16 PII redaction tests all pass. * test(codex_app_server): drop URL-query assertion from stderr-tail redaction test The test bundled (a) sk-live-* credential-prefix redaction with (b) URL query-param redaction. (a) is still in effect via _PREFIX_RE; (b) was the contract we just removed in the parent commit so the 'querysecret12345' assertion stopped holding. Keep the credential-shape assertion, drop the URL-query one. Send-message tool's local _URL_SECRET_QUERY_RE in tools/send_message_tool.py is independent of agent/redact.py and unchanged — its tests (test_top_level_send_failure_redacts_query_token, test_http_error_redacts_access_token_in_exception_text) still pass.
This commit is contained in:
parent
7a8589e782
commit
5f66c36470
3 changed files with 48 additions and 123 deletions
|
|
@ -406,19 +406,14 @@ def redact_sensitive_text(text: str, *, force: bool = False, code_file: bool = F
|
|||
if "eyJ" in text:
|
||||
text = _JWT_RE.sub(lambda m: _mask_token(m.group(0)), text)
|
||||
|
||||
# URL userinfo (http(s)://user:pass@host) — redact for non-DB schemes.
|
||||
# DB schemes are handled above by _DB_CONNSTR_RE.
|
||||
if "://" in text:
|
||||
text = _redact_url_userinfo(text)
|
||||
|
||||
# URL query params containing opaque tokens (?access_token=…&code=…)
|
||||
if "?" in text:
|
||||
text = _redact_url_query_params(text)
|
||||
|
||||
# HTTP access logs can contain relative request targets with query params
|
||||
# and no URL scheme, e.g. `"POST /hook?password=... HTTP/1.1"`.
|
||||
if "?" in text and "=" in text and _has_http_method_substring(text):
|
||||
text = _redact_http_request_target_query_params(text)
|
||||
# NOTE: Web-URL redaction (query params + userinfo + HTTP access-log
|
||||
# request targets) is intentionally OFF. Many legitimate workflows pass
|
||||
# opaque tokens through query strings — magic-link checkouts, OAuth
|
||||
# callbacks the agent is meant to follow, pre-signed share URLs — and
|
||||
# blanket-redacting param values by name breaks those skills mid-flow.
|
||||
# Known credential shapes (sk-, ghp_, JWTs, etc.) inside URLs are still
|
||||
# caught by _PREFIX_RE and _JWT_RE above. DB connection-string passwords
|
||||
# are still caught by _DB_CONNSTR_RE.
|
||||
|
||||
# Form-urlencoded bodies (only triggers on clean k=v&k=v inputs).
|
||||
if "&" in text and "=" in text:
|
||||
|
|
|
|||
|
|
@ -378,127 +378,57 @@ class TestDiscordMentions:
|
|||
assert result.endswith(" said hello")
|
||||
|
||||
|
||||
class TestUrlQueryParamRedaction:
|
||||
"""URL query-string redaction (ported from nearai/ironclaw#2529).
|
||||
|
||||
Catches opaque tokens that don't match vendor prefix regexes by
|
||||
matching on parameter NAME rather than value shape.
|
||||
class TestWebUrlsNotRedacted:
|
||||
"""Web URLs (http/https/wss) pass through unchanged — magic-link
|
||||
checkouts, OAuth callbacks the agent is meant to follow, and pre-signed
|
||||
share URLs must reach the tool intact. Known credential shapes inside
|
||||
URLs (sk-, ghp_, JWTs) are still caught by the prefix and JWT regexes.
|
||||
DB connection-string passwords are still caught by _DB_CONNSTR_RE.
|
||||
"""
|
||||
|
||||
def test_oauth_callback_code(self):
|
||||
def test_oauth_callback_code_passes_through(self):
|
||||
text = "GET https://api.example.com/oauth/cb?code=abc123xyz789&state=csrf_ok"
|
||||
result = redact_sensitive_text(text)
|
||||
assert "abc123xyz789" not in result
|
||||
assert "code=***" in result
|
||||
assert "state=csrf_ok" in result # state is not sensitive
|
||||
|
||||
def test_access_token_query(self):
|
||||
text = "Fetching https://example.com/api?access_token=opaque_value_here_1234&format=json"
|
||||
result = redact_sensitive_text(text)
|
||||
assert "opaque_value_here_1234" not in result
|
||||
assert "access_token=***" in result
|
||||
assert "format=json" in result
|
||||
|
||||
def test_refresh_token_query(self):
|
||||
text = "https://auth.example.com/token?refresh_token=somerefresh&grant_type=refresh"
|
||||
result = redact_sensitive_text(text)
|
||||
assert "somerefresh" not in result
|
||||
assert "grant_type=refresh" in result
|
||||
|
||||
def test_api_key_query(self):
|
||||
text = "https://api.example.com/v1/data?api_key=kABCDEF12345&limit=10"
|
||||
result = redact_sensitive_text(text)
|
||||
assert "kABCDEF12345" not in result
|
||||
assert "limit=10" in result
|
||||
|
||||
def test_presigned_signature(self):
|
||||
text = "https://s3.amazonaws.com/bucket/k?signature=LONG_PRESIGNED_SIG&id=public"
|
||||
result = redact_sensitive_text(text)
|
||||
assert "LONG_PRESIGNED_SIG" not in result
|
||||
assert "id=public" in result
|
||||
|
||||
def test_case_insensitive_param_names(self):
|
||||
"""Lowercase/mixed-case sensitive param names are redacted."""
|
||||
# NOTE: All-caps names like TOKEN= are swallowed by _ENV_ASSIGN_RE
|
||||
# (which matches KEY=value patterns greedily) before URL regex runs.
|
||||
# This test uses lowercase names to isolate URL-query redaction.
|
||||
text = "https://example.com?api_key=abcdef&secret=ghijkl"
|
||||
result = redact_sensitive_text(text)
|
||||
assert "abcdef" not in result
|
||||
assert "ghijkl" not in result
|
||||
assert "api_key=***" in result
|
||||
assert "secret=***" in result
|
||||
|
||||
def test_substring_match_does_not_trigger(self):
|
||||
"""`token_count` and `session_id` must NOT match `token` / `session`."""
|
||||
text = "https://example.com/cb?token_count=42&session_id=xyz&foo=bar"
|
||||
result = redact_sensitive_text(text)
|
||||
assert "token_count=42" in result
|
||||
assert "session_id=xyz" in result
|
||||
|
||||
def test_url_without_query_unchanged(self):
|
||||
text = "https://example.com/path/to/resource"
|
||||
assert redact_sensitive_text(text) == text
|
||||
|
||||
def test_url_with_fragment(self):
|
||||
text = "https://example.com/page?token=xyz#section"
|
||||
result = redact_sensitive_text(text)
|
||||
assert "token=xyz" not in result
|
||||
assert "#section" in result
|
||||
def test_access_token_query_passes_through(self):
|
||||
text = "Fetching https://example.com/api?access_token=opaque_value_here_1234&format=json"
|
||||
assert redact_sensitive_text(text) == text
|
||||
|
||||
def test_websocket_url_query(self):
|
||||
def test_magic_link_checkout_passes_through(self):
|
||||
text = "Open https://checkout.example.com/resume?magic=ABCDEF123456&customer=42"
|
||||
assert redact_sensitive_text(text) == text
|
||||
|
||||
def test_presigned_signature_passes_through(self):
|
||||
text = "https://s3.amazonaws.com/bucket/k?signature=LONG_PRESIGNED_SIG&id=public"
|
||||
assert redact_sensitive_text(text) == text
|
||||
|
||||
def test_https_userinfo_passes_through(self):
|
||||
text = "URL: https://user:supersecretpw@host.example.com/path"
|
||||
assert redact_sensitive_text(text) == text
|
||||
|
||||
def test_websocket_url_query_passes_through(self):
|
||||
text = "wss://api.example.com/ws?token=opaqueWsToken123"
|
||||
result = redact_sensitive_text(text)
|
||||
assert "opaqueWsToken123" not in result
|
||||
assert redact_sensitive_text(text) == text
|
||||
|
||||
def test_http_access_log_relative_request_target_query(self):
|
||||
def test_http_access_log_request_target_passes_through(self):
|
||||
text = (
|
||||
'INFO aiohttp.access: 127.0.0.1 "POST '
|
||||
'/bluebubbles-webhook?password=webhookSecret123&event=new-message '
|
||||
'HTTP/1.1" 200 173 "-" "test-client"'
|
||||
)
|
||||
result = redact_sensitive_text(text)
|
||||
assert "webhookSecret123" not in result
|
||||
assert "password=***" in result
|
||||
assert "event=new-message" in result
|
||||
|
||||
def test_http_access_log_absolute_request_target_query(self):
|
||||
text = (
|
||||
'INFO aiohttp.access: 127.0.0.1 "GET '
|
||||
'https://example.com/callback?code=oauthCode123&state=csrf-ok '
|
||||
'HTTP/1.1" 200 173 "-" "test-client"'
|
||||
)
|
||||
result = redact_sensitive_text(text)
|
||||
assert "oauthCode123" not in result
|
||||
assert "code=***" in result
|
||||
assert "state=csrf-ok" in result
|
||||
|
||||
|
||||
class TestUrlUserinfoRedaction:
|
||||
"""URL userinfo (`scheme://user:pass@host`) for non-DB schemes."""
|
||||
|
||||
def test_https_userinfo(self):
|
||||
text = "URL: https://user:supersecretpw@host.example.com/path"
|
||||
result = redact_sensitive_text(text)
|
||||
assert "supersecretpw" not in result
|
||||
assert "https://user:***@host.example.com" in result
|
||||
|
||||
def test_http_userinfo(self):
|
||||
text = "http://admin:plaintextpass@internal.example.com/api"
|
||||
result = redact_sensitive_text(text)
|
||||
assert "plaintextpass" not in result
|
||||
|
||||
def test_ftp_userinfo(self):
|
||||
text = "ftp://user:ftppass@ftp.example.com/file.txt"
|
||||
result = redact_sensitive_text(text)
|
||||
assert "ftppass" not in result
|
||||
|
||||
def test_url_without_userinfo_unchanged(self):
|
||||
text = "https://example.com/path"
|
||||
assert redact_sensitive_text(text) == text
|
||||
|
||||
def test_db_connstr_still_handled(self):
|
||||
"""DB schemes are handled by _DB_CONNSTR_RE, not _URL_USERINFO_RE."""
|
||||
def test_known_prefix_inside_url_still_redacted(self):
|
||||
"""sk-/ghp_/JWT-shaped values inside a URL are still caught by
|
||||
_PREFIX_RE / _JWT_RE — the carve-out is for opaque tokens only."""
|
||||
text = "https://evil.com/steal?key=sk-" + "a" * 30
|
||||
result = redact_sensitive_text(text)
|
||||
assert "sk-" + "a" * 30 not in result
|
||||
|
||||
def test_db_connstr_password_still_redacted(self):
|
||||
"""DB schemes (postgres/mysql/mongodb/redis/amqp) keep their
|
||||
userinfo redaction via _DB_CONNSTR_RE — connection strings are
|
||||
not web URLs the agent navigates to."""
|
||||
text = "postgres://admin:dbpass@db.internal:5432/app"
|
||||
result = redact_sensitive_text(text)
|
||||
assert "dbpass" not in result
|
||||
|
|
|
|||
|
|
@ -275,8 +275,9 @@ class TestRunTurn:
|
|||
def test_turn_start_failure_attaches_redacted_stderr_tail(self):
|
||||
"""When codex stderr has content (non-OAuth), the tail gets attached
|
||||
to the user-facing error so config/provider problems are debuggable
|
||||
instead of just 'Internal error'. Secrets in stderr are redacted
|
||||
via agent.redact(force=True)."""
|
||||
instead of just 'Internal error'. Credential-shaped values in stderr
|
||||
are redacted via agent.redact(force=True); web-URL query params pass
|
||||
through (see fix(redact): pass web URLs through unchanged)."""
|
||||
client = FakeClient()
|
||||
client.set_stderr_tail([
|
||||
"ERROR: provider auth failed",
|
||||
|
|
@ -299,9 +300,8 @@ class TestRunTurn:
|
|||
# Stderr tail attached
|
||||
assert "codex stderr" in r.error
|
||||
assert "provider auth failed" in r.error
|
||||
# Secrets redacted
|
||||
# Credential-shaped values still redacted (sk- prefix + Bearer header)
|
||||
assert "sk-live-deadbeefdeadbeef" not in r.error
|
||||
assert "querysecret12345" not in r.error
|
||||
# Non-OAuth → should NOT retire (subprocess JSON-RPC is still healthy).
|
||||
assert r.should_retire is False
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue