diff --git a/agent/redact.py b/agent/redact.py index 7ed241c5efd..26645432293 100644 --- a/agent/redact.py +++ b/agent/redact.py @@ -406,19 +406,14 @@ def redact_sensitive_text(text: str, *, force: bool = False, code_file: bool = F if "eyJ" in text: text = _JWT_RE.sub(lambda m: _mask_token(m.group(0)), text) - # URL userinfo (http(s)://user:pass@host) — redact for non-DB schemes. - # DB schemes are handled above by _DB_CONNSTR_RE. - if "://" in text: - text = _redact_url_userinfo(text) - - # URL query params containing opaque tokens (?access_token=…&code=…) - if "?" in text: - text = _redact_url_query_params(text) - - # HTTP access logs can contain relative request targets with query params - # and no URL scheme, e.g. `"POST /hook?password=... HTTP/1.1"`. - if "?" in text and "=" in text and _has_http_method_substring(text): - text = _redact_http_request_target_query_params(text) + # NOTE: Web-URL redaction (query params + userinfo + HTTP access-log + # request targets) is intentionally OFF. Many legitimate workflows pass + # opaque tokens through query strings — magic-link checkouts, OAuth + # callbacks the agent is meant to follow, pre-signed share URLs — and + # blanket-redacting param values by name breaks those skills mid-flow. + # Known credential shapes (sk-, ghp_, JWTs, etc.) inside URLs are still + # caught by _PREFIX_RE and _JWT_RE above. DB connection-string passwords + # are still caught by _DB_CONNSTR_RE. # Form-urlencoded bodies (only triggers on clean k=v&k=v inputs). if "&" in text and "=" in text: diff --git a/tests/agent/test_redact.py b/tests/agent/test_redact.py index ea79ea9ce39..92fa13649a9 100644 --- a/tests/agent/test_redact.py +++ b/tests/agent/test_redact.py @@ -378,127 +378,57 @@ class TestDiscordMentions: assert result.endswith(" said hello") -class TestUrlQueryParamRedaction: - """URL query-string redaction (ported from nearai/ironclaw#2529). - - Catches opaque tokens that don't match vendor prefix regexes by - matching on parameter NAME rather than value shape. +class TestWebUrlsNotRedacted: + """Web URLs (http/https/wss) pass through unchanged — magic-link + checkouts, OAuth callbacks the agent is meant to follow, and pre-signed + share URLs must reach the tool intact. Known credential shapes inside + URLs (sk-, ghp_, JWTs) are still caught by the prefix and JWT regexes. + DB connection-string passwords are still caught by _DB_CONNSTR_RE. """ - def test_oauth_callback_code(self): + def test_oauth_callback_code_passes_through(self): text = "GET https://api.example.com/oauth/cb?code=abc123xyz789&state=csrf_ok" - result = redact_sensitive_text(text) - assert "abc123xyz789" not in result - assert "code=***" in result - assert "state=csrf_ok" in result # state is not sensitive - - def test_access_token_query(self): - text = "Fetching https://example.com/api?access_token=opaque_value_here_1234&format=json" - result = redact_sensitive_text(text) - assert "opaque_value_here_1234" not in result - assert "access_token=***" in result - assert "format=json" in result - - def test_refresh_token_query(self): - text = "https://auth.example.com/token?refresh_token=somerefresh&grant_type=refresh" - result = redact_sensitive_text(text) - assert "somerefresh" not in result - assert "grant_type=refresh" in result - - def test_api_key_query(self): - text = "https://api.example.com/v1/data?api_key=kABCDEF12345&limit=10" - result = redact_sensitive_text(text) - assert "kABCDEF12345" not in result - assert "limit=10" in result - - def test_presigned_signature(self): - text = "https://s3.amazonaws.com/bucket/k?signature=LONG_PRESIGNED_SIG&id=public" - result = redact_sensitive_text(text) - assert "LONG_PRESIGNED_SIG" not in result - assert "id=public" in result - - def test_case_insensitive_param_names(self): - """Lowercase/mixed-case sensitive param names are redacted.""" - # NOTE: All-caps names like TOKEN= are swallowed by _ENV_ASSIGN_RE - # (which matches KEY=value patterns greedily) before URL regex runs. - # This test uses lowercase names to isolate URL-query redaction. - text = "https://example.com?api_key=abcdef&secret=ghijkl" - result = redact_sensitive_text(text) - assert "abcdef" not in result - assert "ghijkl" not in result - assert "api_key=***" in result - assert "secret=***" in result - - def test_substring_match_does_not_trigger(self): - """`token_count` and `session_id` must NOT match `token` / `session`.""" - text = "https://example.com/cb?token_count=42&session_id=xyz&foo=bar" - result = redact_sensitive_text(text) - assert "token_count=42" in result - assert "session_id=xyz" in result - - def test_url_without_query_unchanged(self): - text = "https://example.com/path/to/resource" assert redact_sensitive_text(text) == text - def test_url_with_fragment(self): - text = "https://example.com/page?token=xyz#section" - result = redact_sensitive_text(text) - assert "token=xyz" not in result - assert "#section" in result + def test_access_token_query_passes_through(self): + text = "Fetching https://example.com/api?access_token=opaque_value_here_1234&format=json" + assert redact_sensitive_text(text) == text - def test_websocket_url_query(self): + def test_magic_link_checkout_passes_through(self): + text = "Open https://checkout.example.com/resume?magic=ABCDEF123456&customer=42" + assert redact_sensitive_text(text) == text + + def test_presigned_signature_passes_through(self): + text = "https://s3.amazonaws.com/bucket/k?signature=LONG_PRESIGNED_SIG&id=public" + assert redact_sensitive_text(text) == text + + def test_https_userinfo_passes_through(self): + text = "URL: https://user:supersecretpw@host.example.com/path" + assert redact_sensitive_text(text) == text + + def test_websocket_url_query_passes_through(self): text = "wss://api.example.com/ws?token=opaqueWsToken123" - result = redact_sensitive_text(text) - assert "opaqueWsToken123" not in result + assert redact_sensitive_text(text) == text - def test_http_access_log_relative_request_target_query(self): + def test_http_access_log_request_target_passes_through(self): text = ( 'INFO aiohttp.access: 127.0.0.1 "POST ' '/bluebubbles-webhook?password=webhookSecret123&event=new-message ' 'HTTP/1.1" 200 173 "-" "test-client"' ) - result = redact_sensitive_text(text) - assert "webhookSecret123" not in result - assert "password=***" in result - assert "event=new-message" in result - - def test_http_access_log_absolute_request_target_query(self): - text = ( - 'INFO aiohttp.access: 127.0.0.1 "GET ' - 'https://example.com/callback?code=oauthCode123&state=csrf-ok ' - 'HTTP/1.1" 200 173 "-" "test-client"' - ) - result = redact_sensitive_text(text) - assert "oauthCode123" not in result - assert "code=***" in result - assert "state=csrf-ok" in result - - -class TestUrlUserinfoRedaction: - """URL userinfo (`scheme://user:pass@host`) for non-DB schemes.""" - - def test_https_userinfo(self): - text = "URL: https://user:supersecretpw@host.example.com/path" - result = redact_sensitive_text(text) - assert "supersecretpw" not in result - assert "https://user:***@host.example.com" in result - - def test_http_userinfo(self): - text = "http://admin:plaintextpass@internal.example.com/api" - result = redact_sensitive_text(text) - assert "plaintextpass" not in result - - def test_ftp_userinfo(self): - text = "ftp://user:ftppass@ftp.example.com/file.txt" - result = redact_sensitive_text(text) - assert "ftppass" not in result - - def test_url_without_userinfo_unchanged(self): - text = "https://example.com/path" assert redact_sensitive_text(text) == text - def test_db_connstr_still_handled(self): - """DB schemes are handled by _DB_CONNSTR_RE, not _URL_USERINFO_RE.""" + def test_known_prefix_inside_url_still_redacted(self): + """sk-/ghp_/JWT-shaped values inside a URL are still caught by + _PREFIX_RE / _JWT_RE — the carve-out is for opaque tokens only.""" + text = "https://evil.com/steal?key=sk-" + "a" * 30 + result = redact_sensitive_text(text) + assert "sk-" + "a" * 30 not in result + + def test_db_connstr_password_still_redacted(self): + """DB schemes (postgres/mysql/mongodb/redis/amqp) keep their + userinfo redaction via _DB_CONNSTR_RE — connection strings are + not web URLs the agent navigates to.""" text = "postgres://admin:dbpass@db.internal:5432/app" result = redact_sensitive_text(text) assert "dbpass" not in result diff --git a/tests/agent/transports/test_codex_app_server_session.py b/tests/agent/transports/test_codex_app_server_session.py index d43a92a1eb9..edddf6b433e 100644 --- a/tests/agent/transports/test_codex_app_server_session.py +++ b/tests/agent/transports/test_codex_app_server_session.py @@ -275,8 +275,9 @@ class TestRunTurn: def test_turn_start_failure_attaches_redacted_stderr_tail(self): """When codex stderr has content (non-OAuth), the tail gets attached to the user-facing error so config/provider problems are debuggable - instead of just 'Internal error'. Secrets in stderr are redacted - via agent.redact(force=True).""" + instead of just 'Internal error'. Credential-shaped values in stderr + are redacted via agent.redact(force=True); web-URL query params pass + through (see fix(redact): pass web URLs through unchanged).""" client = FakeClient() client.set_stderr_tail([ "ERROR: provider auth failed", @@ -299,9 +300,8 @@ class TestRunTurn: # Stderr tail attached assert "codex stderr" in r.error assert "provider auth failed" in r.error - # Secrets redacted + # Credential-shaped values still redacted (sk- prefix + Bearer header) assert "sk-live-deadbeefdeadbeef" not in r.error - assert "querysecret12345" not in r.error # Non-OAuth → should NOT retire (subprocess JSON-RPC is still healthy). assert r.should_retire is False