fix(security): redact bare-token credentials in URL userinfo (#6396) (#54475)

git remote set-url with an embedded password (https://PASSWORD@github.com)
leaked the credential into agent output — the redaction engine only masked
user:pass@ DB connection strings, never the colon-less bare-token userinfo
form a git remote uses.

Add _URL_BARE_TOKEN_RE: scheme://TOKEN@host for web/transport schemes
(http/https/wss/git/ssh/ftp), 8+ char floor to skip short usernames, token
class forbidding /:@ so an @ in a path/query is never treated as userinfo.

Deliberately scoped to the bare-token form only. The user:pass@ colon form
and query-string tokens stay passing through (#34029, 'pass web URLs through
unchanged') so magic-link / OAuth round-trip skills keep working — a bare
credential in userinfo is never a workflow token (those live in the query
string), so masking it can't break a skill.
This commit is contained in:
Teknium 2026-06-28 18:52:42 -07:00 committed by GitHub
parent 9860d93f2a
commit 3483424aaa
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 111 additions and 1 deletions

View file

@ -222,6 +222,28 @@ _DB_CONNSTR_RE = re.compile(
re.IGNORECASE,
)
# Bare-token credential in a web/transport URL: ``scheme://TOKEN@host``.
# This is the ``git remote set-url origin https://PASSWORD@github.com/...``
# shape from issue #6396 — a single opaque credential in the userinfo position
# with NO ``user:pass`` colon. It is unambiguously a secret: legitimate
# round-trip URLs (OAuth callbacks, magic links, pre-signed shares — see the
# "Web-URL redaction is intentionally OFF" note in redact_sensitive_text) carry
# their tokens in the QUERY STRING, never in bare userinfo. The colon form
# ``user:pass@`` is deliberately left to pass through (commit "pass web URLs
# through unchanged", #34029) and is NOT matched here — the token class forbids
# ``:``. DB schemes are handled by _DB_CONNSTR_RE above and excluded here.
#
# Guards against false positives:
# - 8+ char floor skips short usernames (git, admin, root, deploy, ubuntu).
# - The token class ``[^\s:@/]`` cannot cross ``/``, so an ``@`` sitting in a
# path or query (e.g. ``?q=user@example.com``) is never treated as userinfo.
_URL_BARE_TOKEN_RE = re.compile(
r"((?:https?|wss?|git|ssh|ftp|ftps|sftp)://)" # scheme
r"([^\s:@/]{8,})" # bare token (no colon/slash/@), 8+ chars
r"(@[^\s]+)", # @host...
re.IGNORECASE,
)
# JWT tokens: header.payload[.signature] — always start with "eyJ" (base64 for "{")
# Matches 1-part (header only), 2-part (header.payload), and full 3-part JWTs.
_JWT_RE = re.compile(
@ -564,6 +586,16 @@ def redact_sensitive_text(
else:
text = _DB_CONNSTR_RE.sub(lambda m: f"{m.group(1)}***{m.group(3)}", text)
# Bare-token userinfo in web/transport URLs: ``scheme://TOKEN@host``.
# The git-remote-with-embedded-password shape from #6396. Only the
# colon-less bare-token form is redacted — ``user:pass@`` and
# query-string tokens are left to pass through (see the web-URL note
# below). See _URL_BARE_TOKEN_RE for the false-positive guards.
text = _URL_BARE_TOKEN_RE.sub(
lambda m: f"{m.group(1)}{_mask_token(m.group(2))}{m.group(3)}",
text,
)
# JWT tokens (eyJ... — base64-encoded JSON headers)
if "eyJ" in text:
text = _JWT_RE.sub(lambda m: _mask_token(m.group(0)), text)
@ -575,7 +607,12 @@ def redact_sensitive_text(
# blanket-redacting param values by name breaks those skills mid-flow.
# Known credential shapes (sk-, ghp_, JWTs, etc.) inside URLs are still
# caught by _PREFIX_RE and _JWT_RE above. DB connection-string passwords
# are still caught by _DB_CONNSTR_RE.
# are still caught by _DB_CONNSTR_RE. The ONE userinfo case still redacted
# is the colon-less bare-token form ``scheme://TOKEN@host`` (#6396, handled
# by _URL_BARE_TOKEN_RE in the ``://`` block above): a bare credential in
# userinfo is never a round-trip workflow token (those live in the query
# string), so masking it can't break a skill. The ``user:pass@`` form is
# left to pass through per #34029.
# Form-urlencoded bodies (only triggers on clean k=v&k=v inputs).
if "&" in text and "=" in text:

View file

@ -492,6 +492,79 @@ class TestWebUrlsNotRedacted:
assert "dbpass" not in result
class TestBareTokenUserinfoRedaction:
"""Regression tests for #6396 — a bare credential in URL userinfo
(``scheme://TOKEN@host``, no ``user:pass`` colon) is redacted. This is the
git-remote-with-embedded-password shape. The colon form ``user:pass@`` and
query-string tokens are deliberately left to pass through (#34029) so
magic-link / OAuth round-trip skills keep working see
TestWebUrlsNotRedacted for those invariants.
"""
def test_git_remote_bare_password_redacted(self):
"""Exact bug scenario: password in a git remote URL."""
text = (
"git remote set-url origin "
"https://MYPASSWORDWASDISLAYEDHERE@github.com/unclehowell/FCUK.git"
)
result = redact_sensitive_text(text)
assert "MYPASSWORDWASDISLAYEDHERE" not in result
assert "@github.com" in result
assert "unclehowell/FCUK.git" in result
def test_ssh_bare_token_redacted(self):
text = "ssh://longtoken1234567@gitlab.com/project.git"
result = redact_sensitive_text(text)
assert "longtoken1234567" not in result
assert "@gitlab.com" in result
def test_ftp_bare_token_redacted(self):
text = "ftp://ftptoken123456@ftp.example.com/files"
result = redact_sensitive_text(text)
assert "ftptoken123456" not in result
def test_bare_token_with_query_redacts_token_only(self):
text = "https://abcdef1234567@host.com/path?foo=bar"
result = redact_sensitive_text(text)
assert "abcdef1234567" not in result
assert "?foo=bar" in result
def test_user_pass_form_still_passes_through(self):
"""The ``user:pass@`` colon form must NOT be redacted (#34029)."""
text = "URL: https://user:supersecretpw@host.example.com/path"
assert redact_sensitive_text(text) == text
def test_short_username_not_redacted(self):
"""Short userinfo (git, admin, deploy) below the 8-char floor passes."""
for text in (
"https://git@github.com/user/repo.git",
"https://admin@example.com/x",
"https://deploy@host.com/y",
):
assert redact_sensitive_text(text) == text
def test_email_in_path_not_redacted(self):
"""An ``@`` in a path/query is not userinfo — the token class stops at
``/``, so emails after the first slash are never treated as a credential."""
for text in (
"https://example.com/search?q=user@example.com",
"https://example.com/users/john@doe.com/profile",
):
assert redact_sensitive_text(text) == text
def test_plain_url_unchanged(self):
text = "https://github.com/user/repo.git"
assert redact_sensitive_text(text) == text
def test_long_bare_token_preserves_head_tail(self):
token = "abcdef" + "x" * 20 + "wxyz"
text = f"https://{token}@github.com/u/r.git"
result = redact_sensitive_text(text)
assert token not in result
assert "abcdef" in result # head preserved
assert "wxyz" in result # tail preserved
class TestFormBodyRedaction:
"""Form-urlencoded body redaction (k=v&k=v with no other text)."""