mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-30 11:52:04 +00:00
git remote set-url with an embedded password (https://PASSWORD@github.com) leaked the credential into agent output — the redaction engine only masked user:pass@ DB connection strings, never the colon-less bare-token userinfo form a git remote uses. Add _URL_BARE_TOKEN_RE: scheme://TOKEN@host for web/transport schemes (http/https/wss/git/ssh/ftp), 8+ char floor to skip short usernames, token class forbidding /:@ so an @ in a path/query is never treated as userinfo. Deliberately scoped to the bare-token form only. The user:pass@ colon form and query-string tokens stay passing through (#34029, 'pass web URLs through unchanged') so magic-link / OAuth round-trip skills keep working — a bare credential in userinfo is never a workflow token (those live in the query string), so masking it can't break a skill.
This commit is contained in:
parent
9860d93f2a
commit
3483424aaa
2 changed files with 111 additions and 1 deletions
|
|
@ -222,6 +222,28 @@ _DB_CONNSTR_RE = re.compile(
|
|||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
# Bare-token credential in a web/transport URL: ``scheme://TOKEN@host``.
|
||||
# This is the ``git remote set-url origin https://PASSWORD@github.com/...``
|
||||
# shape from issue #6396 — a single opaque credential in the userinfo position
|
||||
# with NO ``user:pass`` colon. It is unambiguously a secret: legitimate
|
||||
# round-trip URLs (OAuth callbacks, magic links, pre-signed shares — see the
|
||||
# "Web-URL redaction is intentionally OFF" note in redact_sensitive_text) carry
|
||||
# their tokens in the QUERY STRING, never in bare userinfo. The colon form
|
||||
# ``user:pass@`` is deliberately left to pass through (commit "pass web URLs
|
||||
# through unchanged", #34029) and is NOT matched here — the token class forbids
|
||||
# ``:``. DB schemes are handled by _DB_CONNSTR_RE above and excluded here.
|
||||
#
|
||||
# Guards against false positives:
|
||||
# - 8+ char floor skips short usernames (git, admin, root, deploy, ubuntu).
|
||||
# - The token class ``[^\s:@/]`` cannot cross ``/``, so an ``@`` sitting in a
|
||||
# path or query (e.g. ``?q=user@example.com``) is never treated as userinfo.
|
||||
_URL_BARE_TOKEN_RE = re.compile(
|
||||
r"((?:https?|wss?|git|ssh|ftp|ftps|sftp)://)" # scheme
|
||||
r"([^\s:@/]{8,})" # bare token (no colon/slash/@), 8+ chars
|
||||
r"(@[^\s]+)", # @host...
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
# JWT tokens: header.payload[.signature] — always start with "eyJ" (base64 for "{")
|
||||
# Matches 1-part (header only), 2-part (header.payload), and full 3-part JWTs.
|
||||
_JWT_RE = re.compile(
|
||||
|
|
@ -564,6 +586,16 @@ def redact_sensitive_text(
|
|||
else:
|
||||
text = _DB_CONNSTR_RE.sub(lambda m: f"{m.group(1)}***{m.group(3)}", text)
|
||||
|
||||
# Bare-token userinfo in web/transport URLs: ``scheme://TOKEN@host``.
|
||||
# The git-remote-with-embedded-password shape from #6396. Only the
|
||||
# colon-less bare-token form is redacted — ``user:pass@`` and
|
||||
# query-string tokens are left to pass through (see the web-URL note
|
||||
# below). See _URL_BARE_TOKEN_RE for the false-positive guards.
|
||||
text = _URL_BARE_TOKEN_RE.sub(
|
||||
lambda m: f"{m.group(1)}{_mask_token(m.group(2))}{m.group(3)}",
|
||||
text,
|
||||
)
|
||||
|
||||
# JWT tokens (eyJ... — base64-encoded JSON headers)
|
||||
if "eyJ" in text:
|
||||
text = _JWT_RE.sub(lambda m: _mask_token(m.group(0)), text)
|
||||
|
|
@ -575,7 +607,12 @@ def redact_sensitive_text(
|
|||
# blanket-redacting param values by name breaks those skills mid-flow.
|
||||
# Known credential shapes (sk-, ghp_, JWTs, etc.) inside URLs are still
|
||||
# caught by _PREFIX_RE and _JWT_RE above. DB connection-string passwords
|
||||
# are still caught by _DB_CONNSTR_RE.
|
||||
# are still caught by _DB_CONNSTR_RE. The ONE userinfo case still redacted
|
||||
# is the colon-less bare-token form ``scheme://TOKEN@host`` (#6396, handled
|
||||
# by _URL_BARE_TOKEN_RE in the ``://`` block above): a bare credential in
|
||||
# userinfo is never a round-trip workflow token (those live in the query
|
||||
# string), so masking it can't break a skill. The ``user:pass@`` form is
|
||||
# left to pass through per #34029.
|
||||
|
||||
# Form-urlencoded bodies (only triggers on clean k=v&k=v inputs).
|
||||
if "&" in text and "=" in text:
|
||||
|
|
|
|||
|
|
@ -492,6 +492,79 @@ class TestWebUrlsNotRedacted:
|
|||
assert "dbpass" not in result
|
||||
|
||||
|
||||
class TestBareTokenUserinfoRedaction:
|
||||
"""Regression tests for #6396 — a bare credential in URL userinfo
|
||||
(``scheme://TOKEN@host``, no ``user:pass`` colon) is redacted. This is the
|
||||
git-remote-with-embedded-password shape. The colon form ``user:pass@`` and
|
||||
query-string tokens are deliberately left to pass through (#34029) so
|
||||
magic-link / OAuth round-trip skills keep working — see
|
||||
TestWebUrlsNotRedacted for those invariants.
|
||||
"""
|
||||
|
||||
def test_git_remote_bare_password_redacted(self):
|
||||
"""Exact bug scenario: password in a git remote URL."""
|
||||
text = (
|
||||
"git remote set-url origin "
|
||||
"https://MYPASSWORDWASDISLAYEDHERE@github.com/unclehowell/FCUK.git"
|
||||
)
|
||||
result = redact_sensitive_text(text)
|
||||
assert "MYPASSWORDWASDISLAYEDHERE" not in result
|
||||
assert "@github.com" in result
|
||||
assert "unclehowell/FCUK.git" in result
|
||||
|
||||
def test_ssh_bare_token_redacted(self):
|
||||
text = "ssh://longtoken1234567@gitlab.com/project.git"
|
||||
result = redact_sensitive_text(text)
|
||||
assert "longtoken1234567" not in result
|
||||
assert "@gitlab.com" in result
|
||||
|
||||
def test_ftp_bare_token_redacted(self):
|
||||
text = "ftp://ftptoken123456@ftp.example.com/files"
|
||||
result = redact_sensitive_text(text)
|
||||
assert "ftptoken123456" not in result
|
||||
|
||||
def test_bare_token_with_query_redacts_token_only(self):
|
||||
text = "https://abcdef1234567@host.com/path?foo=bar"
|
||||
result = redact_sensitive_text(text)
|
||||
assert "abcdef1234567" not in result
|
||||
assert "?foo=bar" in result
|
||||
|
||||
def test_user_pass_form_still_passes_through(self):
|
||||
"""The ``user:pass@`` colon form must NOT be redacted (#34029)."""
|
||||
text = "URL: https://user:supersecretpw@host.example.com/path"
|
||||
assert redact_sensitive_text(text) == text
|
||||
|
||||
def test_short_username_not_redacted(self):
|
||||
"""Short userinfo (git, admin, deploy) below the 8-char floor passes."""
|
||||
for text in (
|
||||
"https://git@github.com/user/repo.git",
|
||||
"https://admin@example.com/x",
|
||||
"https://deploy@host.com/y",
|
||||
):
|
||||
assert redact_sensitive_text(text) == text
|
||||
|
||||
def test_email_in_path_not_redacted(self):
|
||||
"""An ``@`` in a path/query is not userinfo — the token class stops at
|
||||
``/``, so emails after the first slash are never treated as a credential."""
|
||||
for text in (
|
||||
"https://example.com/search?q=user@example.com",
|
||||
"https://example.com/users/john@doe.com/profile",
|
||||
):
|
||||
assert redact_sensitive_text(text) == text
|
||||
|
||||
def test_plain_url_unchanged(self):
|
||||
text = "https://github.com/user/repo.git"
|
||||
assert redact_sensitive_text(text) == text
|
||||
|
||||
def test_long_bare_token_preserves_head_tail(self):
|
||||
token = "abcdef" + "x" * 20 + "wxyz"
|
||||
text = f"https://{token}@github.com/u/r.git"
|
||||
result = redact_sensitive_text(text)
|
||||
assert token not in result
|
||||
assert "abcdef" in result # head preserved
|
||||
assert "wxyz" in result # tail preserved
|
||||
|
||||
|
||||
class TestFormBodyRedaction:
|
||||
"""Form-urlencoded body redaction (k=v&k=v with no other text)."""
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue