fix(redact): mask all Authorization schemes and x-api-key style headers

Secret redaction only matched `Authorization: Bearer <token>`. Other auth
headers passed through verbatim into logs, tool output, and transcripts:

- `Authorization: Basic <base64>` — leaks base64(user:password)
- `Authorization: token <pat>` / any non-Bearer scheme
- `Proxy-Authorization: ...`
- `x-api-key: <key>` (Anthropic and many providers) and `api-key`,
  `x-goog-api-key`, `x-auth-token`, `x-access-token`, ... — opaque values with
  no known vendor prefix were caught by nothing

A logged request or an echoed `curl -H "x-api-key: ..."` command therefore
leaked live credentials.

Generalize the Authorization rule to mask the credential for any scheme (and
Proxy-Authorization) while preserving the header name and scheme word for
debuggability, and add an api-key header rule for the single-opaque-value
headers. Bearer behavior is unchanged; plain prose containing the word
"authorization" (no colon-delimited value) is left untouched.

Adds regression tests for Basic/token/Proxy auth and the x-api-key/api-key
headers, including inside a curl command.
This commit is contained in:
devorun 2026-06-21 23:20:01 +03:00 committed by Teknium
parent 87ab373381
commit 6f0ecf37da
2 changed files with 71 additions and 5 deletions

View file

@ -120,9 +120,25 @@ _JSON_FIELD_RE = re.compile(
re.IGNORECASE,
)
# Authorization headers
# Authorization headers — any scheme (Bearer, Basic, Token, Digest, …) plus the
# bare-credential form, and Proxy-Authorization. The credential token is masked
# while the header name and scheme word are preserved for debuggability. The
# previous rule only matched ``Bearer``, so ``Basic <base64 user:pass>`` and
# ``token <pat>`` leaked verbatim into logs/transcripts.
_AUTH_HEADER_RE = re.compile(
r"(Authorization:\s*Bearer\s+)(\S+)",
r"((?:Proxy-)?Authorization:\s*)([A-Za-z][\w.+-]*\s+)?(\S+)",
re.IGNORECASE,
)
# API-key style auth headers carrying a single opaque value (no scheme word).
# Anthropic and many providers authenticate with ``x-api-key``; values without
# a known vendor prefix (custom/local backends) would otherwise leak when a
# request or curl command is logged or echoed into tool output / transcripts.
_SECRET_HEADER_NAMES = (
r"(?:x-api-key|x-goog-api-key|api-key|apikey|x-api-token|x-auth-token|x-access-token)"
)
_SECRET_HEADER_RE = re.compile(
rf"({_SECRET_HEADER_NAMES}\s*:\s*)(\S+)",
re.IGNORECASE,
)
@ -374,11 +390,19 @@ def redact_sensitive_text(text: str, *, force: bool = False, code_file: bool = F
return f'{key}: "{_mask_token(value)}"'
text = _JSON_FIELD_RE.sub(_redact_json, text)
# Authorization headers — _AUTH_HEADER_RE is "Authorization: Bearer ..."
# case-insensitive, so "uthorization" is the cheapest substring gate that
# covers both "Authorization" and "authorization" without a casefold().
# Authorization headers — _AUTH_HEADER_RE matches any scheme after
# "[Proxy-]Authorization:" case-insensitively, so "uthorization" is the
# cheapest substring gate that covers every casing without a casefold().
if "uthorization" in text or "UTHORIZATION" in text:
text = _AUTH_HEADER_RE.sub(
lambda m: m.group(1) + (m.group(2) or "") + _mask_token(m.group(3)),
text,
)
# API-key style headers (x-api-key, api-key, …). Header values are
# colon-separated, so gate on ":" — the regex itself is the precise filter.
if ":" in text:
text = _SECRET_HEADER_RE.sub(
lambda m: m.group(1) + _mask_token(m.group(2)),
text,
)

View file

@ -147,6 +147,48 @@ class TestAuthHeaders:
result = redact_sensitive_text(text)
assert "mytoken12345" not in result
def test_basic_auth_credentials_masked(self):
# base64 of "user:longpassword1234" — leaks user:pass if not redacted.
text = "Authorization: Basic dXNlcjpsb25ncGFzc3dvcmQxMjM0"
result = redact_sensitive_text(text)
assert "Authorization: Basic" in result
assert "dXNlcjpsb25ncGFzc3dvcmQxMjM0" not in result
def test_token_scheme_masked(self):
text = "Authorization: token opaque-credential-1234567890"
result = redact_sensitive_text(text)
assert "Authorization: token" in result
assert "opaque-credential" not in result
def test_proxy_authorization_masked(self):
text = "Proxy-Authorization: Basic dXNlcjpzdXBlcnNlY3JldDEyMzQ="
result = redact_sensitive_text(text)
assert "dXNlcjpzdXBlcnNlY3JldDEyMzQ=" not in result
def test_authorization_prose_unchanged(self):
# "authorization" without a colon-delimited value is plain prose.
text = "the authorization model is fully open"
assert redact_sensitive_text(text) == text
class TestApiKeyHeaders:
def test_x_api_key_header_masked(self):
text = "x-api-key: opaque-provider-key-1234567890"
result = redact_sensitive_text(text)
assert "x-api-key:" in result
assert "opaque-provider-key" not in result
def test_x_api_key_in_curl_command_masked(self):
text = 'curl -H "x-api-key: sk-local-VERYsecret-999888" https://api.example.com'
result = redact_sensitive_text(text)
assert "VERYsecret" not in result
assert "https://api.example.com" in result
def test_api_key_header_masked(self):
text = "api-key: anotherOpaqueSecret1234567"
result = redact_sensitive_text(text)
assert "anotherOpaqueSecret" not in result
class TestTelegramTokens:
def test_bot_token(self):