From 6f0ecf37dad0bcb989ea6139def524e6f0304d55 Mon Sep 17 00:00:00 2001 From: devorun Date: Sun, 21 Jun 2026 23:20:01 +0300 Subject: [PATCH] fix(redact): mask all Authorization schemes and x-api-key style headers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Secret redaction only matched `Authorization: Bearer `. Other auth headers passed through verbatim into logs, tool output, and transcripts: - `Authorization: Basic ` — leaks base64(user:password) - `Authorization: token ` / any non-Bearer scheme - `Proxy-Authorization: ...` - `x-api-key: ` (Anthropic and many providers) and `api-key`, `x-goog-api-key`, `x-auth-token`, `x-access-token`, ... — opaque values with no known vendor prefix were caught by nothing A logged request or an echoed `curl -H "x-api-key: ..."` command therefore leaked live credentials. Generalize the Authorization rule to mask the credential for any scheme (and Proxy-Authorization) while preserving the header name and scheme word for debuggability, and add an api-key header rule for the single-opaque-value headers. Bearer behavior is unchanged; plain prose containing the word "authorization" (no colon-delimited value) is left untouched. Adds regression tests for Basic/token/Proxy auth and the x-api-key/api-key headers, including inside a curl command. --- agent/redact.py | 34 +++++++++++++++++++++++++----- tests/agent/test_redact.py | 42 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+), 5 deletions(-) diff --git a/agent/redact.py b/agent/redact.py index de247ec0ad2..06a7300a307 100644 --- a/agent/redact.py +++ b/agent/redact.py @@ -120,9 +120,25 @@ _JSON_FIELD_RE = re.compile( re.IGNORECASE, ) -# Authorization headers +# Authorization headers — any scheme (Bearer, Basic, Token, Digest, …) plus the +# bare-credential form, and Proxy-Authorization. The credential token is masked +# while the header name and scheme word are preserved for debuggability. The +# previous rule only matched ``Bearer``, so ``Basic `` and +# ``token `` leaked verbatim into logs/transcripts. _AUTH_HEADER_RE = re.compile( - r"(Authorization:\s*Bearer\s+)(\S+)", + r"((?:Proxy-)?Authorization:\s*)([A-Za-z][\w.+-]*\s+)?(\S+)", + re.IGNORECASE, +) + +# API-key style auth headers carrying a single opaque value (no scheme word). +# Anthropic and many providers authenticate with ``x-api-key``; values without +# a known vendor prefix (custom/local backends) would otherwise leak when a +# request or curl command is logged or echoed into tool output / transcripts. +_SECRET_HEADER_NAMES = ( + r"(?:x-api-key|x-goog-api-key|api-key|apikey|x-api-token|x-auth-token|x-access-token)" +) +_SECRET_HEADER_RE = re.compile( + rf"({_SECRET_HEADER_NAMES}\s*:\s*)(\S+)", re.IGNORECASE, ) @@ -374,11 +390,19 @@ def redact_sensitive_text(text: str, *, force: bool = False, code_file: bool = F return f'{key}: "{_mask_token(value)}"' text = _JSON_FIELD_RE.sub(_redact_json, text) - # Authorization headers — _AUTH_HEADER_RE is "Authorization: Bearer ..." - # case-insensitive, so "uthorization" is the cheapest substring gate that - # covers both "Authorization" and "authorization" without a casefold(). + # Authorization headers — _AUTH_HEADER_RE matches any scheme after + # "[Proxy-]Authorization:" case-insensitively, so "uthorization" is the + # cheapest substring gate that covers every casing without a casefold(). if "uthorization" in text or "UTHORIZATION" in text: text = _AUTH_HEADER_RE.sub( + lambda m: m.group(1) + (m.group(2) or "") + _mask_token(m.group(3)), + text, + ) + + # API-key style headers (x-api-key, api-key, …). Header values are + # colon-separated, so gate on ":" — the regex itself is the precise filter. + if ":" in text: + text = _SECRET_HEADER_RE.sub( lambda m: m.group(1) + _mask_token(m.group(2)), text, ) diff --git a/tests/agent/test_redact.py b/tests/agent/test_redact.py index 472b97fb395..88cc424a758 100644 --- a/tests/agent/test_redact.py +++ b/tests/agent/test_redact.py @@ -147,6 +147,48 @@ class TestAuthHeaders: result = redact_sensitive_text(text) assert "mytoken12345" not in result + def test_basic_auth_credentials_masked(self): + # base64 of "user:longpassword1234" — leaks user:pass if not redacted. + text = "Authorization: Basic dXNlcjpsb25ncGFzc3dvcmQxMjM0" + result = redact_sensitive_text(text) + assert "Authorization: Basic" in result + assert "dXNlcjpsb25ncGFzc3dvcmQxMjM0" not in result + + def test_token_scheme_masked(self): + text = "Authorization: token opaque-credential-1234567890" + result = redact_sensitive_text(text) + assert "Authorization: token" in result + assert "opaque-credential" not in result + + def test_proxy_authorization_masked(self): + text = "Proxy-Authorization: Basic dXNlcjpzdXBlcnNlY3JldDEyMzQ=" + result = redact_sensitive_text(text) + assert "dXNlcjpzdXBlcnNlY3JldDEyMzQ=" not in result + + def test_authorization_prose_unchanged(self): + # "authorization" without a colon-delimited value is plain prose. + text = "the authorization model is fully open" + assert redact_sensitive_text(text) == text + + +class TestApiKeyHeaders: + def test_x_api_key_header_masked(self): + text = "x-api-key: opaque-provider-key-1234567890" + result = redact_sensitive_text(text) + assert "x-api-key:" in result + assert "opaque-provider-key" not in result + + def test_x_api_key_in_curl_command_masked(self): + text = 'curl -H "x-api-key: sk-local-VERYsecret-999888" https://api.example.com' + result = redact_sensitive_text(text) + assert "VERYsecret" not in result + assert "https://api.example.com" in result + + def test_api_key_header_masked(self): + text = "api-key: anotherOpaqueSecret1234567" + result = redact_sensitive_text(text) + assert "anotherOpaqueSecret" not in result + class TestTelegramTokens: def test_bot_token(self):