diff --git a/agent/redact.py b/agent/redact.py index de247ec0ad2..06a7300a307 100644 --- a/agent/redact.py +++ b/agent/redact.py @@ -120,9 +120,25 @@ _JSON_FIELD_RE = re.compile( re.IGNORECASE, ) -# Authorization headers +# Authorization headers — any scheme (Bearer, Basic, Token, Digest, …) plus the +# bare-credential form, and Proxy-Authorization. The credential token is masked +# while the header name and scheme word are preserved for debuggability. The +# previous rule only matched ``Bearer``, so ``Basic `` and +# ``token `` leaked verbatim into logs/transcripts. _AUTH_HEADER_RE = re.compile( - r"(Authorization:\s*Bearer\s+)(\S+)", + r"((?:Proxy-)?Authorization:\s*)([A-Za-z][\w.+-]*\s+)?(\S+)", + re.IGNORECASE, +) + +# API-key style auth headers carrying a single opaque value (no scheme word). +# Anthropic and many providers authenticate with ``x-api-key``; values without +# a known vendor prefix (custom/local backends) would otherwise leak when a +# request or curl command is logged or echoed into tool output / transcripts. +_SECRET_HEADER_NAMES = ( + r"(?:x-api-key|x-goog-api-key|api-key|apikey|x-api-token|x-auth-token|x-access-token)" +) +_SECRET_HEADER_RE = re.compile( + rf"({_SECRET_HEADER_NAMES}\s*:\s*)(\S+)", re.IGNORECASE, ) @@ -374,11 +390,19 @@ def redact_sensitive_text(text: str, *, force: bool = False, code_file: bool = F return f'{key}: "{_mask_token(value)}"' text = _JSON_FIELD_RE.sub(_redact_json, text) - # Authorization headers — _AUTH_HEADER_RE is "Authorization: Bearer ..." - # case-insensitive, so "uthorization" is the cheapest substring gate that - # covers both "Authorization" and "authorization" without a casefold(). + # Authorization headers — _AUTH_HEADER_RE matches any scheme after + # "[Proxy-]Authorization:" case-insensitively, so "uthorization" is the + # cheapest substring gate that covers every casing without a casefold(). if "uthorization" in text or "UTHORIZATION" in text: text = _AUTH_HEADER_RE.sub( + lambda m: m.group(1) + (m.group(2) or "") + _mask_token(m.group(3)), + text, + ) + + # API-key style headers (x-api-key, api-key, …). Header values are + # colon-separated, so gate on ":" — the regex itself is the precise filter. + if ":" in text: + text = _SECRET_HEADER_RE.sub( lambda m: m.group(1) + _mask_token(m.group(2)), text, ) diff --git a/tests/agent/test_redact.py b/tests/agent/test_redact.py index 472b97fb395..88cc424a758 100644 --- a/tests/agent/test_redact.py +++ b/tests/agent/test_redact.py @@ -147,6 +147,48 @@ class TestAuthHeaders: result = redact_sensitive_text(text) assert "mytoken12345" not in result + def test_basic_auth_credentials_masked(self): + # base64 of "user:longpassword1234" — leaks user:pass if not redacted. + text = "Authorization: Basic dXNlcjpsb25ncGFzc3dvcmQxMjM0" + result = redact_sensitive_text(text) + assert "Authorization: Basic" in result + assert "dXNlcjpsb25ncGFzc3dvcmQxMjM0" not in result + + def test_token_scheme_masked(self): + text = "Authorization: token opaque-credential-1234567890" + result = redact_sensitive_text(text) + assert "Authorization: token" in result + assert "opaque-credential" not in result + + def test_proxy_authorization_masked(self): + text = "Proxy-Authorization: Basic dXNlcjpzdXBlcnNlY3JldDEyMzQ=" + result = redact_sensitive_text(text) + assert "dXNlcjpzdXBlcnNlY3JldDEyMzQ=" not in result + + def test_authorization_prose_unchanged(self): + # "authorization" without a colon-delimited value is plain prose. + text = "the authorization model is fully open" + assert redact_sensitive_text(text) == text + + +class TestApiKeyHeaders: + def test_x_api_key_header_masked(self): + text = "x-api-key: opaque-provider-key-1234567890" + result = redact_sensitive_text(text) + assert "x-api-key:" in result + assert "opaque-provider-key" not in result + + def test_x_api_key_in_curl_command_masked(self): + text = 'curl -H "x-api-key: sk-local-VERYsecret-999888" https://api.example.com' + result = redact_sensitive_text(text) + assert "VERYsecret" not in result + assert "https://api.example.com" in result + + def test_api_key_header_masked(self): + text = "api-key: anotherOpaqueSecret1234567" + result = redact_sensitive_text(text) + assert "anotherOpaqueSecret" not in result + class TestTelegramTokens: def test_bot_token(self):