fix(redact): mask all Authorization schemes and x-api-key style headers

Secret redaction only matched `Authorization: Bearer <token>`. Other auth headers passed through verbatim into logs, tool output, and transcripts: - `Authorization: Basic <base64>` — leaks base64(user:password) - `Authorization: token <pat>` / any non-Bearer scheme - `Proxy-Authorization: ...` - `x-api-key: <key>` (Anthropic and many providers) and `api-key`, `x-goog-api-key`, `x-auth-token`, `x-access-token`, ... — opaque values with no known vendor prefix were caught by nothing A logged request or an echoed `curl -H "x-api-key: ..."` command therefore leaked live credentials. Generalize the Authorization rule to mask the credential for any scheme (and Proxy-Authorization) while preserving the header name and scheme word for debuggability, and add an api-key header rule for the single-opaque-value headers. Bearer behavior is unchanged; plain prose containing the word "authorization" (no colon-delimited value) is left untouched. Adds regression tests for Basic/token/Proxy auth and the x-api-key/api-key headers, including inside a curl command.
2026-06-23 10:42:00 +00:00 · 2026-06-21 23:20:01 +03:00 · 2026-06-21 23:20:01 +03:00 · 6f0ecf37da
commit 6f0ecf37da
parent 87ab373381
2 changed files with 71 additions and 5 deletions
--- a/agent/redact.py
+++ b/agent/redact.py
@ -120,9 +120,25 @@ _JSON_FIELD_RE = re.compile(
    re.IGNORECASE,
 )

-# Authorization headers
+# Authorization headers — any scheme (Bearer, Basic, Token, Digest, …) plus the
+# bare-credential form, and Proxy-Authorization. The credential token is masked
+# while the header name and scheme word are preserved for debuggability. The
+# previous rule only matched ``Bearer``, so ``Basic <base64 user:pass>`` and
+# ``token <pat>`` leaked verbatim into logs/transcripts.
 _AUTH_HEADER_RE = re.compile(
-    r"(Authorization:\s*Bearer\s+)(\S+)",
+    r"((?:Proxy-)?Authorization:\s*)([A-Za-z][\w.+-]*\s+)?(\S+)",
+    re.IGNORECASE,
+)
+
+# API-key style auth headers carrying a single opaque value (no scheme word).
+# Anthropic and many providers authenticate with ``x-api-key``; values without
+# a known vendor prefix (custom/local backends) would otherwise leak when a
+# request or curl command is logged or echoed into tool output / transcripts.
+_SECRET_HEADER_NAMES = (
+    r"(?:x-api-key|x-goog-api-key|api-key|apikey|x-api-token|x-auth-token|x-access-token)"
+)
+_SECRET_HEADER_RE = re.compile(
+    rf"({_SECRET_HEADER_NAMES}\s*:\s*)(\S+)",
    re.IGNORECASE,
 )

@ -374,11 +390,19 @@ def redact_sensitive_text(text: str, *, force: bool = False, code_file: bool = F
                return f'{key}: "{_mask_token(value)}"'
            text = _JSON_FIELD_RE.sub(_redact_json, text)

-    # Authorization headers — _AUTH_HEADER_RE is "Authorization: Bearer ..."
-    # case-insensitive, so "uthorization" is the cheapest substring gate that
-    # covers both "Authorization" and "authorization" without a casefold().
+    # Authorization headers — _AUTH_HEADER_RE matches any scheme after
+    # "[Proxy-]Authorization:" case-insensitively, so "uthorization" is the
+    # cheapest substring gate that covers every casing without a casefold().
    if "uthorization" in text or "UTHORIZATION" in text:
        text = _AUTH_HEADER_RE.sub(
+            lambda m: m.group(1) + (m.group(2) or "") + _mask_token(m.group(3)),
+            text,
+        )
+
+    # API-key style headers (x-api-key, api-key, …). Header values are
+    # colon-separated, so gate on ":" — the regex itself is the precise filter.
+    if ":" in text:
+        text = _SECRET_HEADER_RE.sub(
            lambda m: m.group(1) + _mask_token(m.group(2)),
            text,
        )
--- a/tests/agent/test_redact.py
+++ b/tests/agent/test_redact.py
@ -147,6 +147,48 @@ class TestAuthHeaders:
        result = redact_sensitive_text(text)
        assert "mytoken12345" not in result

+    def test_basic_auth_credentials_masked(self):
+        # base64 of "user:longpassword1234" — leaks user:pass if not redacted.
+        text = "Authorization: Basic dXNlcjpsb25ncGFzc3dvcmQxMjM0"
+        result = redact_sensitive_text(text)
+        assert "Authorization: Basic" in result
+        assert "dXNlcjpsb25ncGFzc3dvcmQxMjM0" not in result
+
+    def test_token_scheme_masked(self):
+        text = "Authorization: token opaque-credential-1234567890"
+        result = redact_sensitive_text(text)
+        assert "Authorization: token" in result
+        assert "opaque-credential" not in result
+
+    def test_proxy_authorization_masked(self):
+        text = "Proxy-Authorization: Basic dXNlcjpzdXBlcnNlY3JldDEyMzQ="
+        result = redact_sensitive_text(text)
+        assert "dXNlcjpzdXBlcnNlY3JldDEyMzQ=" not in result
+
+    def test_authorization_prose_unchanged(self):
+        # "authorization" without a colon-delimited value is plain prose.
+        text = "the authorization model is fully open"
+        assert redact_sensitive_text(text) == text
+
+
+class TestApiKeyHeaders:
+    def test_x_api_key_header_masked(self):
+        text = "x-api-key: opaque-provider-key-1234567890"
+        result = redact_sensitive_text(text)
+        assert "x-api-key:" in result
+        assert "opaque-provider-key" not in result
+
+    def test_x_api_key_in_curl_command_masked(self):
+        text = 'curl -H "x-api-key: sk-local-VERYsecret-999888" https://api.example.com'
+        result = redact_sensitive_text(text)
+        assert "VERYsecret" not in result
+        assert "https://api.example.com" in result
+
+    def test_api_key_header_masked(self):
+        text = "api-key: anotherOpaqueSecret1234567"
+        result = redact_sensitive_text(text)
+        assert "anotherOpaqueSecret" not in result
+

 class TestTelegramTokens:
    def test_bot_token(self):