mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-23 10:42:00 +00:00
fix(redact): mask all Authorization schemes and x-api-key style headers
Secret redaction only matched `Authorization: Bearer <token>`. Other auth headers passed through verbatim into logs, tool output, and transcripts: - `Authorization: Basic <base64>` — leaks base64(user:password) - `Authorization: token <pat>` / any non-Bearer scheme - `Proxy-Authorization: ...` - `x-api-key: <key>` (Anthropic and many providers) and `api-key`, `x-goog-api-key`, `x-auth-token`, `x-access-token`, ... — opaque values with no known vendor prefix were caught by nothing A logged request or an echoed `curl -H "x-api-key: ..."` command therefore leaked live credentials. Generalize the Authorization rule to mask the credential for any scheme (and Proxy-Authorization) while preserving the header name and scheme word for debuggability, and add an api-key header rule for the single-opaque-value headers. Bearer behavior is unchanged; plain prose containing the word "authorization" (no colon-delimited value) is left untouched. Adds regression tests for Basic/token/Proxy auth and the x-api-key/api-key headers, including inside a curl command.
This commit is contained in:
parent
87ab373381
commit
6f0ecf37da
2 changed files with 71 additions and 5 deletions
|
|
@ -120,9 +120,25 @@ _JSON_FIELD_RE = re.compile(
|
|||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
# Authorization headers
|
||||
# Authorization headers — any scheme (Bearer, Basic, Token, Digest, …) plus the
|
||||
# bare-credential form, and Proxy-Authorization. The credential token is masked
|
||||
# while the header name and scheme word are preserved for debuggability. The
|
||||
# previous rule only matched ``Bearer``, so ``Basic <base64 user:pass>`` and
|
||||
# ``token <pat>`` leaked verbatim into logs/transcripts.
|
||||
_AUTH_HEADER_RE = re.compile(
|
||||
r"(Authorization:\s*Bearer\s+)(\S+)",
|
||||
r"((?:Proxy-)?Authorization:\s*)([A-Za-z][\w.+-]*\s+)?(\S+)",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
# API-key style auth headers carrying a single opaque value (no scheme word).
|
||||
# Anthropic and many providers authenticate with ``x-api-key``; values without
|
||||
# a known vendor prefix (custom/local backends) would otherwise leak when a
|
||||
# request or curl command is logged or echoed into tool output / transcripts.
|
||||
_SECRET_HEADER_NAMES = (
|
||||
r"(?:x-api-key|x-goog-api-key|api-key|apikey|x-api-token|x-auth-token|x-access-token)"
|
||||
)
|
||||
_SECRET_HEADER_RE = re.compile(
|
||||
rf"({_SECRET_HEADER_NAMES}\s*:\s*)(\S+)",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
|
|
@ -374,11 +390,19 @@ def redact_sensitive_text(text: str, *, force: bool = False, code_file: bool = F
|
|||
return f'{key}: "{_mask_token(value)}"'
|
||||
text = _JSON_FIELD_RE.sub(_redact_json, text)
|
||||
|
||||
# Authorization headers — _AUTH_HEADER_RE is "Authorization: Bearer ..."
|
||||
# case-insensitive, so "uthorization" is the cheapest substring gate that
|
||||
# covers both "Authorization" and "authorization" without a casefold().
|
||||
# Authorization headers — _AUTH_HEADER_RE matches any scheme after
|
||||
# "[Proxy-]Authorization:" case-insensitively, so "uthorization" is the
|
||||
# cheapest substring gate that covers every casing without a casefold().
|
||||
if "uthorization" in text or "UTHORIZATION" in text:
|
||||
text = _AUTH_HEADER_RE.sub(
|
||||
lambda m: m.group(1) + (m.group(2) or "") + _mask_token(m.group(3)),
|
||||
text,
|
||||
)
|
||||
|
||||
# API-key style headers (x-api-key, api-key, …). Header values are
|
||||
# colon-separated, so gate on ":" — the regex itself is the precise filter.
|
||||
if ":" in text:
|
||||
text = _SECRET_HEADER_RE.sub(
|
||||
lambda m: m.group(1) + _mask_token(m.group(2)),
|
||||
text,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -147,6 +147,48 @@ class TestAuthHeaders:
|
|||
result = redact_sensitive_text(text)
|
||||
assert "mytoken12345" not in result
|
||||
|
||||
def test_basic_auth_credentials_masked(self):
|
||||
# base64 of "user:longpassword1234" — leaks user:pass if not redacted.
|
||||
text = "Authorization: Basic dXNlcjpsb25ncGFzc3dvcmQxMjM0"
|
||||
result = redact_sensitive_text(text)
|
||||
assert "Authorization: Basic" in result
|
||||
assert "dXNlcjpsb25ncGFzc3dvcmQxMjM0" not in result
|
||||
|
||||
def test_token_scheme_masked(self):
|
||||
text = "Authorization: token opaque-credential-1234567890"
|
||||
result = redact_sensitive_text(text)
|
||||
assert "Authorization: token" in result
|
||||
assert "opaque-credential" not in result
|
||||
|
||||
def test_proxy_authorization_masked(self):
|
||||
text = "Proxy-Authorization: Basic dXNlcjpzdXBlcnNlY3JldDEyMzQ="
|
||||
result = redact_sensitive_text(text)
|
||||
assert "dXNlcjpzdXBlcnNlY3JldDEyMzQ=" not in result
|
||||
|
||||
def test_authorization_prose_unchanged(self):
|
||||
# "authorization" without a colon-delimited value is plain prose.
|
||||
text = "the authorization model is fully open"
|
||||
assert redact_sensitive_text(text) == text
|
||||
|
||||
|
||||
class TestApiKeyHeaders:
|
||||
def test_x_api_key_header_masked(self):
|
||||
text = "x-api-key: opaque-provider-key-1234567890"
|
||||
result = redact_sensitive_text(text)
|
||||
assert "x-api-key:" in result
|
||||
assert "opaque-provider-key" not in result
|
||||
|
||||
def test_x_api_key_in_curl_command_masked(self):
|
||||
text = 'curl -H "x-api-key: sk-local-VERYsecret-999888" https://api.example.com'
|
||||
result = redact_sensitive_text(text)
|
||||
assert "VERYsecret" not in result
|
||||
assert "https://api.example.com" in result
|
||||
|
||||
def test_api_key_header_masked(self):
|
||||
text = "api-key: anotherOpaqueSecret1234567"
|
||||
result = redact_sensitive_text(text)
|
||||
assert "anotherOpaqueSecret" not in result
|
||||
|
||||
|
||||
class TestTelegramTokens:
|
||||
def test_bot_token(self):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue