diff --git a/agent/redact.py b/agent/redact.py index 04d35e3c9..af3b7bb93 100644 --- a/agent/redact.py +++ b/agent/redact.py @@ -93,6 +93,17 @@ _DB_CONNSTR_RE = re.compile( re.IGNORECASE, ) +# JWT tokens: header.payload[.signature] — always start with "eyJ" (base64 for "{") +# Matches 1-part (header only), 2-part (header.payload), and full 3-part JWTs. +_JWT_RE = re.compile( + r"eyJ[A-Za-z0-9_-]{10,}" # Header (always starts with eyJ) + r"(?:\.[A-Za-z0-9_=-]{4,}){0,2}" # Optional payload and/or signature +) + +# Discord user/role mentions: <@123456789012345678> or <@!123456789012345678> +# Snowflake IDs are 17-20 digit integers that resolve to specific Discord accounts. +_DISCORD_MENTION_RE = re.compile(r"<@!?(\d{17,20})>") + # E.164 phone numbers: +, 7-15 digits # Negative lookahead prevents matching hex strings or identifiers _SIGNAL_PHONE_RE = re.compile(r"(\+[1-9]\d{6,14})(?![A-Za-z0-9])") @@ -159,6 +170,12 @@ def redact_sensitive_text(text: str) -> str: # Database connection string passwords text = _DB_CONNSTR_RE.sub(lambda m: f"{m.group(1)}***{m.group(3)}", text) + # JWT tokens (eyJ... — base64-encoded JSON headers) + text = _JWT_RE.sub(lambda m: _mask_token(m.group(0)), text) + + # Discord user/role mentions (<@snowflake_id>) + text = _DISCORD_MENTION_RE.sub(lambda m: f"<@{'!' if '!' in m.group(0) else ''}***>", text) + # E.164 phone numbers (Signal, WhatsApp) def _redact_phone(m): phone = m.group(1) diff --git a/tests/agent/test_redact.py b/tests/agent/test_redact.py index 83b1b4d1a..b40e6ef7f 100644 --- a/tests/agent/test_redact.py +++ b/tests/agent/test_redact.py @@ -284,3 +284,95 @@ class TestElevenLabsTavilyExaKeys: assert "XYZ789abcdef" not in result assert "HOME=/home/user" in result assert "SHELL=/bin/bash" in result + + +class TestJWTTokens: + """JWT tokens start with eyJ (base64 for '{') and have dot-separated parts.""" + + def test_full_3part_jwt(self): + text = ( + "Token: eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9" + ".eyJpc3MiOiI0MjNiZDJkYjg4MjI0MDAwIn0" + ".Gxgv0rru-_kS-I_60EJ7CENTnBh9UeuL3QhkMoQ-VnM" + ) + result = redact_sensitive_text(text) + assert "Token:" in result + # Payload and signature must not survive + assert "eyJpc3Mi" not in result + assert "Gxgv0rru" not in result + + def test_2part_jwt(self): + text = "eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0" + result = redact_sensitive_text(text) + assert "eyJzdWIi" not in result + + def test_standalone_jwt_header(self): + text = "leaked header: eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9 here" + result = redact_sensitive_text(text) + assert "IkpXVCJ9" not in result + assert "leaked header:" in result + + def test_jwt_with_base64_padding(self): + text = "eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0=.abc123def456ghij" + result = redact_sensitive_text(text) + assert "abc123def456" not in result + + def test_short_eyj_not_matched(self): + """eyJ followed by fewer than 10 base64 chars should not match.""" + text = "eyJust a normal word" + assert redact_sensitive_text(text) == text + + def test_jwt_preserves_surrounding_text(self): + text = "before eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0 after" + result = redact_sensitive_text(text) + assert result.startswith("before ") + assert result.endswith(" after") + + def test_home_assistant_jwt_in_memory(self): + """Real-world pattern: HA token stored in agent memory block.""" + text = ( + "Home Assistant API Token: " + "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9" + ".eyJpc3MiOiJhYmNkZWYiLCJleHAiOjE3NzQ5NTcxMDN9" + ".Gxgv0rru-_kS-I_60EJ7CENTnBh9UeuL3QhkMoQ-VnM" + ) + result = redact_sensitive_text(text) + assert "Home Assistant API Token:" in result + assert "Gxgv0rru" not in result + assert "..." in result + + +class TestDiscordMentions: + """Discord snowflake IDs in <@ID> or <@!ID> format.""" + + def test_normal_mention(self): + result = redact_sensitive_text("Hello <@222589316709220353>") + assert "222589316709220353" not in result + assert "<@***>" in result + + def test_nickname_mention(self): + result = redact_sensitive_text("Ping <@!1331549159177846844>") + assert "1331549159177846844" not in result + assert "<@!***>" in result + + def test_multiple_mentions(self): + text = "<@111111111111111111> and <@222222222222222222>" + result = redact_sensitive_text(text) + assert "111111111111111111" not in result + assert "222222222222222222" not in result + + def test_short_id_not_matched(self): + """IDs shorter than 17 digits are not Discord snowflakes.""" + text = "<@12345>" + assert redact_sensitive_text(text) == text + + def test_slack_mention_not_matched(self): + """Slack mentions use letters, not pure digits.""" + text = "<@U024BE7LH>" + assert redact_sensitive_text(text) == text + + def test_preserves_surrounding_text(self): + text = "User <@222589316709220353> said hello" + result = redact_sensitive_text(text) + assert result.startswith("User ") + assert result.endswith(" said hello")