hermes-agent/tests/agent/test_redact.py
Teknium d587d62eba
feat: replace kimi-k2.5 with kimi-k2.6 on OpenRouter and Nous Portal (#13148)
* feat(security): URL query param + userinfo + form body redaction

Port from nearai/ironclaw#2529.

Hermes already has broad value-shape coverage in agent/redact.py
(30+ vendor prefixes, JWTs, DB connstrs, etc.) but missed three
key-name-based patterns that catch opaque tokens without recognizable
prefixes:

1. URL query params - OAuth callback codes (?code=...),
   access_token, refresh_token, signature, etc. These are opaque and
   won't match any prefix regex. Now redacted by parameter NAME.

2. URL userinfo (https://user:pass@host) - for non-DB schemes. DB
   schemes were already handled by _DB_CONNSTR_RE.

3. Form-urlencoded body (k=v pairs joined by ampersands) -
   conservative, only triggers on clean pure-form inputs with no
   other text.

Sensitive key allowlist matches ironclaw's (exact case-insensitive,
NOT substring - so token_count and session_id pass through).

Tests: +20 new test cases across 3 test classes. All 75 redact tests
pass; gateway/test_pii_redaction and tools/test_browser_secret_exfil
also green.

Known pre-existing limitation: _ENV_ASSIGN_RE greedy match swallows
whole all-caps ENV-style names + trailing text when followed by
another assignment. Left untouched here (out of scope); URL query
redaction handles the lowercase case.

* feat: replace kimi-k2.5 with kimi-k2.6 on OpenRouter and Nous Portal

Update model catalogs for OpenRouter (fallback snapshot), Nous Portal,
and NVIDIA NIM to reference moonshotai/kimi-k2.6.  Add kimi-k2.6 to
the fixed-temperature frozenset in auxiliary_client.py so the 0.6
contract is enforced on aggregator routings.

Native Moonshot provider lists (kimi-coding, kimi-coding-cn, moonshot,
opencode-zen, opencode-go) are unchanged — those use Moonshot's own
model IDs which are unaffected.
2026-04-20 11:49:54 -07:00

513 lines
19 KiB
Python

"""Tests for agent.redact -- secret masking in logs and output."""
import logging
import os
import pytest
from agent.redact import redact_sensitive_text, RedactingFormatter
@pytest.fixture(autouse=True)
def _ensure_redaction_enabled(monkeypatch):
"""Ensure HERMES_REDACT_SECRETS is not disabled by prior test imports."""
monkeypatch.delenv("HERMES_REDACT_SECRETS", raising=False)
# Also patch the module-level snapshot so it reflects the cleared env var
monkeypatch.setattr("agent.redact._REDACT_ENABLED", True)
class TestKnownPrefixes:
def test_openai_sk_key(self):
text = "Using key sk-proj-abc123def456ghi789jkl012"
result = redact_sensitive_text(text)
assert "sk-pro" in result
assert "abc123def456" not in result
assert "..." in result
def test_openrouter_sk_key(self):
text = "OPENROUTER_API_KEY=sk-or-v1-abcdefghijklmnopqrstuvwxyz1234567890"
result = redact_sensitive_text(text)
assert "abcdefghijklmnop" not in result
def test_github_pat_classic(self):
result = redact_sensitive_text("token: ghp_abc123def456ghi789jkl")
assert "abc123def456" not in result
def test_github_pat_fine_grained(self):
result = redact_sensitive_text("github_pat_abc123def456ghi789jklmno")
assert "abc123def456" not in result
def test_slack_token(self):
token = "xoxb-" + "0" * 12 + "-" + "a" * 14
result = redact_sensitive_text(token)
assert "a" * 14 not in result
def test_google_api_key(self):
result = redact_sensitive_text("AIzaSyB-abc123def456ghi789jklmno012345")
assert "abc123def456" not in result
def test_perplexity_key(self):
result = redact_sensitive_text("pplx-abcdef123456789012345")
assert "abcdef12345" not in result
def test_fal_key(self):
result = redact_sensitive_text("fal_abc123def456ghi789jkl")
assert "abc123def456" not in result
def test_short_token_fully_masked(self):
result = redact_sensitive_text("key=sk-short1234567")
assert "***" in result
class TestEnvAssignments:
def test_export_api_key(self):
text = "export OPENAI_API_KEY=sk-proj-abc123def456ghi789jkl012"
result = redact_sensitive_text(text)
assert "OPENAI_API_KEY=" in result
assert "abc123def456" not in result
def test_quoted_value(self):
text = 'MY_SECRET_TOKEN="supersecretvalue123456789"'
result = redact_sensitive_text(text)
assert "MY_SECRET_TOKEN=" in result
assert "supersecretvalue" not in result
def test_non_secret_env_unchanged(self):
text = "HOME=/home/user"
result = redact_sensitive_text(text)
assert result == text
def test_path_unchanged(self):
text = "PATH=/usr/local/bin:/usr/bin"
result = redact_sensitive_text(text)
assert result == text
def test_lowercase_python_variable_token_unchanged(self):
# Regression: #4367 — lowercase 'token' assignment must not be redacted
text = "before_tokens = response.usage.prompt_tokens"
result = redact_sensitive_text(text)
assert result == text
def test_lowercase_python_variable_api_key_unchanged(self):
# Regression: #4367 — lowercase 'api_key' must not be redacted
text = "api_key = config.get('api_key')"
result = redact_sensitive_text(text)
assert result == text
def test_typescript_await_token_unchanged(self):
# Regression: #4367 — 'await' keyword must not be redacted as a secret value
text = "const token = await getToken();"
result = redact_sensitive_text(text)
assert result == text
def test_typescript_await_secret_unchanged(self):
# Regression: #4367 — similar pattern with 'secret' variable
text = "const secret = await fetchSecret();"
result = redact_sensitive_text(text)
assert result == text
def test_export_whitespace_preserved(self):
# Regression: #4367 — whitespace before uppercase env var must be preserved
text = "export SECRET_TOKEN=mypassword"
result = redact_sensitive_text(text)
assert result.startswith("export ")
assert "SECRET_TOKEN=" in result
assert "mypassword" not in result
class TestJsonFields:
def test_json_api_key(self):
text = '{"apiKey": "sk-proj-abc123def456ghi789jkl012"}'
result = redact_sensitive_text(text)
assert "abc123def456" not in result
def test_json_token(self):
text = '{"access_token": "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.longtoken.here"}'
result = redact_sensitive_text(text)
assert "eyJhbGciOiJSUzI1NiIs" not in result
def test_json_non_secret_unchanged(self):
text = '{"name": "John", "model": "gpt-4"}'
result = redact_sensitive_text(text)
assert result == text
class TestAuthHeaders:
def test_bearer_token(self):
text = "Authorization: Bearer sk-proj-abc123def456ghi789jkl012"
result = redact_sensitive_text(text)
assert "Authorization: Bearer" in result
assert "abc123def456" not in result
def test_case_insensitive(self):
text = "authorization: bearer mytoken123456789012345678"
result = redact_sensitive_text(text)
assert "mytoken12345" not in result
class TestTelegramTokens:
def test_bot_token(self):
text = "bot123456789:ABCDEfghij-KLMNopqrst_UVWXyz12345"
result = redact_sensitive_text(text)
assert "ABCDEfghij" not in result
assert "123456789:***" in result
def test_raw_token(self):
text = "12345678901:ABCDEfghijKLMNopqrstUVWXyz1234567890"
result = redact_sensitive_text(text)
assert "ABCDEfghij" not in result
class TestPassthrough:
def test_empty_string(self):
assert redact_sensitive_text("") == ""
def test_none_returns_none(self):
assert redact_sensitive_text(None) is None
def test_non_string_input_int_coerced(self):
assert redact_sensitive_text(12345) == "12345"
def test_non_string_input_dict_coerced_and_redacted(self):
result = redact_sensitive_text({"token": "sk-proj-abc123def456ghi789jkl012"})
assert "abc123def456" not in result
def test_normal_text_unchanged(self):
text = "Hello world, this is a normal log message with no secrets."
assert redact_sensitive_text(text) == text
def test_code_unchanged(self):
text = "def main():\n print('hello')\n return 42"
assert redact_sensitive_text(text) == text
def test_url_without_key_unchanged(self):
text = "Connecting to https://api.openai.com/v1/chat/completions"
assert redact_sensitive_text(text) == text
class TestRedactingFormatter:
def test_formats_and_redacts(self):
formatter = RedactingFormatter("%(message)s")
record = logging.LogRecord(
name="test",
level=logging.INFO,
pathname="",
lineno=0,
msg="Key is sk-proj-abc123def456ghi789jkl012",
args=(),
exc_info=None,
)
result = formatter.format(record)
assert "abc123def456" not in result
assert "sk-pro" in result
class TestPrintenvSimulation:
"""Simulate what happens when the agent runs `env` or `printenv`."""
def test_full_env_dump(self):
env_dump = """HOME=/home/user
PATH=/usr/local/bin:/usr/bin
OPENAI_API_KEY=sk-proj-abc123def456ghi789jkl012mno345
OPENROUTER_API_KEY=sk-or-v1-reallyLongSecretKeyValue12345678
FIRECRAWL_API_KEY=fc-shortkey123456789012
TELEGRAM_BOT_TOKEN=bot987654321:ABCDEfghij-KLMNopqrst_UVWXyz12345
SHELL=/bin/bash
USER=teknium"""
result = redact_sensitive_text(env_dump)
# Secrets should be masked
assert "abc123def456" not in result
assert "reallyLongSecretKey" not in result
assert "ABCDEfghij" not in result
# Non-secrets should survive
assert "HOME=/home/user" in result
assert "SHELL=/bin/bash" in result
assert "USER=teknium" in result
class TestSecretCapturePayloadRedaction:
def test_secret_value_field_redacted(self):
text = '{"success": true, "secret_value": "sk-test-secret-1234567890"}'
result = redact_sensitive_text(text)
assert "sk-test-secret-1234567890" not in result
def test_raw_secret_field_redacted(self):
text = '{"raw_secret": "ghp_abc123def456ghi789jkl"}'
result = redact_sensitive_text(text)
assert "abc123def456" not in result
class TestElevenLabsTavilyExaKeys:
"""Regression tests for ElevenLabs (sk_), Tavily (tvly-), and Exa (exa_) keys."""
def test_elevenlabs_key_redacted(self):
text = "ELEVENLABS_API_KEY=sk_abc123def456ghi789jklmnopqrstu"
result = redact_sensitive_text(text)
assert "abc123def456ghi" not in result
def test_elevenlabs_key_in_log_line(self):
text = "Connecting to ElevenLabs with key sk_abc123def456ghi789jklmnopqrstu"
result = redact_sensitive_text(text)
assert "abc123def456ghi" not in result
def test_tavily_key_redacted(self):
text = "TAVILY_API_KEY=tvly-ABCdef123456789GHIJKL0000"
result = redact_sensitive_text(text)
assert "ABCdef123456789" not in result
def test_tavily_key_in_log_line(self):
text = "Initialising Tavily client with tvly-ABCdef123456789GHIJKL0000"
result = redact_sensitive_text(text)
assert "ABCdef123456789" not in result
def test_exa_key_redacted(self):
text = "EXA_API_KEY=exa_XYZ789abcdef000000000000000"
result = redact_sensitive_text(text)
assert "XYZ789abcdef" not in result
def test_exa_key_in_log_line(self):
text = "Using Exa client with key exa_XYZ789abcdef000000000000000"
result = redact_sensitive_text(text)
assert "XYZ789abcdef" not in result
def test_all_three_in_env_dump(self):
env_dump = (
"HOME=/home/user\n"
"ELEVENLABS_API_KEY=sk_abc123def456ghi789jklmnopqrstu\n"
"TAVILY_API_KEY=tvly-ABCdef123456789GHIJKL0000\n"
"EXA_API_KEY=exa_XYZ789abcdef000000000000000\n"
"SHELL=/bin/bash\n"
)
result = redact_sensitive_text(env_dump)
assert "abc123def456ghi" not in result
assert "ABCdef123456789" not in result
assert "XYZ789abcdef" not in result
assert "HOME=/home/user" in result
assert "SHELL=/bin/bash" in result
class TestJWTTokens:
"""JWT tokens start with eyJ (base64 for '{') and have dot-separated parts."""
def test_full_3part_jwt(self):
text = (
"Token: eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9"
".eyJpc3MiOiI0MjNiZDJkYjg4MjI0MDAwIn0"
".Gxgv0rru-_kS-I_60EJ7CENTnBh9UeuL3QhkMoQ-VnM"
)
result = redact_sensitive_text(text)
assert "Token:" in result
# Payload and signature must not survive
assert "eyJpc3Mi" not in result
assert "Gxgv0rru" not in result
def test_2part_jwt(self):
text = "eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0"
result = redact_sensitive_text(text)
assert "eyJzdWIi" not in result
def test_standalone_jwt_header(self):
text = "leaked header: eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9 here"
result = redact_sensitive_text(text)
assert "IkpXVCJ9" not in result
assert "leaked header:" in result
def test_jwt_with_base64_padding(self):
text = "eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0=.abc123def456ghij"
result = redact_sensitive_text(text)
assert "abc123def456" not in result
def test_short_eyj_not_matched(self):
"""eyJ followed by fewer than 10 base64 chars should not match."""
text = "eyJust a normal word"
assert redact_sensitive_text(text) == text
def test_jwt_preserves_surrounding_text(self):
text = "before eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0 after"
result = redact_sensitive_text(text)
assert result.startswith("before ")
assert result.endswith(" after")
def test_home_assistant_jwt_in_memory(self):
"""Real-world pattern: HA token stored in agent memory block."""
text = (
"Home Assistant API Token: "
"eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9"
".eyJpc3MiOiJhYmNkZWYiLCJleHAiOjE3NzQ5NTcxMDN9"
".Gxgv0rru-_kS-I_60EJ7CENTnBh9UeuL3QhkMoQ-VnM"
)
result = redact_sensitive_text(text)
assert "Home Assistant API Token:" in result
assert "Gxgv0rru" not in result
assert "..." in result
class TestDiscordMentions:
"""Discord snowflake IDs in <@ID> or <@!ID> format."""
def test_normal_mention(self):
result = redact_sensitive_text("Hello <@222589316709220353>")
assert "222589316709220353" not in result
assert "<@***>" in result
def test_nickname_mention(self):
result = redact_sensitive_text("Ping <@!1331549159177846844>")
assert "1331549159177846844" not in result
assert "<@!***>" in result
def test_multiple_mentions(self):
text = "<@111111111111111111> and <@222222222222222222>"
result = redact_sensitive_text(text)
assert "111111111111111111" not in result
assert "222222222222222222" not in result
def test_short_id_not_matched(self):
"""IDs shorter than 17 digits are not Discord snowflakes."""
text = "<@12345>"
assert redact_sensitive_text(text) == text
def test_slack_mention_not_matched(self):
"""Slack mentions use letters, not pure digits."""
text = "<@U024BE7LH>"
assert redact_sensitive_text(text) == text
def test_preserves_surrounding_text(self):
text = "User <@222589316709220353> said hello"
result = redact_sensitive_text(text)
assert result.startswith("User ")
assert result.endswith(" said hello")
class TestUrlQueryParamRedaction:
"""URL query-string redaction (ported from nearai/ironclaw#2529).
Catches opaque tokens that don't match vendor prefix regexes by
matching on parameter NAME rather than value shape.
"""
def test_oauth_callback_code(self):
text = "GET https://api.example.com/oauth/cb?code=abc123xyz789&state=csrf_ok"
result = redact_sensitive_text(text)
assert "abc123xyz789" not in result
assert "code=***" in result
assert "state=csrf_ok" in result # state is not sensitive
def test_access_token_query(self):
text = "Fetching https://example.com/api?access_token=opaque_value_here_1234&format=json"
result = redact_sensitive_text(text)
assert "opaque_value_here_1234" not in result
assert "access_token=***" in result
assert "format=json" in result
def test_refresh_token_query(self):
text = "https://auth.example.com/token?refresh_token=somerefresh&grant_type=refresh"
result = redact_sensitive_text(text)
assert "somerefresh" not in result
assert "grant_type=refresh" in result
def test_api_key_query(self):
text = "https://api.example.com/v1/data?api_key=kABCDEF12345&limit=10"
result = redact_sensitive_text(text)
assert "kABCDEF12345" not in result
assert "limit=10" in result
def test_presigned_signature(self):
text = "https://s3.amazonaws.com/bucket/k?signature=LONG_PRESIGNED_SIG&id=public"
result = redact_sensitive_text(text)
assert "LONG_PRESIGNED_SIG" not in result
assert "id=public" in result
def test_case_insensitive_param_names(self):
"""Lowercase/mixed-case sensitive param names are redacted."""
# NOTE: All-caps names like TOKEN= are swallowed by _ENV_ASSIGN_RE
# (which matches KEY=value patterns greedily) before URL regex runs.
# This test uses lowercase names to isolate URL-query redaction.
text = "https://example.com?api_key=abcdef&secret=ghijkl"
result = redact_sensitive_text(text)
assert "abcdef" not in result
assert "ghijkl" not in result
assert "api_key=***" in result
assert "secret=***" in result
def test_substring_match_does_not_trigger(self):
"""`token_count` and `session_id` must NOT match `token` / `session`."""
text = "https://example.com/cb?token_count=42&session_id=xyz&foo=bar"
result = redact_sensitive_text(text)
assert "token_count=42" in result
assert "session_id=xyz" in result
def test_url_without_query_unchanged(self):
text = "https://example.com/path/to/resource"
assert redact_sensitive_text(text) == text
def test_url_with_fragment(self):
text = "https://example.com/page?token=xyz#section"
result = redact_sensitive_text(text)
assert "token=xyz" not in result
assert "#section" in result
def test_websocket_url_query(self):
text = "wss://api.example.com/ws?token=opaqueWsToken123"
result = redact_sensitive_text(text)
assert "opaqueWsToken123" not in result
class TestUrlUserinfoRedaction:
"""URL userinfo (`scheme://user:pass@host`) for non-DB schemes."""
def test_https_userinfo(self):
text = "URL: https://user:supersecretpw@host.example.com/path"
result = redact_sensitive_text(text)
assert "supersecretpw" not in result
assert "https://user:***@host.example.com" in result
def test_http_userinfo(self):
text = "http://admin:plaintextpass@internal.example.com/api"
result = redact_sensitive_text(text)
assert "plaintextpass" not in result
def test_ftp_userinfo(self):
text = "ftp://user:ftppass@ftp.example.com/file.txt"
result = redact_sensitive_text(text)
assert "ftppass" not in result
def test_url_without_userinfo_unchanged(self):
text = "https://example.com/path"
assert redact_sensitive_text(text) == text
def test_db_connstr_still_handled(self):
"""DB schemes are handled by _DB_CONNSTR_RE, not _URL_USERINFO_RE."""
text = "postgres://admin:dbpass@db.internal:5432/app"
result = redact_sensitive_text(text)
assert "dbpass" not in result
class TestFormBodyRedaction:
"""Form-urlencoded body redaction (k=v&k=v with no other text)."""
def test_pure_form_body(self):
text = "password=mysecret&username=bob&token=opaqueValue"
result = redact_sensitive_text(text)
assert "mysecret" not in result
assert "opaqueValue" not in result
assert "username=bob" in result
def test_oauth_token_request(self):
text = "grant_type=password&client_id=app&client_secret=topsecret&username=alice&password=alicepw"
result = redact_sensitive_text(text)
assert "topsecret" not in result
assert "alicepw" not in result
assert "client_id=app" in result
def test_non_form_text_unchanged(self):
"""Sentences with `&` should NOT trigger form redaction."""
text = "I have password=foo and other things" # contains spaces
result = redact_sensitive_text(text)
# The space breaks the form regex; passthrough expected.
assert "I have" in result
def test_multiline_text_not_form(self):
"""Multi-line text is never treated as form body."""
text = "first=1\nsecond=2"
# Should pass through (still subject to other redactors)
assert "first=1" in redact_sensitive_text(text)