hermes-agent/tests/hermes_cli/test_non_ascii_credential.py
Teknium da528a8207 fix: detect and strip non-ASCII characters from API keys (#6843)
API keys containing Unicode lookalike characters (e.g. ʋ U+028B instead
of v) cause UnicodeEncodeError when httpx encodes the Authorization
header as ASCII.  This commonly happens when users copy-paste keys from
PDFs, rich-text editors, or web pages with decorative fonts.

Three layers of defense:

1. **Save-time validation** (hermes_cli/config.py):
   _check_non_ascii_credential() strips non-ASCII from credential values
   when saving to .env, with a clear warning explaining the issue.

2. **Load-time sanitization** (hermes_cli/env_loader.py):
   _sanitize_loaded_credentials() strips non-ASCII from credential env
   vars (those ending in _API_KEY, _TOKEN, _SECRET, _KEY) after dotenv
   loads them, so the rest of the codebase never sees non-ASCII keys.

3. **Runtime recovery** (run_agent.py):
   The UnicodeEncodeError recovery block now also sanitizes self.api_key
   and self._client_kwargs['api_key'], fixing the gap where message/tool
   sanitization succeeded but the API key still caused httpx to fail on
   the Authorization header.

Also: hermes_logging.py RotatingFileHandler now explicitly sets
encoding='utf-8' instead of relying on locale default (defensive
hardening for ASCII-locale systems).
2026-04-14 20:20:31 -07:00

83 lines
3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""Tests for non-ASCII credential detection and sanitization.
Covers the fix for issue #6843 — API keys containing Unicode lookalike
characters (e.g. ʋ U+028B instead of v) cause UnicodeEncodeError when
httpx tries to encode the Authorization header as ASCII.
"""
import os
import sys
import tempfile
import pytest
from hermes_cli.config import _check_non_ascii_credential
class TestCheckNonAsciiCredential:
"""Tests for _check_non_ascii_credential()."""
def test_ascii_key_unchanged(self):
key = "sk-proj-" + "a" * 100
result = _check_non_ascii_credential("TEST_API_KEY", key)
assert result == key
def test_strips_unicode_v_lookalike(self, capsys):
"""The exact scenario from issue #6843: ʋ instead of v."""
key = "sk-proj-abc" + "ʋ" + "def" # \u028b
result = _check_non_ascii_credential("OPENROUTER_API_KEY", key)
assert result == "sk-proj-abcdef"
assert "ʋ" not in result
# Should print a warning
captured = capsys.readouterr()
assert "non-ASCII" in captured.err
def test_strips_multiple_non_ascii(self, capsys):
key = "sk-proj-aʋbécd"
result = _check_non_ascii_credential("OPENAI_API_KEY", key)
assert result == "sk-proj-abcd"
captured = capsys.readouterr()
assert "U+028B" in captured.err # reports the char
def test_empty_key(self):
result = _check_non_ascii_credential("TEST_KEY", "")
assert result == ""
def test_all_ascii_no_warning(self, capsys):
result = _check_non_ascii_credential("KEY", "all-ascii-value-123")
assert result == "all-ascii-value-123"
captured = capsys.readouterr()
assert captured.err == ""
class TestEnvLoaderSanitization:
"""Tests for _sanitize_loaded_credentials in env_loader."""
def test_strips_non_ascii_from_api_key(self, monkeypatch):
from hermes_cli.env_loader import _sanitize_loaded_credentials
monkeypatch.setenv("OPENROUTER_API_KEY", "sk-proj-abcʋdef")
_sanitize_loaded_credentials()
assert os.environ["OPENROUTER_API_KEY"] == "sk-proj-abcdef"
def test_strips_non_ascii_from_token(self, monkeypatch):
from hermes_cli.env_loader import _sanitize_loaded_credentials
monkeypatch.setenv("DISCORD_BOT_TOKEN", "tokénvalue")
_sanitize_loaded_credentials()
assert os.environ["DISCORD_BOT_TOKEN"] == "toknvalue"
def test_ignores_non_credential_vars(self, monkeypatch):
from hermes_cli.env_loader import _sanitize_loaded_credentials
monkeypatch.setenv("MY_UNICODE_VAR", "héllo wörld")
_sanitize_loaded_credentials()
# Not a credential suffix — should be left alone
assert os.environ["MY_UNICODE_VAR"] == "héllo wörld"
def test_ascii_credentials_untouched(self, monkeypatch):
from hermes_cli.env_loader import _sanitize_loaded_credentials
monkeypatch.setenv("OPENAI_API_KEY", "sk-proj-allascii123")
_sanitize_loaded_credentials()
assert os.environ["OPENAI_API_KEY"] == "sk-proj-allascii123"