mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
fix: detect and strip non-ASCII characters from API keys (#6843)
API keys containing Unicode lookalike characters (e.g. ʋ U+028B instead of v) cause UnicodeEncodeError when httpx encodes the Authorization header as ASCII. This commonly happens when users copy-paste keys from PDFs, rich-text editors, or web pages with decorative fonts. Three layers of defense: 1. **Save-time validation** (hermes_cli/config.py): _check_non_ascii_credential() strips non-ASCII from credential values when saving to .env, with a clear warning explaining the issue. 2. **Load-time sanitization** (hermes_cli/env_loader.py): _sanitize_loaded_credentials() strips non-ASCII from credential env vars (those ending in _API_KEY, _TOKEN, _SECRET, _KEY) after dotenv loads them, so the rest of the codebase never sees non-ASCII keys. 3. **Runtime recovery** (run_agent.py): The UnicodeEncodeError recovery block now also sanitizes self.api_key and self._client_kwargs['api_key'], fixing the gap where message/tool sanitization succeeded but the API key still caused httpx to fail on the Authorization header. Also: hermes_logging.py RotatingFileHandler now explicitly sets encoding='utf-8' instead of relying on locale default (defensive hardening for ASCII-locale systems).
This commit is contained in:
parent
677f1227c3
commit
da528a8207
6 changed files with 206 additions and 0 deletions
|
|
@ -142,6 +142,33 @@ class TestSurrogateVsAsciiSanitization:
|
|||
assert _sanitize_messages_surrogates(messages) is False
|
||||
|
||||
|
||||
class TestApiKeyNonAsciiSanitization:
|
||||
"""Tests for API key sanitization in the UnicodeEncodeError recovery.
|
||||
|
||||
Covers the root cause of issue #6843: a non-ASCII character (ʋ U+028B)
|
||||
in the API key causes httpx to fail when encoding the Authorization
|
||||
header as ASCII. The recovery block must strip non-ASCII from the key.
|
||||
"""
|
||||
|
||||
def test_strip_non_ascii_from_api_key(self):
|
||||
"""_strip_non_ascii removes ʋ from an API key string."""
|
||||
key = "sk-proj-abc" + "ʋ" + "def"
|
||||
assert _strip_non_ascii(key) == "sk-proj-abcdef"
|
||||
|
||||
def test_api_key_at_position_153(self):
|
||||
"""Reproduce the exact error: ʋ at position 153 in 'Bearer <key>'."""
|
||||
key = "sk-proj-" + "a" * 138 + "ʋ" + "bcd"
|
||||
auth_value = f"Bearer {key}"
|
||||
# This is what httpx does — and it fails:
|
||||
with pytest.raises(UnicodeEncodeError) as exc_info:
|
||||
auth_value.encode("ascii")
|
||||
assert exc_info.value.start == 153
|
||||
# After sanitization, it should work:
|
||||
sanitized_key = _strip_non_ascii(key)
|
||||
sanitized_auth = f"Bearer {sanitized_key}"
|
||||
sanitized_auth.encode("ascii") # should not raise
|
||||
|
||||
|
||||
class TestSanitizeToolsNonAscii:
|
||||
"""Tests for _sanitize_tools_non_ascii."""
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue