mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
fix: detect and strip non-ASCII characters from API keys (#6843)
API keys containing Unicode lookalike characters (e.g. ʋ U+028B instead of v) cause UnicodeEncodeError when httpx encodes the Authorization header as ASCII. This commonly happens when users copy-paste keys from PDFs, rich-text editors, or web pages with decorative fonts. Three layers of defense: 1. **Save-time validation** (hermes_cli/config.py): _check_non_ascii_credential() strips non-ASCII from credential values when saving to .env, with a clear warning explaining the issue. 2. **Load-time sanitization** (hermes_cli/env_loader.py): _sanitize_loaded_credentials() strips non-ASCII from credential env vars (those ending in _API_KEY, _TOKEN, _SECRET, _KEY) after dotenv loads them, so the rest of the codebase never sees non-ASCII keys. 3. **Runtime recovery** (run_agent.py): The UnicodeEncodeError recovery block now also sanitizes self.api_key and self._client_kwargs['api_key'], fixing the gap where message/tool sanitization succeeded but the API key still caused httpx to fail on the Authorization header. Also: hermes_logging.py RotatingFileHandler now explicitly sets encoding='utf-8' instead of relying on locale default (defensive hardening for ASCII-locale systems).
This commit is contained in:
parent
677f1227c3
commit
da528a8207
6 changed files with 206 additions and 0 deletions
|
|
@ -8,11 +8,40 @@ from pathlib import Path
|
|||
from dotenv import load_dotenv
|
||||
|
||||
|
||||
# Env var name suffixes that indicate credential values. These are the
|
||||
# only env vars whose values we sanitize on load — we must not silently
|
||||
# alter arbitrary user env vars, but credentials are known to require
|
||||
# pure ASCII (they become HTTP header values).
|
||||
_CREDENTIAL_SUFFIXES = ("_API_KEY", "_TOKEN", "_SECRET", "_KEY")
|
||||
|
||||
|
||||
def _sanitize_loaded_credentials() -> None:
|
||||
"""Strip non-ASCII characters from credential env vars in os.environ.
|
||||
|
||||
Called after dotenv loads so the rest of the codebase never sees
|
||||
non-ASCII API keys. Only touches env vars whose names end with
|
||||
known credential suffixes (``_API_KEY``, ``_TOKEN``, etc.).
|
||||
"""
|
||||
for key, value in list(os.environ.items()):
|
||||
if not any(key.endswith(suffix) for suffix in _CREDENTIAL_SUFFIXES):
|
||||
continue
|
||||
try:
|
||||
value.encode("ascii")
|
||||
except UnicodeEncodeError:
|
||||
os.environ[key] = value.encode("ascii", errors="ignore").decode("ascii")
|
||||
|
||||
|
||||
def _load_dotenv_with_fallback(path: Path, *, override: bool) -> None:
|
||||
try:
|
||||
load_dotenv(dotenv_path=path, override=override, encoding="utf-8")
|
||||
except UnicodeDecodeError:
|
||||
load_dotenv(dotenv_path=path, override=override, encoding="latin-1")
|
||||
# Strip non-ASCII characters from credential env vars that were just
|
||||
# loaded. API keys must be pure ASCII since they're sent as HTTP
|
||||
# header values (httpx encodes headers as ASCII). Non-ASCII chars
|
||||
# typically come from copy-pasting keys from PDFs or rich-text editors
|
||||
# that substitute Unicode lookalike glyphs (e.g. ʋ U+028B for v).
|
||||
_sanitize_loaded_credentials()
|
||||
|
||||
|
||||
def _sanitize_env_file_if_needed(path: Path) -> None:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue