mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
fix: detect and strip non-ASCII characters from API keys (#6843)
API keys containing Unicode lookalike characters (e.g. ʋ U+028B instead of v) cause UnicodeEncodeError when httpx encodes the Authorization header as ASCII. This commonly happens when users copy-paste keys from PDFs, rich-text editors, or web pages with decorative fonts. Three layers of defense: 1. **Save-time validation** (hermes_cli/config.py): _check_non_ascii_credential() strips non-ASCII from credential values when saving to .env, with a clear warning explaining the issue. 2. **Load-time sanitization** (hermes_cli/env_loader.py): _sanitize_loaded_credentials() strips non-ASCII from credential env vars (those ending in _API_KEY, _TOKEN, _SECRET, _KEY) after dotenv loads them, so the rest of the codebase never sees non-ASCII keys. 3. **Runtime recovery** (run_agent.py): The UnicodeEncodeError recovery block now also sanitizes self.api_key and self._client_kwargs['api_key'], fixing the gap where message/tool sanitization succeeded but the API key still caused httpx to fail on the Authorization header. Also: hermes_logging.py RotatingFileHandler now explicitly sets encoding='utf-8' instead of relying on locale default (defensive hardening for ASCII-locale systems).
This commit is contained in:
parent
677f1227c3
commit
da528a8207
6 changed files with 206 additions and 0 deletions
|
|
@ -2766,6 +2766,47 @@ def sanitize_env_file() -> int:
|
|||
return fixes
|
||||
|
||||
|
||||
def _check_non_ascii_credential(key: str, value: str) -> str:
|
||||
"""Warn and strip non-ASCII characters from credential values.
|
||||
|
||||
API keys and tokens must be pure ASCII — they are sent as HTTP header
|
||||
values which httpx/httpcore encode as ASCII. Non-ASCII characters
|
||||
(commonly introduced by copy-pasting from rich-text editors or PDFs
|
||||
that substitute lookalike Unicode glyphs for ASCII letters) cause
|
||||
``UnicodeEncodeError: 'ascii' codec can't encode character`` at
|
||||
request time.
|
||||
|
||||
Returns the sanitized (ASCII-only) value. Prints a warning if any
|
||||
non-ASCII characters were found and removed.
|
||||
"""
|
||||
try:
|
||||
value.encode("ascii")
|
||||
return value # all ASCII — nothing to do
|
||||
except UnicodeEncodeError:
|
||||
pass
|
||||
|
||||
# Build a readable list of the offending characters
|
||||
bad_chars: list[str] = []
|
||||
for i, ch in enumerate(value):
|
||||
if ord(ch) > 127:
|
||||
bad_chars.append(f" position {i}: {ch!r} (U+{ord(ch):04X})")
|
||||
sanitized = value.encode("ascii", errors="ignore").decode("ascii")
|
||||
|
||||
import sys
|
||||
print(
|
||||
f"\n Warning: {key} contains non-ASCII characters that will break API requests.\n"
|
||||
f" This usually happens when copy-pasting from a PDF, rich-text editor,\n"
|
||||
f" or web page that substitutes lookalike Unicode glyphs for ASCII letters.\n"
|
||||
f"\n"
|
||||
+ "\n".join(f" {line}" for line in bad_chars[:5])
|
||||
+ ("\n ... and more" if len(bad_chars) > 5 else "")
|
||||
+ f"\n\n The non-ASCII characters have been stripped automatically.\n"
|
||||
f" If authentication fails, re-copy the key from the provider's dashboard.\n",
|
||||
file=sys.stderr,
|
||||
)
|
||||
return sanitized
|
||||
|
||||
|
||||
def save_env_value(key: str, value: str):
|
||||
"""Save or update a value in ~/.hermes/.env."""
|
||||
if is_managed():
|
||||
|
|
@ -2774,6 +2815,8 @@ def save_env_value(key: str, value: str):
|
|||
if not _ENV_VAR_NAME_RE.match(key):
|
||||
raise ValueError(f"Invalid environment variable name: {key!r}")
|
||||
value = value.replace("\n", "").replace("\r", "")
|
||||
# API keys / tokens must be ASCII — strip non-ASCII with a warning.
|
||||
value = _check_non_ascii_credential(key, value)
|
||||
ensure_hermes_home()
|
||||
env_path = get_env_path()
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue