mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
API keys containing Unicode lookalike characters (e.g. ʋ U+028B instead of v) cause UnicodeEncodeError when httpx encodes the Authorization header as ASCII. This commonly happens when users copy-paste keys from PDFs, rich-text editors, or web pages with decorative fonts. Three layers of defense: 1. **Save-time validation** (hermes_cli/config.py): _check_non_ascii_credential() strips non-ASCII from credential values when saving to .env, with a clear warning explaining the issue. 2. **Load-time sanitization** (hermes_cli/env_loader.py): _sanitize_loaded_credentials() strips non-ASCII from credential env vars (those ending in _API_KEY, _TOKEN, _SECRET, _KEY) after dotenv loads them, so the rest of the codebase never sees non-ASCII keys. 3. **Runtime recovery** (run_agent.py): The UnicodeEncodeError recovery block now also sanitizes self.api_key and self._client_kwargs['api_key'], fixing the gap where message/tool sanitization succeeded but the API key still caused httpx to fail on the Authorization header. Also: hermes_logging.py RotatingFileHandler now explicitly sets encoding='utf-8' instead of relying on locale default (defensive hardening for ASCII-locale systems).
123 lines
4.5 KiB
Python
123 lines
4.5 KiB
Python
"""Helpers for loading Hermes .env files consistently across entrypoints."""
|
||
|
||
from __future__ import annotations
|
||
|
||
import os
|
||
from pathlib import Path
|
||
|
||
from dotenv import load_dotenv
|
||
|
||
|
||
# Env var name suffixes that indicate credential values. These are the
|
||
# only env vars whose values we sanitize on load — we must not silently
|
||
# alter arbitrary user env vars, but credentials are known to require
|
||
# pure ASCII (they become HTTP header values).
|
||
_CREDENTIAL_SUFFIXES = ("_API_KEY", "_TOKEN", "_SECRET", "_KEY")
|
||
|
||
|
||
def _sanitize_loaded_credentials() -> None:
|
||
"""Strip non-ASCII characters from credential env vars in os.environ.
|
||
|
||
Called after dotenv loads so the rest of the codebase never sees
|
||
non-ASCII API keys. Only touches env vars whose names end with
|
||
known credential suffixes (``_API_KEY``, ``_TOKEN``, etc.).
|
||
"""
|
||
for key, value in list(os.environ.items()):
|
||
if not any(key.endswith(suffix) for suffix in _CREDENTIAL_SUFFIXES):
|
||
continue
|
||
try:
|
||
value.encode("ascii")
|
||
except UnicodeEncodeError:
|
||
os.environ[key] = value.encode("ascii", errors="ignore").decode("ascii")
|
||
|
||
|
||
def _load_dotenv_with_fallback(path: Path, *, override: bool) -> None:
|
||
try:
|
||
load_dotenv(dotenv_path=path, override=override, encoding="utf-8")
|
||
except UnicodeDecodeError:
|
||
load_dotenv(dotenv_path=path, override=override, encoding="latin-1")
|
||
# Strip non-ASCII characters from credential env vars that were just
|
||
# loaded. API keys must be pure ASCII since they're sent as HTTP
|
||
# header values (httpx encodes headers as ASCII). Non-ASCII chars
|
||
# typically come from copy-pasting keys from PDFs or rich-text editors
|
||
# that substitute Unicode lookalike glyphs (e.g. ʋ U+028B for v).
|
||
_sanitize_loaded_credentials()
|
||
|
||
|
||
def _sanitize_env_file_if_needed(path: Path) -> None:
|
||
"""Pre-sanitize a .env file before python-dotenv reads it.
|
||
|
||
python-dotenv does not handle corrupted lines where multiple
|
||
KEY=VALUE pairs are concatenated on a single line (missing newline).
|
||
This produces mangled values — e.g. a bot token duplicated 8×
|
||
(see #8908).
|
||
|
||
We delegate to ``hermes_cli.config._sanitize_env_lines`` which
|
||
already knows all valid Hermes env-var names and can split
|
||
concatenated lines correctly.
|
||
"""
|
||
if not path.exists():
|
||
return
|
||
try:
|
||
from hermes_cli.config import _sanitize_env_lines
|
||
except ImportError:
|
||
return # early bootstrap — config module not available yet
|
||
|
||
read_kw = {"encoding": "utf-8", "errors": "replace"}
|
||
try:
|
||
with open(path, **read_kw) as f:
|
||
original = f.readlines()
|
||
sanitized = _sanitize_env_lines(original)
|
||
if sanitized != original:
|
||
import tempfile
|
||
fd, tmp = tempfile.mkstemp(
|
||
dir=str(path.parent), suffix=".tmp", prefix=".env_"
|
||
)
|
||
try:
|
||
with os.fdopen(fd, "w", encoding="utf-8") as f:
|
||
f.writelines(sanitized)
|
||
f.flush()
|
||
os.fsync(f.fileno())
|
||
os.replace(tmp, path)
|
||
except BaseException:
|
||
try:
|
||
os.unlink(tmp)
|
||
except OSError:
|
||
pass
|
||
raise
|
||
except Exception:
|
||
pass # best-effort — don't block gateway startup
|
||
|
||
|
||
def load_hermes_dotenv(
|
||
*,
|
||
hermes_home: str | os.PathLike | None = None,
|
||
project_env: str | os.PathLike | None = None,
|
||
) -> list[Path]:
|
||
"""Load Hermes environment files with user config taking precedence.
|
||
|
||
Behavior:
|
||
- `~/.hermes/.env` overrides stale shell-exported values when present.
|
||
- project `.env` acts as a dev fallback and only fills missing values when
|
||
the user env exists.
|
||
- if no user env exists, the project `.env` also overrides stale shell vars.
|
||
"""
|
||
loaded: list[Path] = []
|
||
|
||
home_path = Path(hermes_home or os.getenv("HERMES_HOME", Path.home() / ".hermes"))
|
||
user_env = home_path / ".env"
|
||
project_env_path = Path(project_env) if project_env else None
|
||
|
||
# Fix corrupted .env files before python-dotenv parses them (#8908).
|
||
if user_env.exists():
|
||
_sanitize_env_file_if_needed(user_env)
|
||
|
||
if user_env.exists():
|
||
_load_dotenv_with_fallback(user_env, override=True)
|
||
loaded.append(user_env)
|
||
|
||
if project_env_path and project_env_path.exists():
|
||
_load_dotenv_with_fallback(project_env_path, override=not loaded)
|
||
loaded.append(project_env_path)
|
||
|
||
return loaded
|