"""Helpers for loading Hermes .env files consistently across entrypoints.""" from __future__ import annotations import os from pathlib import Path from dotenv import load_dotenv # Env var name suffixes that indicate credential values. These are the # only env vars whose values we sanitize on load — we must not silently # alter arbitrary user env vars, but credentials are known to require # pure ASCII (they become HTTP header values). _CREDENTIAL_SUFFIXES = ("_API_KEY", "_TOKEN", "_SECRET", "_KEY") def _sanitize_loaded_credentials() -> None: """Strip non-ASCII characters from credential env vars in os.environ. Called after dotenv loads so the rest of the codebase never sees non-ASCII API keys. Only touches env vars whose names end with known credential suffixes (``_API_KEY``, ``_TOKEN``, etc.). """ for key, value in list(os.environ.items()): if not any(key.endswith(suffix) for suffix in _CREDENTIAL_SUFFIXES): continue try: value.encode("ascii") except UnicodeEncodeError: os.environ[key] = value.encode("ascii", errors="ignore").decode("ascii") def _load_dotenv_with_fallback(path: Path, *, override: bool) -> None: try: load_dotenv(dotenv_path=path, override=override, encoding="utf-8") except UnicodeDecodeError: load_dotenv(dotenv_path=path, override=override, encoding="latin-1") # Strip non-ASCII characters from credential env vars that were just # loaded. API keys must be pure ASCII since they're sent as HTTP # header values (httpx encodes headers as ASCII). Non-ASCII chars # typically come from copy-pasting keys from PDFs or rich-text editors # that substitute Unicode lookalike glyphs (e.g. ʋ U+028B for v). _sanitize_loaded_credentials() def _sanitize_env_file_if_needed(path: Path) -> None: """Pre-sanitize a .env file before python-dotenv reads it. python-dotenv does not handle corrupted lines where multiple KEY=VALUE pairs are concatenated on a single line (missing newline). This produces mangled values — e.g. a bot token duplicated 8× (see #8908). We delegate to ``hermes_cli.config._sanitize_env_lines`` which already knows all valid Hermes env-var names and can split concatenated lines correctly. """ if not path.exists(): return try: from hermes_cli.config import _sanitize_env_lines except ImportError: return # early bootstrap — config module not available yet read_kw = {"encoding": "utf-8", "errors": "replace"} try: with open(path, **read_kw) as f: original = f.readlines() sanitized = _sanitize_env_lines(original) if sanitized != original: import tempfile fd, tmp = tempfile.mkstemp( dir=str(path.parent), suffix=".tmp", prefix=".env_" ) try: with os.fdopen(fd, "w", encoding="utf-8") as f: f.writelines(sanitized) f.flush() os.fsync(f.fileno()) os.replace(tmp, path) except BaseException: try: os.unlink(tmp) except OSError: pass raise except Exception: pass # best-effort — don't block gateway startup def load_hermes_dotenv( *, hermes_home: str | os.PathLike | None = None, project_env: str | os.PathLike | None = None, ) -> list[Path]: """Load Hermes environment files with user config taking precedence. Behavior: - `~/.hermes/.env` overrides stale shell-exported values when present. - project `.env` acts as a dev fallback and only fills missing values when the user env exists. - if no user env exists, the project `.env` also overrides stale shell vars. """ loaded: list[Path] = [] home_path = Path(hermes_home or os.getenv("HERMES_HOME", Path.home() / ".hermes")) user_env = home_path / ".env" project_env_path = Path(project_env) if project_env else None # Fix corrupted .env files before python-dotenv parses them (#8908). if user_env.exists(): _sanitize_env_file_if_needed(user_env) if user_env.exists(): _load_dotenv_with_fallback(user_env, override=True) loaded.append(user_env) if project_env_path and project_env_path.exists(): _load_dotenv_with_fallback(project_env_path, override=not loaded) loaded.append(project_env_path) return loaded