diff --git a/hermes_cli/config.py b/hermes_cli/config.py index fc5bc929d..487708388 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -2384,7 +2384,13 @@ def save_config(config: Dict[str, Any]): def load_env() -> Dict[str, str]: - """Load environment variables from ~/.hermes/.env.""" + """Load environment variables from ~/.hermes/.env. + + Sanitizes lines before parsing so that corrupted files (e.g. + concatenated KEY=VALUE pairs on a single line) are handled + gracefully instead of producing mangled values such as duplicated + bot tokens. See #8908. + """ env_path = get_env_path() env_vars = {} @@ -2393,11 +2399,15 @@ def load_env() -> Dict[str, str]: # fail on UTF-8 .env files. Use explicit UTF-8 only on Windows. open_kw = {"encoding": "utf-8", "errors": "replace"} if _IS_WINDOWS else {} with open(env_path, **open_kw) as f: - for line in f: - line = line.strip() - if line and not line.startswith('#') and '=' in line: - key, _, value = line.partition('=') - env_vars[key.strip()] = value.strip().strip('"\'') + raw_lines = f.readlines() + # Sanitize before parsing: split concatenated lines & drop stale + # placeholders so corrupted .env files don't produce invalid tokens. + lines = _sanitize_env_lines(raw_lines) + for line in lines: + line = line.strip() + if line and not line.startswith('#') and '=' in line: + key, _, value = line.partition('=') + env_vars[key.strip()] = value.strip().strip('"\'') return env_vars diff --git a/hermes_cli/env_loader.py b/hermes_cli/env_loader.py index 0066d25b0..8d6a1449d 100644 --- a/hermes_cli/env_loader.py +++ b/hermes_cli/env_loader.py @@ -15,6 +15,51 @@ def _load_dotenv_with_fallback(path: Path, *, override: bool) -> None: load_dotenv(dotenv_path=path, override=override, encoding="latin-1") +def _sanitize_env_file_if_needed(path: Path) -> None: + """Pre-sanitize a .env file before python-dotenv reads it. + + python-dotenv does not handle corrupted lines where multiple + KEY=VALUE pairs are concatenated on a single line (missing newline). + This produces mangled values — e.g. a bot token duplicated 8× + (see #8908). + + We delegate to ``hermes_cli.config._sanitize_env_lines`` which + already knows all valid Hermes env-var names and can split + concatenated lines correctly. + """ + if not path.exists(): + return + try: + from hermes_cli.config import _sanitize_env_lines + except ImportError: + return # early bootstrap — config module not available yet + + read_kw = {"encoding": "utf-8", "errors": "replace"} + try: + with open(path, **read_kw) as f: + original = f.readlines() + sanitized = _sanitize_env_lines(original) + if sanitized != original: + import tempfile + fd, tmp = tempfile.mkstemp( + dir=str(path.parent), suffix=".tmp", prefix=".env_" + ) + try: + with os.fdopen(fd, "w", encoding="utf-8") as f: + f.writelines(sanitized) + f.flush() + os.fsync(f.fileno()) + os.replace(tmp, path) + except BaseException: + try: + os.unlink(tmp) + except OSError: + pass + raise + except Exception: + pass # best-effort — don't block gateway startup + + def load_hermes_dotenv( *, hermes_home: str | os.PathLike | None = None, @@ -34,6 +79,10 @@ def load_hermes_dotenv( user_env = home_path / ".env" project_env_path = Path(project_env) if project_env else None + # Fix corrupted .env files before python-dotenv parses them (#8908). + if user_env.exists(): + _sanitize_env_file_if_needed(user_env) + if user_env.exists(): _load_dotenv_with_fallback(user_env, override=True) loaded.append(user_env) diff --git a/tests/test_env_sanitize_on_load.py b/tests/test_env_sanitize_on_load.py new file mode 100644 index 000000000..c29f5ecc5 --- /dev/null +++ b/tests/test_env_sanitize_on_load.py @@ -0,0 +1,94 @@ +"""Tests for .env sanitization during load to prevent token duplication (#8908).""" + +import os +import tempfile +from pathlib import Path +from unittest.mock import patch + +import pytest + + +def test_load_env_sanitizes_concatenated_lines(): + """Verify load_env() splits concatenated KEY=VALUE pairs. + + Reproduces the scenario from #8908 where a corrupted .env file + contained multiple tokens on a single line, causing the bot token + to be duplicated 8 times. + """ + from hermes_cli.config import load_env + + token = "8356550917:AAGGEkzg06Hrc3Hjb3Sa1jkGVDOdU_lYy2Q" + # Simulate concatenated line: TOKEN=xxx followed immediately by another key + corrupted = f"TELEGRAM_BOT_TOKEN={token}ANTHROPIC_API_KEY=sk-ant-test123\n" + + with tempfile.NamedTemporaryFile( + mode="w", suffix=".env", delete=False, encoding="utf-8" + ) as f: + f.write(corrupted) + env_path = Path(f.name) + + try: + with patch("hermes_cli.config.get_env_path", return_value=env_path): + result = load_env() + assert result.get("TELEGRAM_BOT_TOKEN") == token, ( + f"Token should be exactly '{token}', got '{result.get('TELEGRAM_BOT_TOKEN')}'" + ) + assert result.get("ANTHROPIC_API_KEY") == "sk-ant-test123" + finally: + env_path.unlink(missing_ok=True) + + +def test_load_env_normal_file_unchanged(): + """A well-formed .env file should be parsed identically.""" + from hermes_cli.config import load_env + + content = ( + "TELEGRAM_BOT_TOKEN=mytoken123\n" + "ANTHROPIC_API_KEY=sk-ant-key\n" + "# comment\n" + "\n" + "OPENAI_API_KEY=sk-openai\n" + ) + + with tempfile.NamedTemporaryFile( + mode="w", suffix=".env", delete=False, encoding="utf-8" + ) as f: + f.write(content) + env_path = Path(f.name) + + try: + with patch("hermes_cli.config.get_env_path", return_value=env_path): + result = load_env() + assert result["TELEGRAM_BOT_TOKEN"] == "mytoken123" + assert result["ANTHROPIC_API_KEY"] == "sk-ant-key" + assert result["OPENAI_API_KEY"] == "sk-openai" + finally: + env_path.unlink(missing_ok=True) + + +def test_env_loader_sanitizes_before_dotenv(): + """Verify env_loader._sanitize_env_file_if_needed fixes corrupted files.""" + from hermes_cli.env_loader import _sanitize_env_file_if_needed + + token = "8356550917:AAGGEkzg06Hrc3Hjb3Sa1jkGVDOdU_lYy2Q" + corrupted = f"TELEGRAM_BOT_TOKEN={token}ANTHROPIC_API_KEY=sk-ant-test\n" + + with tempfile.NamedTemporaryFile( + mode="w", suffix=".env", delete=False, encoding="utf-8" + ) as f: + f.write(corrupted) + env_path = Path(f.name) + + try: + _sanitize_env_file_if_needed(env_path) + with open(env_path, encoding="utf-8") as f: + lines = f.readlines() + # Should be split into two separate lines + assert len(lines) == 2, f"Expected 2 lines, got {len(lines)}: {lines}" + assert lines[0].startswith("TELEGRAM_BOT_TOKEN=") + assert lines[1].startswith("ANTHROPIC_API_KEY=") + # Token should not contain the second key + parsed_token = lines[0].strip().split("=", 1)[1] + assert parsed_token == token + finally: + env_path.unlink(missing_ok=True)