fix: sanitize .env before loading to prevent token duplication (#8908)

When .env files become corrupted (e.g. concatenated KEY=VALUE pairs on
a single line due to concurrent writes or encoding issues), both
python-dotenv and load_env() would parse the entire concatenated string
as a single value. This caused bot tokens to appear duplicated up to 8×,
triggering InvalidToken errors from the Telegram API.

Root cause: _sanitize_env_lines() — which correctly splits concatenated
lines — was only called during save_env_value() writes, not during reads.

Fix:
- load_env() now calls _sanitize_env_lines() before parsing
- env_loader.load_hermes_dotenv() sanitizes the .env file on disk
  before python-dotenv reads it, so os.getenv() also returns clean values
- Added tests reproducing the exact corruption pattern from #8908

Closes #8908
This commit is contained in:
Mil Wang (from Dev Box) 2026-04-13 18:41:12 +08:00 committed by Teknium
parent e77f135ed8
commit e469f3f3db
3 changed files with 159 additions and 6 deletions

View file

@ -2384,7 +2384,13 @@ def save_config(config: Dict[str, Any]):
def load_env() -> Dict[str, str]:
"""Load environment variables from ~/.hermes/.env."""
"""Load environment variables from ~/.hermes/.env.
Sanitizes lines before parsing so that corrupted files (e.g.
concatenated KEY=VALUE pairs on a single line) are handled
gracefully instead of producing mangled values such as duplicated
bot tokens. See #8908.
"""
env_path = get_env_path()
env_vars = {}
@ -2393,11 +2399,15 @@ def load_env() -> Dict[str, str]:
# fail on UTF-8 .env files. Use explicit UTF-8 only on Windows.
open_kw = {"encoding": "utf-8", "errors": "replace"} if _IS_WINDOWS else {}
with open(env_path, **open_kw) as f:
for line in f:
line = line.strip()
if line and not line.startswith('#') and '=' in line:
key, _, value = line.partition('=')
env_vars[key.strip()] = value.strip().strip('"\'')
raw_lines = f.readlines()
# Sanitize before parsing: split concatenated lines & drop stale
# placeholders so corrupted .env files don't produce invalid tokens.
lines = _sanitize_env_lines(raw_lines)
for line in lines:
line = line.strip()
if line and not line.startswith('#') and '=' in line:
key, _, value = line.partition('=')
env_vars[key.strip()] = value.strip().strip('"\'')
return env_vars