mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
fix(env_loader): warn when non-ASCII stripped from credential env vars (#13300)
Load-time sanitizer silently removed non-ASCII codepoints from any env var ending in _API_KEY / _TOKEN / _SECRET / _KEY, turning copy-paste artifacts (Unicode lookalikes, ZWSP, NBSP) into opaque provider-side API_KEY_INVALID errors. Warn once per key to stderr with the offending codepoints (U+XXXX) and guidance to re-copy from the provider dashboard.
This commit is contained in:
parent
5125a78283
commit
fdd0ecaf13
2 changed files with 102 additions and 3 deletions
|
|
@ -3,6 +3,7 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
|
@ -14,6 +15,26 @@ from dotenv import load_dotenv
|
|||
# pure ASCII (they become HTTP header values).
|
||||
_CREDENTIAL_SUFFIXES = ("_API_KEY", "_TOKEN", "_SECRET", "_KEY")
|
||||
|
||||
# Names we've already warned about during this process, so repeated
|
||||
# load_hermes_dotenv() calls (user env + project env, gateway hot-reload,
|
||||
# tests) don't spam the same warning multiple times.
|
||||
_WARNED_KEYS: set[str] = set()
|
||||
|
||||
|
||||
def _format_offending_chars(value: str, limit: int = 3) -> str:
|
||||
"""Return a compact 'U+XXXX ('c'), ...' summary of non-ASCII codepoints."""
|
||||
seen: list[str] = []
|
||||
for ch in value:
|
||||
if ord(ch) > 127:
|
||||
label = f"U+{ord(ch):04X}"
|
||||
if ch.isprintable():
|
||||
label += f" ({ch!r})"
|
||||
if label not in seen:
|
||||
seen.append(label)
|
||||
if len(seen) >= limit:
|
||||
break
|
||||
return ", ".join(seen)
|
||||
|
||||
|
||||
def _sanitize_loaded_credentials() -> None:
|
||||
"""Strip non-ASCII characters from credential env vars in os.environ.
|
||||
|
|
@ -21,14 +42,42 @@ def _sanitize_loaded_credentials() -> None:
|
|||
Called after dotenv loads so the rest of the codebase never sees
|
||||
non-ASCII API keys. Only touches env vars whose names end with
|
||||
known credential suffixes (``_API_KEY``, ``_TOKEN``, etc.).
|
||||
|
||||
Emits a one-line warning to stderr when characters are stripped.
|
||||
Silent stripping would mask copy-paste corruption (Unicode lookalike
|
||||
glyphs from PDFs / rich-text editors, ZWSP from web pages) as opaque
|
||||
provider-side "invalid API key" errors (see #6843).
|
||||
"""
|
||||
for key, value in list(os.environ.items()):
|
||||
if not any(key.endswith(suffix) for suffix in _CREDENTIAL_SUFFIXES):
|
||||
continue
|
||||
try:
|
||||
value.encode("ascii")
|
||||
continue
|
||||
except UnicodeEncodeError:
|
||||
os.environ[key] = value.encode("ascii", errors="ignore").decode("ascii")
|
||||
pass
|
||||
cleaned = value.encode("ascii", errors="ignore").decode("ascii")
|
||||
os.environ[key] = cleaned
|
||||
if key in _WARNED_KEYS:
|
||||
continue
|
||||
_WARNED_KEYS.add(key)
|
||||
stripped = len(value) - len(cleaned)
|
||||
detail = _format_offending_chars(value) or "non-printable"
|
||||
print(
|
||||
f" Warning: {key} contained {stripped} non-ASCII character"
|
||||
f"{'s' if stripped != 1 else ''} ({detail}) — stripped so the "
|
||||
f"key can be sent as an HTTP header.",
|
||||
file=sys.stderr,
|
||||
)
|
||||
print(
|
||||
" This usually means the key was copy-pasted from a PDF, "
|
||||
"rich-text editor, or web page that substituted lookalike\n"
|
||||
" Unicode glyphs for ASCII letters. If authentication fails "
|
||||
"(e.g. \"API key not valid\"), re-copy the key from the\n"
|
||||
" provider's dashboard and run `hermes setup` (or edit the "
|
||||
".env file in a plain-text editor).",
|
||||
file=sys.stderr,
|
||||
)
|
||||
|
||||
|
||||
def _load_dotenv_with_fallback(path: Path, *, override: bool) -> None:
|
||||
|
|
|
|||
|
|
@ -54,15 +54,17 @@ class TestEnvLoaderSanitization:
|
|||
"""Tests for _sanitize_loaded_credentials in env_loader."""
|
||||
|
||||
def test_strips_non_ascii_from_api_key(self, monkeypatch):
|
||||
from hermes_cli.env_loader import _sanitize_loaded_credentials
|
||||
from hermes_cli.env_loader import _sanitize_loaded_credentials, _WARNED_KEYS
|
||||
|
||||
_WARNED_KEYS.discard("OPENROUTER_API_KEY")
|
||||
monkeypatch.setenv("OPENROUTER_API_KEY", "sk-proj-abcʋdef")
|
||||
_sanitize_loaded_credentials()
|
||||
assert os.environ["OPENROUTER_API_KEY"] == "sk-proj-abcdef"
|
||||
|
||||
def test_strips_non_ascii_from_token(self, monkeypatch):
|
||||
from hermes_cli.env_loader import _sanitize_loaded_credentials
|
||||
from hermes_cli.env_loader import _sanitize_loaded_credentials, _WARNED_KEYS
|
||||
|
||||
_WARNED_KEYS.discard("DISCORD_BOT_TOKEN")
|
||||
monkeypatch.setenv("DISCORD_BOT_TOKEN", "tokénvalue")
|
||||
_sanitize_loaded_credentials()
|
||||
assert os.environ["DISCORD_BOT_TOKEN"] == "toknvalue"
|
||||
|
|
@ -81,3 +83,51 @@ class TestEnvLoaderSanitization:
|
|||
monkeypatch.setenv("OPENAI_API_KEY", "sk-proj-allascii123")
|
||||
_sanitize_loaded_credentials()
|
||||
assert os.environ["OPENAI_API_KEY"] == "sk-proj-allascii123"
|
||||
|
||||
def test_warns_to_stderr_when_stripping(self, monkeypatch, capsys):
|
||||
"""Silent stripping masks bad keys as opaque provider 400s (see #6843 fallout).
|
||||
|
||||
Users must be told when a copy-paste artifact was removed so they
|
||||
can re-copy the key if authentication fails.
|
||||
"""
|
||||
from hermes_cli.env_loader import _sanitize_loaded_credentials, _WARNED_KEYS
|
||||
|
||||
_WARNED_KEYS.discard("GOOGLE_API_KEY")
|
||||
monkeypatch.setenv("GOOGLE_API_KEY", "AIzaSy\u200babcdef") # ZWSP mid-key
|
||||
_sanitize_loaded_credentials()
|
||||
assert os.environ["GOOGLE_API_KEY"] == "AIzaSyabcdef"
|
||||
|
||||
captured = capsys.readouterr()
|
||||
assert "GOOGLE_API_KEY" in captured.err
|
||||
assert "U+200B" in captured.err
|
||||
assert "re-copy" in captured.err.lower()
|
||||
|
||||
def test_warning_fires_only_once_per_key(self, monkeypatch, capsys):
|
||||
"""Repeated loads (user env + project env) must not double-warn."""
|
||||
from hermes_cli.env_loader import _sanitize_loaded_credentials, _WARNED_KEYS
|
||||
|
||||
_WARNED_KEYS.discard("GEMINI_API_KEY")
|
||||
monkeypatch.setenv("GEMINI_API_KEY", "AIza\u028bbad")
|
||||
_sanitize_loaded_credentials()
|
||||
first = capsys.readouterr().err
|
||||
|
||||
monkeypatch.setenv("GEMINI_API_KEY", "AIza\u028bbad2")
|
||||
_sanitize_loaded_credentials()
|
||||
second = capsys.readouterr().err
|
||||
|
||||
assert "GEMINI_API_KEY" in first
|
||||
assert second == "" # no repeat warning
|
||||
|
||||
def test_ascii_control_chars_not_stripped(self, monkeypatch, capsys):
|
||||
"""ASCII control bytes (e.g. ESC 0x1B from terminal paste) are NOT non-ASCII.
|
||||
|
||||
This is intentional — they're valid ASCII for HTTP headers even if the
|
||||
provider rejects them. Documents the scope of the sanitizer.
|
||||
"""
|
||||
from hermes_cli.env_loader import _sanitize_loaded_credentials, _WARNED_KEYS
|
||||
|
||||
_WARNED_KEYS.clear()
|
||||
monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-ant\x1bapi-key")
|
||||
_sanitize_loaded_credentials()
|
||||
assert os.environ["ANTHROPIC_API_KEY"] == "sk-ant\x1bapi-key"
|
||||
assert capsys.readouterr().err == ""
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue