mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
Merge branch 'main' of github.com:NousResearch/hermes-agent into feat/ink-refactor
This commit is contained in:
commit
496bfb3c59
7 changed files with 207 additions and 2 deletions
|
|
@ -848,8 +848,7 @@ class SlashCommandCompleter(Completer):
|
|||
return None
|
||||
return word
|
||||
|
||||
@staticmethod
|
||||
def _context_completions(word: str, limit: int = 30):
|
||||
def _context_completions(self, word: str, limit: int = 30):
|
||||
"""Yield Claude Code-style @ context completions.
|
||||
|
||||
Bare ``@`` or ``@partial`` shows static references and matching
|
||||
|
|
|
|||
|
|
@ -2766,6 +2766,47 @@ def sanitize_env_file() -> int:
|
|||
return fixes
|
||||
|
||||
|
||||
def _check_non_ascii_credential(key: str, value: str) -> str:
|
||||
"""Warn and strip non-ASCII characters from credential values.
|
||||
|
||||
API keys and tokens must be pure ASCII — they are sent as HTTP header
|
||||
values which httpx/httpcore encode as ASCII. Non-ASCII characters
|
||||
(commonly introduced by copy-pasting from rich-text editors or PDFs
|
||||
that substitute lookalike Unicode glyphs for ASCII letters) cause
|
||||
``UnicodeEncodeError: 'ascii' codec can't encode character`` at
|
||||
request time.
|
||||
|
||||
Returns the sanitized (ASCII-only) value. Prints a warning if any
|
||||
non-ASCII characters were found and removed.
|
||||
"""
|
||||
try:
|
||||
value.encode("ascii")
|
||||
return value # all ASCII — nothing to do
|
||||
except UnicodeEncodeError:
|
||||
pass
|
||||
|
||||
# Build a readable list of the offending characters
|
||||
bad_chars: list[str] = []
|
||||
for i, ch in enumerate(value):
|
||||
if ord(ch) > 127:
|
||||
bad_chars.append(f" position {i}: {ch!r} (U+{ord(ch):04X})")
|
||||
sanitized = value.encode("ascii", errors="ignore").decode("ascii")
|
||||
|
||||
import sys
|
||||
print(
|
||||
f"\n Warning: {key} contains non-ASCII characters that will break API requests.\n"
|
||||
f" This usually happens when copy-pasting from a PDF, rich-text editor,\n"
|
||||
f" or web page that substitutes lookalike Unicode glyphs for ASCII letters.\n"
|
||||
f"\n"
|
||||
+ "\n".join(f" {line}" for line in bad_chars[:5])
|
||||
+ ("\n ... and more" if len(bad_chars) > 5 else "")
|
||||
+ f"\n\n The non-ASCII characters have been stripped automatically.\n"
|
||||
f" If authentication fails, re-copy the key from the provider's dashboard.\n",
|
||||
file=sys.stderr,
|
||||
)
|
||||
return sanitized
|
||||
|
||||
|
||||
def save_env_value(key: str, value: str):
|
||||
"""Save or update a value in ~/.hermes/.env."""
|
||||
if is_managed():
|
||||
|
|
@ -2774,6 +2815,8 @@ def save_env_value(key: str, value: str):
|
|||
if not _ENV_VAR_NAME_RE.match(key):
|
||||
raise ValueError(f"Invalid environment variable name: {key!r}")
|
||||
value = value.replace("\n", "").replace("\r", "")
|
||||
# API keys / tokens must be ASCII — strip non-ASCII with a warning.
|
||||
value = _check_non_ascii_credential(key, value)
|
||||
ensure_hermes_home()
|
||||
env_path = get_env_path()
|
||||
|
||||
|
|
|
|||
|
|
@ -8,11 +8,40 @@ from pathlib import Path
|
|||
from dotenv import load_dotenv
|
||||
|
||||
|
||||
# Env var name suffixes that indicate credential values. These are the
|
||||
# only env vars whose values we sanitize on load — we must not silently
|
||||
# alter arbitrary user env vars, but credentials are known to require
|
||||
# pure ASCII (they become HTTP header values).
|
||||
_CREDENTIAL_SUFFIXES = ("_API_KEY", "_TOKEN", "_SECRET", "_KEY")
|
||||
|
||||
|
||||
def _sanitize_loaded_credentials() -> None:
|
||||
"""Strip non-ASCII characters from credential env vars in os.environ.
|
||||
|
||||
Called after dotenv loads so the rest of the codebase never sees
|
||||
non-ASCII API keys. Only touches env vars whose names end with
|
||||
known credential suffixes (``_API_KEY``, ``_TOKEN``, etc.).
|
||||
"""
|
||||
for key, value in list(os.environ.items()):
|
||||
if not any(key.endswith(suffix) for suffix in _CREDENTIAL_SUFFIXES):
|
||||
continue
|
||||
try:
|
||||
value.encode("ascii")
|
||||
except UnicodeEncodeError:
|
||||
os.environ[key] = value.encode("ascii", errors="ignore").decode("ascii")
|
||||
|
||||
|
||||
def _load_dotenv_with_fallback(path: Path, *, override: bool) -> None:
|
||||
try:
|
||||
load_dotenv(dotenv_path=path, override=override, encoding="utf-8")
|
||||
except UnicodeDecodeError:
|
||||
load_dotenv(dotenv_path=path, override=override, encoding="latin-1")
|
||||
# Strip non-ASCII characters from credential env vars that were just
|
||||
# loaded. API keys must be pure ASCII since they're sent as HTTP
|
||||
# header values (httpx encodes headers as ASCII). Non-ASCII chars
|
||||
# typically come from copy-pasting keys from PDFs or rich-text editors
|
||||
# that substitute Unicode lookalike glyphs (e.g. ʋ U+028B for v).
|
||||
_sanitize_loaded_credentials()
|
||||
|
||||
|
||||
def _sanitize_env_file_if_needed(path: Path) -> None:
|
||||
|
|
|
|||
|
|
@ -358,6 +358,7 @@ def _add_rotating_handler(
|
|||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
handler = _ManagedRotatingFileHandler(
|
||||
str(path), maxBytes=max_bytes, backupCount=backup_count,
|
||||
encoding="utf-8",
|
||||
)
|
||||
handler.setLevel(level)
|
||||
handler.setFormatter(formatter)
|
||||
|
|
|
|||
23
run_agent.py
23
run_agent.py
|
|
@ -8988,12 +8988,35 @@ class AIAgent:
|
|||
if isinstance(_default_headers, dict):
|
||||
_headers_sanitized = _sanitize_structure_non_ascii(_default_headers)
|
||||
|
||||
# Sanitize the API key — non-ASCII characters in
|
||||
# credentials (e.g. ʋ instead of v from a bad
|
||||
# copy-paste) cause httpx to fail when encoding
|
||||
# the Authorization header as ASCII. This is the
|
||||
# most common cause of persistent UnicodeEncodeError
|
||||
# that survives message/tool sanitization (#6843).
|
||||
_credential_sanitized = False
|
||||
_raw_key = getattr(self, "api_key", None) or ""
|
||||
if _raw_key:
|
||||
_clean_key = _strip_non_ascii(_raw_key)
|
||||
if _clean_key != _raw_key:
|
||||
self.api_key = _clean_key
|
||||
if isinstance(getattr(self, "_client_kwargs", None), dict):
|
||||
self._client_kwargs["api_key"] = _clean_key
|
||||
_credential_sanitized = True
|
||||
self._vprint(
|
||||
f"{self.log_prefix}⚠️ API key contained non-ASCII characters "
|
||||
f"(bad copy-paste?) — stripped them. If auth fails, "
|
||||
f"re-copy the key from your provider's dashboard.",
|
||||
force=True,
|
||||
)
|
||||
|
||||
if (
|
||||
_messages_sanitized
|
||||
or _prefill_sanitized
|
||||
or _tools_sanitized
|
||||
or _system_sanitized
|
||||
or _headers_sanitized
|
||||
or _credential_sanitized
|
||||
):
|
||||
self._unicode_sanitization_passes += 1
|
||||
self._vprint(
|
||||
|
|
|
|||
83
tests/hermes_cli/test_non_ascii_credential.py
Normal file
83
tests/hermes_cli/test_non_ascii_credential.py
Normal file
|
|
@ -0,0 +1,83 @@
|
|||
"""Tests for non-ASCII credential detection and sanitization.
|
||||
|
||||
Covers the fix for issue #6843 — API keys containing Unicode lookalike
|
||||
characters (e.g. ʋ U+028B instead of v) cause UnicodeEncodeError when
|
||||
httpx tries to encode the Authorization header as ASCII.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import tempfile
|
||||
|
||||
import pytest
|
||||
|
||||
from hermes_cli.config import _check_non_ascii_credential
|
||||
|
||||
|
||||
class TestCheckNonAsciiCredential:
|
||||
"""Tests for _check_non_ascii_credential()."""
|
||||
|
||||
def test_ascii_key_unchanged(self):
|
||||
key = "sk-proj-" + "a" * 100
|
||||
result = _check_non_ascii_credential("TEST_API_KEY", key)
|
||||
assert result == key
|
||||
|
||||
def test_strips_unicode_v_lookalike(self, capsys):
|
||||
"""The exact scenario from issue #6843: ʋ instead of v."""
|
||||
key = "sk-proj-abc" + "ʋ" + "def" # \u028b
|
||||
result = _check_non_ascii_credential("OPENROUTER_API_KEY", key)
|
||||
assert result == "sk-proj-abcdef"
|
||||
assert "ʋ" not in result
|
||||
# Should print a warning
|
||||
captured = capsys.readouterr()
|
||||
assert "non-ASCII" in captured.err
|
||||
|
||||
def test_strips_multiple_non_ascii(self, capsys):
|
||||
key = "sk-proj-aʋbécd"
|
||||
result = _check_non_ascii_credential("OPENAI_API_KEY", key)
|
||||
assert result == "sk-proj-abcd"
|
||||
captured = capsys.readouterr()
|
||||
assert "U+028B" in captured.err # reports the char
|
||||
|
||||
def test_empty_key(self):
|
||||
result = _check_non_ascii_credential("TEST_KEY", "")
|
||||
assert result == ""
|
||||
|
||||
def test_all_ascii_no_warning(self, capsys):
|
||||
result = _check_non_ascii_credential("KEY", "all-ascii-value-123")
|
||||
assert result == "all-ascii-value-123"
|
||||
captured = capsys.readouterr()
|
||||
assert captured.err == ""
|
||||
|
||||
|
||||
class TestEnvLoaderSanitization:
|
||||
"""Tests for _sanitize_loaded_credentials in env_loader."""
|
||||
|
||||
def test_strips_non_ascii_from_api_key(self, monkeypatch):
|
||||
from hermes_cli.env_loader import _sanitize_loaded_credentials
|
||||
|
||||
monkeypatch.setenv("OPENROUTER_API_KEY", "sk-proj-abcʋdef")
|
||||
_sanitize_loaded_credentials()
|
||||
assert os.environ["OPENROUTER_API_KEY"] == "sk-proj-abcdef"
|
||||
|
||||
def test_strips_non_ascii_from_token(self, monkeypatch):
|
||||
from hermes_cli.env_loader import _sanitize_loaded_credentials
|
||||
|
||||
monkeypatch.setenv("DISCORD_BOT_TOKEN", "tokénvalue")
|
||||
_sanitize_loaded_credentials()
|
||||
assert os.environ["DISCORD_BOT_TOKEN"] == "toknvalue"
|
||||
|
||||
def test_ignores_non_credential_vars(self, monkeypatch):
|
||||
from hermes_cli.env_loader import _sanitize_loaded_credentials
|
||||
|
||||
monkeypatch.setenv("MY_UNICODE_VAR", "héllo wörld")
|
||||
_sanitize_loaded_credentials()
|
||||
# Not a credential suffix — should be left alone
|
||||
assert os.environ["MY_UNICODE_VAR"] == "héllo wörld"
|
||||
|
||||
def test_ascii_credentials_untouched(self, monkeypatch):
|
||||
from hermes_cli.env_loader import _sanitize_loaded_credentials
|
||||
|
||||
monkeypatch.setenv("OPENAI_API_KEY", "sk-proj-allascii123")
|
||||
_sanitize_loaded_credentials()
|
||||
assert os.environ["OPENAI_API_KEY"] == "sk-proj-allascii123"
|
||||
|
|
@ -142,6 +142,33 @@ class TestSurrogateVsAsciiSanitization:
|
|||
assert _sanitize_messages_surrogates(messages) is False
|
||||
|
||||
|
||||
class TestApiKeyNonAsciiSanitization:
|
||||
"""Tests for API key sanitization in the UnicodeEncodeError recovery.
|
||||
|
||||
Covers the root cause of issue #6843: a non-ASCII character (ʋ U+028B)
|
||||
in the API key causes httpx to fail when encoding the Authorization
|
||||
header as ASCII. The recovery block must strip non-ASCII from the key.
|
||||
"""
|
||||
|
||||
def test_strip_non_ascii_from_api_key(self):
|
||||
"""_strip_non_ascii removes ʋ from an API key string."""
|
||||
key = "sk-proj-abc" + "ʋ" + "def"
|
||||
assert _strip_non_ascii(key) == "sk-proj-abcdef"
|
||||
|
||||
def test_api_key_at_position_153(self):
|
||||
"""Reproduce the exact error: ʋ at position 153 in 'Bearer <key>'."""
|
||||
key = "sk-proj-" + "a" * 138 + "ʋ" + "bcd"
|
||||
auth_value = f"Bearer {key}"
|
||||
# This is what httpx does — and it fails:
|
||||
with pytest.raises(UnicodeEncodeError) as exc_info:
|
||||
auth_value.encode("ascii")
|
||||
assert exc_info.value.start == 153
|
||||
# After sanitization, it should work:
|
||||
sanitized_key = _strip_non_ascii(key)
|
||||
sanitized_auth = f"Bearer {sanitized_key}"
|
||||
sanitized_auth.encode("ascii") # should not raise
|
||||
|
||||
|
||||
class TestSanitizeToolsNonAscii:
|
||||
"""Tests for _sanitize_tools_non_ascii."""
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue