Merge branch 'main' of github.com:NousResearch/hermes-agent into feat/ink-refactor

This commit is contained in:
Brooklyn Nicholson 2026-04-14 22:30:22 -05:00
commit 496bfb3c59
7 changed files with 207 additions and 2 deletions

View file

@ -848,8 +848,7 @@ class SlashCommandCompleter(Completer):
return None
return word
@staticmethod
def _context_completions(word: str, limit: int = 30):
def _context_completions(self, word: str, limit: int = 30):
"""Yield Claude Code-style @ context completions.
Bare ``@`` or ``@partial`` shows static references and matching

View file

@ -2766,6 +2766,47 @@ def sanitize_env_file() -> int:
return fixes
def _check_non_ascii_credential(key: str, value: str) -> str:
"""Warn and strip non-ASCII characters from credential values.
API keys and tokens must be pure ASCII they are sent as HTTP header
values which httpx/httpcore encode as ASCII. Non-ASCII characters
(commonly introduced by copy-pasting from rich-text editors or PDFs
that substitute lookalike Unicode glyphs for ASCII letters) cause
``UnicodeEncodeError: 'ascii' codec can't encode character`` at
request time.
Returns the sanitized (ASCII-only) value. Prints a warning if any
non-ASCII characters were found and removed.
"""
try:
value.encode("ascii")
return value # all ASCII — nothing to do
except UnicodeEncodeError:
pass
# Build a readable list of the offending characters
bad_chars: list[str] = []
for i, ch in enumerate(value):
if ord(ch) > 127:
bad_chars.append(f" position {i}: {ch!r} (U+{ord(ch):04X})")
sanitized = value.encode("ascii", errors="ignore").decode("ascii")
import sys
print(
f"\n Warning: {key} contains non-ASCII characters that will break API requests.\n"
f" This usually happens when copy-pasting from a PDF, rich-text editor,\n"
f" or web page that substitutes lookalike Unicode glyphs for ASCII letters.\n"
f"\n"
+ "\n".join(f" {line}" for line in bad_chars[:5])
+ ("\n ... and more" if len(bad_chars) > 5 else "")
+ f"\n\n The non-ASCII characters have been stripped automatically.\n"
f" If authentication fails, re-copy the key from the provider's dashboard.\n",
file=sys.stderr,
)
return sanitized
def save_env_value(key: str, value: str):
"""Save or update a value in ~/.hermes/.env."""
if is_managed():
@ -2774,6 +2815,8 @@ def save_env_value(key: str, value: str):
if not _ENV_VAR_NAME_RE.match(key):
raise ValueError(f"Invalid environment variable name: {key!r}")
value = value.replace("\n", "").replace("\r", "")
# API keys / tokens must be ASCII — strip non-ASCII with a warning.
value = _check_non_ascii_credential(key, value)
ensure_hermes_home()
env_path = get_env_path()

View file

@ -8,11 +8,40 @@ from pathlib import Path
from dotenv import load_dotenv
# Env var name suffixes that indicate credential values. These are the
# only env vars whose values we sanitize on load — we must not silently
# alter arbitrary user env vars, but credentials are known to require
# pure ASCII (they become HTTP header values).
_CREDENTIAL_SUFFIXES = ("_API_KEY", "_TOKEN", "_SECRET", "_KEY")
def _sanitize_loaded_credentials() -> None:
"""Strip non-ASCII characters from credential env vars in os.environ.
Called after dotenv loads so the rest of the codebase never sees
non-ASCII API keys. Only touches env vars whose names end with
known credential suffixes (``_API_KEY``, ``_TOKEN``, etc.).
"""
for key, value in list(os.environ.items()):
if not any(key.endswith(suffix) for suffix in _CREDENTIAL_SUFFIXES):
continue
try:
value.encode("ascii")
except UnicodeEncodeError:
os.environ[key] = value.encode("ascii", errors="ignore").decode("ascii")
def _load_dotenv_with_fallback(path: Path, *, override: bool) -> None:
try:
load_dotenv(dotenv_path=path, override=override, encoding="utf-8")
except UnicodeDecodeError:
load_dotenv(dotenv_path=path, override=override, encoding="latin-1")
# Strip non-ASCII characters from credential env vars that were just
# loaded. API keys must be pure ASCII since they're sent as HTTP
# header values (httpx encodes headers as ASCII). Non-ASCII chars
# typically come from copy-pasting keys from PDFs or rich-text editors
# that substitute Unicode lookalike glyphs (e.g. ʋ U+028B for v).
_sanitize_loaded_credentials()
def _sanitize_env_file_if_needed(path: Path) -> None:

View file

@ -358,6 +358,7 @@ def _add_rotating_handler(
path.parent.mkdir(parents=True, exist_ok=True)
handler = _ManagedRotatingFileHandler(
str(path), maxBytes=max_bytes, backupCount=backup_count,
encoding="utf-8",
)
handler.setLevel(level)
handler.setFormatter(formatter)

View file

@ -8988,12 +8988,35 @@ class AIAgent:
if isinstance(_default_headers, dict):
_headers_sanitized = _sanitize_structure_non_ascii(_default_headers)
# Sanitize the API key — non-ASCII characters in
# credentials (e.g. ʋ instead of v from a bad
# copy-paste) cause httpx to fail when encoding
# the Authorization header as ASCII. This is the
# most common cause of persistent UnicodeEncodeError
# that survives message/tool sanitization (#6843).
_credential_sanitized = False
_raw_key = getattr(self, "api_key", None) or ""
if _raw_key:
_clean_key = _strip_non_ascii(_raw_key)
if _clean_key != _raw_key:
self.api_key = _clean_key
if isinstance(getattr(self, "_client_kwargs", None), dict):
self._client_kwargs["api_key"] = _clean_key
_credential_sanitized = True
self._vprint(
f"{self.log_prefix}⚠️ API key contained non-ASCII characters "
f"(bad copy-paste?) — stripped them. If auth fails, "
f"re-copy the key from your provider's dashboard.",
force=True,
)
if (
_messages_sanitized
or _prefill_sanitized
or _tools_sanitized
or _system_sanitized
or _headers_sanitized
or _credential_sanitized
):
self._unicode_sanitization_passes += 1
self._vprint(

View file

@ -0,0 +1,83 @@
"""Tests for non-ASCII credential detection and sanitization.
Covers the fix for issue #6843 — API keys containing Unicode lookalike
characters (e.g. ʋ U+028B instead of v) cause UnicodeEncodeError when
httpx tries to encode the Authorization header as ASCII.
"""
import os
import sys
import tempfile
import pytest
from hermes_cli.config import _check_non_ascii_credential
class TestCheckNonAsciiCredential:
"""Tests for _check_non_ascii_credential()."""
def test_ascii_key_unchanged(self):
key = "sk-proj-" + "a" * 100
result = _check_non_ascii_credential("TEST_API_KEY", key)
assert result == key
def test_strips_unicode_v_lookalike(self, capsys):
"""The exact scenario from issue #6843: ʋ instead of v."""
key = "sk-proj-abc" + "ʋ" + "def" # \u028b
result = _check_non_ascii_credential("OPENROUTER_API_KEY", key)
assert result == "sk-proj-abcdef"
assert "ʋ" not in result
# Should print a warning
captured = capsys.readouterr()
assert "non-ASCII" in captured.err
def test_strips_multiple_non_ascii(self, capsys):
key = "sk-proj-aʋbécd"
result = _check_non_ascii_credential("OPENAI_API_KEY", key)
assert result == "sk-proj-abcd"
captured = capsys.readouterr()
assert "U+028B" in captured.err # reports the char
def test_empty_key(self):
result = _check_non_ascii_credential("TEST_KEY", "")
assert result == ""
def test_all_ascii_no_warning(self, capsys):
result = _check_non_ascii_credential("KEY", "all-ascii-value-123")
assert result == "all-ascii-value-123"
captured = capsys.readouterr()
assert captured.err == ""
class TestEnvLoaderSanitization:
"""Tests for _sanitize_loaded_credentials in env_loader."""
def test_strips_non_ascii_from_api_key(self, monkeypatch):
from hermes_cli.env_loader import _sanitize_loaded_credentials
monkeypatch.setenv("OPENROUTER_API_KEY", "sk-proj-abcʋdef")
_sanitize_loaded_credentials()
assert os.environ["OPENROUTER_API_KEY"] == "sk-proj-abcdef"
def test_strips_non_ascii_from_token(self, monkeypatch):
from hermes_cli.env_loader import _sanitize_loaded_credentials
monkeypatch.setenv("DISCORD_BOT_TOKEN", "tokénvalue")
_sanitize_loaded_credentials()
assert os.environ["DISCORD_BOT_TOKEN"] == "toknvalue"
def test_ignores_non_credential_vars(self, monkeypatch):
from hermes_cli.env_loader import _sanitize_loaded_credentials
monkeypatch.setenv("MY_UNICODE_VAR", "héllo wörld")
_sanitize_loaded_credentials()
# Not a credential suffix — should be left alone
assert os.environ["MY_UNICODE_VAR"] == "héllo wörld"
def test_ascii_credentials_untouched(self, monkeypatch):
from hermes_cli.env_loader import _sanitize_loaded_credentials
monkeypatch.setenv("OPENAI_API_KEY", "sk-proj-allascii123")
_sanitize_loaded_credentials()
assert os.environ["OPENAI_API_KEY"] == "sk-proj-allascii123"

View file

@ -142,6 +142,33 @@ class TestSurrogateVsAsciiSanitization:
assert _sanitize_messages_surrogates(messages) is False
class TestApiKeyNonAsciiSanitization:
"""Tests for API key sanitization in the UnicodeEncodeError recovery.
Covers the root cause of issue #6843: a non-ASCII character (ʋ U+028B)
in the API key causes httpx to fail when encoding the Authorization
header as ASCII. The recovery block must strip non-ASCII from the key.
"""
def test_strip_non_ascii_from_api_key(self):
"""_strip_non_ascii removes ʋ from an API key string."""
key = "sk-proj-abc" + "ʋ" + "def"
assert _strip_non_ascii(key) == "sk-proj-abcdef"
def test_api_key_at_position_153(self):
"""Reproduce the exact error: ʋ at position 153 in 'Bearer <key>'."""
key = "sk-proj-" + "a" * 138 + "ʋ" + "bcd"
auth_value = f"Bearer {key}"
# This is what httpx does — and it fails:
with pytest.raises(UnicodeEncodeError) as exc_info:
auth_value.encode("ascii")
assert exc_info.value.start == 153
# After sanitization, it should work:
sanitized_key = _strip_non_ascii(key)
sanitized_auth = f"Bearer {sanitized_key}"
sanitized_auth.encode("ascii") # should not raise
class TestSanitizeToolsNonAscii:
"""Tests for _sanitize_tools_non_ascii."""