Merge branch 'main' of github.com:NousResearch/hermes-agent into feat/ink-refactor

2026-06-09 08:21:50 +00:00 · 2026-04-14 22:30:22 -05:00 · 2026-04-14 22:30:22 -05:00 · 496bfb3c59
commit 496bfb3c59
parent 99d859ce4a da528a8207
7 changed files with 207 additions and 2 deletions
--- a/hermes_cli/commands.py
+++ b/hermes_cli/commands.py
@ -848,8 +848,7 @@ class SlashCommandCompleter(Completer):
            return None
        return word

-    @staticmethod
-    def _context_completions(word: str, limit: int = 30):
+    def _context_completions(self, word: str, limit: int = 30):
        """Yield Claude Code-style @ context completions.

        Bare ``@`` or ``@partial`` shows static references and matching
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@ -2766,6 +2766,47 @@ def sanitize_env_file() -> int:
    return fixes


+def _check_non_ascii_credential(key: str, value: str) -> str:
+    """Warn and strip non-ASCII characters from credential values.
+
+    API keys and tokens must be pure ASCII — they are sent as HTTP header
+    values which httpx/httpcore encode as ASCII.  Non-ASCII characters
+    (commonly introduced by copy-pasting from rich-text editors or PDFs
+    that substitute lookalike Unicode glyphs for ASCII letters) cause
+    ``UnicodeEncodeError: 'ascii' codec can't encode character`` at
+    request time.
+
+    Returns the sanitized (ASCII-only) value.  Prints a warning if any
+    non-ASCII characters were found and removed.
+    """
+    try:
+        value.encode("ascii")
+        return value  # all ASCII — nothing to do
+    except UnicodeEncodeError:
+        pass
+
+    # Build a readable list of the offending characters
+    bad_chars: list[str] = []
+    for i, ch in enumerate(value):
+        if ord(ch) > 127:
+            bad_chars.append(f"  position {i}: {ch!r} (U+{ord(ch):04X})")
+    sanitized = value.encode("ascii", errors="ignore").decode("ascii")
+
+    import sys
+    print(
+        f"\n  Warning: {key} contains non-ASCII characters that will break API requests.\n"
+        f"  This usually happens when copy-pasting from a PDF, rich-text editor,\n"
+        f"  or web page that substitutes lookalike Unicode glyphs for ASCII letters.\n"
+        f"\n"
+        + "\n".join(f"  {line}" for line in bad_chars[:5])
+        + ("\n  ... and more" if len(bad_chars) > 5 else "")
+        + f"\n\n  The non-ASCII characters have been stripped automatically.\n"
+        f"  If authentication fails, re-copy the key from the provider's dashboard.\n",
+        file=sys.stderr,
+    )
+    return sanitized
+
+
 def save_env_value(key: str, value: str):
    """Save or update a value in ~/.hermes/.env."""
    if is_managed():
@ -2774,6 +2815,8 @@ def save_env_value(key: str, value: str):
    if not _ENV_VAR_NAME_RE.match(key):
        raise ValueError(f"Invalid environment variable name: {key!r}")
    value = value.replace("\n", "").replace("\r", "")
+    # API keys / tokens must be ASCII — strip non-ASCII with a warning.
+    value = _check_non_ascii_credential(key, value)
    ensure_hermes_home()
    env_path = get_env_path()
    
--- a/hermes_cli/env_loader.py
+++ b/hermes_cli/env_loader.py
@ -8,11 +8,40 @@ from pathlib import Path
 from dotenv import load_dotenv


+# Env var name suffixes that indicate credential values.  These are the
+# only env vars whose values we sanitize on load — we must not silently
+# alter arbitrary user env vars, but credentials are known to require
+# pure ASCII (they become HTTP header values).
+_CREDENTIAL_SUFFIXES = ("_API_KEY", "_TOKEN", "_SECRET", "_KEY")
+
+
+def _sanitize_loaded_credentials() -> None:
+    """Strip non-ASCII characters from credential env vars in os.environ.
+
+    Called after dotenv loads so the rest of the codebase never sees
+    non-ASCII API keys.  Only touches env vars whose names end with
+    known credential suffixes (``_API_KEY``, ``_TOKEN``, etc.).
+    """
+    for key, value in list(os.environ.items()):
+        if not any(key.endswith(suffix) for suffix in _CREDENTIAL_SUFFIXES):
+            continue
+        try:
+            value.encode("ascii")
+        except UnicodeEncodeError:
+            os.environ[key] = value.encode("ascii", errors="ignore").decode("ascii")
+
+
 def _load_dotenv_with_fallback(path: Path, *, override: bool) -> None:
    try:
        load_dotenv(dotenv_path=path, override=override, encoding="utf-8")
    except UnicodeDecodeError:
        load_dotenv(dotenv_path=path, override=override, encoding="latin-1")
+    # Strip non-ASCII characters from credential env vars that were just
+    # loaded.  API keys must be pure ASCII since they're sent as HTTP
+    # header values (httpx encodes headers as ASCII).  Non-ASCII chars
+    # typically come from copy-pasting keys from PDFs or rich-text editors
+    # that substitute Unicode lookalike glyphs (e.g. ʋ U+028B for v).
+    _sanitize_loaded_credentials()


 def _sanitize_env_file_if_needed(path: Path) -> None:
--- a/hermes_logging.py
+++ b/hermes_logging.py
@ -358,6 +358,7 @@ def _add_rotating_handler(
    path.parent.mkdir(parents=True, exist_ok=True)
    handler = _ManagedRotatingFileHandler(
        str(path), maxBytes=max_bytes, backupCount=backup_count,
+        encoding="utf-8",
    )
    handler.setLevel(level)
    handler.setFormatter(formatter)
--- a/run_agent.py
+++ b/run_agent.py
@ -8988,12 +8988,35 @@ class AIAgent:
                            if isinstance(_default_headers, dict):
                                _headers_sanitized = _sanitize_structure_non_ascii(_default_headers)

+                            # Sanitize the API key — non-ASCII characters in
+                            # credentials (e.g. ʋ instead of v from a bad
+                            # copy-paste) cause httpx to fail when encoding
+                            # the Authorization header as ASCII.  This is the
+                            # most common cause of persistent UnicodeEncodeError
+                            # that survives message/tool sanitization (#6843).
+                            _credential_sanitized = False
+                            _raw_key = getattr(self, "api_key", None) or ""
+                            if _raw_key:
+                                _clean_key = _strip_non_ascii(_raw_key)
+                                if _clean_key != _raw_key:
+                                    self.api_key = _clean_key
+                                    if isinstance(getattr(self, "_client_kwargs", None), dict):
+                                        self._client_kwargs["api_key"] = _clean_key
+                                    _credential_sanitized = True
+                                    self._vprint(
+                                        f"{self.log_prefix}⚠️  API key contained non-ASCII characters "
+                                        f"(bad copy-paste?) — stripped them. If auth fails, "
+                                        f"re-copy the key from your provider's dashboard.",
+                                        force=True,
+                                    )
+
                            if (
                                _messages_sanitized
                                or _prefill_sanitized
                                or _tools_sanitized
                                or _system_sanitized
                                or _headers_sanitized
+                                or _credential_sanitized
                            ):
                                self._unicode_sanitization_passes += 1
                                self._vprint(
--- a/tests/hermes_cli/test_non_ascii_credential.py
+++ b/tests/hermes_cli/test_non_ascii_credential.py
@ -0,0 +1,83 @@
+"""Tests for non-ASCII credential detection and sanitization.
+
+Covers the fix for issue #6843 — API keys containing Unicode lookalike
+characters (e.g. ʋ U+028B instead of v) cause UnicodeEncodeError when
+httpx tries to encode the Authorization header as ASCII.
+"""
+
+import os
+import sys
+import tempfile
+
+import pytest
+
+from hermes_cli.config import _check_non_ascii_credential
+
+
+class TestCheckNonAsciiCredential:
+    """Tests for _check_non_ascii_credential()."""
+
+    def test_ascii_key_unchanged(self):
+        key = "sk-proj-" + "a" * 100
+        result = _check_non_ascii_credential("TEST_API_KEY", key)
+        assert result == key
+
+    def test_strips_unicode_v_lookalike(self, capsys):
+        """The exact scenario from issue #6843: ʋ instead of v."""
+        key = "sk-proj-abc" + "ʋ" + "def"  # \u028b
+        result = _check_non_ascii_credential("OPENROUTER_API_KEY", key)
+        assert result == "sk-proj-abcdef"
+        assert "ʋ" not in result
+        # Should print a warning
+        captured = capsys.readouterr()
+        assert "non-ASCII" in captured.err
+
+    def test_strips_multiple_non_ascii(self, capsys):
+        key = "sk-proj-aʋbécd"
+        result = _check_non_ascii_credential("OPENAI_API_KEY", key)
+        assert result == "sk-proj-abcd"
+        captured = capsys.readouterr()
+        assert "U+028B" in captured.err  # reports the char
+
+    def test_empty_key(self):
+        result = _check_non_ascii_credential("TEST_KEY", "")
+        assert result == ""
+
+    def test_all_ascii_no_warning(self, capsys):
+        result = _check_non_ascii_credential("KEY", "all-ascii-value-123")
+        assert result == "all-ascii-value-123"
+        captured = capsys.readouterr()
+        assert captured.err == ""
+
+
+class TestEnvLoaderSanitization:
+    """Tests for _sanitize_loaded_credentials in env_loader."""
+
+    def test_strips_non_ascii_from_api_key(self, monkeypatch):
+        from hermes_cli.env_loader import _sanitize_loaded_credentials
+
+        monkeypatch.setenv("OPENROUTER_API_KEY", "sk-proj-abcʋdef")
+        _sanitize_loaded_credentials()
+        assert os.environ["OPENROUTER_API_KEY"] == "sk-proj-abcdef"
+
+    def test_strips_non_ascii_from_token(self, monkeypatch):
+        from hermes_cli.env_loader import _sanitize_loaded_credentials
+
+        monkeypatch.setenv("DISCORD_BOT_TOKEN", "tokénvalue")
+        _sanitize_loaded_credentials()
+        assert os.environ["DISCORD_BOT_TOKEN"] == "toknvalue"
+
+    def test_ignores_non_credential_vars(self, monkeypatch):
+        from hermes_cli.env_loader import _sanitize_loaded_credentials
+
+        monkeypatch.setenv("MY_UNICODE_VAR", "héllo wörld")
+        _sanitize_loaded_credentials()
+        # Not a credential suffix — should be left alone
+        assert os.environ["MY_UNICODE_VAR"] == "héllo wörld"
+
+    def test_ascii_credentials_untouched(self, monkeypatch):
+        from hermes_cli.env_loader import _sanitize_loaded_credentials
+
+        monkeypatch.setenv("OPENAI_API_KEY", "sk-proj-allascii123")
+        _sanitize_loaded_credentials()
+        assert os.environ["OPENAI_API_KEY"] == "sk-proj-allascii123"
--- a/tests/run_agent/test_unicode_ascii_codec.py
+++ b/tests/run_agent/test_unicode_ascii_codec.py
@ -142,6 +142,33 @@ class TestSurrogateVsAsciiSanitization:
        assert _sanitize_messages_surrogates(messages) is False


+class TestApiKeyNonAsciiSanitization:
+    """Tests for API key sanitization in the UnicodeEncodeError recovery.
+
+    Covers the root cause of issue #6843: a non-ASCII character (ʋ U+028B)
+    in the API key causes httpx to fail when encoding the Authorization
+    header as ASCII.  The recovery block must strip non-ASCII from the key.
+    """
+
+    def test_strip_non_ascii_from_api_key(self):
+        """_strip_non_ascii removes ʋ from an API key string."""
+        key = "sk-proj-abc" + "ʋ" + "def"
+        assert _strip_non_ascii(key) == "sk-proj-abcdef"
+
+    def test_api_key_at_position_153(self):
+        """Reproduce the exact error: ʋ at position 153 in 'Bearer <key>'."""
+        key = "sk-proj-" + "a" * 138 + "ʋ" + "bcd"
+        auth_value = f"Bearer {key}"
+        # This is what httpx does — and it fails:
+        with pytest.raises(UnicodeEncodeError) as exc_info:
+            auth_value.encode("ascii")
+        assert exc_info.value.start == 153
+        # After sanitization, it should work:
+        sanitized_key = _strip_non_ascii(key)
+        sanitized_auth = f"Bearer {sanitized_key}"
+        sanitized_auth.encode("ascii")  # should not raise
+
+
 class TestSanitizeToolsNonAscii:
    """Tests for _sanitize_tools_non_ascii."""