diff --git a/hermes_cli/commands.py b/hermes_cli/commands.py index ec2637515..2d0107803 100644 --- a/hermes_cli/commands.py +++ b/hermes_cli/commands.py @@ -848,8 +848,7 @@ class SlashCommandCompleter(Completer): return None return word - @staticmethod - def _context_completions(word: str, limit: int = 30): + def _context_completions(self, word: str, limit: int = 30): """Yield Claude Code-style @ context completions. Bare ``@`` or ``@partial`` shows static references and matching diff --git a/hermes_cli/config.py b/hermes_cli/config.py index d121bc517..d06338aa1 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -2766,6 +2766,47 @@ def sanitize_env_file() -> int: return fixes +def _check_non_ascii_credential(key: str, value: str) -> str: + """Warn and strip non-ASCII characters from credential values. + + API keys and tokens must be pure ASCII — they are sent as HTTP header + values which httpx/httpcore encode as ASCII. Non-ASCII characters + (commonly introduced by copy-pasting from rich-text editors or PDFs + that substitute lookalike Unicode glyphs for ASCII letters) cause + ``UnicodeEncodeError: 'ascii' codec can't encode character`` at + request time. + + Returns the sanitized (ASCII-only) value. Prints a warning if any + non-ASCII characters were found and removed. + """ + try: + value.encode("ascii") + return value # all ASCII — nothing to do + except UnicodeEncodeError: + pass + + # Build a readable list of the offending characters + bad_chars: list[str] = [] + for i, ch in enumerate(value): + if ord(ch) > 127: + bad_chars.append(f" position {i}: {ch!r} (U+{ord(ch):04X})") + sanitized = value.encode("ascii", errors="ignore").decode("ascii") + + import sys + print( + f"\n Warning: {key} contains non-ASCII characters that will break API requests.\n" + f" This usually happens when copy-pasting from a PDF, rich-text editor,\n" + f" or web page that substitutes lookalike Unicode glyphs for ASCII letters.\n" + f"\n" + + "\n".join(f" {line}" for line in bad_chars[:5]) + + ("\n ... and more" if len(bad_chars) > 5 else "") + + f"\n\n The non-ASCII characters have been stripped automatically.\n" + f" If authentication fails, re-copy the key from the provider's dashboard.\n", + file=sys.stderr, + ) + return sanitized + + def save_env_value(key: str, value: str): """Save or update a value in ~/.hermes/.env.""" if is_managed(): @@ -2774,6 +2815,8 @@ def save_env_value(key: str, value: str): if not _ENV_VAR_NAME_RE.match(key): raise ValueError(f"Invalid environment variable name: {key!r}") value = value.replace("\n", "").replace("\r", "") + # API keys / tokens must be ASCII — strip non-ASCII with a warning. + value = _check_non_ascii_credential(key, value) ensure_hermes_home() env_path = get_env_path() diff --git a/hermes_cli/env_loader.py b/hermes_cli/env_loader.py index 8d6a1449d..853f0d262 100644 --- a/hermes_cli/env_loader.py +++ b/hermes_cli/env_loader.py @@ -8,11 +8,40 @@ from pathlib import Path from dotenv import load_dotenv +# Env var name suffixes that indicate credential values. These are the +# only env vars whose values we sanitize on load — we must not silently +# alter arbitrary user env vars, but credentials are known to require +# pure ASCII (they become HTTP header values). +_CREDENTIAL_SUFFIXES = ("_API_KEY", "_TOKEN", "_SECRET", "_KEY") + + +def _sanitize_loaded_credentials() -> None: + """Strip non-ASCII characters from credential env vars in os.environ. + + Called after dotenv loads so the rest of the codebase never sees + non-ASCII API keys. Only touches env vars whose names end with + known credential suffixes (``_API_KEY``, ``_TOKEN``, etc.). + """ + for key, value in list(os.environ.items()): + if not any(key.endswith(suffix) for suffix in _CREDENTIAL_SUFFIXES): + continue + try: + value.encode("ascii") + except UnicodeEncodeError: + os.environ[key] = value.encode("ascii", errors="ignore").decode("ascii") + + def _load_dotenv_with_fallback(path: Path, *, override: bool) -> None: try: load_dotenv(dotenv_path=path, override=override, encoding="utf-8") except UnicodeDecodeError: load_dotenv(dotenv_path=path, override=override, encoding="latin-1") + # Strip non-ASCII characters from credential env vars that were just + # loaded. API keys must be pure ASCII since they're sent as HTTP + # header values (httpx encodes headers as ASCII). Non-ASCII chars + # typically come from copy-pasting keys from PDFs or rich-text editors + # that substitute Unicode lookalike glyphs (e.g. ʋ U+028B for v). + _sanitize_loaded_credentials() def _sanitize_env_file_if_needed(path: Path) -> None: diff --git a/hermes_logging.py b/hermes_logging.py index dbef21328..0ebc450a2 100644 --- a/hermes_logging.py +++ b/hermes_logging.py @@ -358,6 +358,7 @@ def _add_rotating_handler( path.parent.mkdir(parents=True, exist_ok=True) handler = _ManagedRotatingFileHandler( str(path), maxBytes=max_bytes, backupCount=backup_count, + encoding="utf-8", ) handler.setLevel(level) handler.setFormatter(formatter) diff --git a/run_agent.py b/run_agent.py index c5cff82a4..e7de2a676 100644 --- a/run_agent.py +++ b/run_agent.py @@ -8988,12 +8988,35 @@ class AIAgent: if isinstance(_default_headers, dict): _headers_sanitized = _sanitize_structure_non_ascii(_default_headers) + # Sanitize the API key — non-ASCII characters in + # credentials (e.g. ʋ instead of v from a bad + # copy-paste) cause httpx to fail when encoding + # the Authorization header as ASCII. This is the + # most common cause of persistent UnicodeEncodeError + # that survives message/tool sanitization (#6843). + _credential_sanitized = False + _raw_key = getattr(self, "api_key", None) or "" + if _raw_key: + _clean_key = _strip_non_ascii(_raw_key) + if _clean_key != _raw_key: + self.api_key = _clean_key + if isinstance(getattr(self, "_client_kwargs", None), dict): + self._client_kwargs["api_key"] = _clean_key + _credential_sanitized = True + self._vprint( + f"{self.log_prefix}⚠️ API key contained non-ASCII characters " + f"(bad copy-paste?) — stripped them. If auth fails, " + f"re-copy the key from your provider's dashboard.", + force=True, + ) + if ( _messages_sanitized or _prefill_sanitized or _tools_sanitized or _system_sanitized or _headers_sanitized + or _credential_sanitized ): self._unicode_sanitization_passes += 1 self._vprint( diff --git a/tests/hermes_cli/test_non_ascii_credential.py b/tests/hermes_cli/test_non_ascii_credential.py new file mode 100644 index 000000000..fe39335eb --- /dev/null +++ b/tests/hermes_cli/test_non_ascii_credential.py @@ -0,0 +1,83 @@ +"""Tests for non-ASCII credential detection and sanitization. + +Covers the fix for issue #6843 — API keys containing Unicode lookalike +characters (e.g. ʋ U+028B instead of v) cause UnicodeEncodeError when +httpx tries to encode the Authorization header as ASCII. +""" + +import os +import sys +import tempfile + +import pytest + +from hermes_cli.config import _check_non_ascii_credential + + +class TestCheckNonAsciiCredential: + """Tests for _check_non_ascii_credential().""" + + def test_ascii_key_unchanged(self): + key = "sk-proj-" + "a" * 100 + result = _check_non_ascii_credential("TEST_API_KEY", key) + assert result == key + + def test_strips_unicode_v_lookalike(self, capsys): + """The exact scenario from issue #6843: ʋ instead of v.""" + key = "sk-proj-abc" + "ʋ" + "def" # \u028b + result = _check_non_ascii_credential("OPENROUTER_API_KEY", key) + assert result == "sk-proj-abcdef" + assert "ʋ" not in result + # Should print a warning + captured = capsys.readouterr() + assert "non-ASCII" in captured.err + + def test_strips_multiple_non_ascii(self, capsys): + key = "sk-proj-aʋbécd" + result = _check_non_ascii_credential("OPENAI_API_KEY", key) + assert result == "sk-proj-abcd" + captured = capsys.readouterr() + assert "U+028B" in captured.err # reports the char + + def test_empty_key(self): + result = _check_non_ascii_credential("TEST_KEY", "") + assert result == "" + + def test_all_ascii_no_warning(self, capsys): + result = _check_non_ascii_credential("KEY", "all-ascii-value-123") + assert result == "all-ascii-value-123" + captured = capsys.readouterr() + assert captured.err == "" + + +class TestEnvLoaderSanitization: + """Tests for _sanitize_loaded_credentials in env_loader.""" + + def test_strips_non_ascii_from_api_key(self, monkeypatch): + from hermes_cli.env_loader import _sanitize_loaded_credentials + + monkeypatch.setenv("OPENROUTER_API_KEY", "sk-proj-abcʋdef") + _sanitize_loaded_credentials() + assert os.environ["OPENROUTER_API_KEY"] == "sk-proj-abcdef" + + def test_strips_non_ascii_from_token(self, monkeypatch): + from hermes_cli.env_loader import _sanitize_loaded_credentials + + monkeypatch.setenv("DISCORD_BOT_TOKEN", "tokénvalue") + _sanitize_loaded_credentials() + assert os.environ["DISCORD_BOT_TOKEN"] == "toknvalue" + + def test_ignores_non_credential_vars(self, monkeypatch): + from hermes_cli.env_loader import _sanitize_loaded_credentials + + monkeypatch.setenv("MY_UNICODE_VAR", "héllo wörld") + _sanitize_loaded_credentials() + # Not a credential suffix — should be left alone + assert os.environ["MY_UNICODE_VAR"] == "héllo wörld" + + def test_ascii_credentials_untouched(self, monkeypatch): + from hermes_cli.env_loader import _sanitize_loaded_credentials + + monkeypatch.setenv("OPENAI_API_KEY", "sk-proj-allascii123") + _sanitize_loaded_credentials() + assert os.environ["OPENAI_API_KEY"] == "sk-proj-allascii123" diff --git a/tests/run_agent/test_unicode_ascii_codec.py b/tests/run_agent/test_unicode_ascii_codec.py index fc175696e..ef4f3f339 100644 --- a/tests/run_agent/test_unicode_ascii_codec.py +++ b/tests/run_agent/test_unicode_ascii_codec.py @@ -142,6 +142,33 @@ class TestSurrogateVsAsciiSanitization: assert _sanitize_messages_surrogates(messages) is False +class TestApiKeyNonAsciiSanitization: + """Tests for API key sanitization in the UnicodeEncodeError recovery. + + Covers the root cause of issue #6843: a non-ASCII character (ʋ U+028B) + in the API key causes httpx to fail when encoding the Authorization + header as ASCII. The recovery block must strip non-ASCII from the key. + """ + + def test_strip_non_ascii_from_api_key(self): + """_strip_non_ascii removes ʋ from an API key string.""" + key = "sk-proj-abc" + "ʋ" + "def" + assert _strip_non_ascii(key) == "sk-proj-abcdef" + + def test_api_key_at_position_153(self): + """Reproduce the exact error: ʋ at position 153 in 'Bearer '.""" + key = "sk-proj-" + "a" * 138 + "ʋ" + "bcd" + auth_value = f"Bearer {key}" + # This is what httpx does — and it fails: + with pytest.raises(UnicodeEncodeError) as exc_info: + auth_value.encode("ascii") + assert exc_info.value.start == 153 + # After sanitization, it should work: + sanitized_key = _strip_non_ascii(key) + sanitized_auth = f"Bearer {sanitized_key}" + sanitized_auth.encode("ascii") # should not raise + + class TestSanitizeToolsNonAscii: """Tests for _sanitize_tools_non_ascii."""