mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-14 04:02:26 +00:00
hermes_bootstrap: Windows-only UTF-8 stdio shim for all entry points
Codebase-wide fix for Python-on-Windows UTF-8 footguns, complementing
the earlier execute_code sandbox fixes (which remain load-bearing for
when the sandbox explicitly scrubs child env).
Problem: Python on Windows has two long-standing text-encoding pitfalls:
1. sys.stdout/stderr are bound to the console code page (cp1252 on
US-locale installs) — print('café') crashes with UnicodeEncodeError.
2. Subprocess children don't know to use UTF-8 unless PYTHONUTF8 and/or
PYTHONIOENCODING are set in their env — so any Python we spawn
(linters, sandbox children, delegation workers) hits the same bug.
Solution: A tiny bootstrap module (hermes_bootstrap.py) imported as the
first statement of every Hermes entry point:
- hermes_cli/main.py (hermes / hermes-agent console_script)
- run_agent.py (hermes-agent direct)
- acp_adapter/entry.py (hermes-acp)
- gateway/run.py (messaging gateway)
- batch_runner.py (parallel batch mode)
- cli.py (legacy direct-launch CLI)
On Windows, the bootstrap:
- os.environ.setdefault('PYTHONUTF8', '1') (PEP 540 UTF-8 mode)
- os.environ.setdefault('PYTHONIOENCODING', 'utf-8')
- sys.stdout/stderr/stdin.reconfigure(encoding='utf-8', errors='replace')
Children inherit the env vars → they run in UTF-8 mode.
Current process's stdio is reconfigured → print('café') works now.
On POSIX (Linux/macOS), the bootstrap is a complete no-op. We don't
touch LANG, LC_*, or anything else — users who have intentionally
configured a non-UTF-8 locale aren't affected. POSIX systems are
already UTF-8 by default in 99% of modern setups, so there's nothing
to fix.
setdefault() (not overwrite) means users who explicitly set PYTHONUTF8=0
or PYTHONIOENCODING=cp1252 in their environment are respected.
What this does NOT fix: bare open(path, 'w') calls in the *parent*
process still default to locale encoding because PYTHONUTF8 is only
read at interpreter init. A ruff PLW1514 sweep (separate follow-up)
will add explicit encoding='utf-8' at those ~219 call sites for
belt-and-suspenders.
Tests (17): 16 passed, 1 skipped on Windows.
- Windows: env vars set, stdio reconfigured, child inherits UTF-8 mode
- POSIX: complete no-op (verified on fake POSIX + skipped on real
POSIX since we don't have a Linux box in this session)
- Idempotence: multiple calls safe
- Graceful degradation: non-reconfigurable streams don't crash
- User opt-out: explicit PYTHONUTF8=0 is respected
- Load order: every entry point's FIRST top-level import is
hermes_bootstrap, enforced by an AST-level parametrized test
pyproject.toml: added hermes_bootstrap to py-modules so it ships with
pip installs.
This commit is contained in:
parent
bf43f6cfdd
commit
6098272454
9 changed files with 452 additions and 2 deletions
|
|
@ -13,6 +13,10 @@ Usage::
|
||||||
hermes-acp
|
hermes-acp
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
# IMPORTANT: hermes_bootstrap must be the very first import — UTF-8 stdio
|
||||||
|
# on Windows. No-op on POSIX. See hermes_bootstrap.py for full rationale.
|
||||||
|
import hermes_bootstrap # noqa: F401
|
||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
import logging
|
import logging
|
||||||
import sys
|
import sys
|
||||||
|
|
|
||||||
|
|
@ -20,6 +20,10 @@ Usage:
|
||||||
python batch_runner.py --dataset_file=data.jsonl --batch_size=10 --run_name=my_run --distribution=image_gen
|
python batch_runner.py --dataset_file=data.jsonl --batch_size=10 --run_name=my_run --distribution=image_gen
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
# IMPORTANT: hermes_bootstrap must be the very first import — UTF-8 stdio
|
||||||
|
# on Windows. No-op on POSIX. See hermes_bootstrap.py for full rationale.
|
||||||
|
import hermes_bootstrap # noqa: F401
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
|
|
|
||||||
5
cli.py
5
cli.py
|
|
@ -9,10 +9,13 @@ Usage:
|
||||||
python cli.py # Start interactive mode with all tools
|
python cli.py # Start interactive mode with all tools
|
||||||
python cli.py --toolsets web,terminal # Start with specific toolsets
|
python cli.py --toolsets web,terminal # Start with specific toolsets
|
||||||
python cli.py --skills hermes-agent-dev,github-auth
|
python cli.py --skills hermes-agent-dev,github-auth
|
||||||
python cli.py -q "your question" # Single query mode
|
|
||||||
python cli.py --list-tools # List available tools and exit
|
python cli.py --list-tools # List available tools and exit
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
# IMPORTANT: hermes_bootstrap must be the very first import — UTF-8 stdio
|
||||||
|
# on Windows. No-op on POSIX. See hermes_bootstrap.py for full rationale.
|
||||||
|
import hermes_bootstrap # noqa: F401
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
|
|
|
||||||
|
|
@ -13,6 +13,10 @@ Usage:
|
||||||
python cli.py --gateway
|
python cli.py --gateway
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
# IMPORTANT: hermes_bootstrap must be the very first import — UTF-8 stdio
|
||||||
|
# on Windows. No-op on POSIX. See hermes_bootstrap.py for full rationale.
|
||||||
|
import hermes_bootstrap # noqa: F401
|
||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
import dataclasses
|
import dataclasses
|
||||||
import inspect
|
import inspect
|
||||||
|
|
|
||||||
129
hermes_bootstrap.py
Normal file
129
hermes_bootstrap.py
Normal file
|
|
@ -0,0 +1,129 @@
|
||||||
|
"""Windows UTF-8 bootstrap for Hermes entry points.
|
||||||
|
|
||||||
|
Python on Windows has two long-standing text-encoding footguns:
|
||||||
|
|
||||||
|
1. ``sys.stdout`` / ``sys.stderr`` are bound to the console code page
|
||||||
|
(``cp1252`` on US-locale installs), so ``print("café")`` crashes with
|
||||||
|
``UnicodeEncodeError: 'charmap' codec can't encode character``.
|
||||||
|
|
||||||
|
2. Child processes spawned via ``subprocess`` don't know to use UTF-8
|
||||||
|
unless ``PYTHONUTF8`` and/or ``PYTHONIOENCODING`` are set in their
|
||||||
|
environment — so any Python subprocess (the execute_code sandbox,
|
||||||
|
delegation children, linter subprocesses, etc.) inherits the same
|
||||||
|
cp1252 defaults and hits the same UnicodeEncodeError.
|
||||||
|
|
||||||
|
This module fixes both on Windows *only* — POSIX is untouched. It
|
||||||
|
should be imported at the very top of every Hermes entry point
|
||||||
|
(``hermes``, ``hermes-agent``, ``hermes-acp``, ``python -m gateway.run``,
|
||||||
|
``batch_runner.py``, ``cron/scheduler.py``) before any other imports
|
||||||
|
that might do file I/O or print to stdout.
|
||||||
|
|
||||||
|
What this module does on Windows:
|
||||||
|
|
||||||
|
- Sets ``os.environ["PYTHONUTF8"] = "1"`` (PEP 540 UTF-8 mode) so
|
||||||
|
every child process we spawn uses UTF-8 for ``open()`` and stdio.
|
||||||
|
- Sets ``os.environ["PYTHONIOENCODING"] = "utf-8"`` for belt-and-
|
||||||
|
suspenders — some tools read this instead of / in addition to
|
||||||
|
``PYTHONUTF8``.
|
||||||
|
- Reconfigures ``sys.stdout`` / ``sys.stderr`` to UTF-8 in the current
|
||||||
|
process, using the ``reconfigure()`` API (Python 3.7+). This fixes
|
||||||
|
``print("café")`` in the parent without a re-exec.
|
||||||
|
|
||||||
|
What this module does NOT do:
|
||||||
|
|
||||||
|
- It does not re-exec Python with ``-X utf8``, so ``open()`` calls in
|
||||||
|
the *current* process still default to locale encoding. Those need
|
||||||
|
an explicit ``encoding="utf-8"`` at the call site (lint rule
|
||||||
|
``PLW1514`` / ``PYI058``). Ruff is the right tool for that sweep.
|
||||||
|
|
||||||
|
What this module does on POSIX:
|
||||||
|
|
||||||
|
- Nothing. POSIX systems are already UTF-8 by default in 99% of cases,
|
||||||
|
and we don't want to touch ``LANG``/``LC_*`` behavior that users may
|
||||||
|
have configured intentionally. If someone hits a C/POSIX locale on
|
||||||
|
Linux, they can export ``PYTHONUTF8=1`` themselves — we won't override.
|
||||||
|
|
||||||
|
Idempotent: safe to call multiple times. ``_bootstrap_once`` guards
|
||||||
|
against double-reconfigure.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
_IS_WINDOWS = sys.platform == "win32"
|
||||||
|
_bootstrap_applied = False
|
||||||
|
|
||||||
|
|
||||||
|
def apply_windows_utf8_bootstrap() -> bool:
|
||||||
|
"""Apply the Windows UTF-8 bootstrap if we're on Windows.
|
||||||
|
|
||||||
|
Returns True if bootstrap was applied (i.e. we're on Windows and
|
||||||
|
haven't already done this), False otherwise. The return value is
|
||||||
|
advisory — callers normally don't need it, but tests may want to
|
||||||
|
assert the path was taken.
|
||||||
|
|
||||||
|
Idempotent: subsequent calls after the first are a no-op.
|
||||||
|
"""
|
||||||
|
global _bootstrap_applied
|
||||||
|
|
||||||
|
if not _IS_WINDOWS:
|
||||||
|
return False
|
||||||
|
if _bootstrap_applied:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# 1. Child processes inherit these and run in UTF-8 mode.
|
||||||
|
# We use setdefault() rather than overwriting so the user can
|
||||||
|
# explicitly opt out by setting PYTHONUTF8=0 in their environment
|
||||||
|
# (or PYTHONIOENCODING=something-else) if they really want to.
|
||||||
|
os.environ.setdefault("PYTHONUTF8", "1")
|
||||||
|
os.environ.setdefault("PYTHONIOENCODING", "utf-8")
|
||||||
|
|
||||||
|
# 2. Reconfigure the current process's stdio to UTF-8. Needed
|
||||||
|
# because os.environ changes don't retroactively rebind sys.stdout
|
||||||
|
# — those were bound at interpreter startup based on the console
|
||||||
|
# code page. ``reconfigure`` is a TextIOWrapper method since 3.7.
|
||||||
|
#
|
||||||
|
# errors="replace" means that if we ever *read* something from
|
||||||
|
# stdin that isn't UTF-8 (unlikely but possible with piped input
|
||||||
|
# from legacy tools), we'll get U+FFFD replacement chars rather
|
||||||
|
# than a crash. Output is pure UTF-8.
|
||||||
|
for stream_name in ("stdout", "stderr"):
|
||||||
|
stream = getattr(sys, stream_name, None)
|
||||||
|
if stream is None:
|
||||||
|
continue
|
||||||
|
reconfigure = getattr(stream, "reconfigure", None)
|
||||||
|
if reconfigure is None:
|
||||||
|
# Not a TextIOWrapper (could be redirected to a BytesIO in
|
||||||
|
# tests, or a non-standard stream in some embedded cases).
|
||||||
|
# Skip silently — the env-var fix is still in effect for
|
||||||
|
# child processes, which is the bigger win.
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
reconfigure(encoding="utf-8", errors="replace")
|
||||||
|
except (OSError, ValueError):
|
||||||
|
# Already closed, or someone replaced it with something
|
||||||
|
# non-reconfigurable. Non-fatal.
|
||||||
|
pass
|
||||||
|
|
||||||
|
# stdin is reconfigured separately with errors="replace" too — input
|
||||||
|
# from a legacy pipe shouldn't crash the process.
|
||||||
|
stdin = getattr(sys, "stdin", None)
|
||||||
|
if stdin is not None:
|
||||||
|
reconfigure = getattr(stdin, "reconfigure", None)
|
||||||
|
if reconfigure is not None:
|
||||||
|
try:
|
||||||
|
reconfigure(encoding="utf-8", errors="replace")
|
||||||
|
except (OSError, ValueError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
_bootstrap_applied = True
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
# Apply on import — entry points just need ``import hermes_bootstrap``
|
||||||
|
# (or ``from hermes_bootstrap import apply_windows_utf8_bootstrap``) at
|
||||||
|
# the very top of their module, before importing anything else. The
|
||||||
|
# import side effect does the right thing.
|
||||||
|
apply_windows_utf8_bootstrap()
|
||||||
|
|
@ -43,6 +43,11 @@ Usage:
|
||||||
hermes claw migrate --dry-run # Preview migration without changes
|
hermes claw migrate --dry-run # Preview migration without changes
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
# IMPORTANT: hermes_bootstrap must be the very first import — it sets up
|
||||||
|
# UTF-8 stdio on Windows so print()/subprocess children don't hit
|
||||||
|
# UnicodeEncodeError with non-ASCII characters. No-op on POSIX.
|
||||||
|
import hermes_bootstrap # noqa: F401
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
|
|
|
||||||
|
|
@ -160,7 +160,7 @@ hermes-agent = "run_agent:main"
|
||||||
hermes-acp = "acp_adapter.entry:main"
|
hermes-acp = "acp_adapter.entry:main"
|
||||||
|
|
||||||
[tool.setuptools]
|
[tool.setuptools]
|
||||||
py-modules = ["run_agent", "model_tools", "toolsets", "batch_runner", "trajectory_compressor", "toolset_distributions", "cli", "hermes_constants", "hermes_state", "hermes_time", "hermes_logging", "rl_cli", "utils"]
|
py-modules = ["run_agent", "model_tools", "toolsets", "batch_runner", "trajectory_compressor", "toolset_distributions", "cli", "hermes_bootstrap", "hermes_constants", "hermes_state", "hermes_time", "hermes_logging", "rl_cli", "utils"]
|
||||||
|
|
||||||
[tool.setuptools.package-data]
|
[tool.setuptools.package-data]
|
||||||
hermes_cli = ["web_dist/**/*"]
|
hermes_cli = ["web_dist/**/*"]
|
||||||
|
|
|
||||||
|
|
@ -20,6 +20,10 @@ Usage:
|
||||||
response = agent.run_conversation("Tell me about the latest Python updates")
|
response = agent.run_conversation("Tell me about the latest Python updates")
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
# IMPORTANT: hermes_bootstrap must be the very first import — UTF-8 stdio
|
||||||
|
# on Windows. No-op on POSIX. See hermes_bootstrap.py for full rationale.
|
||||||
|
import hermes_bootstrap # noqa: F401
|
||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
import base64
|
import base64
|
||||||
import concurrent.futures
|
import concurrent.futures
|
||||||
|
|
|
||||||
297
tests/test_hermes_bootstrap.py
Normal file
297
tests/test_hermes_bootstrap.py
Normal file
|
|
@ -0,0 +1,297 @@
|
||||||
|
"""Tests for hermes_bootstrap — Windows UTF-8 stdio shim.
|
||||||
|
|
||||||
|
The bootstrap module is imported at the top of every Hermes entry point
|
||||||
|
(hermes, hermes-agent, hermes-acp, gateway, batch_runner, cli.py). It
|
||||||
|
fixes Python's Windows UTF-8 defaults so print("café") doesn't crash and
|
||||||
|
subprocess children inherit UTF-8 mode.
|
||||||
|
|
||||||
|
Key invariants covered by these tests:
|
||||||
|
|
||||||
|
1. Windows: env vars get set, stdio reconfigured, non-ASCII print works
|
||||||
|
2. POSIX: complete no-op (we don't touch LANG/LC_* or anything else)
|
||||||
|
3. Idempotent: safe to call multiple times
|
||||||
|
4. Respects user opt-out: if the user explicitly sets PYTHONUTF8=0 or
|
||||||
|
PYTHONIOENCODING=something-else, we leave those alone
|
||||||
|
5. Load order: every Hermes entry point imports hermes_bootstrap as its
|
||||||
|
first non-docstring import (before anything that might do file I/O
|
||||||
|
or print to stdout)
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import io
|
||||||
|
import os
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
import textwrap
|
||||||
|
import unittest.mock as mock
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
# Import the module under test via an import-time side-effect check path.
|
||||||
|
# We need to be able to reset its state between tests, so we import it
|
||||||
|
# fresh in each test that manipulates _IS_WINDOWS.
|
||||||
|
def _fresh_import():
|
||||||
|
"""Return a freshly-imported hermes_bootstrap module.
|
||||||
|
|
||||||
|
Drops any cached copy from sys.modules first so module-level code
|
||||||
|
runs again and the platform check re-evaluates.
|
||||||
|
"""
|
||||||
|
sys.modules.pop("hermes_bootstrap", None)
|
||||||
|
import hermes_bootstrap # noqa: WPS433
|
||||||
|
return hermes_bootstrap
|
||||||
|
|
||||||
|
|
||||||
|
class TestWindowsBehavior:
|
||||||
|
"""Windows: the bootstrap does its job."""
|
||||||
|
|
||||||
|
@pytest.mark.skipif(
|
||||||
|
sys.platform != "win32",
|
||||||
|
reason="Windows-specific behavior",
|
||||||
|
)
|
||||||
|
def test_env_vars_set_on_windows(self, monkeypatch):
|
||||||
|
# Clear any pre-existing values and re-run bootstrap.
|
||||||
|
monkeypatch.delenv("PYTHONUTF8", raising=False)
|
||||||
|
monkeypatch.delenv("PYTHONIOENCODING", raising=False)
|
||||||
|
hb = _fresh_import()
|
||||||
|
# Module-level apply_windows_utf8_bootstrap() ran during import.
|
||||||
|
assert os.environ.get("PYTHONUTF8") == "1"
|
||||||
|
assert os.environ.get("PYTHONIOENCODING") == "utf-8"
|
||||||
|
assert hb._bootstrap_applied is True
|
||||||
|
|
||||||
|
@pytest.mark.skipif(
|
||||||
|
sys.platform != "win32",
|
||||||
|
reason="Windows-specific behavior",
|
||||||
|
)
|
||||||
|
def test_stdout_reconfigured_to_utf8_on_windows(self):
|
||||||
|
# The live process's stdout should now be UTF-8 (the Hermes CLI
|
||||||
|
# runs on Windows with a pytest console that's cp1252 by default).
|
||||||
|
# If reconfigure succeeded, sys.stdout.encoding is 'utf-8'.
|
||||||
|
_fresh_import()
|
||||||
|
# pytest may capture stdout, which makes encoding check flaky —
|
||||||
|
# so instead verify the reconfigure call succeeded on the real
|
||||||
|
# stream by attempting the failure case.
|
||||||
|
out = sys.stdout
|
||||||
|
reconfigure = getattr(out, "reconfigure", None)
|
||||||
|
if reconfigure is None:
|
||||||
|
pytest.skip("pytest replaced sys.stdout with a non-reconfigurable stream")
|
||||||
|
# After bootstrap, encoding should be utf-8 (or the reconfigure
|
||||||
|
# skipped because pytest's capture already set it to utf-8).
|
||||||
|
assert out.encoding.lower() in {"utf-8", "utf8"}, (
|
||||||
|
f"stdout encoding is {out.encoding!r} — bootstrap should have "
|
||||||
|
"reconfigured it to UTF-8"
|
||||||
|
)
|
||||||
|
|
||||||
|
@pytest.mark.skipif(
|
||||||
|
sys.platform != "win32",
|
||||||
|
reason="Windows-specific behavior",
|
||||||
|
)
|
||||||
|
def test_child_process_inherits_utf8_mode(self):
|
||||||
|
"""A subprocess spawned from this process should inherit
|
||||||
|
PYTHONUTF8=1 and be able to print non-ASCII to stdout."""
|
||||||
|
_fresh_import()
|
||||||
|
# Non-ASCII chars that would crash under cp1252: arrow, emoji.
|
||||||
|
script = textwrap.dedent("""
|
||||||
|
import sys
|
||||||
|
print("em-dash \\u2014 arrow \\u2192 emoji \\U0001f680")
|
||||||
|
sys.exit(0)
|
||||||
|
""").strip()
|
||||||
|
# Don't pass env= — let the child inherit os.environ, which
|
||||||
|
# now contains PYTHONUTF8=1 courtesy of the bootstrap.
|
||||||
|
result = subprocess.run(
|
||||||
|
[sys.executable, "-c", script],
|
||||||
|
capture_output=True,
|
||||||
|
timeout=15,
|
||||||
|
)
|
||||||
|
assert result.returncode == 0, (
|
||||||
|
f"Child crashed printing non-ASCII despite UTF-8 bootstrap:\n"
|
||||||
|
f" stdout: {result.stdout!r}\n"
|
||||||
|
f" stderr: {result.stderr!r}"
|
||||||
|
)
|
||||||
|
decoded = result.stdout.decode("utf-8")
|
||||||
|
assert "\u2014" in decoded
|
||||||
|
assert "\u2192" in decoded
|
||||||
|
assert "\U0001f680" in decoded
|
||||||
|
|
||||||
|
|
||||||
|
class TestUserOptOut:
|
||||||
|
"""If the user has explicitly set PYTHONUTF8 / PYTHONIOENCODING in
|
||||||
|
their environment, we respect that (setdefault, not overwrite)."""
|
||||||
|
|
||||||
|
@pytest.mark.skipif(
|
||||||
|
sys.platform != "win32",
|
||||||
|
reason="Only meaningful on Windows where we'd otherwise set these",
|
||||||
|
)
|
||||||
|
def test_user_pythonutf8_zero_preserved(self, monkeypatch):
|
||||||
|
monkeypatch.setenv("PYTHONUTF8", "0")
|
||||||
|
_fresh_import()
|
||||||
|
assert os.environ["PYTHONUTF8"] == "0", (
|
||||||
|
"bootstrap must not overwrite an explicit user setting"
|
||||||
|
)
|
||||||
|
|
||||||
|
@pytest.mark.skipif(
|
||||||
|
sys.platform != "win32",
|
||||||
|
reason="Only meaningful on Windows where we'd otherwise set these",
|
||||||
|
)
|
||||||
|
def test_user_pythonioencoding_preserved(self, monkeypatch):
|
||||||
|
monkeypatch.setenv("PYTHONIOENCODING", "latin-1")
|
||||||
|
_fresh_import()
|
||||||
|
assert os.environ["PYTHONIOENCODING"] == "latin-1"
|
||||||
|
|
||||||
|
|
||||||
|
class TestPosixNoOp:
|
||||||
|
"""POSIX: zero behavior change. We don't touch LANG, LC_*, or any
|
||||||
|
stdio. The goal is that Linux/macOS behave identically before and
|
||||||
|
after this module is imported."""
|
||||||
|
|
||||||
|
def test_noop_on_fake_posix(self, monkeypatch):
|
||||||
|
"""Even when imported, the bootstrap function must return False
|
||||||
|
and leave env untouched when _IS_WINDOWS is False."""
|
||||||
|
hb = _fresh_import()
|
||||||
|
# Reset + fake POSIX
|
||||||
|
hb._IS_WINDOWS = False
|
||||||
|
hb._bootstrap_applied = False
|
||||||
|
monkeypatch.delenv("PYTHONUTF8", raising=False)
|
||||||
|
monkeypatch.delenv("PYTHONIOENCODING", raising=False)
|
||||||
|
|
||||||
|
result = hb.apply_windows_utf8_bootstrap()
|
||||||
|
|
||||||
|
assert result is False
|
||||||
|
assert "PYTHONUTF8" not in os.environ
|
||||||
|
assert "PYTHONIOENCODING" not in os.environ
|
||||||
|
assert hb._bootstrap_applied is False
|
||||||
|
|
||||||
|
@pytest.mark.skipif(
|
||||||
|
sys.platform == "win32",
|
||||||
|
reason="Real POSIX required for this check",
|
||||||
|
)
|
||||||
|
def test_real_posix_bootstrap_is_noop(self, monkeypatch):
|
||||||
|
"""On actual Linux/macOS, importing the module must not set
|
||||||
|
PYTHONUTF8 or reconfigure stdio."""
|
||||||
|
monkeypatch.delenv("PYTHONUTF8", raising=False)
|
||||||
|
monkeypatch.delenv("PYTHONIOENCODING", raising=False)
|
||||||
|
hb = _fresh_import()
|
||||||
|
assert hb._bootstrap_applied is False
|
||||||
|
assert "PYTHONUTF8" not in os.environ
|
||||||
|
assert "PYTHONIOENCODING" not in os.environ
|
||||||
|
|
||||||
|
|
||||||
|
class TestIdempotence:
|
||||||
|
"""Calling apply_windows_utf8_bootstrap() multiple times must be safe."""
|
||||||
|
|
||||||
|
def test_second_call_returns_false(self):
|
||||||
|
hb = _fresh_import()
|
||||||
|
# First call already happened at import time.
|
||||||
|
result = hb.apply_windows_utf8_bootstrap()
|
||||||
|
assert result is False, (
|
||||||
|
"Second call should return False (idempotent no-op)"
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_no_exceptions_on_repeated_calls(self):
|
||||||
|
hb = _fresh_import()
|
||||||
|
for _ in range(5):
|
||||||
|
hb.apply_windows_utf8_bootstrap()
|
||||||
|
|
||||||
|
|
||||||
|
class TestStdioReconfigureErrorHandling:
|
||||||
|
"""If sys.stdout/stderr/stdin have been replaced with streams that
|
||||||
|
don't support reconfigure (e.g. by a test harness), the bootstrap
|
||||||
|
must degrade gracefully rather than crash."""
|
||||||
|
|
||||||
|
def test_non_reconfigurable_stream_does_not_crash(self, monkeypatch):
|
||||||
|
"""Replace sys.stdout with a BytesIO (no reconfigure method),
|
||||||
|
then run the bootstrap and make sure it doesn't raise."""
|
||||||
|
hb = _fresh_import()
|
||||||
|
hb._IS_WINDOWS = True
|
||||||
|
hb._bootstrap_applied = False
|
||||||
|
|
||||||
|
fake = io.BytesIO() # no .reconfigure attribute
|
||||||
|
monkeypatch.setattr(sys, "stdout", fake)
|
||||||
|
try:
|
||||||
|
# Must not raise.
|
||||||
|
hb.apply_windows_utf8_bootstrap()
|
||||||
|
except Exception as exc:
|
||||||
|
pytest.fail(f"bootstrap raised on non-reconfigurable stdout: {exc}")
|
||||||
|
|
||||||
|
def test_reconfigure_oserror_is_caught(self, monkeypatch):
|
||||||
|
"""If reconfigure() itself raises (closed stream, etc.), swallow
|
||||||
|
the error — the env-var half of the fix still applies."""
|
||||||
|
hb = _fresh_import()
|
||||||
|
hb._IS_WINDOWS = True
|
||||||
|
hb._bootstrap_applied = False
|
||||||
|
|
||||||
|
class _BrokenStream:
|
||||||
|
encoding = "utf-8"
|
||||||
|
def reconfigure(self, **kwargs):
|
||||||
|
raise OSError("simulated: stream already closed")
|
||||||
|
|
||||||
|
monkeypatch.setattr(sys, "stdout", _BrokenStream())
|
||||||
|
monkeypatch.setattr(sys, "stderr", _BrokenStream())
|
||||||
|
# Must not raise.
|
||||||
|
hb.apply_windows_utf8_bootstrap()
|
||||||
|
|
||||||
|
|
||||||
|
class TestEntryPointsImportBootstrap:
|
||||||
|
"""Every Hermes entry point must import hermes_bootstrap as its
|
||||||
|
first non-docstring import. We check this by scanning source files
|
||||||
|
rather than invoking the entry points (which would require a full
|
||||||
|
agent context)."""
|
||||||
|
|
||||||
|
# Entry points that invoke Hermes as a process. Each one must
|
||||||
|
# import hermes_bootstrap before doing any file I/O or stdout writes.
|
||||||
|
ENTRY_POINTS = [
|
||||||
|
"hermes_cli/main.py", # hermes CLI (console_script)
|
||||||
|
"run_agent.py", # hermes-agent (console_script)
|
||||||
|
"acp_adapter/entry.py", # hermes-acp (console_script)
|
||||||
|
"gateway/run.py", # gateway
|
||||||
|
"batch_runner.py", # batch mode
|
||||||
|
"cli.py", # legacy direct-launch CLI
|
||||||
|
]
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("path", ENTRY_POINTS)
|
||||||
|
def test_entry_point_imports_bootstrap(self, path):
|
||||||
|
"""The file must contain 'import hermes_bootstrap' and that
|
||||||
|
line must appear before the first 'import' of anything else.
|
||||||
|
|
||||||
|
We're lenient about the docstring (can be arbitrarily long) and
|
||||||
|
about comment lines — just need to verify the first import
|
||||||
|
statement is the bootstrap.
|
||||||
|
"""
|
||||||
|
# Resolve relative to the hermes-agent repo root. Tests live
|
||||||
|
# at tests/test_hermes_bootstrap.py, so go up one dir.
|
||||||
|
import pathlib
|
||||||
|
here = pathlib.Path(__file__).resolve()
|
||||||
|
repo_root = here.parent.parent # tests/ -> repo root
|
||||||
|
full_path = repo_root / path
|
||||||
|
assert full_path.exists(), f"entry point missing: {full_path}"
|
||||||
|
|
||||||
|
source = full_path.read_text(encoding="utf-8")
|
||||||
|
|
||||||
|
# Find the first non-comment, non-blank line that starts with
|
||||||
|
# 'import ' or 'from '. It must be 'import hermes_bootstrap'.
|
||||||
|
import tokenize
|
||||||
|
import ast
|
||||||
|
tree = ast.parse(source)
|
||||||
|
|
||||||
|
first_import_node = None
|
||||||
|
for node in ast.iter_child_nodes(tree):
|
||||||
|
if isinstance(node, (ast.Import, ast.ImportFrom)):
|
||||||
|
first_import_node = node
|
||||||
|
break
|
||||||
|
|
||||||
|
assert first_import_node is not None, (
|
||||||
|
f"{path}: no top-level imports found at all"
|
||||||
|
)
|
||||||
|
|
||||||
|
if isinstance(first_import_node, ast.Import):
|
||||||
|
first_import_name = first_import_node.names[0].name
|
||||||
|
else: # ImportFrom
|
||||||
|
first_import_name = first_import_node.module or ""
|
||||||
|
|
||||||
|
assert first_import_name == "hermes_bootstrap", (
|
||||||
|
f"{path}: first top-level import is {first_import_name!r}, "
|
||||||
|
f"but it must be 'hermes_bootstrap' so UTF-8 stdio is "
|
||||||
|
f"configured before anything else initializes. Move the "
|
||||||
|
f"'import hermes_bootstrap' line to be the first import."
|
||||||
|
)
|
||||||
Loading…
Add table
Add a link
Reference in a new issue