fix(tests): pin UTF-8 encoding when reading source files on Windows

Three tests in tests/agent/test_auxiliary_config_bridge.py read
in-tree source files (gateway/run.py and cli.py) via
Path.read_text() with no encoding argument.  The default falls
back to the system locale, which on Western Windows installs is
cp1252, and the read fails as soon as the source contains any
byte that isn't valid cp1252 (e.g. an em-dash in a comment):

    UnicodeDecodeError: 'charmap' codec can't decode byte 0x8f
    in position 41190: character maps to <undefined>

Linux CI doesn't catch this because the default Linux locale is
UTF-8.  Windows contributors hit it on every run of the test suite.

Pin encoding="utf-8" on the three call sites that read repo
source files.  This matches the existing precedent in
hermes_cli/doctor.py:363, where the same pattern (with an
explanatory comment) was applied to fix the .env read on
non-UTF-8 Windows locales.

Affected tests now pass on Windows + Python 3.12:
  - TestGatewayBridgeCodeParity.test_gateway_has_auxiliary_bridge
  - TestGatewayBridgeCodeParity.test_gateway_no_compression_env_bridge
  - TestCLIDefaultsHaveAuxiliaryKeys.test_cli_defaults_can_merge_auxiliary
This commit is contained in:
obafemiferanmi1999 2026-05-07 19:56:53 +01:00 committed by kshitij
parent 5d2a75ddf2
commit 3801825efd

View file

@ -200,7 +200,11 @@ class TestGatewayBridgeCodeParity:
def test_gateway_has_auxiliary_bridge(self):
"""The gateway config bridge must include auxiliary.* bridging."""
gateway_path = Path(__file__).parent.parent.parent / "gateway" / "run.py"
content = gateway_path.read_text()
# Pin encoding to UTF-8: source files in this repo are UTF-8, but
# Path.read_text() defaults to the system locale — which is cp1252
# on most Western Windows installs and crashes as soon as the file
# contains any non-ASCII byte (e.g. an em-dash in a comment).
content = gateway_path.read_text(encoding="utf-8")
# Check for key patterns that indicate the bridge is present
assert "AUXILIARY_VISION_PROVIDER" in content
assert "AUXILIARY_VISION_MODEL" in content
@ -214,7 +218,9 @@ class TestGatewayBridgeCodeParity:
def test_gateway_no_compression_env_bridge(self):
"""Gateway should NOT bridge compression config to env vars (config-only)."""
gateway_path = Path(__file__).parent.parent.parent / "gateway" / "run.py"
content = gateway_path.read_text()
# See note in test_gateway_has_auxiliary_bridge — pin UTF-8 so the
# test runs on Windows where the default locale is cp1252.
content = gateway_path.read_text(encoding="utf-8")
assert "CONTEXT_COMPRESSION_PROVIDER" not in content
assert "CONTEXT_COMPRESSION_MODEL" not in content
@ -289,7 +295,9 @@ class TestCLIDefaultsHaveAuxiliaryKeys:
# So auxiliary config from config.yaml gets merged even though
# cli.py's defaults dict doesn't define it.
import cli as _cli_mod
source = Path(_cli_mod.__file__).read_text()
# See note in test_gateway_has_auxiliary_bridge — pin UTF-8 so the
# test runs on Windows where the default locale is cp1252.
source = Path(_cli_mod.__file__).read_text(encoding="utf-8")
assert "auxiliary_config = defaults.get(\"auxiliary\"" in source
assert "AUXILIARY_VISION_PROVIDER" in source
assert "AUXILIARY_VISION_MODEL" in source