From 3801825efd40465ec97e9b1f285cd0e009722dc8 Mon Sep 17 00:00:00 2001 From: obafemiferanmi1999 Date: Thu, 7 May 2026 19:56:53 +0100 Subject: [PATCH] fix(tests): pin UTF-8 encoding when reading source files on Windows Three tests in tests/agent/test_auxiliary_config_bridge.py read in-tree source files (gateway/run.py and cli.py) via Path.read_text() with no encoding argument. The default falls back to the system locale, which on Western Windows installs is cp1252, and the read fails as soon as the source contains any byte that isn't valid cp1252 (e.g. an em-dash in a comment): UnicodeDecodeError: 'charmap' codec can't decode byte 0x8f in position 41190: character maps to Linux CI doesn't catch this because the default Linux locale is UTF-8. Windows contributors hit it on every run of the test suite. Pin encoding="utf-8" on the three call sites that read repo source files. This matches the existing precedent in hermes_cli/doctor.py:363, where the same pattern (with an explanatory comment) was applied to fix the .env read on non-UTF-8 Windows locales. Affected tests now pass on Windows + Python 3.12: - TestGatewayBridgeCodeParity.test_gateway_has_auxiliary_bridge - TestGatewayBridgeCodeParity.test_gateway_no_compression_env_bridge - TestCLIDefaultsHaveAuxiliaryKeys.test_cli_defaults_can_merge_auxiliary --- tests/agent/test_auxiliary_config_bridge.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/tests/agent/test_auxiliary_config_bridge.py b/tests/agent/test_auxiliary_config_bridge.py index 66350519b0b..11fe9f71c23 100644 --- a/tests/agent/test_auxiliary_config_bridge.py +++ b/tests/agent/test_auxiliary_config_bridge.py @@ -200,7 +200,11 @@ class TestGatewayBridgeCodeParity: def test_gateway_has_auxiliary_bridge(self): """The gateway config bridge must include auxiliary.* bridging.""" gateway_path = Path(__file__).parent.parent.parent / "gateway" / "run.py" - content = gateway_path.read_text() + # Pin encoding to UTF-8: source files in this repo are UTF-8, but + # Path.read_text() defaults to the system locale — which is cp1252 + # on most Western Windows installs and crashes as soon as the file + # contains any non-ASCII byte (e.g. an em-dash in a comment). + content = gateway_path.read_text(encoding="utf-8") # Check for key patterns that indicate the bridge is present assert "AUXILIARY_VISION_PROVIDER" in content assert "AUXILIARY_VISION_MODEL" in content @@ -214,7 +218,9 @@ class TestGatewayBridgeCodeParity: def test_gateway_no_compression_env_bridge(self): """Gateway should NOT bridge compression config to env vars (config-only).""" gateway_path = Path(__file__).parent.parent.parent / "gateway" / "run.py" - content = gateway_path.read_text() + # See note in test_gateway_has_auxiliary_bridge — pin UTF-8 so the + # test runs on Windows where the default locale is cp1252. + content = gateway_path.read_text(encoding="utf-8") assert "CONTEXT_COMPRESSION_PROVIDER" not in content assert "CONTEXT_COMPRESSION_MODEL" not in content @@ -289,7 +295,9 @@ class TestCLIDefaultsHaveAuxiliaryKeys: # So auxiliary config from config.yaml gets merged even though # cli.py's defaults dict doesn't define it. import cli as _cli_mod - source = Path(_cli_mod.__file__).read_text() + # See note in test_gateway_has_auxiliary_bridge — pin UTF-8 so the + # test runs on Windows where the default locale is cp1252. + source = Path(_cli_mod.__file__).read_text(encoding="utf-8") assert "auxiliary_config = defaults.get(\"auxiliary\"" in source assert "AUXILIARY_VISION_PROVIDER" in source assert "AUXILIARY_VISION_MODEL" in source