codebase: add encoding='utf-8' to all bare open() calls (PLW1514)

Closes the last Python-on-Windows UTF-8 exposure by making every
text-mode open() call explicit about its encoding.

Before: on Windows, bare open(path, 'r') defaults to the system
locale encoding (cp1252 on US-locale installs).  That means reading
any config/yaml/markdown/json file with non-ASCII content either
crashes with UnicodeDecodeError or silently mis-decodes bytes.

After: all 89 affected call sites in production code now pass
encoding='utf-8' explicitly.  Works identically on every platform
and every locale, no surprise behavior.

Mechanical sweep via:
  ruff check --preview --extend-select PLW1514 --unsafe-fixes --fix     --exclude 'tests,venv,.venv,node_modules,website,optional-skills,               skills,tinker-atropos,plugins' .

All 89 fixes have the same shape: open(x) or open(x, mode) became
open(x, encoding='utf-8') or open(x, mode, encoding='utf-8').  Nothing
else changed.  Every modified file still parses and the Windows/sandbox
test suite is still green (85 passed, 14 skipped, 0 failed across
tests/tools/test_code_execution_windows_env.py +
tests/tools/test_code_execution_modes.py + tests/tools/test_env_passthrough.py +
tests/test_hermes_bootstrap.py).

Scope notes:
  - tests/ excluded: test fixtures can use locale encoding intentionally
    (exercising edge cases).  If we want to tighten tests later that's
    a separate PR.
  - plugins/ excluded: plugin-specific conventions may differ; plugin
    authors own their code.
  - optional-skills/ and skills/ excluded: skill scripts are user-authored
    and we don't want to mass-edit them.
  - website/ and tinker-atropos/ excluded: vendored / generated content.

46 files touched, 89 +/- lines (symmetric replacement).  No behavior
change on POSIX or on Windows when the file is ASCII; bug fix on
Windows when the file contains non-ASCII.
This commit is contained in:
Teknium 2026-05-07 19:24:45 -07:00
parent d94fb47717
commit cbce5e93fc
46 changed files with 89 additions and 89 deletions

View file

@ -81,7 +81,7 @@ def build_catalog() -> dict:
def main() -> int:
catalog = build_catalog()
os.makedirs(os.path.dirname(OUTPUT_PATH), exist_ok=True)
with open(OUTPUT_PATH, "w") as fh:
with open(OUTPUT_PATH, "w", encoding="utf-8") as fh:
json.dump(catalog, fh, indent=2)
fh.write("\n")

View file

@ -304,7 +304,7 @@ def main():
}
os.makedirs(os.path.dirname(OUTPUT_PATH), exist_ok=True)
with open(OUTPUT_PATH, "w") as f:
with open(OUTPUT_PATH, "w", encoding="utf-8") as f:
json.dump(index, f, separators=(",", ":"), ensure_ascii=False)
elapsed = time.time() - overall_start

View file

@ -291,7 +291,7 @@ def check_release_file(release_file, all_contributors):
missing: set of handles NOT found in the file
"""
try:
content = Path(release_file).read_text()
content = Path(release_file).read_text(encoding="utf-8")
except FileNotFoundError:
print(f" [error] Release file not found: {release_file}", file=sys.stderr)
return set(), set(all_contributors)

View file

@ -242,7 +242,7 @@ def check_config(groq_key, eleven_key):
if config_path.exists():
try:
import yaml
with open(config_path) as f:
with open(config_path, encoding="utf-8") as f:
cfg = yaml.safe_load(f) or {}
stt_provider = cfg.get("stt", {}).get("provider", "local")

View file

@ -111,7 +111,7 @@ def summarize(log: Path, since_ts_ms: int) -> dict[str, Any]:
frame_events: list[dict[str, Any]] = []
if not log.exists():
return {"error": f"no log at {log}", "react": [], "frame": []}
for line in log.read_text().splitlines():
for line in log.read_text(encoding="utf-8").splitlines():
line = line.strip()
if not line:
continue
@ -505,7 +505,7 @@ def main() -> int:
if args.save:
path = Path(f"/tmp/perf-{args.save}.json")
path.write_text(json.dumps(metrics, indent=2))
path.write_text(json.dumps(metrics, indent=2), encoding="utf-8")
print(f"\n• saved: {path}")
if args.compare:

View file

@ -1365,7 +1365,7 @@ def main():
)
if args.output:
Path(args.output).write_text(changelog)
Path(args.output).write_text(changelog, encoding="utf-8")
print(f"Changelog written to {args.output}")
else:
print(changelog)