codebase: add encoding='utf-8' to all bare open() calls (PLW1514)

Closes the last Python-on-Windows UTF-8 exposure by making every
text-mode open() call explicit about its encoding.

Before: on Windows, bare open(path, 'r') defaults to the system
locale encoding (cp1252 on US-locale installs).  That means reading
any config/yaml/markdown/json file with non-ASCII content either
crashes with UnicodeDecodeError or silently mis-decodes bytes.

After: all 89 affected call sites in production code now pass
encoding='utf-8' explicitly.  Works identically on every platform
and every locale, no surprise behavior.

Mechanical sweep via:
  ruff check --preview --extend-select PLW1514 --unsafe-fixes --fix     --exclude 'tests,venv,.venv,node_modules,website,optional-skills,               skills,tinker-atropos,plugins' .

All 89 fixes have the same shape: open(x) or open(x, mode) became
open(x, encoding='utf-8') or open(x, mode, encoding='utf-8').  Nothing
else changed.  Every modified file still parses and the Windows/sandbox
test suite is still green (85 passed, 14 skipped, 0 failed across
tests/tools/test_code_execution_windows_env.py +
tests/tools/test_code_execution_modes.py + tests/tools/test_env_passthrough.py +
tests/test_hermes_bootstrap.py).

Scope notes:
  - tests/ excluded: test fixtures can use locale encoding intentionally
    (exercising edge cases).  If we want to tighten tests later that's
    a separate PR.
  - plugins/ excluded: plugin-specific conventions may differ; plugin
    authors own their code.
  - optional-skills/ and skills/ excluded: skill scripts are user-authored
    and we don't want to mass-edit them.
  - website/ and tinker-atropos/ excluded: vendored / generated content.

46 files touched, 89 +/- lines (symmetric replacement).  No behavior
change on POSIX or on Windows when the file is ASCII; bug fix on
Windows when the file contains non-ASCII.
This commit is contained in:
Teknium 2026-05-07 19:24:45 -07:00
parent 6098272454
commit 9c914c01c8
46 changed files with 89 additions and 89 deletions

View file

@ -573,7 +573,7 @@ def create_quick_snapshot(
"total_size": sum(manifest.values()),
"files": manifest,
}
with open(snap_dir / "manifest.json", "w") as f:
with open(snap_dir / "manifest.json", "w", encoding="utf-8") as f:
json.dump(meta, f, indent=2)
# Auto-prune
@ -599,7 +599,7 @@ def list_quick_snapshots(
manifest_path = d / "manifest.json"
if manifest_path.exists():
try:
with open(manifest_path) as f:
with open(manifest_path, encoding="utf-8") as f:
results.append(json.load(f))
except (json.JSONDecodeError, OSError):
results.append({"id": d.name, "file_count": 0, "total_size": 0})
@ -629,7 +629,7 @@ def restore_quick_snapshot(
if not manifest_path.exists():
return False
with open(manifest_path) as f:
with open(manifest_path, encoding="utf-8") as f:
meta = json.load(f)
restored = 0

View file

@ -212,7 +212,7 @@ def get_container_exec_info() -> Optional[dict]:
try:
info = {}
with open(container_mode_file, "r") as f:
with open(container_mode_file, "r", encoding="utf-8") as f:
for line in f:
line = line.strip()
if "=" in line and not line.startswith("#"):
@ -297,7 +297,7 @@ def _is_container() -> bool:
return True
# LXC / cgroup-based detection
try:
with open("/proc/1/cgroup", "r") as f:
with open("/proc/1/cgroup", "r", encoding="utf-8") as f:
cgroup_content = f.read()
if "docker" in cgroup_content or "lxc" in cgroup_content or "kubepods" in cgroup_content:
return True
@ -3452,7 +3452,7 @@ def migrate_config(interactive: bool = True, quiet: bool = False) -> Dict[str, A
if not manifest_file.exists():
continue
try:
with open(manifest_file) as _mf:
with open(manifest_file, encoding="utf-8") as _mf:
manifest = yaml.safe_load(_mf) or {}
except Exception:
manifest = {}

View file

@ -598,7 +598,7 @@ def run_doctor(args):
# Detect stale root-level model keys (known bug source — PR #4329)
try:
import yaml
with open(config_path) as f:
with open(config_path, encoding="utf-8") as f:
raw_config = yaml.safe_load(f) or {}
stale_root_keys = [k for k in ("provider", "base_url") if k in raw_config and isinstance(raw_config[k], str)]
if stale_root_keys:
@ -1399,7 +1399,7 @@ def run_doctor(args):
import yaml as _yaml
_mem_cfg_path = HERMES_HOME / "config.yaml"
if _mem_cfg_path.exists():
with open(_mem_cfg_path) as _f:
with open(_mem_cfg_path, encoding="utf-8") as _f:
_raw_cfg = _yaml.safe_load(_f) or {}
_active_memory_provider = (_raw_cfg.get("memory") or {}).get("provider", "")
except Exception:

View file

@ -205,7 +205,7 @@ def _cmd_test(args) -> None:
if getattr(args, "payload_file", None):
try:
custom = json.loads(Path(args.payload_file).read_text())
custom = json.loads(Path(args.payload_file).read_text(encoding="utf-8"))
if isinstance(custom, dict):
payload.update(custom)
else:

View file

@ -2835,7 +2835,7 @@ def _pid_alive(pid: Optional[int]) -> bool:
# where we have a cheap, deterministic process-state probe.
if sys.platform == "linux":
try:
with open(f"/proc/{int(pid)}/status", "r") as f:
with open(f"/proc/{int(pid)}/status", "r", encoding="utf-8") as f:
for line in f:
if line.startswith("State:"):
# "State:\tZ (zombie)" → dead

View file

@ -69,7 +69,7 @@ def _install_dependencies(provider_name: str) -> None:
try:
import yaml
with open(yaml_path) as f:
with open(yaml_path, encoding="utf-8") as f:
meta = yaml.safe_load(f) or {}
except Exception:
return
@ -377,7 +377,7 @@ def _write_env_vars(env_path: Path, env_writes: dict) -> None:
if key not in updated_keys:
new_lines.append(f"{key}={val}")
env_path.write_text("\n".join(new_lines) + "\n")
env_path.write_text("\n".join(new_lines) + "\n", encoding="utf-8")
# ---------------------------------------------------------------------------

View file

@ -173,7 +173,7 @@ def _read_disk_cache() -> tuple[dict[str, Any] | None, float]:
except (OSError, FileNotFoundError):
return (None, 0.0)
try:
with open(path) as fh:
with open(path, encoding="utf-8") as fh:
data = json.load(fh)
except (OSError, json.JSONDecodeError):
return (None, 0.0)
@ -187,7 +187,7 @@ def _write_disk_cache(data: dict[str, Any]) -> None:
try:
path.parent.mkdir(parents=True, exist_ok=True)
tmp = path.with_suffix(path.suffix + ".tmp")
with open(tmp, "w") as fh:
with open(tmp, "w", encoding="utf-8") as fh:
json.dump(data, fh, indent=2)
fh.write("\n")
atomic_replace(tmp, path)

View file

@ -174,7 +174,7 @@ def run_oneshot(
# Redirect stderr AND stdout to devnull for the entire call tree.
# We'll print the final response to the real stdout at the end.
real_stdout = sys.stdout
devnull = open(os.devnull, "w")
devnull = open(os.devnull, "w", encoding="utf-8")
try:
with redirect_stdout(devnull), redirect_stderr(devnull):

View file

@ -870,7 +870,7 @@ class PluginManager:
if yaml is None:
logger.warning("PyYAML not installed cannot load %s", manifest_file)
return None
data = yaml.safe_load(manifest_file.read_text()) or {}
data = yaml.safe_load(manifest_file.read_text(encoding="utf-8")) or {}
name = data.get("name", plugin_dir.name)
key = f"{prefix}/{plugin_dir.name}" if prefix else name

View file

@ -127,7 +127,7 @@ def _read_manifest(plugin_dir: Path) -> dict:
try:
import yaml
with open(manifest_file) as f:
with open(manifest_file, encoding="utf-8") as f:
return yaml.safe_load(f) or {}
except Exception as e:
logger.warning("Failed to read plugin.yaml in %s: %s", plugin_dir, e)
@ -703,7 +703,7 @@ def _discover_all_plugins() -> list:
description = ""
if yaml:
try:
with open(manifest_file) as f:
with open(manifest_file, encoding="utf-8") as f:
manifest = yaml.safe_load(f) or {}
name = manifest.get("name", d.name)
version = manifest.get("version", "")

View file

@ -354,7 +354,7 @@ def _read_config_model(profile_dir: Path) -> tuple:
return None, None
try:
import yaml
with open(config_path, "r") as f:
with open(config_path, "r", encoding="utf-8") as f:
cfg = yaml.safe_load(f) or {}
model_cfg = cfg.get("model", {})
if isinstance(model_cfg, str):

View file

@ -1257,7 +1257,7 @@ def do_snapshot_export(output_path: str, console: Optional[Console] = None) -> N
sys.stdout.write(payload)
else:
out = Path(output_path)
out.write_text(payload)
out.write_text(payload, encoding="utf-8")
c.print(f"[bold green]Snapshot exported:[/] {out}")
c.print(f"[dim]{len(installed)} skill(s), {len(tap_list)} tap(s)[/]\n")
@ -1274,7 +1274,7 @@ def do_snapshot_import(input_path: str, force: bool = False,
return
try:
snapshot = json.loads(inp.read_text())
snapshot = json.loads(inp.read_text(encoding="utf-8"))
except json.JSONDecodeError:
c.print(f"[bold red]Error:[/] Invalid JSON in {inp}\n")
return

View file

@ -692,7 +692,7 @@ def _tail_lines(path: Path, n: int) -> List[str]:
if not path.exists():
return []
try:
text = path.read_text(errors="replace")
text = path.read_text(encoding="utf-8", errors="replace")
except OSError:
return []
lines = text.splitlines()