codebase: add encoding='utf-8' to all bare open() calls (PLW1514)

Closes the last Python-on-Windows UTF-8 exposure by making every text-mode open() call explicit about its encoding. Before: on Windows, bare open(path, 'r') defaults to the system locale encoding (cp1252 on US-locale installs). That means reading any config/yaml/markdown/json file with non-ASCII content either crashes with UnicodeDecodeError or silently mis-decodes bytes. After: all 89 affected call sites in production code now pass encoding='utf-8' explicitly. Works identically on every platform and every locale, no surprise behavior. Mechanical sweep via: ruff check --preview --extend-select PLW1514 --unsafe-fixes --fix --exclude 'tests,venv,.venv,node_modules,website,optional-skills, skills,tinker-atropos,plugins' . All 89 fixes have the same shape: open(x) or open(x, mode) became open(x, encoding='utf-8') or open(x, mode, encoding='utf-8'). Nothing else changed. Every modified file still parses and the Windows/sandbox test suite is still green (85 passed, 14 skipped, 0 failed across tests/tools/test_code_execution_windows_env.py + tests/tools/test_code_execution_modes.py + tests/tools/test_env_passthrough.py + tests/test_hermes_bootstrap.py). Scope notes: - tests/ excluded: test fixtures can use locale encoding intentionally (exercising edge cases). If we want to tighten tests later that's a separate PR. - plugins/ excluded: plugin-specific conventions may differ; plugin authors own their code. - optional-skills/ and skills/ excluded: skill scripts are user-authored and we don't want to mass-edit them. - website/ and tinker-atropos/ excluded: vendored / generated content. 46 files touched, 89 +/- lines (symmetric replacement). No behavior change on POSIX or on Windows when the file is ASCII; bug fix on Windows when the file contains non-ASCII.
2026-05-11 03:31:55 +00:00 · 2026-05-07 19:24:45 -07:00 · 2026-05-07 19:24:45 -07:00 · 9c914c01c8
commit 9c914c01c8
parent 6098272454
46 changed files with 89 additions and 89 deletions
--- a/tools/rl_training_tool.py
+++ b/tools/rl_training_tool.py
@ -169,7 +169,7 @@ def _scan_environments() -> List[EnvironmentInfo]:
            continue
        
        try:
-            with open(py_file, "r") as f:
+            with open(py_file, "r", encoding="utf-8") as f:
                tree = ast.parse(f.read())
            
            for node in ast.walk(tree):
@ -333,7 +333,7 @@ async def _spawn_training_run(run_state: RunState, config_path: Path):
        
        # File must stay open while the subprocess runs; we store the handle
        # on run_state so _stop_training_run() can close it when done.
-        api_log_file = open(api_log, "w")  # closed by _stop_training_run
+        api_log_file = open(api_log, "w", encoding="utf-8")  # closed by _stop_training_run
        run_state.api_log_file = api_log_file
        run_state.api_process = subprocess.Popen(
            ["run-api"],
@ -356,7 +356,7 @@ async def _spawn_training_run(run_state: RunState, config_path: Path):
        # Step 2: Start the Tinker trainer
        logger.info("[%s] Starting Tinker trainer: launch_training.py --config %s", run_id, config_path)
        
-        trainer_log_file = open(trainer_log, "w")  # closed by _stop_training_run
+        trainer_log_file = open(trainer_log, "w", encoding="utf-8")  # closed by _stop_training_run
        run_state.trainer_log_file = trainer_log_file
        run_state.trainer_process = subprocess.Popen(
            [sys.executable, "launch_training.py", "--config", str(config_path)],
@ -397,7 +397,7 @@ async def _spawn_training_run(run_state: RunState, config_path: Path):
        
        logger.info("[%s] Starting environment: %s serve", run_id, env_info.file_path)
        
-        env_log_file = open(env_log, "w")  # closed by _stop_training_run
+        env_log_file = open(env_log, "w", encoding="utf-8")  # closed by _stop_training_run
        run_state.env_log_file = env_log_file
        run_state.env_process = subprocess.Popen(
            [sys.executable, str(env_info.file_path), "serve", "--config", str(config_path)],
@ -777,7 +777,7 @@ async def rl_start_training() -> str:
    if "wandb_name" in _current_config and _current_config["wandb_name"]:
        run_config["env"]["wandb_name"] = _current_config["wandb_name"]
    
-    with open(config_path, "w") as f:
+    with open(config_path, "w", encoding="utf-8") as f:
        yaml.dump(run_config, f, default_flow_style=False)
    
    # Create run state
@ -1206,7 +1206,7 @@ async def rl_test_inference(
            stderr_text = "\n".join(stderr_lines)
            
            # Write logs to files for inspection outside CLI
-            with open(log_file, "w") as f:
+            with open(log_file, "w", encoding="utf-8") as f:
                f.write(f"Command: {cmd_display}\n")
                f.write(f"Working dir: {TINKER_ATROPOS_ROOT}\n")
                f.write(f"Return code: {process.returncode}\n")
@ -1238,7 +1238,7 @@ async def rl_test_inference(
                # Parse the output JSONL file
                if output_file.exists():
                    # Read JSONL file (one JSON object per line = one step)
-                    with open(output_file, "r") as f:
+                    with open(output_file, "r", encoding="utf-8") as f:
                        for line in f:
                            line = line.strip()
                            if not line: