codebase: add encoding='utf-8' to all bare open() calls (PLW1514)

Closes the last Python-on-Windows UTF-8 exposure by making every text-mode open() call explicit about its encoding. Before: on Windows, bare open(path, 'r') defaults to the system locale encoding (cp1252 on US-locale installs). That means reading any config/yaml/markdown/json file with non-ASCII content either crashes with UnicodeDecodeError or silently mis-decodes bytes. After: all 89 affected call sites in production code now pass encoding='utf-8' explicitly. Works identically on every platform and every locale, no surprise behavior. Mechanical sweep via: ruff check --preview --extend-select PLW1514 --unsafe-fixes --fix --exclude 'tests,venv,.venv,node_modules,website,optional-skills, skills,tinker-atropos,plugins' . All 89 fixes have the same shape: open(x) or open(x, mode) became open(x, encoding='utf-8') or open(x, mode, encoding='utf-8'). Nothing else changed. Every modified file still parses and the Windows/sandbox test suite is still green (85 passed, 14 skipped, 0 failed across tests/tools/test_code_execution_windows_env.py + tests/tools/test_code_execution_modes.py + tests/tools/test_env_passthrough.py + tests/test_hermes_bootstrap.py). Scope notes: - tests/ excluded: test fixtures can use locale encoding intentionally (exercising edge cases). If we want to tighten tests later that's a separate PR. - plugins/ excluded: plugin-specific conventions may differ; plugin authors own their code. - optional-skills/ and skills/ excluded: skill scripts are user-authored and we don't want to mass-edit them. - website/ and tinker-atropos/ excluded: vendored / generated content. 46 files touched, 89 +/- lines (symmetric replacement). No behavior change on POSIX or on Windows when the file is ASCII; bug fix on Windows when the file contains non-ASCII.
2026-05-11 03:31:55 +00:00 · 2026-05-07 19:24:45 -07:00 · 2026-05-07 19:24:45 -07:00 · cbce5e93fc
commit cbce5e93fc
parent d94fb47717
46 changed files with 89 additions and 89 deletions
--- a/tools/browser_tool.py
+++ b/tools/browser_tool.py
@ -751,7 +751,7 @@ def _run_chrome_fallback_command(
            proc.wait()
            return {"success": False, "error": f"Chrome fallback '{cmd}' timed out"}
        try:
-            with open(stdout_path, "r") as f:
+            with open(stdout_path, "r", encoding="utf-8") as f:
                stdout = f.read().strip()
            if stdout:
                return json.loads(stdout.split("\n")[-1])
@ -1110,7 +1110,7 @@ def _write_owner_pid(socket_dir: str, session_name: str) -> None:
    """
    try:
        path = os.path.join(socket_dir, f"{session_name}.owner_pid")
-        with open(path, "w") as f:
+        with open(path, "w", encoding="utf-8") as f:
            f.write(str(os.getpid()))
    except OSError as exc:
        logger.debug("Could not write owner_pid file for %s: %s",
@ -1174,7 +1174,7 @@ def _reap_orphaned_browser_sessions():
        owner_alive: Optional[bool] = None  # None = owner_pid missing/unreadable
        if os.path.isfile(owner_pid_file):
            try:
-                owner_pid = int(Path(owner_pid_file).read_text().strip())
+                owner_pid = int(Path(owner_pid_file).read_text(encoding="utf-8").strip())
                try:
                    os.kill(owner_pid, 0)
                    owner_alive = True
@ -1209,7 +1209,7 @@ def _reap_orphaned_browser_sessions():
            continue

        try:
-            daemon_pid = int(Path(pid_file).read_text().strip())
+            daemon_pid = int(Path(pid_file).read_text(encoding="utf-8").strip())
        except (ValueError, OSError):
            shutil.rmtree(socket_dir, ignore_errors=True)
            continue
@ -1834,7 +1834,7 @@ def _run_browser_command(
                # Detect AppArmor user namespace restrictions (Ubuntu 23.10+)
                _userns_restrict = "/proc/sys/kernel/apparmor_restrict_unprivileged_userns"
                try:
-                    with open(_userns_restrict) as _f:
+                    with open(_userns_restrict, encoding="utf-8") as _f:
                        if _f.read().strip() == "1":
                            _needs_sandbox_bypass = True
                            logger.debug(
@ -1879,9 +1879,9 @@ def _run_browser_command(
            result = {"success": False, "error": f"Command timed out after {timeout} seconds"}
            # Fall through to fallback check below
        else:
-            with open(stdout_path, "r") as f:
+            with open(stdout_path, "r", encoding="utf-8") as f:
                stdout = f.read()
-            with open(stderr_path, "r") as f:
+            with open(stderr_path, "r", encoding="utf-8") as f:
                stderr = f.read()
            returncode = proc.returncode

@ -3180,7 +3180,7 @@ def _cleanup_single_browser_session(task_id: str) -> None:
                pid_file = os.path.join(socket_dir, f"{session_name}.pid")
                if os.path.isfile(pid_file):
                    try:
-                        daemon_pid = int(Path(pid_file).read_text().strip())
+                        daemon_pid = int(Path(pid_file).read_text(encoding="utf-8").strip())
                        os.kill(daemon_pid, signal.SIGTERM)
                        logger.debug("Killed daemon pid %s for %s", daemon_pid, session_name)
                    except (ProcessLookupError, ValueError, PermissionError, OSError):
@ -3323,7 +3323,7 @@ def _running_in_docker() -> bool:
    if os.path.exists("/.dockerenv"):
        return True
    try:
-        with open("/proc/1/cgroup", "rt") as fp:
+        with open("/proc/1/cgroup", "rt", encoding="utf-8") as fp:
            return "docker" in fp.read()
    except OSError:
        return False
--- a/tools/code_execution_tool.py
+++ b/tools/code_execution_tool.py
@ -517,7 +517,7 @@ def _rpc_server_loop(
                # their status prints don't leak into the CLI spinner.
                try:
                    _real_stdout, _real_stderr = sys.stdout, sys.stderr
-                    devnull = open(os.devnull, "w")
+                    devnull = open(os.devnull, "w", encoding="utf-8")
                    try:
                        sys.stdout = devnull
                        sys.stderr = devnull
@ -791,7 +791,7 @@ def _rpc_poll_loop(
                    # Dispatch through the standard tool handler
                    try:
                        _real_stdout, _real_stderr = sys.stdout, sys.stderr
-                        devnull = open(os.devnull, "w")
+                        devnull = open(os.devnull, "w", encoding="utf-8")
                        try:
                            sys.stdout = devnull
                            sys.stderr = devnull
--- a/tools/environments/base.py
+++ b/tools/environments/base.py
@ -158,7 +158,7 @@ def _load_json_store(path: Path) -> dict:
    """Load a JSON file as a dict, returning ``{}`` on any error."""
    if path.exists():
        try:
-            return json.loads(path.read_text())
+            return json.loads(path.read_text(encoding="utf-8"))
        except Exception:
            pass
    return {}
@ -167,7 +167,7 @@ def _load_json_store(path: Path) -> dict:
 def _save_json_store(path: Path, data: dict) -> None:
    """Write *data* as pretty-printed JSON to *path*."""
    path.parent.mkdir(parents=True, exist_ok=True)
-    path.write_text(json.dumps(data, indent=2))
+    path.write_text(json.dumps(data, indent=2), encoding="utf-8")


 def _file_mtime_key(host_path: str) -> tuple[float, int] | None:
--- a/tools/environments/file_sync.py
+++ b/tools/environments/file_sync.py
@ -284,7 +284,7 @@ class FileSyncManager:
            # Windows: no flock — run without serialization
            self._sync_back_impl()
            return
-        lock_fd = open(lock_path, "w")
+        lock_fd = open(lock_path, "w", encoding="utf-8")
        try:
            fcntl.flock(lock_fd, fcntl.LOCK_EX)
            self._sync_back_impl()
--- a/tools/environments/local.py
+++ b/tools/environments/local.py
@ -562,7 +562,7 @@ class LocalEnvironment(BaseEnvironment):
        ``_run_bash`` recovery path will resolve a safe fallback if needed.
        """
        try:
-            with open(self._cwd_file) as f:
+            with open(self._cwd_file, encoding="utf-8") as f:
                cwd_path = f.read().strip()
            if cwd_path and os.path.isdir(cwd_path):
                self.cwd = cwd_path
--- a/tools/mcp_tool.py
+++ b/tools/mcp_tool.py
@ -1992,7 +1992,7 @@ def _snapshot_child_pids() -> set:
    # Linux: read from /proc
    try:
        children_path = f"/proc/{my_pid}/task/{my_pid}/children"
-        with open(children_path) as f:
+        with open(children_path, encoding="utf-8") as f:
            return {int(p) for p in f.read().split() if p.strip()}
    except (FileNotFoundError, OSError, ValueError):
        pass
--- a/tools/rl_training_tool.py
+++ b/tools/rl_training_tool.py
@ -169,7 +169,7 @@ def _scan_environments() -> List[EnvironmentInfo]:
            continue
        
        try:
-            with open(py_file, "r") as f:
+            with open(py_file, "r", encoding="utf-8") as f:
                tree = ast.parse(f.read())
            
            for node in ast.walk(tree):
@ -333,7 +333,7 @@ async def _spawn_training_run(run_state: RunState, config_path: Path):
        
        # File must stay open while the subprocess runs; we store the handle
        # on run_state so _stop_training_run() can close it when done.
-        api_log_file = open(api_log, "w")  # closed by _stop_training_run
+        api_log_file = open(api_log, "w", encoding="utf-8")  # closed by _stop_training_run
        run_state.api_log_file = api_log_file
        run_state.api_process = subprocess.Popen(
            ["run-api"],
@ -356,7 +356,7 @@ async def _spawn_training_run(run_state: RunState, config_path: Path):
        # Step 2: Start the Tinker trainer
        logger.info("[%s] Starting Tinker trainer: launch_training.py --config %s", run_id, config_path)
        
-        trainer_log_file = open(trainer_log, "w")  # closed by _stop_training_run
+        trainer_log_file = open(trainer_log, "w", encoding="utf-8")  # closed by _stop_training_run
        run_state.trainer_log_file = trainer_log_file
        run_state.trainer_process = subprocess.Popen(
            [sys.executable, "launch_training.py", "--config", str(config_path)],
@ -397,7 +397,7 @@ async def _spawn_training_run(run_state: RunState, config_path: Path):
        
        logger.info("[%s] Starting environment: %s serve", run_id, env_info.file_path)
        
-        env_log_file = open(env_log, "w")  # closed by _stop_training_run
+        env_log_file = open(env_log, "w", encoding="utf-8")  # closed by _stop_training_run
        run_state.env_log_file = env_log_file
        run_state.env_process = subprocess.Popen(
            [sys.executable, str(env_info.file_path), "serve", "--config", str(config_path)],
@ -777,7 +777,7 @@ async def rl_start_training() -> str:
    if "wandb_name" in _current_config and _current_config["wandb_name"]:
        run_config["env"]["wandb_name"] = _current_config["wandb_name"]
    
-    with open(config_path, "w") as f:
+    with open(config_path, "w", encoding="utf-8") as f:
        yaml.dump(run_config, f, default_flow_style=False)
    
    # Create run state
@ -1206,7 +1206,7 @@ async def rl_test_inference(
            stderr_text = "\n".join(stderr_lines)
            
            # Write logs to files for inspection outside CLI
-            with open(log_file, "w") as f:
+            with open(log_file, "w", encoding="utf-8") as f:
                f.write(f"Command: {cmd_display}\n")
                f.write(f"Working dir: {TINKER_ATROPOS_ROOT}\n")
                f.write(f"Return code: {process.returncode}\n")
@ -1238,7 +1238,7 @@ async def rl_test_inference(
                # Parse the output JSONL file
                if output_file.exists():
                    # Read JSONL file (one JSON object per line = one step)
-                    with open(output_file, "r") as f:
+                    with open(output_file, "r", encoding="utf-8") as f:
                        for line in f:
                            line = line.strip()
                            if not line:
--- a/tools/skills_hub.py
+++ b/tools/skills_hub.py
@ -219,7 +219,7 @@ class GitHubAuth:
            key_file = Path(key_path)
            if not key_file.exists():
                return None
-            private_key = key_file.read_text()
+            private_key = key_file.read_text(encoding="utf-8")

            now = int(time.time())
            payload = {
@ -2667,7 +2667,7 @@ def append_audit_log(action: str, skill_name: str, source: str,
        parts.append(extra)
    line = " ".join(parts) + "\n"
    try:
-        with open(AUDIT_LOG, "a") as f:
+        with open(AUDIT_LOG, "a", encoding="utf-8") as f:
            f.write(line)
    except OSError as e:
        logger.debug("Could not write audit log: %s", e)
--- a/tools/tirith_security.py
+++ b/tools/tirith_security.py
@ -126,7 +126,7 @@ def _read_failure_reason() -> str | None:
        mtime = os.path.getmtime(p)
        if (time.time() - mtime) >= _MARKER_TTL:
            return None
-        with open(p, "r") as f:
+        with open(p, "r", encoding="utf-8") as f:
            return f.read().strip()
    except OSError:
        return None
@ -160,7 +160,7 @@ def _mark_install_failed(reason: str = ""):
    try:
        p = _failure_marker_path()
        os.makedirs(os.path.dirname(p), exist_ok=True)
-        with open(p, "w") as f:
+        with open(p, "w", encoding="utf-8") as f:
            f.write(reason)
    except OSError:
        pass
@ -257,7 +257,7 @@ def _verify_cosign(checksums_path: str, sig_path: str, cert_path: str) -> bool |
 def _verify_checksum(archive_path: str, checksums_path: str, archive_name: str) -> bool:
    """Verify SHA-256 of the archive against checksums.txt."""
    expected = None
-    with open(checksums_path) as f:
+    with open(checksums_path, encoding="utf-8") as f:
        for line in f:
            # Format: "<hash>  <filename>"
            parts = line.strip().split("  ", 1)
--- a/tools/voice_mode.py
+++ b/tools/voice_mode.py
@ -110,7 +110,7 @@ def detect_audio_environment() -> dict:
    # WSL detection — PulseAudio bridge makes audio work in WSL.
    # Only block if PULSE_SERVER is not configured.
    try:
-        with open('/proc/version', 'r') as f:
+        with open('/proc/version', 'r', encoding="utf-8") as f:
            if 'microsoft' in f.read().lower():
                if os.environ.get('PULSE_SERVER'):
                    notices.append("Running in WSL with PulseAudio bridge")