From 3af3c4eb8c65c42bdba1a7d9b11266ded5fa6d9f Mon Sep 17 00:00:00 2001 From: Gutslabs <128259593+Gutslabs@users.noreply.github.com> Date: Sun, 10 May 2026 22:20:25 -0700 Subject: [PATCH] fix(misc): three small defensive fixes from PR #1974 Salvages the three substantive low-severity fixes from Gutslabs' #1974 "misc bug fixes" bundle. The other 8 claims in that PR were either already fixed on main with superior implementations (state lock, firecrawl lazy import, fcntl/msvcrt guard, path normalization, schema migrations) or did not survive review. - run_agent: `_materialize_data_url_for_vision` uses `NamedTemporaryFile(delete=False)`; if `base64.b64decode` raises on a corrupt data URL the temp file would persist forever. Wrap the write in try/except and `os.unlink` the temp on failure. - gateway/session: `append_to_transcript` JSONL write had no error handling, so disk-full / read-only-fs / permission errors crashed the message handler. The SQLite write above is the primary store, so swallow OSError on the JSONL fallback with a debug log. - gateway/status: `_read_pid_record` reads `pid_path.read_text()` after an `exists()` check; if the PID file is deleted between the two calls (concurrent gateway restart) we hit an unhandled OSError. Catch it and return None. Adds a regression test for the tempfile cleanup; the other two paths are defensive try/excepts on infrequent OSError that don't warrant dedicated tests. Co-authored-by: Teknium <127238744+teknium1@users.noreply.github.com> --- gateway/session.py | 11 ++-- gateway/status.py | 6 ++- run_agent.py | 13 ++++- .../test_materialize_data_url_cleanup.py | 54 +++++++++++++++++++ 4 files changed, 78 insertions(+), 6 deletions(-) create mode 100644 tests/run_agent/test_materialize_data_url_cleanup.py diff --git a/gateway/session.py b/gateway/session.py index be393e48e6f..c145625ae44 100644 --- a/gateway/session.py +++ b/gateway/session.py @@ -1276,9 +1276,14 @@ class SessionStore: # Also write legacy JSONL (keeps existing tooling working during transition) transcript_path = self.get_transcript_path(session_id) - with self._lock: - with open(transcript_path, "a", encoding="utf-8") as f: - f.write(json.dumps(message, ensure_ascii=False) + "\n") + try: + with self._lock: + with open(transcript_path, "a", encoding="utf-8") as f: + f.write(json.dumps(message, ensure_ascii=False) + "\n") + except OSError as e: + # Disk full / read-only fs / permission errors must not crash the + # message handler — the SQLite write above is the primary store. + logger.debug("Failed to write JSONL transcript for %s: %s", session_id, e) def rewrite_transcript(self, session_id: str, messages: List[Dict[str, Any]]) -> None: """Replace the entire transcript for a session with new messages. diff --git a/gateway/status.py b/gateway/status.py index 78fec1a98cb..25df7dc02eb 100644 --- a/gateway/status.py +++ b/gateway/status.py @@ -218,7 +218,11 @@ def _read_pid_record(pid_path: Optional[Path] = None) -> Optional[dict]: if not pid_path.exists(): return None - raw = pid_path.read_text().strip() + try: + raw = pid_path.read_text().strip() + except OSError: + # File was deleted between exists() and read_text(), or permission flipped. + return None if not raw: return None diff --git a/run_agent.py b/run_agent.py index 1c49ebff0fe..5fdb73487a3 100644 --- a/run_agent.py +++ b/run_agent.py @@ -8793,8 +8793,17 @@ class AIAgent: "image/jpg": ".jpg", }.get(mime, ".jpg") tmp = tempfile.NamedTemporaryFile(prefix="anthropic_image_", suffix=suffix, delete=False) - with tmp: - tmp.write(base64.b64decode(data)) + try: + with tmp: + tmp.write(base64.b64decode(data)) + except Exception: + # delete=False means a corrupt/unsupported data URL would otherwise + # leak a zero-byte temp file on every failed materialization. + try: + os.unlink(tmp.name) + except OSError: + pass + raise path = Path(tmp.name) return str(path), path diff --git a/tests/run_agent/test_materialize_data_url_cleanup.py b/tests/run_agent/test_materialize_data_url_cleanup.py new file mode 100644 index 00000000000..a3327a4be47 --- /dev/null +++ b/tests/run_agent/test_materialize_data_url_cleanup.py @@ -0,0 +1,54 @@ +"""Regression test: temp file cleanup when materializing data URLs for vision. + +`_materialize_data_url_for_vision` creates a `NamedTemporaryFile(delete=False)` +so the path can be handed to vision backends. If `base64.b64decode` raises on +a corrupt/unsupported data URL the temp file would otherwise persist forever +on disk, leaking once per failed call. +""" + +from __future__ import annotations + +import base64 +import os +import tempfile +from pathlib import Path + +import pytest + +from run_agent import AIAgent + + +def _list_anthropic_tmpfiles(tmpdir: str) -> list[str]: + return [ + name for name in os.listdir(tmpdir) + if name.startswith("anthropic_image_") + ] + + +def test_b64decode_failure_does_not_leak_tempfile(monkeypatch, tmp_path): + monkeypatch.setattr(tempfile, "tempdir", str(tmp_path)) + + bad_url = "data:image/png;base64,!!!not-valid-base64!!!" + with pytest.raises(Exception): + AIAgent._materialize_data_url_for_vision(bad_url) + + leftovers = _list_anthropic_tmpfiles(str(tmp_path)) + assert leftovers == [], f"leaked temp files after decode failure: {leftovers}" + + +def test_successful_decode_returns_path_to_existing_file(monkeypatch, tmp_path): + monkeypatch.setattr(tempfile, "tempdir", str(tmp_path)) + + payload = b"\x89PNG\r\n\x1a\n" + b"\x00" * 16 # a few bytes is enough + encoded = base64.b64encode(payload).decode("ascii") + good_url = f"data:image/png;base64,{encoded}" + + path_str, path_obj = AIAgent._materialize_data_url_for_vision(good_url) + + assert isinstance(path_obj, Path) + assert path_obj.exists() + assert path_obj.read_bytes() == payload + assert path_str == str(path_obj) + # Caller is responsible for cleanup; mimic that here so the test leaves + # no artifacts behind. + path_obj.unlink()