From 2245879af0dcc48f93fa8d516b8748d65ac67c75 Mon Sep 17 00:00:00 2001 From: Wesley Simplicio Date: Sat, 9 May 2026 12:19:39 -0300 Subject: [PATCH] fix(checkpoint): guard _touch_project against non-dict project metadata MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Problem ======= `tools.checkpoint_manager._touch_project` reads the project metadata file with `json.loads(meta_path.read_text(...))`, then immediately does: meta["workdir"] = str(_normalize_path(working_dir)) The `except` block only catches `(OSError, ValueError)`. When the file parses successfully but returns a non-dict value (a list `[]`, `null`, or a scalar from a corrupted or hand-truncated write), `json.loads` succeeds without error and `meta` is set to, e.g., `[]`. The subsequent subscript assignment then raises `TypeError: list indices must be integers or slices, not str`, which is NOT caught by the narrow except clause. This TypeError propagates up through `_take` to `ensure_checkpoint`, where the broad `except Exception` safety net swallows it. The effect is that `ensure_checkpoint` silently returns False for the entire session — all checkpoints are skipped for the affected working directory without any user-visible error. Root cause ========== Missing `isinstance(meta, dict)` guard after `json.loads`, identical in pattern to bugs fixed in `cron/jobs.py` (#22569) and `tools/process_registry.py` (#22544). The same guard is already present one function below in `_list_projects` (line 506), but was inadvertently omitted in `_touch_project`. Fix === Add two lines after the try/except: ```python if not isinstance(meta, dict): meta = {} ``` This matches the existing guard in `_list_projects` and ensures a fresh empty dict is used whenever the persisted value is not a mapping — preserving the `created_at` semantics via `setdefault` on the next line. Tests ===== `TestTouchProjectMalformedMeta` covers four non-dict root values (`[]`, `null`, `42`, `"oops"`). Each writes a corrupted metadata file, calls `_touch_project`, and asserts: (a) no exception raised, (b) the metadata file is rewritten as a valid dict containing `last_touch` and `workdir`. All four fail on main with `TypeError`, pass with fix. Full `tests/tools/test_checkpoint_manager.py` regression: 77 passed. --- tests/tools/test_checkpoint_manager.py | 38 ++++++++++++++++++++++++++ tools/checkpoint_manager.py | 2 ++ 2 files changed, 40 insertions(+) diff --git a/tests/tools/test_checkpoint_manager.py b/tests/tools/test_checkpoint_manager.py index 2c87db0e5e3..84955f224de 100644 --- a/tests/tools/test_checkpoint_manager.py +++ b/tests/tools/test_checkpoint_manager.py @@ -21,6 +21,7 @@ from tools.checkpoint_manager import ( _store_path, _ref_name, _project_meta_path, + _touch_project, format_checkpoint_list, DEFAULT_EXCLUDES, CHECKPOINT_BASE, @@ -608,6 +609,43 @@ class TestErrorResilience: assert mgr.ensure_checkpoint(str(work_dir), "test") is False +class TestTouchProjectMalformedMeta: + """_touch_project must not raise when the project metadata file is corrupted. + + The try/except in _touch_project only catches ``(OSError, ValueError)``. + When ``json.load`` succeeds but returns a non-dict (e.g. a list ``[]``, + ``null``, or a scalar), the subsequent ``meta["workdir"] = ...`` raises + ``TypeError: list indices must be integers…``. This TypeError propagates + uncaught out of ``_touch_project`` and up through ``_take`` into + ``ensure_checkpoint``, where it is swallowed by the broad ``except + Exception`` safety net — but the effect is that the checkpoint is silently + skipped for the entire session. + + Fix: add ``if not isinstance(meta, dict): meta = {}`` after parsing, + mirroring the same guard already present in ``_list_projects``. + """ + + @pytest.mark.parametrize("payload", ["[]", "null", "42", '"oops"']) + def test_non_dict_meta_does_not_raise(self, tmp_path, payload): + store = tmp_path / "store" + workdir = str(tmp_path / "project") + _init_store(store, workdir) + + dir_hash = _project_hash(workdir) + meta_path = _project_meta_path(store, dir_hash) + meta_path.parent.mkdir(parents=True, exist_ok=True) + meta_path.write_text(payload, encoding="utf-8") + + # Must not raise TypeError + _touch_project(store, workdir) + + # Metadata file should now be a valid dict with last_touch updated + data = json.loads(meta_path.read_text(encoding="utf-8")) + assert isinstance(data, dict) + assert "last_touch" in data + assert "workdir" in data + + # ========================================================================= # Security / input validation # ========================================================================= diff --git a/tools/checkpoint_manager.py b/tools/checkpoint_manager.py index 15b106f512e..cab877bc623 100644 --- a/tools/checkpoint_manager.py +++ b/tools/checkpoint_manager.py @@ -482,6 +482,8 @@ def _touch_project(store: Path, working_dir: str) -> None: meta = json.loads(meta_path.read_text(encoding="utf-8")) except (OSError, ValueError): meta = {} + if not isinstance(meta, dict): + meta = {} meta["workdir"] = str(_normalize_path(working_dir)) meta["last_touch"] = time.time() meta.setdefault("created_at", meta["last_touch"])