From d12be46df8753931c21946fc0b0caccb83ff2209 Mon Sep 17 00:00:00 2001 From: LeonSGP43 Date: Sun, 3 May 2026 22:49:46 +0800 Subject: [PATCH] fix(skills): lock usage telemetry updates --- tests/tools/test_skill_usage.py | 33 ++++++++++++++++ tools/skill_usage.py | 69 +++++++++++++++++++++++++++------ 2 files changed, 91 insertions(+), 11 deletions(-) diff --git a/tests/tools/test_skill_usage.py b/tests/tools/test_skill_usage.py index 996aaa9d6d..8251e60999 100644 --- a/tests/tools/test_skill_usage.py +++ b/tests/tools/test_skill_usage.py @@ -1,12 +1,21 @@ """Tests for tools/skill_usage.py — sidecar telemetry + provenance filtering.""" import json +import multiprocessing as mp import os from pathlib import Path import pytest +def _bump_view_many(hermes_home: str, skill_name: str, iterations: int) -> None: + os.environ["HERMES_HOME"] = hermes_home + from tools.skill_usage import bump_view + + for _ in range(iterations): + bump_view(skill_name) + + @pytest.fixture def skills_home(tmp_path, monkeypatch): """Isolated HERMES_HOME with a clean skills/ dir for each test.""" @@ -139,6 +148,30 @@ def test_bumps_do_not_corrupt_other_skills(skills_home): assert get_record("skill-b")["use_count"] == 1 +def test_concurrent_bump_view_preserves_all_updates(skills_home): + from tools.skill_usage import get_record + + process_count = 6 + iterations = 25 + ctx = mp.get_context("spawn") + processes = [ + ctx.Process( + target=_bump_view_many, + args=(str(skills_home), "shared-skill", iterations), + ) + for _ in range(process_count) + ] + + for process in processes: + process.start() + for process in processes: + process.join(timeout=20) + + for process in processes: + assert process.exitcode == 0 + assert get_record("shared-skill")["view_count"] == process_count * iterations + + # --------------------------------------------------------------------------- # State transitions # --------------------------------------------------------------------------- diff --git a/tools/skill_usage.py b/tools/skill_usage.py index 9b94ca9a05..88bca75219 100644 --- a/tools/skill_usage.py +++ b/tools/skill_usage.py @@ -28,6 +28,7 @@ import json import logging import os import tempfile +from contextlib import contextmanager from datetime import datetime, timezone from pathlib import Path from typing import Any, Dict, Iterable, List, Optional, Set, Tuple @@ -36,6 +37,17 @@ from hermes_constants import get_hermes_home logger = logging.getLogger(__name__) +# fcntl is Unix-only; on Windows use msvcrt for file locking. +msvcrt = None +try: + import fcntl +except ImportError: # pragma: no cover - platform-specific fallback + fcntl = None + try: + import msvcrt + except ImportError: + pass + STATE_ACTIVE = "active" STATE_STALE = "stale" @@ -51,6 +63,39 @@ def _usage_file() -> Path: return _skills_dir() / ".usage.json" +@contextmanager +def _usage_file_lock(): + """Serialize .usage.json read-modify-write cycles across processes.""" + lock_path = _usage_file().with_suffix(".json.lock") + lock_path.parent.mkdir(parents=True, exist_ok=True) + + if fcntl is None and msvcrt is None: + yield + return + + if msvcrt and (not lock_path.exists() or lock_path.stat().st_size == 0): + lock_path.write_text(" ", encoding="utf-8") + + fd = open(lock_path, "r+" if msvcrt else "a+") + try: + if fcntl: + fcntl.flock(fd, fcntl.LOCK_EX) + else: + fd.seek(0) + msvcrt.locking(fd.fileno(), msvcrt.LK_LOCK, 1) + yield + finally: + if fcntl: + fcntl.flock(fd, fcntl.LOCK_UN) + elif msvcrt: + try: + fd.seek(0) + msvcrt.locking(fd.fileno(), msvcrt.LK_UNLCK, 1) + except (OSError, IOError): + pass + fd.close() + + def _archive_dir() -> Path: return _skills_dir() / ".archive" @@ -341,13 +386,14 @@ def _mutate(skill_name: str, mutator) -> None: try: if not is_agent_created(skill_name): return - data = load_usage() - rec = data.get(skill_name) - if not isinstance(rec, dict): - rec = _empty_record() - mutator(rec) - data[skill_name] = rec - save_usage(data) + with _usage_file_lock(): + data = load_usage() + rec = data.get(skill_name) + if not isinstance(rec, dict): + rec = _empty_record() + mutator(rec) + data[skill_name] = rec + save_usage(data) except Exception as e: logger.debug("skill_usage._mutate(%s) failed: %s", skill_name, e, exc_info=True) @@ -417,10 +463,11 @@ def forget(skill_name: str) -> None: if not skill_name: return try: - data = load_usage() - if skill_name in data: - del data[skill_name] - save_usage(data) + with _usage_file_lock(): + data = load_usage() + if skill_name in data: + del data[skill_name] + save_usage(data) except Exception as e: logger.debug("skill_usage.forget(%s) failed: %s", skill_name, e, exc_info=True)