fix(skills): lock usage telemetry updates

This commit is contained in:
LeonSGP43 2026-05-03 22:49:46 +08:00 committed by Teknium
parent c2d6b385f1
commit d12be46df8
2 changed files with 91 additions and 11 deletions

View file

@ -1,12 +1,21 @@
"""Tests for tools/skill_usage.py — sidecar telemetry + provenance filtering.""" """Tests for tools/skill_usage.py — sidecar telemetry + provenance filtering."""
import json import json
import multiprocessing as mp
import os import os
from pathlib import Path from pathlib import Path
import pytest import pytest
def _bump_view_many(hermes_home: str, skill_name: str, iterations: int) -> None:
os.environ["HERMES_HOME"] = hermes_home
from tools.skill_usage import bump_view
for _ in range(iterations):
bump_view(skill_name)
@pytest.fixture @pytest.fixture
def skills_home(tmp_path, monkeypatch): def skills_home(tmp_path, monkeypatch):
"""Isolated HERMES_HOME with a clean skills/ dir for each test.""" """Isolated HERMES_HOME with a clean skills/ dir for each test."""
@ -139,6 +148,30 @@ def test_bumps_do_not_corrupt_other_skills(skills_home):
assert get_record("skill-b")["use_count"] == 1 assert get_record("skill-b")["use_count"] == 1
def test_concurrent_bump_view_preserves_all_updates(skills_home):
from tools.skill_usage import get_record
process_count = 6
iterations = 25
ctx = mp.get_context("spawn")
processes = [
ctx.Process(
target=_bump_view_many,
args=(str(skills_home), "shared-skill", iterations),
)
for _ in range(process_count)
]
for process in processes:
process.start()
for process in processes:
process.join(timeout=20)
for process in processes:
assert process.exitcode == 0
assert get_record("shared-skill")["view_count"] == process_count * iterations
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# State transitions # State transitions
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------

View file

@ -28,6 +28,7 @@ import json
import logging import logging
import os import os
import tempfile import tempfile
from contextlib import contextmanager
from datetime import datetime, timezone from datetime import datetime, timezone
from pathlib import Path from pathlib import Path
from typing import Any, Dict, Iterable, List, Optional, Set, Tuple from typing import Any, Dict, Iterable, List, Optional, Set, Tuple
@ -36,6 +37,17 @@ from hermes_constants import get_hermes_home
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
# fcntl is Unix-only; on Windows use msvcrt for file locking.
msvcrt = None
try:
import fcntl
except ImportError: # pragma: no cover - platform-specific fallback
fcntl = None
try:
import msvcrt
except ImportError:
pass
STATE_ACTIVE = "active" STATE_ACTIVE = "active"
STATE_STALE = "stale" STATE_STALE = "stale"
@ -51,6 +63,39 @@ def _usage_file() -> Path:
return _skills_dir() / ".usage.json" return _skills_dir() / ".usage.json"
@contextmanager
def _usage_file_lock():
"""Serialize .usage.json read-modify-write cycles across processes."""
lock_path = _usage_file().with_suffix(".json.lock")
lock_path.parent.mkdir(parents=True, exist_ok=True)
if fcntl is None and msvcrt is None:
yield
return
if msvcrt and (not lock_path.exists() or lock_path.stat().st_size == 0):
lock_path.write_text(" ", encoding="utf-8")
fd = open(lock_path, "r+" if msvcrt else "a+")
try:
if fcntl:
fcntl.flock(fd, fcntl.LOCK_EX)
else:
fd.seek(0)
msvcrt.locking(fd.fileno(), msvcrt.LK_LOCK, 1)
yield
finally:
if fcntl:
fcntl.flock(fd, fcntl.LOCK_UN)
elif msvcrt:
try:
fd.seek(0)
msvcrt.locking(fd.fileno(), msvcrt.LK_UNLCK, 1)
except (OSError, IOError):
pass
fd.close()
def _archive_dir() -> Path: def _archive_dir() -> Path:
return _skills_dir() / ".archive" return _skills_dir() / ".archive"
@ -341,13 +386,14 @@ def _mutate(skill_name: str, mutator) -> None:
try: try:
if not is_agent_created(skill_name): if not is_agent_created(skill_name):
return return
data = load_usage() with _usage_file_lock():
rec = data.get(skill_name) data = load_usage()
if not isinstance(rec, dict): rec = data.get(skill_name)
rec = _empty_record() if not isinstance(rec, dict):
mutator(rec) rec = _empty_record()
data[skill_name] = rec mutator(rec)
save_usage(data) data[skill_name] = rec
save_usage(data)
except Exception as e: except Exception as e:
logger.debug("skill_usage._mutate(%s) failed: %s", skill_name, e, exc_info=True) logger.debug("skill_usage._mutate(%s) failed: %s", skill_name, e, exc_info=True)
@ -417,10 +463,11 @@ def forget(skill_name: str) -> None:
if not skill_name: if not skill_name:
return return
try: try:
data = load_usage() with _usage_file_lock():
if skill_name in data: data = load_usage()
del data[skill_name] if skill_name in data:
save_usage(data) del data[skill_name]
save_usage(data)
except Exception as e: except Exception as e:
logger.debug("skill_usage.forget(%s) failed: %s", skill_name, e, exc_info=True) logger.debug("skill_usage.forget(%s) failed: %s", skill_name, e, exc_info=True)