fix(skills): lock usage telemetry updates

This commit is contained in:
LeonSGP43 2026-05-03 22:49:46 +08:00 committed by Teknium
parent c2d6b385f1
commit d12be46df8
2 changed files with 91 additions and 11 deletions

View file

@ -1,12 +1,21 @@
"""Tests for tools/skill_usage.py — sidecar telemetry + provenance filtering."""
import json
import multiprocessing as mp
import os
from pathlib import Path
import pytest
def _bump_view_many(hermes_home: str, skill_name: str, iterations: int) -> None:
os.environ["HERMES_HOME"] = hermes_home
from tools.skill_usage import bump_view
for _ in range(iterations):
bump_view(skill_name)
@pytest.fixture
def skills_home(tmp_path, monkeypatch):
"""Isolated HERMES_HOME with a clean skills/ dir for each test."""
@ -139,6 +148,30 @@ def test_bumps_do_not_corrupt_other_skills(skills_home):
assert get_record("skill-b")["use_count"] == 1
def test_concurrent_bump_view_preserves_all_updates(skills_home):
from tools.skill_usage import get_record
process_count = 6
iterations = 25
ctx = mp.get_context("spawn")
processes = [
ctx.Process(
target=_bump_view_many,
args=(str(skills_home), "shared-skill", iterations),
)
for _ in range(process_count)
]
for process in processes:
process.start()
for process in processes:
process.join(timeout=20)
for process in processes:
assert process.exitcode == 0
assert get_record("shared-skill")["view_count"] == process_count * iterations
# ---------------------------------------------------------------------------
# State transitions
# ---------------------------------------------------------------------------

View file

@ -28,6 +28,7 @@ import json
import logging
import os
import tempfile
from contextlib import contextmanager
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Dict, Iterable, List, Optional, Set, Tuple
@ -36,6 +37,17 @@ from hermes_constants import get_hermes_home
logger = logging.getLogger(__name__)
# fcntl is Unix-only; on Windows use msvcrt for file locking.
msvcrt = None
try:
import fcntl
except ImportError: # pragma: no cover - platform-specific fallback
fcntl = None
try:
import msvcrt
except ImportError:
pass
STATE_ACTIVE = "active"
STATE_STALE = "stale"
@ -51,6 +63,39 @@ def _usage_file() -> Path:
return _skills_dir() / ".usage.json"
@contextmanager
def _usage_file_lock():
"""Serialize .usage.json read-modify-write cycles across processes."""
lock_path = _usage_file().with_suffix(".json.lock")
lock_path.parent.mkdir(parents=True, exist_ok=True)
if fcntl is None and msvcrt is None:
yield
return
if msvcrt and (not lock_path.exists() or lock_path.stat().st_size == 0):
lock_path.write_text(" ", encoding="utf-8")
fd = open(lock_path, "r+" if msvcrt else "a+")
try:
if fcntl:
fcntl.flock(fd, fcntl.LOCK_EX)
else:
fd.seek(0)
msvcrt.locking(fd.fileno(), msvcrt.LK_LOCK, 1)
yield
finally:
if fcntl:
fcntl.flock(fd, fcntl.LOCK_UN)
elif msvcrt:
try:
fd.seek(0)
msvcrt.locking(fd.fileno(), msvcrt.LK_UNLCK, 1)
except (OSError, IOError):
pass
fd.close()
def _archive_dir() -> Path:
return _skills_dir() / ".archive"
@ -341,13 +386,14 @@ def _mutate(skill_name: str, mutator) -> None:
try:
if not is_agent_created(skill_name):
return
data = load_usage()
rec = data.get(skill_name)
if not isinstance(rec, dict):
rec = _empty_record()
mutator(rec)
data[skill_name] = rec
save_usage(data)
with _usage_file_lock():
data = load_usage()
rec = data.get(skill_name)
if not isinstance(rec, dict):
rec = _empty_record()
mutator(rec)
data[skill_name] = rec
save_usage(data)
except Exception as e:
logger.debug("skill_usage._mutate(%s) failed: %s", skill_name, e, exc_info=True)
@ -417,10 +463,11 @@ def forget(skill_name: str) -> None:
if not skill_name:
return
try:
data = load_usage()
if skill_name in data:
del data[skill_name]
save_usage(data)
with _usage_file_lock():
data = load_usage()
if skill_name in data:
del data[skill_name]
save_usage(data)
except Exception as e:
logger.debug("skill_usage.forget(%s) failed: %s", skill_name, e, exc_info=True)