From abcaf0522905ff849cc8241037f42fb669bcb664 Mon Sep 17 00:00:00 2001
From: LeonSGP43 <cine.dreamer.one@gmail.com>
Date: Sun, 3 May 2026 21:44:04 +0800
Subject: [PATCH] fix(skills): keep manual skills out of curator

---
 tests/agent/test_curator.py            | 37 +++++++++++++++++++++++
 tests/tools/test_skill_manager_tool.py |  3 ++
 tests/tools/test_skill_usage.py        | 32 +++++++++++++++-----
 tools/skill_manager_tool.py            |  6 ++--
 tools/skill_usage.py                   | 42 ++++++++++++++++++++------
 5 files changed, 102 insertions(+), 18 deletions(-)

diff --git a/tests/agent/test_curator.py b/tests/agent/test_curator.py
index 45b9699456..69dc5f8578 100644
--- a/tests/agent/test_curator.py
+++ b/tests/agent/test_curator.py
@@ -154,6 +154,7 @@ def test_unused_skill_transitions_to_stale(curator_env):
     long_ago = (datetime.now(timezone.utc) - timedelta(days=45)).isoformat()
     data = u.load_usage()
     data["old-skill"] = u._empty_record()
+    data["old-skill"]["created_by"] = "agent"
     data["old-skill"]["last_used_at"] = long_ago
     data["old-skill"]["created_at"] = long_ago
     u.save_usage(data)
@@ -172,6 +173,7 @@ def test_very_old_skill_gets_archived(curator_env):
     super_old = (datetime.now(timezone.utc) - timedelta(days=120)).isoformat()
     data = u.load_usage()
     data["ancient"] = u._empty_record()
+    data["ancient"]["created_by"] = "agent"
     data["ancient"]["last_used_at"] = super_old
     data["ancient"]["created_at"] = super_old
     u.save_usage(data)
@@ -192,6 +194,7 @@ def test_pinned_skill_is_never_touched(curator_env):
     super_old = (datetime.now(timezone.utc) - timedelta(days=365)).isoformat()
     data = u.load_usage()
     data["precious"] = u._empty_record()
+    data["precious"]["created_by"] = "agent"
     data["precious"]["last_used_at"] = super_old
     data["precious"]["created_at"] = super_old
     data["precious"]["pinned"] = True
@@ -214,6 +217,7 @@ def test_stale_skill_reactivates_on_recent_use(curator_env):
     recent = datetime.now(timezone.utc).isoformat()
     data = u.load_usage()
     data["revived"] = u._empty_record()
+    data["revived"]["created_by"] = "agent"
     data["revived"]["state"] = "stale"
     data["revived"]["last_used_at"] = recent
     data["revived"]["created_at"] = recent
@@ -240,6 +244,27 @@ def test_new_skill_without_last_used_not_immediately_archived(curator_env):
     assert (skills_dir / "fresh").exists()
 
 
+def test_manual_skill_is_not_auto_archived(curator_env):
+    """Manual skills can have usage records, but without the agent-created
+    marker they must stay out of curator transitions."""
+    c = curator_env["curator"]
+    u = curator_env["usage"]
+    skills_dir = curator_env["home"] / "skills"
+    skill_dir = _write_skill(skills_dir, "manual")
+
+    super_old = (datetime.now(timezone.utc) - timedelta(days=365)).isoformat()
+    data = u.load_usage()
+    data["manual"] = u._empty_record()
+    data["manual"]["last_used_at"] = super_old
+    data["manual"]["created_at"] = super_old
+    u.save_usage(data)
+
+    counts = c.apply_automatic_transitions()
+    assert counts["checked"] == 0
+    assert counts["archived"] == 0
+    assert skill_dir.exists()
+
+
 def test_bundled_skill_not_touched_by_transitions(curator_env):
     c = curator_env["curator"]
     u = curator_env["usage"]
@@ -267,8 +292,10 @@ def test_bundled_skill_not_touched_by_transitions(curator_env):
 
 def test_run_review_records_state(curator_env):
     c = curator_env["curator"]
+    u = curator_env["usage"]
     skills_dir = curator_env["home"] / "skills"
     _write_skill(skills_dir, "a")
+    u.mark_agent_created("a")
 
     result = c.run_curator_review(synchronous=True)
     assert "started_at" in result
@@ -284,8 +311,10 @@ def test_dry_run_does_not_advance_state(curator_env, monkeypatch):
     `hermes curator status`. Fixes #18373.
     """
     c = curator_env["curator"]
+    u = curator_env["usage"]
     skills_dir = curator_env["home"] / "skills"
     _write_skill(skills_dir, "a")
+    u.mark_agent_created("a")
 
     # Stub the LLM so the test doesn't need a provider.
     monkeypatch.setattr(
@@ -311,8 +340,10 @@ def test_dry_run_injects_report_only_banner(curator_env, monkeypatch):
     skips automatic transitions — but the LLM prompt is the only guard
     against the model calling skill_manage directly."""
     c = curator_env["curator"]
+    u = curator_env["usage"]
     skills_dir = curator_env["home"] / "skills"
     _write_skill(skills_dir, "a")
+    u.mark_agent_created("a")
 
     captured = {}
     def _stub(prompt):
@@ -331,8 +362,10 @@ def test_dry_run_skips_automatic_transitions(curator_env, monkeypatch):
     archives skills deterministically, and a preview must not touch the
     filesystem."""
     c = curator_env["curator"]
+    u = curator_env["usage"]
     skills_dir = curator_env["home"] / "skills"
     _write_skill(skills_dir, "a")
+    u.mark_agent_created("a")
 
     called = {"n": 0}
     def _explode(*_a, **_kw):
@@ -351,8 +384,10 @@ def test_dry_run_skips_automatic_transitions(curator_env, monkeypatch):
 
 def test_run_review_synchronous_invokes_llm_stub(curator_env, monkeypatch):
     c = curator_env["curator"]
+    u = curator_env["usage"]
     skills_dir = curator_env["home"] / "skills"
     _write_skill(skills_dir, "a")
+    u.mark_agent_created("a")
 
     calls = []
     def _stub(prompt):
@@ -409,8 +444,10 @@ def test_maybe_run_curator_enforces_idle_gate(curator_env, monkeypatch):
 
 def test_maybe_run_curator_runs_when_eligible(curator_env, monkeypatch):
     c = curator_env["curator"]
+    u = curator_env["usage"]
     skills_dir = curator_env["home"] / "skills"
     _write_skill(skills_dir, "a")
+    u.mark_agent_created("a")
     # Seed last_run_at far in the past so the interval gate opens — the
     # "no state" path intentionally defers the first run now (#18373).
     long_ago = datetime.now(timezone.utc) - timedelta(hours=c.get_interval_hours() * 2)
diff --git a/tests/tools/test_skill_manager_tool.py b/tests/tools/test_skill_manager_tool.py
index 004924b9f4..934215d945 100644
--- a/tests/tools/test_skill_manager_tool.py
+++ b/tests/tools/test_skill_manager_tool.py
@@ -533,8 +533,11 @@ class TestSkillManageDispatcher:
     def test_full_create_via_dispatcher(self, tmp_path):
         with _skill_dir(tmp_path):
             raw = skill_manage(action="create", name="test-skill", content=VALID_SKILL_CONTENT)
+            from tools.skill_usage import load_usage
+            usage = load_usage()
         result = json.loads(raw)
         assert result["success"] is True
+        assert usage["test-skill"]["created_by"] == "agent"
 
     def test_delete_via_dispatcher_threads_absorbed_into(self, tmp_path):
         # Dispatcher must plumb absorbed_into through to _delete_skill so the
diff --git a/tests/tools/test_skill_usage.py b/tests/tools/test_skill_usage.py
index 7dd92eb18c..b66e2bba76 100644
--- a/tests/tools/test_skill_usage.py
+++ b/tests/tools/test_skill_usage.py
@@ -194,10 +194,11 @@ def test_forget_removes_record(skills_home):
 # ---------------------------------------------------------------------------
 
 def test_agent_created_excludes_bundled(skills_home):
-    from tools.skill_usage import list_agent_created_skill_names
+    from tools.skill_usage import list_agent_created_skill_names, mark_agent_created
     skills_dir = skills_home / "skills"
     _write_skill(skills_dir, "bundled-skill", category="github")
     _write_skill(skills_dir, "my-skill")
+    mark_agent_created("my-skill")
     # Seed a bundled manifest marking bundled-skill as upstream
     (skills_dir / ".bundled_manifest").write_text(
         "bundled-skill:abc123\n", encoding="utf-8",
@@ -208,10 +209,11 @@ def test_agent_created_excludes_bundled(skills_home):
 
 
 def test_agent_created_excludes_hub_installed(skills_home):
-    from tools.skill_usage import list_agent_created_skill_names
+    from tools.skill_usage import list_agent_created_skill_names, mark_agent_created
     skills_dir = skills_home / "skills"
     _write_skill(skills_dir, "hub-skill")
     _write_skill(skills_dir, "my-skill")
+    mark_agent_created("my-skill")
     hub_dir = skills_dir / ".hub"
     hub_dir.mkdir()
     (hub_dir / "lock.json").write_text(
@@ -238,9 +240,10 @@ def test_is_agent_created(skills_home):
 
 
 def test_agent_created_skips_archive_and_hub_dirs(skills_home):
-    from tools.skill_usage import list_agent_created_skill_names
+    from tools.skill_usage import list_agent_created_skill_names, mark_agent_created
     skills_dir = skills_home / "skills"
     _write_skill(skills_dir, "real-skill")
+    mark_agent_created("real-skill")
     # Dot-prefixed dirs must be ignored even if they contain SKILL.md
     archive = skills_dir / ".archive" / "old-skill"
     archive.mkdir(parents=True)
@@ -368,27 +371,41 @@ def test_archive_collision_gets_suffix(skills_home):
 # Reporting
 # ---------------------------------------------------------------------------
 
-def test_agent_created_report_includes_defaults(skills_home):
-    from tools.skill_usage import agent_created_report, bump_view
+def test_agent_created_report_includes_marked_skills_with_defaults(skills_home):
+    from tools.skill_usage import agent_created_report, bump_view, mark_agent_created
     skills_dir = skills_home / "skills"
     _write_skill(skills_dir, "a")
     _write_skill(skills_dir, "b")
+    mark_agent_created("a")
+    mark_agent_created("b")
     bump_view("a")
     rows = agent_created_report()
     by_name = {r["name"]: r for r in rows}
     assert "a" in by_name and "b" in by_name
     assert by_name["a"]["view_count"] == 1
-    # b has no usage record yet — must still appear with defaults
+    # b has only the provenance marker — activity fields still default.
     assert by_name["b"]["view_count"] == 0
     assert by_name["b"]["state"] == "active"
 
 
+def test_manual_skill_with_usage_is_not_curator_managed(skills_home):
+    from tools.skill_usage import agent_created_report, bump_view, list_agent_created_skill_names
+    skills_dir = skills_home / "skills"
+    _write_skill(skills_dir, "manual-skill")
+
+    bump_view("manual-skill")
+
+    assert "manual-skill" not in list_agent_created_skill_names()
+    assert "manual-skill" not in {r["name"] for r in agent_created_report()}
+
+
 def test_agent_created_report_excludes_bundled_and_hub(skills_home):
-    from tools.skill_usage import agent_created_report
+    from tools.skill_usage import agent_created_report, mark_agent_created
     skills_dir = skills_home / "skills"
     _write_skill(skills_dir, "mine")
     _write_skill(skills_dir, "bundled")
     _write_skill(skills_dir, "hubbed")
+    mark_agent_created("mine")
     (skills_dir / ".bundled_manifest").write_text("bundled:abc\n", encoding="utf-8")
     hub = skills_dir / ".hub"
     hub.mkdir()
@@ -414,6 +431,7 @@ def test_agent_created_report_derives_activity_from_view_and_patch(skills_home,
     ])
     monkeypatch.setattr(skill_usage, "_now_iso", lambda: next(timestamps))
 
+    skill_usage.mark_agent_created("mine")
     skill_usage.bump_view("mine")
     skill_usage.bump_patch("mine")
 
diff --git a/tools/skill_manager_tool.py b/tools/skill_manager_tool.py
index d8d44f1a8b..e7d264de67 100644
--- a/tools/skill_manager_tool.py
+++ b/tools/skill_manager_tool.py
@@ -786,8 +786,10 @@ def skill_manage(
         # that mutate an existing skill's guidance), drop the record on delete.
         # Best-effort; telemetry failures never break the tool.
         try:
-            from tools.skill_usage import bump_patch, forget
-            if action in ("patch", "edit", "write_file", "remove_file"):
+            from tools.skill_usage import bump_patch, forget, mark_agent_created
+            if action == "create":
+                mark_agent_created(name)
+            elif action in ("patch", "edit", "write_file", "remove_file"):
                 bump_patch(name)
             elif action == "delete":
                 forget(name)
diff --git a/tools/skill_usage.py b/tools/skill_usage.py
index 8a4a1aa425..0491f1d8b1 100644
--- a/tools/skill_usage.py
+++ b/tools/skill_usage.py
@@ -11,8 +11,9 @@ Design notes:
   - Atomic writes via tempfile + os.replace (same pattern as .bundled_manifest).
   - All counter bumps are best-effort: failures log at DEBUG and return silently.
     A broken sidecar never breaks the underlying tool call.
-  - Provenance filter: "agent-created" == not in .bundled_manifest AND not in
-    .hub/lock.json. The curator only ever mutates agent-created skills.
+  - Provenance filter: curator-managed skills are explicitly marked when
+    created through skill_manage. Bundled / hub-installed skills stay
+    off-limits, and manually authored skills are not inferred from location.
 
 Lifecycle states:
     active    -> default
@@ -149,11 +150,13 @@ def _read_hub_installed_names() -> Set[str]:
 
 
 def list_agent_created_skill_names() -> List[str]:
-    """Enumerate skills that were authored by the agent (or user), NOT by a
-    bundled or hub-installed source.
+    """Enumerate skills explicitly authored by the agent.
 
-    The curator operates exclusively on this set. Bundled / hub skills are
-    maintained by their upstream sources and must never be pruned here.
+    The curator operates exclusively on this set. Skills are only eligible
+    after ``skill_manage(action="create")`` marks them in ``.usage.json``;
+    manually authored skills must not be inferred from filesystem location.
+    Bundled / hub skills are maintained by their upstream sources and must
+    never be pruned here.
     """
     base = _skills_dir()
     if not base.exists():
@@ -161,6 +164,7 @@ def list_agent_created_skill_names() -> List[str]:
     bundled = _read_bundled_manifest_names()
     hub = _read_hub_installed_names()
     off_limits = bundled | hub
+    usage = load_usage()
 
     names: List[str] = []
     # Top-level SKILL.md files (flat layout) AND nested category/skill/SKILL.md
@@ -176,6 +180,8 @@ def list_agent_created_skill_names() -> List[str]:
         name = _read_skill_name(skill_md, fallback=skill_md.parent.name)
         if name in off_limits:
             continue
+        if not _is_curator_managed_record(usage.get(name)):
+            continue
         names.append(name)
     return sorted(set(names))
 
@@ -207,12 +213,20 @@ def is_agent_created(skill_name: str) -> bool:
     return skill_name not in off_limits
 
 
+def _is_curator_managed_record(record: Any) -> bool:
+    """Return True when a usage record opts a skill into curator management."""
+    if not isinstance(record, dict):
+        return False
+    return record.get("created_by") == "agent" or record.get("agent_created") is True
+
+
 # ---------------------------------------------------------------------------
 # Sidecar I/O
 # ---------------------------------------------------------------------------
 
 def _empty_record() -> Dict[str, Any]:
     return {
+        "created_by": None,
         "use_count": 0,
         "view_count": 0,
         "last_used_at": None,
@@ -287,9 +301,8 @@ def _mutate(skill_name: str, mutator) -> None:
     """Load, apply *mutator(record)* in place, save. Best-effort.
 
     Bundled and hub-installed skills are NEVER recorded in the sidecar.
-    This keeps .usage.json focused on agent-created skills (the only ones
-    the curator considers) and prevents stale counters from hanging around
-    for upstream-managed skills.
+    Local manual skills may still accrue usage telemetry, but they only
+    become curator-managed when ``created_by`` is explicitly marked.
     """
     if not skill_name:
         return
@@ -336,6 +349,17 @@ def bump_patch(skill_name: str) -> None:
     _mutate(skill_name, _apply)
 
 
+def mark_agent_created(skill_name: str) -> None:
+    """Opt a skill created by skill_manage into curator management.
+
+    Viewing or invoking a manually authored skill may still create telemetry,
+    but only this explicit marker makes it eligible for automatic curation.
+    """
+    def _apply(rec: Dict[str, Any]) -> None:
+        rec["created_by"] = "agent"
+    _mutate(skill_name, _apply)
+
+
 def set_state(skill_name: str, state: str) -> None:
     """Set lifecycle state. No-op if *state* is invalid."""
     if state not in _VALID_STATES: