fix(curator): prevent false-positive consolidation from substring matching

_classify_removed_skills used naive 'in' substring matching to detect whether a removed skill's name appeared in skill_manage arguments. Short/common skill names (api, git, test, foo, etc.) matched incorrectly when they appeared as substrings of longer words in file paths (references/api-design.md) or content (latest, testing). Replace with field-aware matching: - file_path: needle must match a complete filename stem or directory name, with -/_ normalised for variant tolerance - content fields: word-boundary regex (\b) prevents embedding in longer words Also add 3 regression tests covering the false-positive scenarios.
2026-05-07 02:51:50 +00:00 · 2026-05-03 21:02:03 +08:00 · 2026-05-03 21:02:03 +08:00 · 744079ffe6
commit 744079ffe6
parent c0300575c1
2 changed files with 112 additions and 4 deletions
--- a/agent/curator.py
+++ b/agent/curator.py
@ -24,6 +24,7 @@ from __future__ import annotations
 import json
 import logging
 import os
+import re
 import tempfile
 import threading
 from datetime import datetime, timedelta, timezone
@ -469,6 +470,24 @@ def _reports_root() -> Path:
    return root


+def _needle_in_path_component(needle: str, path: str) -> bool:
+    """Check if *needle* is a complete filename stem or directory name in *path*.
+
+    Unlike simple substring matching, this avoids false positives where short
+    skill names are embedded in longer filenames (e.g. "api" matching
+    "references/api-design.md").  Hyphens and underscores are normalised so
+    "open-webui-setup" matches "open_webui_setup.md".
+    """
+    norm_needle = needle.replace("-", "_")
+    for part in path.replace("\\", "/").split("/"):
+        if not part:
+            continue
+        stem = part.rsplit(".", 1)[0] if "." in part else part
+        if stem.replace("-", "_") == norm_needle:
+            return True
+    return False
+
+
 def _classify_removed_skills(
    removed: List[str],
    added: List[str],
@ -547,15 +566,29 @@ def _classify_removed_skills(
                continue

            # Look for the removed skill's name in file_path / content / raw.
-            haystacks: List[str] = []
+            # Matching strategy differs by field type:
+            #   file_path — needle must be a complete path component
+            #     (filename stem or directory name), so "api" does NOT
+            #     falsely match "references/api-design.md".
+            #   content fields — word-boundary regex so "test" does NOT
+            #     falsely match "latest" or "testing".
+            haystacks: List[tuple[str, str]] = []
            for key in ("file_path", "file_content", "content", "new_string", "_raw"):
                v = args.get(key)
                if isinstance(v, str):
-                    haystacks.append(v)
+                    haystacks.append((key, v))
            hit = False
-            for hay in haystacks:
+            for key, hay in haystacks:
                for needle in needles:
-                    if needle and needle in hay:
+                    if not needle:
+                        continue
+                    if key == "file_path":
+                        matched = _needle_in_path_component(needle, hay)
+                    else:
+                        matched = bool(
+                            re.search(rf'\b{re.escape(needle)}\b', hay)
+                        )
+                    if matched:
                        hit = True
                        evidence = (
                            f"skill_manage action={args.get('action', '?')} "
--- a/tests/agent/test_curator_classification.py
+++ b/tests/agent/test_curator_classification.py
@ -220,6 +220,81 @@ def test_classify_handles_malformed_arguments_string(curator_env):
    assert len(result["pruned"]) == 1


+def test_classify_no_false_positive_short_name_in_file_path(curator_env):
+    """Short skill name that is a substring of another filename = pruned, not consolidated."""
+    # e.g. "api" should NOT match "references/api-design.md"
+    result = curator_env._classify_removed_skills(
+        removed=["api"],
+        added=[],
+        after_names={"conventions"},
+        tool_calls=[
+            {
+                "name": "skill_manage",
+                "arguments": json.dumps({
+                    "action": "write_file",
+                    "name": "conventions",
+                    "file_path": "references/api-design.md",
+                    "file_content": "# API Design\n...",
+                }),
+            },
+        ],
+    )
+    assert result["consolidated"] == [], (
+        f"Short name 'api' should NOT match file_path 'references/api-design.md'"
+    )
+    assert len(result["pruned"]) == 1
+    assert result["pruned"][0]["name"] == "api"
+
+
+def test_classify_no_false_positive_short_name_in_content(curator_env):
+    """Short skill name embedded in longer word in content = pruned, not consolidated."""
+    # e.g. "test" should NOT match content "running latest tests"
+    result = curator_env._classify_removed_skills(
+        removed=["test"],
+        added=[],
+        after_names={"umbrella"},
+        tool_calls=[
+            {
+                "name": "skill_manage",
+                "arguments": json.dumps({
+                    "action": "patch",
+                    "name": "umbrella",
+                    "old_string": "old",
+                    "new_string": "running latest tests with pytest",
+                }),
+            },
+        ],
+    )
+    assert result["consolidated"] == [], (
+        f"Short name 'test' should NOT match 'latest' via word boundary"
+    )
+    assert len(result["pruned"]) == 1
+
+
+def test_classify_still_matches_exact_word_in_content(curator_env):
+    """Word-boundary match still works for exact word occurrences."""
+    # "api" SHOULD match content "use the api gateway"
+    result = curator_env._classify_removed_skills(
+        removed=["api"],
+        added=[],
+        after_names={"gateway"},
+        tool_calls=[
+            {
+                "name": "skill_manage",
+                "arguments": json.dumps({
+                    "action": "edit",
+                    "name": "gateway",
+                    "content": "# Gateway\n\nUse the api gateway for all requests.\n",
+                }),
+            },
+        ],
+    )
+    assert len(result["consolidated"]) == 1, (
+        f"'api' should match as a standalone word in content"
+    )
+    assert result["consolidated"][0]["into"] == "gateway"
+
+
 def test_report_md_splits_consolidated_and_pruned_sections(curator_env):
    """End-to-end: REPORT.md shows both sections distinctly."""
    curator = curator_env