fix(curator): prevent false-positive consolidation from substring matching

_classify_removed_skills used naive 'in' substring matching to detect whether a removed skill's name appeared in skill_manage arguments. Short/common skill names (api, git, test, foo, etc.) matched incorrectly when they appeared as substrings of longer words in file paths (references/api-design.md) or content (latest, testing). Replace with field-aware matching: - file_path: needle must match a complete filename stem or directory name, with -/_ normalised for variant tolerance - content fields: word-boundary regex (\b) prevents embedding in longer words Also add 3 regression tests covering the false-positive scenarios.
2026-05-07 02:51:50 +00:00 · 2026-05-03 21:02:03 +08:00 · 2026-05-03 21:02:03 +08:00 · 744079ffe6
commit 744079ffe6
parent c0300575c1
2 changed files with 112 additions and 4 deletions
--- a/agent/curator.py
+++ b/agent/curator.py
@ -24,6 +24,7 @@ from __future__ import annotations
 import json
 import logging
 import os
 import re
 import tempfile
 import threading
 from datetime import datetime, timedelta, timezone
@ -469,6 +470,24 @@ def _reports_root() -> Path:
    return root
 def _needle_in_path_component(needle: str, path: str) -> bool:
    """Check if *needle* is a complete filename stem or directory name in *path*.
    Unlike simple substring matching, this avoids false positives where short
    skill names are embedded in longer filenames (e.g. "api" matching
    "references/api-design.md").  Hyphens and underscores are normalised so
    "open-webui-setup" matches "open_webui_setup.md".
    """
    norm_needle = needle.replace("-", "_")
    for part in path.replace("\\", "/").split("/"):
        if not part:
            continue
        stem = part.rsplit(".", 1)[0] if "." in part else part
        if stem.replace("-", "_") == norm_needle:
            return True
    return False
 def _classify_removed_skills(
    removed: List[str],
    added: List[str],
@ -547,15 +566,29 @@ def _classify_removed_skills(
                continue
            # Look for the removed skill's name in file_path / content / raw.
-            haystacks: List[str] = []
+            # Matching strategy differs by field type:
            #   file_path — needle must be a complete path component
            #     (filename stem or directory name), so "api" does NOT
            #     falsely match "references/api-design.md".
            #   content fields — word-boundary regex so "test" does NOT
            #     falsely match "latest" or "testing".
            haystacks: List[tuple[str, str]] = []
            for key in ("file_path", "file_content", "content", "new_string", "_raw"):
                v = args.get(key)
                if isinstance(v, str):
-                    haystacks.append(v)
+                    haystacks.append((key, v))
            hit = False
-            for hay in haystacks:
+            for key, hay in haystacks:
                for needle in needles:
-                    if needle and needle in hay:
+                    if not needle:
                        continue
                    if key == "file_path":
                        matched = _needle_in_path_component(needle, hay)
                    else:
                        matched = bool(
                            re.search(rf'\b{re.escape(needle)}\b', hay)
                        )
                    if matched:
                        hit = True
                        evidence = (
                            f"skill_manage action={args.get('action', '?')} "
--- a/tests/agent/test_curator_classification.py
+++ b/tests/agent/test_curator_classification.py
@ -220,6 +220,81 @@ def test_classify_handles_malformed_arguments_string(curator_env):
    assert len(result["pruned"]) == 1
 def test_classify_no_false_positive_short_name_in_file_path(curator_env):
    """Short skill name that is a substring of another filename = pruned, not consolidated."""
    # e.g. "api" should NOT match "references/api-design.md"
    result = curator_env._classify_removed_skills(
        removed=["api"],
        added=[],
        after_names={"conventions"},
        tool_calls=[
            {
                "name": "skill_manage",
                "arguments": json.dumps({
                    "action": "write_file",
                    "name": "conventions",
                    "file_path": "references/api-design.md",
                    "file_content": "# API Design\n...",
                }),
            },
        ],
    )
    assert result["consolidated"] == [], (
        f"Short name 'api' should NOT match file_path 'references/api-design.md'"
    )
    assert len(result["pruned"]) == 1
    assert result["pruned"][0]["name"] == "api"
 def test_classify_no_false_positive_short_name_in_content(curator_env):
    """Short skill name embedded in longer word in content = pruned, not consolidated."""
    # e.g. "test" should NOT match content "running latest tests"
    result = curator_env._classify_removed_skills(
        removed=["test"],
        added=[],
        after_names={"umbrella"},
        tool_calls=[
            {
                "name": "skill_manage",
                "arguments": json.dumps({
                    "action": "patch",
                    "name": "umbrella",
                    "old_string": "old",
                    "new_string": "running latest tests with pytest",
                }),
            },
        ],
    )
    assert result["consolidated"] == [], (
        f"Short name 'test' should NOT match 'latest' via word boundary"
    )
    assert len(result["pruned"]) == 1
 def test_classify_still_matches_exact_word_in_content(curator_env):
    """Word-boundary match still works for exact word occurrences."""
    # "api" SHOULD match content "use the api gateway"
    result = curator_env._classify_removed_skills(
        removed=["api"],
        added=[],
        after_names={"gateway"},
        tool_calls=[
            {
                "name": "skill_manage",
                "arguments": json.dumps({
                    "action": "edit",
                    "name": "gateway",
                    "content": "# Gateway\n\nUse the api gateway for all requests.\n",
                }),
            },
        ],
    )
    assert len(result["consolidated"]) == 1, (
        f"'api' should match as a standalone word in content"
    )
    assert result["consolidated"][0]["into"] == "gateway"
 def test_report_md_splits_consolidated_and_pruned_sections(curator_env):
    """End-to-end: REPORT.md shows both sections distinctly."""
    curator = curator_env