fix(curator): prevent false-positive consolidation from substring matching

_classify_removed_skills used naive 'in' substring matching to detect
whether a removed skill's name appeared in skill_manage arguments.
Short/common skill names (api, git, test, foo, etc.) matched
incorrectly when they appeared as substrings of longer words in file
paths (references/api-design.md) or content (latest, testing).

Replace with field-aware matching:
- file_path: needle must match a complete filename stem or directory
  name, with -/_ normalised for variant tolerance
- content fields: word-boundary regex (\b) prevents embedding in
  longer words

Also add 3 regression tests covering the false-positive scenarios.
This commit is contained in:
daixin1204 2026-05-03 21:02:03 +08:00 committed by Teknium
parent c0300575c1
commit 744079ffe6
2 changed files with 112 additions and 4 deletions

View file

@ -24,6 +24,7 @@ from __future__ import annotations
import json import json
import logging import logging
import os import os
import re
import tempfile import tempfile
import threading import threading
from datetime import datetime, timedelta, timezone from datetime import datetime, timedelta, timezone
@ -469,6 +470,24 @@ def _reports_root() -> Path:
return root return root
def _needle_in_path_component(needle: str, path: str) -> bool:
"""Check if *needle* is a complete filename stem or directory name in *path*.
Unlike simple substring matching, this avoids false positives where short
skill names are embedded in longer filenames (e.g. "api" matching
"references/api-design.md"). Hyphens and underscores are normalised so
"open-webui-setup" matches "open_webui_setup.md".
"""
norm_needle = needle.replace("-", "_")
for part in path.replace("\\", "/").split("/"):
if not part:
continue
stem = part.rsplit(".", 1)[0] if "." in part else part
if stem.replace("-", "_") == norm_needle:
return True
return False
def _classify_removed_skills( def _classify_removed_skills(
removed: List[str], removed: List[str],
added: List[str], added: List[str],
@ -547,15 +566,29 @@ def _classify_removed_skills(
continue continue
# Look for the removed skill's name in file_path / content / raw. # Look for the removed skill's name in file_path / content / raw.
haystacks: List[str] = [] # Matching strategy differs by field type:
# file_path — needle must be a complete path component
# (filename stem or directory name), so "api" does NOT
# falsely match "references/api-design.md".
# content fields — word-boundary regex so "test" does NOT
# falsely match "latest" or "testing".
haystacks: List[tuple[str, str]] = []
for key in ("file_path", "file_content", "content", "new_string", "_raw"): for key in ("file_path", "file_content", "content", "new_string", "_raw"):
v = args.get(key) v = args.get(key)
if isinstance(v, str): if isinstance(v, str):
haystacks.append(v) haystacks.append((key, v))
hit = False hit = False
for hay in haystacks: for key, hay in haystacks:
for needle in needles: for needle in needles:
if needle and needle in hay: if not needle:
continue
if key == "file_path":
matched = _needle_in_path_component(needle, hay)
else:
matched = bool(
re.search(rf'\b{re.escape(needle)}\b', hay)
)
if matched:
hit = True hit = True
evidence = ( evidence = (
f"skill_manage action={args.get('action', '?')} " f"skill_manage action={args.get('action', '?')} "

View file

@ -220,6 +220,81 @@ def test_classify_handles_malformed_arguments_string(curator_env):
assert len(result["pruned"]) == 1 assert len(result["pruned"]) == 1
def test_classify_no_false_positive_short_name_in_file_path(curator_env):
"""Short skill name that is a substring of another filename = pruned, not consolidated."""
# e.g. "api" should NOT match "references/api-design.md"
result = curator_env._classify_removed_skills(
removed=["api"],
added=[],
after_names={"conventions"},
tool_calls=[
{
"name": "skill_manage",
"arguments": json.dumps({
"action": "write_file",
"name": "conventions",
"file_path": "references/api-design.md",
"file_content": "# API Design\n...",
}),
},
],
)
assert result["consolidated"] == [], (
f"Short name 'api' should NOT match file_path 'references/api-design.md'"
)
assert len(result["pruned"]) == 1
assert result["pruned"][0]["name"] == "api"
def test_classify_no_false_positive_short_name_in_content(curator_env):
"""Short skill name embedded in longer word in content = pruned, not consolidated."""
# e.g. "test" should NOT match content "running latest tests"
result = curator_env._classify_removed_skills(
removed=["test"],
added=[],
after_names={"umbrella"},
tool_calls=[
{
"name": "skill_manage",
"arguments": json.dumps({
"action": "patch",
"name": "umbrella",
"old_string": "old",
"new_string": "running latest tests with pytest",
}),
},
],
)
assert result["consolidated"] == [], (
f"Short name 'test' should NOT match 'latest' via word boundary"
)
assert len(result["pruned"]) == 1
def test_classify_still_matches_exact_word_in_content(curator_env):
"""Word-boundary match still works for exact word occurrences."""
# "api" SHOULD match content "use the api gateway"
result = curator_env._classify_removed_skills(
removed=["api"],
added=[],
after_names={"gateway"},
tool_calls=[
{
"name": "skill_manage",
"arguments": json.dumps({
"action": "edit",
"name": "gateway",
"content": "# Gateway\n\nUse the api gateway for all requests.\n",
}),
},
],
)
assert len(result["consolidated"]) == 1, (
f"'api' should match as a standalone word in content"
)
assert result["consolidated"][0]["into"] == "gateway"
def test_report_md_splits_consolidated_and_pruned_sections(curator_env): def test_report_md_splits_consolidated_and_pruned_sections(curator_env):
"""End-to-end: REPORT.md shows both sections distinctly.""" """End-to-end: REPORT.md shows both sections distinctly."""
curator = curator_env curator = curator_env