diff --git a/agent/curator.py b/agent/curator.py index 8dee0acbba..a726e875b6 100644 --- a/agent/curator.py +++ b/agent/curator.py @@ -24,6 +24,7 @@ from __future__ import annotations import json import logging import os +import re import tempfile import threading from datetime import datetime, timedelta, timezone @@ -469,6 +470,24 @@ def _reports_root() -> Path: return root +def _needle_in_path_component(needle: str, path: str) -> bool: + """Check if *needle* is a complete filename stem or directory name in *path*. + + Unlike simple substring matching, this avoids false positives where short + skill names are embedded in longer filenames (e.g. "api" matching + "references/api-design.md"). Hyphens and underscores are normalised so + "open-webui-setup" matches "open_webui_setup.md". + """ + norm_needle = needle.replace("-", "_") + for part in path.replace("\\", "/").split("/"): + if not part: + continue + stem = part.rsplit(".", 1)[0] if "." in part else part + if stem.replace("-", "_") == norm_needle: + return True + return False + + def _classify_removed_skills( removed: List[str], added: List[str], @@ -547,15 +566,29 @@ def _classify_removed_skills( continue # Look for the removed skill's name in file_path / content / raw. - haystacks: List[str] = [] + # Matching strategy differs by field type: + # file_path — needle must be a complete path component + # (filename stem or directory name), so "api" does NOT + # falsely match "references/api-design.md". + # content fields — word-boundary regex so "test" does NOT + # falsely match "latest" or "testing". + haystacks: List[tuple[str, str]] = [] for key in ("file_path", "file_content", "content", "new_string", "_raw"): v = args.get(key) if isinstance(v, str): - haystacks.append(v) + haystacks.append((key, v)) hit = False - for hay in haystacks: + for key, hay in haystacks: for needle in needles: - if needle and needle in hay: + if not needle: + continue + if key == "file_path": + matched = _needle_in_path_component(needle, hay) + else: + matched = bool( + re.search(rf'\b{re.escape(needle)}\b', hay) + ) + if matched: hit = True evidence = ( f"skill_manage action={args.get('action', '?')} " diff --git a/tests/agent/test_curator_classification.py b/tests/agent/test_curator_classification.py index 031d66529b..625776f537 100644 --- a/tests/agent/test_curator_classification.py +++ b/tests/agent/test_curator_classification.py @@ -220,6 +220,81 @@ def test_classify_handles_malformed_arguments_string(curator_env): assert len(result["pruned"]) == 1 +def test_classify_no_false_positive_short_name_in_file_path(curator_env): + """Short skill name that is a substring of another filename = pruned, not consolidated.""" + # e.g. "api" should NOT match "references/api-design.md" + result = curator_env._classify_removed_skills( + removed=["api"], + added=[], + after_names={"conventions"}, + tool_calls=[ + { + "name": "skill_manage", + "arguments": json.dumps({ + "action": "write_file", + "name": "conventions", + "file_path": "references/api-design.md", + "file_content": "# API Design\n...", + }), + }, + ], + ) + assert result["consolidated"] == [], ( + f"Short name 'api' should NOT match file_path 'references/api-design.md'" + ) + assert len(result["pruned"]) == 1 + assert result["pruned"][0]["name"] == "api" + + +def test_classify_no_false_positive_short_name_in_content(curator_env): + """Short skill name embedded in longer word in content = pruned, not consolidated.""" + # e.g. "test" should NOT match content "running latest tests" + result = curator_env._classify_removed_skills( + removed=["test"], + added=[], + after_names={"umbrella"}, + tool_calls=[ + { + "name": "skill_manage", + "arguments": json.dumps({ + "action": "patch", + "name": "umbrella", + "old_string": "old", + "new_string": "running latest tests with pytest", + }), + }, + ], + ) + assert result["consolidated"] == [], ( + f"Short name 'test' should NOT match 'latest' via word boundary" + ) + assert len(result["pruned"]) == 1 + + +def test_classify_still_matches_exact_word_in_content(curator_env): + """Word-boundary match still works for exact word occurrences.""" + # "api" SHOULD match content "use the api gateway" + result = curator_env._classify_removed_skills( + removed=["api"], + added=[], + after_names={"gateway"}, + tool_calls=[ + { + "name": "skill_manage", + "arguments": json.dumps({ + "action": "edit", + "name": "gateway", + "content": "# Gateway\n\nUse the api gateway for all requests.\n", + }), + }, + ], + ) + assert len(result["consolidated"]) == 1, ( + f"'api' should match as a standalone word in content" + ) + assert result["consolidated"][0]["into"] == "gateway" + + def test_report_md_splits_consolidated_and_pruned_sections(curator_env): """End-to-end: REPORT.md shows both sections distinctly.""" curator = curator_env