mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-07 02:51:50 +00:00
fix(curator): prevent false-positive consolidation from substring matching
_classify_removed_skills used naive 'in' substring matching to detect whether a removed skill's name appeared in skill_manage arguments. Short/common skill names (api, git, test, foo, etc.) matched incorrectly when they appeared as substrings of longer words in file paths (references/api-design.md) or content (latest, testing). Replace with field-aware matching: - file_path: needle must match a complete filename stem or directory name, with -/_ normalised for variant tolerance - content fields: word-boundary regex (\b) prevents embedding in longer words Also add 3 regression tests covering the false-positive scenarios.
This commit is contained in:
parent
c0300575c1
commit
744079ffe6
2 changed files with 112 additions and 4 deletions
|
|
@ -24,6 +24,7 @@ from __future__ import annotations
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
|
import re
|
||||||
import tempfile
|
import tempfile
|
||||||
import threading
|
import threading
|
||||||
from datetime import datetime, timedelta, timezone
|
from datetime import datetime, timedelta, timezone
|
||||||
|
|
@ -469,6 +470,24 @@ def _reports_root() -> Path:
|
||||||
return root
|
return root
|
||||||
|
|
||||||
|
|
||||||
|
def _needle_in_path_component(needle: str, path: str) -> bool:
|
||||||
|
"""Check if *needle* is a complete filename stem or directory name in *path*.
|
||||||
|
|
||||||
|
Unlike simple substring matching, this avoids false positives where short
|
||||||
|
skill names are embedded in longer filenames (e.g. "api" matching
|
||||||
|
"references/api-design.md"). Hyphens and underscores are normalised so
|
||||||
|
"open-webui-setup" matches "open_webui_setup.md".
|
||||||
|
"""
|
||||||
|
norm_needle = needle.replace("-", "_")
|
||||||
|
for part in path.replace("\\", "/").split("/"):
|
||||||
|
if not part:
|
||||||
|
continue
|
||||||
|
stem = part.rsplit(".", 1)[0] if "." in part else part
|
||||||
|
if stem.replace("-", "_") == norm_needle:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
def _classify_removed_skills(
|
def _classify_removed_skills(
|
||||||
removed: List[str],
|
removed: List[str],
|
||||||
added: List[str],
|
added: List[str],
|
||||||
|
|
@ -547,15 +566,29 @@ def _classify_removed_skills(
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Look for the removed skill's name in file_path / content / raw.
|
# Look for the removed skill's name in file_path / content / raw.
|
||||||
haystacks: List[str] = []
|
# Matching strategy differs by field type:
|
||||||
|
# file_path — needle must be a complete path component
|
||||||
|
# (filename stem or directory name), so "api" does NOT
|
||||||
|
# falsely match "references/api-design.md".
|
||||||
|
# content fields — word-boundary regex so "test" does NOT
|
||||||
|
# falsely match "latest" or "testing".
|
||||||
|
haystacks: List[tuple[str, str]] = []
|
||||||
for key in ("file_path", "file_content", "content", "new_string", "_raw"):
|
for key in ("file_path", "file_content", "content", "new_string", "_raw"):
|
||||||
v = args.get(key)
|
v = args.get(key)
|
||||||
if isinstance(v, str):
|
if isinstance(v, str):
|
||||||
haystacks.append(v)
|
haystacks.append((key, v))
|
||||||
hit = False
|
hit = False
|
||||||
for hay in haystacks:
|
for key, hay in haystacks:
|
||||||
for needle in needles:
|
for needle in needles:
|
||||||
if needle and needle in hay:
|
if not needle:
|
||||||
|
continue
|
||||||
|
if key == "file_path":
|
||||||
|
matched = _needle_in_path_component(needle, hay)
|
||||||
|
else:
|
||||||
|
matched = bool(
|
||||||
|
re.search(rf'\b{re.escape(needle)}\b', hay)
|
||||||
|
)
|
||||||
|
if matched:
|
||||||
hit = True
|
hit = True
|
||||||
evidence = (
|
evidence = (
|
||||||
f"skill_manage action={args.get('action', '?')} "
|
f"skill_manage action={args.get('action', '?')} "
|
||||||
|
|
|
||||||
|
|
@ -220,6 +220,81 @@ def test_classify_handles_malformed_arguments_string(curator_env):
|
||||||
assert len(result["pruned"]) == 1
|
assert len(result["pruned"]) == 1
|
||||||
|
|
||||||
|
|
||||||
|
def test_classify_no_false_positive_short_name_in_file_path(curator_env):
|
||||||
|
"""Short skill name that is a substring of another filename = pruned, not consolidated."""
|
||||||
|
# e.g. "api" should NOT match "references/api-design.md"
|
||||||
|
result = curator_env._classify_removed_skills(
|
||||||
|
removed=["api"],
|
||||||
|
added=[],
|
||||||
|
after_names={"conventions"},
|
||||||
|
tool_calls=[
|
||||||
|
{
|
||||||
|
"name": "skill_manage",
|
||||||
|
"arguments": json.dumps({
|
||||||
|
"action": "write_file",
|
||||||
|
"name": "conventions",
|
||||||
|
"file_path": "references/api-design.md",
|
||||||
|
"file_content": "# API Design\n...",
|
||||||
|
}),
|
||||||
|
},
|
||||||
|
],
|
||||||
|
)
|
||||||
|
assert result["consolidated"] == [], (
|
||||||
|
f"Short name 'api' should NOT match file_path 'references/api-design.md'"
|
||||||
|
)
|
||||||
|
assert len(result["pruned"]) == 1
|
||||||
|
assert result["pruned"][0]["name"] == "api"
|
||||||
|
|
||||||
|
|
||||||
|
def test_classify_no_false_positive_short_name_in_content(curator_env):
|
||||||
|
"""Short skill name embedded in longer word in content = pruned, not consolidated."""
|
||||||
|
# e.g. "test" should NOT match content "running latest tests"
|
||||||
|
result = curator_env._classify_removed_skills(
|
||||||
|
removed=["test"],
|
||||||
|
added=[],
|
||||||
|
after_names={"umbrella"},
|
||||||
|
tool_calls=[
|
||||||
|
{
|
||||||
|
"name": "skill_manage",
|
||||||
|
"arguments": json.dumps({
|
||||||
|
"action": "patch",
|
||||||
|
"name": "umbrella",
|
||||||
|
"old_string": "old",
|
||||||
|
"new_string": "running latest tests with pytest",
|
||||||
|
}),
|
||||||
|
},
|
||||||
|
],
|
||||||
|
)
|
||||||
|
assert result["consolidated"] == [], (
|
||||||
|
f"Short name 'test' should NOT match 'latest' via word boundary"
|
||||||
|
)
|
||||||
|
assert len(result["pruned"]) == 1
|
||||||
|
|
||||||
|
|
||||||
|
def test_classify_still_matches_exact_word_in_content(curator_env):
|
||||||
|
"""Word-boundary match still works for exact word occurrences."""
|
||||||
|
# "api" SHOULD match content "use the api gateway"
|
||||||
|
result = curator_env._classify_removed_skills(
|
||||||
|
removed=["api"],
|
||||||
|
added=[],
|
||||||
|
after_names={"gateway"},
|
||||||
|
tool_calls=[
|
||||||
|
{
|
||||||
|
"name": "skill_manage",
|
||||||
|
"arguments": json.dumps({
|
||||||
|
"action": "edit",
|
||||||
|
"name": "gateway",
|
||||||
|
"content": "# Gateway\n\nUse the api gateway for all requests.\n",
|
||||||
|
}),
|
||||||
|
},
|
||||||
|
],
|
||||||
|
)
|
||||||
|
assert len(result["consolidated"]) == 1, (
|
||||||
|
f"'api' should match as a standalone word in content"
|
||||||
|
)
|
||||||
|
assert result["consolidated"][0]["into"] == "gateway"
|
||||||
|
|
||||||
|
|
||||||
def test_report_md_splits_consolidated_and_pruned_sections(curator_env):
|
def test_report_md_splits_consolidated_and_pruned_sections(curator_env):
|
||||||
"""End-to-end: REPORT.md shows both sections distinctly."""
|
"""End-to-end: REPORT.md shows both sections distinctly."""
|
||||||
curator = curator_env
|
curator = curator_env
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue