feat(skills): size limits for agent writes + fuzzy matching for patch (#4414)

* feat(skills): add content size limits for agent-created skills

Agent writes via skill_manage (create/edit/patch/write_file) are now
constrained to prevent unbounded growth:

- SKILL.md and supporting files: 100,000 character limit
- Supporting files: additional 1 MiB byte limit
- Patches on oversized hand-placed skills that reduce the size are
  allowed (shrink path), but patches that grow beyond the limit are
  rejected

Hand-placed skills and hub-installed skills have NO hard limit —
they load and function normally regardless of size. Hub installs
get a warning in the log if SKILL.md exceeds 100k chars.

This mirrors the memory system's char_limit pattern. Without this,
the agent auto-grows skills indefinitely through iterative patches
(hermes-agent-dev reached 197k chars / 72k tokens — 40x larger than
the largest skill in the entire skills.sh ecosystem).

Constants: MAX_SKILL_CONTENT_CHARS (100k), MAX_SKILL_FILE_BYTES (1MiB)
Tests: 14 new tests covering all write paths and edge cases

* feat(skills): add fuzzy matching to skill patch

_patch_skill now uses the same 8-strategy fuzzy matching engine
(tools/fuzzy_match.py) as the file patch tool. Handles whitespace
normalization, indentation differences, escape sequences, and
block-anchor matching. Eliminates exact-match failures when agents
patch skills with minor formatting mismatches.
This commit is contained in:
Teknium 2026-04-01 04:19:19 -07:00 committed by GitHub
parent 70744add15
commit f6ada27d1c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 464 additions and 18 deletions

View file

@ -82,6 +82,8 @@ SKILLS_DIR = HERMES_HOME / "skills"
MAX_NAME_LENGTH = 64
MAX_DESCRIPTION_LENGTH = 1024
MAX_SKILL_CONTENT_CHARS = 100_000 # ~36k tokens at 2.75 chars/token
MAX_SKILL_FILE_BYTES = 1_048_576 # 1 MiB per supporting file
# Characters allowed in skill names (filesystem-safe, URL-friendly)
VALID_NAME_RE = re.compile(r'^[a-z0-9][a-z0-9._-]*$')
@ -177,6 +179,21 @@ def _validate_frontmatter(content: str) -> Optional[str]:
return None
def _validate_content_size(content: str, label: str = "SKILL.md") -> Optional[str]:
"""Check that content doesn't exceed the character limit for agent writes.
Returns an error message or None if within bounds.
"""
if len(content) > MAX_SKILL_CONTENT_CHARS:
return (
f"{label} content is {len(content):,} characters "
f"(limit: {MAX_SKILL_CONTENT_CHARS:,}). "
f"Consider splitting into a smaller SKILL.md with supporting files "
f"in references/ or templates/."
)
return None
def _resolve_skill_dir(name: str, category: str = None) -> Path:
"""Build the directory path for a new skill, optionally under a category."""
if category:
@ -275,6 +292,10 @@ def _create_skill(name: str, content: str, category: str = None) -> Dict[str, An
if err:
return {"success": False, "error": err}
err = _validate_content_size(content)
if err:
return {"success": False, "error": err}
# Check for name collisions across all directories
existing = _find_skill(name)
if existing:
@ -318,6 +339,10 @@ def _edit_skill(name: str, content: str) -> Dict[str, Any]:
if err:
return {"success": False, "error": err}
err = _validate_content_size(content)
if err:
return {"success": False, "error": err}
existing = _find_skill(name)
if not existing:
return {"success": False, "error": f"Skill '{name}' not found. Use skills_list() to see available skills."}
@ -379,27 +404,29 @@ def _patch_skill(
content = target.read_text(encoding="utf-8")
count = content.count(old_string)
if count == 0:
# Use the same fuzzy matching engine as the file patch tool.
# This handles whitespace normalization, indentation differences,
# escape sequences, and block-anchor matching — saving the agent
# from exact-match failures on minor formatting mismatches.
from tools.fuzzy_match import fuzzy_find_and_replace
new_content, match_count, match_error = fuzzy_find_and_replace(
content, old_string, new_string, replace_all
)
if match_error:
# Show a short preview of the file so the model can self-correct
preview = content[:500] + ("..." if len(content) > 500 else "")
return {
"success": False,
"error": "old_string not found in the file.",
"error": match_error,
"file_preview": preview,
}
if count > 1 and not replace_all:
return {
"success": False,
"error": (
f"old_string matched {count} times. Provide more surrounding context "
f"to make the match unique, or set replace_all=true to replace all occurrences."
),
"match_count": count,
}
new_content = content.replace(old_string, new_string) if replace_all else content.replace(old_string, new_string, 1)
# Check size limit on the result
target_label = "SKILL.md" if not file_path else file_path
err = _validate_content_size(new_content, label=target_label)
if err:
return {"success": False, "error": err}
# If patching SKILL.md, validate frontmatter is still intact
if not file_path:
@ -419,10 +446,9 @@ def _patch_skill(
_atomic_write_text(target, original_content)
return {"success": False, "error": scan_error}
replacements = count if replace_all else 1
return {
"success": True,
"message": f"Patched {'SKILL.md' if not file_path else file_path} in skill '{name}' ({replacements} replacement{'s' if replacements > 1 else ''}).",
"message": f"Patched {'SKILL.md' if not file_path else file_path} in skill '{name}' ({match_count} replacement{'s' if match_count > 1 else ''}).",
}
@ -455,6 +481,21 @@ def _write_file(name: str, file_path: str, file_content: str) -> Dict[str, Any]:
if not file_content and file_content != "":
return {"success": False, "error": "file_content is required."}
# Check size limits
content_bytes = len(file_content.encode("utf-8"))
if content_bytes > MAX_SKILL_FILE_BYTES:
return {
"success": False,
"error": (
f"File content is {content_bytes:,} bytes "
f"(limit: {MAX_SKILL_FILE_BYTES:,} bytes / 1 MiB). "
f"Consider splitting into smaller files."
),
}
err = _validate_content_size(file_content, label=file_path)
if err:
return {"success": False, "error": err}
existing = _find_skill(name)
if not existing:
return {"success": False, "error": f"Skill '{name}' not found. Create it first with action='create'."}