feat(skills): size limits for agent writes + fuzzy matching for patch (#4414)

* feat(skills): add content size limits for agent-created skills Agent writes via skill_manage (create/edit/patch/write_file) are now constrained to prevent unbounded growth: - SKILL.md and supporting files: 100,000 character limit - Supporting files: additional 1 MiB byte limit - Patches on oversized hand-placed skills that reduce the size are allowed (shrink path), but patches that grow beyond the limit are rejected Hand-placed skills and hub-installed skills have NO hard limit — they load and function normally regardless of size. Hub installs get a warning in the log if SKILL.md exceeds 100k chars. This mirrors the memory system's char_limit pattern. Without this, the agent auto-grows skills indefinitely through iterative patches (hermes-agent-dev reached 197k chars / 72k tokens — 40x larger than the largest skill in the entire skills.sh ecosystem). Constants: MAX_SKILL_CONTENT_CHARS (100k), MAX_SKILL_FILE_BYTES (1MiB) Tests: 14 new tests covering all write paths and edge cases * feat(skills): add fuzzy matching to skill patch _patch_skill now uses the same 8-strategy fuzzy matching engine (tools/fuzzy_match.py) as the file patch tool. Handles whitespace normalization, indentation differences, escape sequences, and block-anchor matching. Eliminates exact-match failures when agents patch skills with minor formatting mismatches.
2026-04-25 00:51:20 +00:00 · 2026-04-01 04:19:19 -07:00 · 2026-04-01 04:19:19 -07:00 · f6ada27d1c
commit f6ada27d1c
parent 70744add15
5 changed files with 464 additions and 18 deletions
--- a/tools/skill_manager_tool.py
+++ b/tools/skill_manager_tool.py
@ -82,6 +82,8 @@ SKILLS_DIR = HERMES_HOME / "skills"

 MAX_NAME_LENGTH = 64
 MAX_DESCRIPTION_LENGTH = 1024
+MAX_SKILL_CONTENT_CHARS = 100_000   # ~36k tokens at 2.75 chars/token
+MAX_SKILL_FILE_BYTES = 1_048_576    # 1 MiB per supporting file

 # Characters allowed in skill names (filesystem-safe, URL-friendly)
 VALID_NAME_RE = re.compile(r'^[a-z0-9][a-z0-9._-]*$')
@ -177,6 +179,21 @@ def _validate_frontmatter(content: str) -> Optional[str]:
    return None


+def _validate_content_size(content: str, label: str = "SKILL.md") -> Optional[str]:
+    """Check that content doesn't exceed the character limit for agent writes.
+
+    Returns an error message or None if within bounds.
+    """
+    if len(content) > MAX_SKILL_CONTENT_CHARS:
+        return (
+            f"{label} content is {len(content):,} characters "
+            f"(limit: {MAX_SKILL_CONTENT_CHARS:,}). "
+            f"Consider splitting into a smaller SKILL.md with supporting files "
+            f"in references/ or templates/."
+        )
+    return None
+
+
 def _resolve_skill_dir(name: str, category: str = None) -> Path:
    """Build the directory path for a new skill, optionally under a category."""
    if category:
@ -275,6 +292,10 @@ def _create_skill(name: str, content: str, category: str = None) -> Dict[str, An
    if err:
        return {"success": False, "error": err}

+    err = _validate_content_size(content)
+    if err:
+        return {"success": False, "error": err}
+
    # Check for name collisions across all directories
    existing = _find_skill(name)
    if existing:
@ -318,6 +339,10 @@ def _edit_skill(name: str, content: str) -> Dict[str, Any]:
    if err:
        return {"success": False, "error": err}

+    err = _validate_content_size(content)
+    if err:
+        return {"success": False, "error": err}
+
    existing = _find_skill(name)
    if not existing:
        return {"success": False, "error": f"Skill '{name}' not found. Use skills_list() to see available skills."}
@ -379,27 +404,29 @@ def _patch_skill(

    content = target.read_text(encoding="utf-8")

-    count = content.count(old_string)
-    if count == 0:
+    # Use the same fuzzy matching engine as the file patch tool.
+    # This handles whitespace normalization, indentation differences,
+    # escape sequences, and block-anchor matching — saving the agent
+    # from exact-match failures on minor formatting mismatches.
+    from tools.fuzzy_match import fuzzy_find_and_replace
+
+    new_content, match_count, match_error = fuzzy_find_and_replace(
+        content, old_string, new_string, replace_all
+    )
+    if match_error:
        # Show a short preview of the file so the model can self-correct
        preview = content[:500] + ("..." if len(content) > 500 else "")
        return {
            "success": False,
-            "error": "old_string not found in the file.",
+            "error": match_error,
            "file_preview": preview,
        }

-    if count > 1 and not replace_all:
-        return {
-            "success": False,
-            "error": (
-                f"old_string matched {count} times. Provide more surrounding context "
-                f"to make the match unique, or set replace_all=true to replace all occurrences."
-            ),
-            "match_count": count,
-        }
-
-    new_content = content.replace(old_string, new_string) if replace_all else content.replace(old_string, new_string, 1)
+    # Check size limit on the result
+    target_label = "SKILL.md" if not file_path else file_path
+    err = _validate_content_size(new_content, label=target_label)
+    if err:
+        return {"success": False, "error": err}

    # If patching SKILL.md, validate frontmatter is still intact
    if not file_path:
@ -419,10 +446,9 @@ def _patch_skill(
        _atomic_write_text(target, original_content)
        return {"success": False, "error": scan_error}

-    replacements = count if replace_all else 1
    return {
        "success": True,
-        "message": f"Patched {'SKILL.md' if not file_path else file_path} in skill '{name}' ({replacements} replacement{'s' if replacements > 1 else ''}).",
+        "message": f"Patched {'SKILL.md' if not file_path else file_path} in skill '{name}' ({match_count} replacement{'s' if match_count > 1 else ''}).",
    }


@ -455,6 +481,21 @@ def _write_file(name: str, file_path: str, file_content: str) -> Dict[str, Any]:
    if not file_content and file_content != "":
        return {"success": False, "error": "file_content is required."}

+    # Check size limits
+    content_bytes = len(file_content.encode("utf-8"))
+    if content_bytes > MAX_SKILL_FILE_BYTES:
+        return {
+            "success": False,
+            "error": (
+                f"File content is {content_bytes:,} bytes "
+                f"(limit: {MAX_SKILL_FILE_BYTES:,} bytes / 1 MiB). "
+                f"Consider splitting into smaller files."
+            ),
+        }
+    err = _validate_content_size(file_content, label=file_path)
+    if err:
+        return {"success": False, "error": err}
+
    existing = _find_skill(name)
    if not existing:
        return {"success": False, "error": f"Skill '{name}' not found. Create it first with action='create'."}