From 28f4d6db63f450828ffe419964719d35bbeedc58 Mon Sep 17 00:00:00 2001
From: Chris Danis <cdanis@gmail.com>
Date: Tue, 5 May 2026 04:21:17 -0700
Subject: [PATCH] fix(tool-schemas): reactive strip of pattern/format on
 llama.cpp grammar 400s
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

MCP servers commonly emit JSON Schema `pattern` (e.g. `\\d{4}-\\d{2}-\\d{2}`
for date-time params) and `format` keywords. llama.cpp's
`json-schema-to-grammar` converter rejects regex escape classes
(\\d/\\w/\\s) and most format values, returning HTTP 400
"parse: error parsing grammar: unknown escape at \\d" — the whole request
fails.

Cloud providers (OpenAI, Anthropic, OpenRouter, Gemini) accept these
keywords fine and use them as prompting hints. Stripping unconditionally
loses useful hints for every cloud user to fix a llama.cpp-only bug.

Approach: classify the llama.cpp grammar-parse 400 in the error
classifier, and on match do a one-shot in-place strip of pattern/format
from `self.tools`, then retry. Follows the existing
`thinking_signature` recovery pattern. Cloud users hit zero overhead;
llama.cpp users pay one failed request per session.

Changes
- agent/error_classifier.py: new `FailoverReason.llama_cpp_grammar_pattern`
  + narrow HTTP-400 branch matching "error parsing grammar",
  "json-schema-to-grammar", or "unable to generate parser ... template".
- tools/schema_sanitizer.py: new `strip_pattern_and_format()` helper —
  reactive, walks schema nodes, skips property names (search_files.pattern
  survives). Returns strip count for logging.
- run_agent.py: new one-shot recovery block in the retry loop. Strips,
  logs, continues. Falls through to normal retry if nothing to strip.
- tests: 4 classifier tests (3 variants + 1 non-400 negative), 7 strip
  tests including the property-name preservation and idempotency checks.

Co-authored-by: Chris Danis <cdanis@gmail.com>
---
 agent/error_classifier.py            |  26 +++++++
 run_agent.py                         |  44 ++++++++++++
 tests/agent/test_error_classifier.py |  38 ++++++++++
 tests/tools/test_schema_sanitizer.py | 101 ++++++++++++++++++++++++++-
 tools/schema_sanitizer.py            |  72 +++++++++++++++++++
 5 files changed, 280 insertions(+), 1 deletion(-)

diff --git a/agent/error_classifier.py b/agent/error_classifier.py
index 67feaa4304..419a984b75 100644
--- a/agent/error_classifier.py
+++ b/agent/error_classifier.py
@@ -55,6 +55,7 @@ class FailoverReason(enum.Enum):
     thinking_signature = "thinking_signature"  # Anthropic thinking block sig invalid
     long_context_tier = "long_context_tier"    # Anthropic "extra usage" tier gate
     oauth_long_context_beta_forbidden = "oauth_long_context_beta_forbidden"  # Anthropic OAuth subscription rejects 1M context beta — disable beta and retry
+    llama_cpp_grammar_pattern = "llama_cpp_grammar_pattern"  # llama.cpp json-schema-to-grammar rejects regex escapes in `pattern` / `format` — strip from tools and retry
 
     # Catch-all
     unknown = "unknown"                  # Unclassifiable — retry with backoff
@@ -470,6 +471,31 @@ def classify_api_error(
             should_compress=False,
         )
 
+    # llama.cpp's ``json-schema-to-grammar`` converter (used by its OAI
+    # server to build GBNF tool-call parsers) rejects regex escape classes
+    # like ``\d``/``\w``/``\s`` and most ``format`` values. MCP servers
+    # routinely emit ``"pattern": "\\d{4}-\\d{2}-\\d{2}"`` for date/phone/
+    # email params. llama.cpp surfaces this as HTTP 400 with one of a few
+    # recognizable phrases; on match we strip ``pattern``/``format`` from
+    # ``self.tools`` in the retry loop and retry once. Cloud providers are
+    # unaffected — they accept these keywords and we never hit this branch.
+    if (
+        status_code == 400
+        and (
+            "error parsing grammar" in error_msg
+            or "json-schema-to-grammar" in error_msg
+            or (
+                "unable to generate parser" in error_msg
+                and "template" in error_msg
+            )
+        )
+    ):
+        return _result(
+            FailoverReason.llama_cpp_grammar_pattern,
+            retryable=True,
+            should_compress=False,
+        )
+
     # ── 2. HTTP status code classification ──────────────────────────
 
     if status_code is not None:
diff --git a/run_agent.py b/run_agent.py
index 8e1549925b..4d8ffa1908 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -11116,6 +11116,7 @@ class AIAgent:
             thinking_sig_retry_attempted = False
             image_shrink_retry_attempted = False
             oauth_1m_beta_retry_attempted = False
+            llama_cpp_grammar_retry_attempted = False
             has_retried_429 = False
             restart_with_compressed_messages = False
             restart_with_length_continuation = False
@@ -12206,6 +12207,49 @@ class AIAgent:
                         )
                         continue
 
+                    # ── llama.cpp grammar-parse recovery ──────────────────
+                    # llama.cpp's ``json-schema-to-grammar`` converter rejects
+                    # regex escape classes (``\d``, ``\w``, ``\s``) and most
+                    # ``format`` values in tool schemas.  MCP servers emit
+                    # these routinely for date/phone/email params.  Recovery:
+                    # strip ``pattern``/``format`` from ``self.tools`` and
+                    # retry once.  We keep the keywords by default so cloud
+                    # providers get the full prompting hints; this branch
+                    # fires only for users on llama.cpp's OAI server.
+                    if (
+                        classified.reason == FailoverReason.llama_cpp_grammar_pattern
+                        and not llama_cpp_grammar_retry_attempted
+                    ):
+                        llama_cpp_grammar_retry_attempted = True
+                        try:
+                            from tools.schema_sanitizer import strip_pattern_and_format
+                            _, _stripped = strip_pattern_and_format(self.tools)
+                        except Exception as _strip_exc:  # pragma: no cover — defensive
+                            logging.warning(
+                                "%sllama.cpp grammar recovery: strip helper failed: %s",
+                                self.log_prefix, _strip_exc,
+                            )
+                            _stripped = 0
+                        if _stripped:
+                            self._vprint(
+                                f"{self.log_prefix}⚠️  llama.cpp rejected tool schema grammar — "
+                                f"stripped {_stripped} pattern/format keyword(s), retrying...",
+                                force=True,
+                            )
+                            logging.warning(
+                                "%sllama.cpp grammar recovery: stripped %d "
+                                "pattern/format keyword(s) from tool schemas",
+                                self.log_prefix, _stripped,
+                            )
+                            continue
+                        # No keywords found to strip — fall through to normal
+                        # retry path rather than loop forever on the same error.
+                        logging.warning(
+                            "%sllama.cpp grammar error but no pattern/format "
+                            "keywords to strip — falling through to normal retry",
+                            self.log_prefix,
+                        )
+
                     retry_count += 1
                     elapsed_time = time.time() - api_start_time
                     self._touch_activity(
diff --git a/tests/agent/test_error_classifier.py b/tests/agent/test_error_classifier.py
index 5a28797349..d3f62c847c 100644
--- a/tests/agent/test_error_classifier.py
+++ b/tests/agent/test_error_classifier.py
@@ -59,6 +59,7 @@ class TestFailoverReason:
             "provider_policy_blocked",
             "thinking_signature", "long_context_tier",
             "oauth_long_context_beta_forbidden",
+            "llama_cpp_grammar_pattern",
             "unknown",
         }
         actual = {r.value for r in FailoverReason}
@@ -475,6 +476,43 @@ class TestClassifyApiError:
         # Without "thinking" in the message, it shouldn't be thinking_signature
         assert result.reason != FailoverReason.thinking_signature
 
+    # ── Provider-specific: llama.cpp grammar-parse ──
+
+    def test_llama_cpp_grammar_parse_error(self):
+        """llama.cpp rejects regex escapes in JSON Schema `pattern`."""
+        e = MockAPIError(
+            "parse: error parsing grammar: unknown escape at \\d",
+            status_code=400,
+        )
+        result = classify_api_error(e, provider="openai-compatible")
+        assert result.reason == FailoverReason.llama_cpp_grammar_pattern
+        assert result.retryable is True
+        assert result.should_compress is False
+
+    def test_llama_cpp_unable_to_generate_parser(self):
+        """Older llama.cpp builds surface the error as 'unable to generate parser'."""
+        e = MockAPIError(
+            "Unable to generate parser for this template",
+            status_code=400,
+        )
+        result = classify_api_error(e, provider="openai-compatible")
+        assert result.reason == FailoverReason.llama_cpp_grammar_pattern
+
+    def test_llama_cpp_json_schema_to_grammar_phrase(self):
+        """Some builds mention the module name explicitly."""
+        e = MockAPIError(
+            "json-schema-to-grammar failed to convert schema",
+            status_code=400,
+        )
+        result = classify_api_error(e, provider="openai-compatible")
+        assert result.reason == FailoverReason.llama_cpp_grammar_pattern
+
+    def test_llama_cpp_grammar_requires_400(self):
+        """A 500 with the same phrase isn't the llama.cpp grammar case."""
+        e = MockAPIError("error parsing grammar", status_code=500)
+        result = classify_api_error(e, provider="openai-compatible")
+        assert result.reason != FailoverReason.llama_cpp_grammar_pattern
+
     # ── Provider-specific: Anthropic long-context tier ──
 
     def test_anthropic_long_context_tier(self):
diff --git a/tests/tools/test_schema_sanitizer.py b/tests/tools/test_schema_sanitizer.py
index 171651ca7a..cc54fbfeb0 100644
--- a/tests/tools/test_schema_sanitizer.py
+++ b/tests/tools/test_schema_sanitizer.py
@@ -9,7 +9,7 @@ from __future__ import annotations
 
 import copy
 
-from tools.schema_sanitizer import sanitize_tool_schemas
+from tools.schema_sanitizer import sanitize_tool_schemas, strip_pattern_and_format
 
 
 def _tool(name: str, parameters: dict) -> dict:
@@ -203,3 +203,102 @@ def test_empty_tools_list_returns_empty():
 
 def test_none_tools_returns_none():
     assert sanitize_tool_schemas(None) is None
+
+
+# ─────────────────────────────────────────────────────────────────────────
+# strip_pattern_and_format — reactive recovery when llama.cpp rejects a
+# schema with an HTTP 400 grammar-parse error. Must be opt-in (only
+# invoked on recovery) and must not damage property names.
+# ─────────────────────────────────────────────────────────────────────────
+
+
+def test_strip_pattern_removes_schema_pattern_keyword():
+    """`pattern` as a sibling of `type` → stripped."""
+    tools = [_tool("t", {
+        "type": "object",
+        "properties": {
+            "date": {"type": "string", "pattern": "\\d{4,4}-\\d{2,2}-\\d{2,2}"},
+        },
+    })]
+    _, stripped = strip_pattern_and_format(tools)
+    assert stripped == 1
+    prop = tools[0]["function"]["parameters"]["properties"]["date"]
+    assert "pattern" not in prop
+    assert prop["type"] == "string"
+
+
+def test_strip_format_removes_schema_format_keyword():
+    """`format` as a sibling of `type` → stripped."""
+    tools = [_tool("t", {
+        "type": "object",
+        "properties": {
+            "ts": {"type": "string", "format": "date-time"},
+        },
+    })]
+    _, stripped = strip_pattern_and_format(tools)
+    assert stripped == 1
+    assert "format" not in tools[0]["function"]["parameters"]["properties"]["ts"]
+
+
+def test_strip_preserves_property_named_pattern():
+    """Property literally *named* 'pattern' (search_files) must survive."""
+    tools = [_tool("search_files", {
+        "type": "object",
+        "properties": {
+            "pattern": {"type": "string", "description": "Regex pattern..."},
+            "limit": {"type": "integer"},
+        },
+        "required": ["pattern"],
+    })]
+    _, stripped = strip_pattern_and_format(tools)
+    assert stripped == 0
+    params = tools[0]["function"]["parameters"]
+    # Property named "pattern" still exists with its schema intact
+    assert "pattern" in params["properties"]
+    assert params["properties"]["pattern"]["type"] == "string"
+    assert params["required"] == ["pattern"]
+
+
+def test_strip_recurses_into_anyof_variants():
+    """Pattern/format inside anyOf variant schemas are also stripped."""
+    tools = [_tool("t", {
+        "type": "object",
+        "properties": {
+            "value": {
+                "anyOf": [
+                    {"type": "string", "pattern": "[A-Z]+", "format": "uuid"},
+                    {"type": "integer"},
+                ],
+            },
+        },
+    })]
+    _, stripped = strip_pattern_and_format(tools)
+    assert stripped == 2
+    variants = tools[0]["function"]["parameters"]["properties"]["value"]["anyOf"]
+    assert "pattern" not in variants[0]
+    assert "format" not in variants[0]
+    assert variants[0]["type"] == "string"
+
+
+def test_strip_is_idempotent():
+    """Second call on already-stripped tools is a no-op."""
+    tools = [_tool("t", {
+        "type": "object",
+        "properties": {"d": {"type": "string", "pattern": "\\d+"}},
+    })]
+    _, first = strip_pattern_and_format(tools)
+    _, second = strip_pattern_and_format(tools)
+    assert first == 1
+    assert second == 0
+
+
+def test_strip_empty_tools_returns_zero():
+    tools, stripped = strip_pattern_and_format([])
+    assert tools == []
+    assert stripped == 0
+
+
+def test_strip_none_returns_zero():
+    tools, stripped = strip_pattern_and_format(None)
+    assert tools is None
+    assert stripped == 0
diff --git a/tools/schema_sanitizer.py b/tools/schema_sanitizer.py
index de43b131b6..8c0a915aca 100644
--- a/tools/schema_sanitizer.py
+++ b/tools/schema_sanitizer.py
@@ -255,3 +255,75 @@ def _sanitize_node(node: Any, path: str) -> Any:
             out["required"] = valid
 
     return out
+
+
+# =============================================================================
+# Reactive strip — only invoked when llama.cpp rejects a schema
+# =============================================================================
+
+_STRIP_ON_RECOVERY_KEYS = frozenset({"pattern", "format"})
+
+
+def strip_pattern_and_format(tools: list[dict]) -> tuple[list[dict], int]:
+    """Strip ``pattern`` and ``format`` JSON Schema keywords from tool schemas.
+
+    This is a *reactive* sanitizer invoked only when llama.cpp's
+    ``json-schema-to-grammar`` converter has rejected a tool schema with an
+    HTTP 400 grammar-parse error.  llama.cpp's regex engine supports only a
+    small subset of ECMAScript regex (literals, ``.``, ``[...]``, ``|``,
+    ``*``, ``+``, ``?``, ``{n,m}``) — it rejects escape classes like ``\\d``,
+    ``\\w``, ``\\s`` and most ``format`` values.  Cloud providers (OpenAI,
+    Anthropic, OpenRouter, Gemini) accept these keywords fine and rely on
+    them as prompting hints, so we keep them in the default schema and only
+    strip on demand.
+
+    The strip operates on a sibling of ``type`` (so schema keywords are
+    removed) — a property literally *named* ``pattern`` (e.g. the first arg
+    of the built-in ``search_files`` tool) is not affected because property
+    names live in the ``properties`` dict, not as siblings of ``type``.
+
+    Args:
+        tools: OpenAI-format tool list, mutated in place for efficiency.
+            Callers that need to preserve the original should deep-copy first.
+
+    Returns:
+        ``(tools, stripped_count)`` — the same list reference plus a count of
+        how many ``pattern``/``format`` keywords were removed across all tools.
+    """
+    if not tools:
+        return tools, 0
+
+    stripped = 0
+
+    def _walk(node: Any) -> None:
+        nonlocal stripped
+        if isinstance(node, dict):
+            # Only strip as a sibling of ``type`` — i.e. when this node is
+            # itself a schema.  This avoids stripping literal property keys
+            # named "pattern" (search_files.pattern, etc.) because those live
+            # inside a ``properties`` dict, not as siblings of ``type``.
+            is_schema_node = "type" in node or "anyOf" in node or "oneOf" in node or "allOf" in node
+            for key in list(node.keys()):
+                if is_schema_node and key in _STRIP_ON_RECOVERY_KEYS:
+                    node.pop(key, None)
+                    stripped += 1
+                    continue
+                _walk(node[key])
+        elif isinstance(node, list):
+            for item in node:
+                _walk(item)
+
+    for tool in tools:
+        fn = tool.get("function") if isinstance(tool, dict) else None
+        if isinstance(fn, dict):
+            params = fn.get("parameters")
+            if isinstance(params, dict):
+                _walk(params)
+
+    if stripped:
+        logger.info(
+            "schema_sanitizer: stripped %d pattern/format keyword(s) from "
+            "tool schemas (llama.cpp grammar-parse recovery)",
+            stripped,
+        )
+    return tools, stripped