fix(tool-schemas): reactive strip of pattern/format on llama.cpp grammar 400s

MCP servers commonly emit JSON Schema `pattern` (e.g. `\\d{4}-\\d{2}-\\d{2}` for date-time params) and `format` keywords. llama.cpp's `json-schema-to-grammar` converter rejects regex escape classes (\\d/\\w/\\s) and most format values, returning HTTP 400 "parse: error parsing grammar: unknown escape at \\d" — the whole request fails. Cloud providers (OpenAI, Anthropic, OpenRouter, Gemini) accept these keywords fine and use them as prompting hints. Stripping unconditionally loses useful hints for every cloud user to fix a llama.cpp-only bug. Approach: classify the llama.cpp grammar-parse 400 in the error classifier, and on match do a one-shot in-place strip of pattern/format from `self.tools`, then retry. Follows the existing `thinking_signature` recovery pattern. Cloud users hit zero overhead; llama.cpp users pay one failed request per session. Changes - agent/error_classifier.py: new `FailoverReason.llama_cpp_grammar_pattern` + narrow HTTP-400 branch matching "error parsing grammar", "json-schema-to-grammar", or "unable to generate parser ... template". - tools/schema_sanitizer.py: new `strip_pattern_and_format()` helper — reactive, walks schema nodes, skips property names (search_files.pattern survives). Returns strip count for logging. - run_agent.py: new one-shot recovery block in the retry loop. Strips, logs, continues. Falls through to normal retry if nothing to strip. - tests: 4 classifier tests (3 variants + 1 non-400 negative), 7 strip tests including the property-name preservation and idempotency checks. Co-authored-by: Chris Danis <cdanis@gmail.com>
2026-05-10 03:22:05 +00:00 · 2026-05-05 04:21:17 -07:00 · 2026-05-05 04:21:17 -07:00 · 28f4d6db63
commit 28f4d6db63
parent 542e06c789
5 changed files with 280 additions and 1 deletions
--- a/tests/agent/test_error_classifier.py
+++ b/tests/agent/test_error_classifier.py
@ -59,6 +59,7 @@ class TestFailoverReason:
            "provider_policy_blocked",
            "thinking_signature", "long_context_tier",
            "oauth_long_context_beta_forbidden",
+            "llama_cpp_grammar_pattern",
            "unknown",
        }
        actual = {r.value for r in FailoverReason}
@ -475,6 +476,43 @@ class TestClassifyApiError:
        # Without "thinking" in the message, it shouldn't be thinking_signature
        assert result.reason != FailoverReason.thinking_signature

+    # ── Provider-specific: llama.cpp grammar-parse ──
+
+    def test_llama_cpp_grammar_parse_error(self):
+        """llama.cpp rejects regex escapes in JSON Schema `pattern`."""
+        e = MockAPIError(
+            "parse: error parsing grammar: unknown escape at \\d",
+            status_code=400,
+        )
+        result = classify_api_error(e, provider="openai-compatible")
+        assert result.reason == FailoverReason.llama_cpp_grammar_pattern
+        assert result.retryable is True
+        assert result.should_compress is False
+
+    def test_llama_cpp_unable_to_generate_parser(self):
+        """Older llama.cpp builds surface the error as 'unable to generate parser'."""
+        e = MockAPIError(
+            "Unable to generate parser for this template",
+            status_code=400,
+        )
+        result = classify_api_error(e, provider="openai-compatible")
+        assert result.reason == FailoverReason.llama_cpp_grammar_pattern
+
+    def test_llama_cpp_json_schema_to_grammar_phrase(self):
+        """Some builds mention the module name explicitly."""
+        e = MockAPIError(
+            "json-schema-to-grammar failed to convert schema",
+            status_code=400,
+        )
+        result = classify_api_error(e, provider="openai-compatible")
+        assert result.reason == FailoverReason.llama_cpp_grammar_pattern
+
+    def test_llama_cpp_grammar_requires_400(self):
+        """A 500 with the same phrase isn't the llama.cpp grammar case."""
+        e = MockAPIError("error parsing grammar", status_code=500)
+        result = classify_api_error(e, provider="openai-compatible")
+        assert result.reason != FailoverReason.llama_cpp_grammar_pattern
+
    # ── Provider-specific: Anthropic long-context tier ──

    def test_anthropic_long_context_tier(self):