diff --git a/agent/error_classifier.py b/agent/error_classifier.py index 67feaa4304..419a984b75 100644 --- a/agent/error_classifier.py +++ b/agent/error_classifier.py @@ -55,6 +55,7 @@ class FailoverReason(enum.Enum): thinking_signature = "thinking_signature" # Anthropic thinking block sig invalid long_context_tier = "long_context_tier" # Anthropic "extra usage" tier gate oauth_long_context_beta_forbidden = "oauth_long_context_beta_forbidden" # Anthropic OAuth subscription rejects 1M context beta — disable beta and retry + llama_cpp_grammar_pattern = "llama_cpp_grammar_pattern" # llama.cpp json-schema-to-grammar rejects regex escapes in `pattern` / `format` — strip from tools and retry # Catch-all unknown = "unknown" # Unclassifiable — retry with backoff @@ -470,6 +471,31 @@ def classify_api_error( should_compress=False, ) + # llama.cpp's ``json-schema-to-grammar`` converter (used by its OAI + # server to build GBNF tool-call parsers) rejects regex escape classes + # like ``\d``/``\w``/``\s`` and most ``format`` values. MCP servers + # routinely emit ``"pattern": "\\d{4}-\\d{2}-\\d{2}"`` for date/phone/ + # email params. llama.cpp surfaces this as HTTP 400 with one of a few + # recognizable phrases; on match we strip ``pattern``/``format`` from + # ``self.tools`` in the retry loop and retry once. Cloud providers are + # unaffected — they accept these keywords and we never hit this branch. + if ( + status_code == 400 + and ( + "error parsing grammar" in error_msg + or "json-schema-to-grammar" in error_msg + or ( + "unable to generate parser" in error_msg + and "template" in error_msg + ) + ) + ): + return _result( + FailoverReason.llama_cpp_grammar_pattern, + retryable=True, + should_compress=False, + ) + # ── 2. HTTP status code classification ────────────────────────── if status_code is not None: diff --git a/run_agent.py b/run_agent.py index 8e1549925b..4d8ffa1908 100644 --- a/run_agent.py +++ b/run_agent.py @@ -11116,6 +11116,7 @@ class AIAgent: thinking_sig_retry_attempted = False image_shrink_retry_attempted = False oauth_1m_beta_retry_attempted = False + llama_cpp_grammar_retry_attempted = False has_retried_429 = False restart_with_compressed_messages = False restart_with_length_continuation = False @@ -12206,6 +12207,49 @@ class AIAgent: ) continue + # ── llama.cpp grammar-parse recovery ────────────────── + # llama.cpp's ``json-schema-to-grammar`` converter rejects + # regex escape classes (``\d``, ``\w``, ``\s``) and most + # ``format`` values in tool schemas. MCP servers emit + # these routinely for date/phone/email params. Recovery: + # strip ``pattern``/``format`` from ``self.tools`` and + # retry once. We keep the keywords by default so cloud + # providers get the full prompting hints; this branch + # fires only for users on llama.cpp's OAI server. + if ( + classified.reason == FailoverReason.llama_cpp_grammar_pattern + and not llama_cpp_grammar_retry_attempted + ): + llama_cpp_grammar_retry_attempted = True + try: + from tools.schema_sanitizer import strip_pattern_and_format + _, _stripped = strip_pattern_and_format(self.tools) + except Exception as _strip_exc: # pragma: no cover — defensive + logging.warning( + "%sllama.cpp grammar recovery: strip helper failed: %s", + self.log_prefix, _strip_exc, + ) + _stripped = 0 + if _stripped: + self._vprint( + f"{self.log_prefix}⚠️ llama.cpp rejected tool schema grammar — " + f"stripped {_stripped} pattern/format keyword(s), retrying...", + force=True, + ) + logging.warning( + "%sllama.cpp grammar recovery: stripped %d " + "pattern/format keyword(s) from tool schemas", + self.log_prefix, _stripped, + ) + continue + # No keywords found to strip — fall through to normal + # retry path rather than loop forever on the same error. + logging.warning( + "%sllama.cpp grammar error but no pattern/format " + "keywords to strip — falling through to normal retry", + self.log_prefix, + ) + retry_count += 1 elapsed_time = time.time() - api_start_time self._touch_activity( diff --git a/tests/agent/test_error_classifier.py b/tests/agent/test_error_classifier.py index 5a28797349..d3f62c847c 100644 --- a/tests/agent/test_error_classifier.py +++ b/tests/agent/test_error_classifier.py @@ -59,6 +59,7 @@ class TestFailoverReason: "provider_policy_blocked", "thinking_signature", "long_context_tier", "oauth_long_context_beta_forbidden", + "llama_cpp_grammar_pattern", "unknown", } actual = {r.value for r in FailoverReason} @@ -475,6 +476,43 @@ class TestClassifyApiError: # Without "thinking" in the message, it shouldn't be thinking_signature assert result.reason != FailoverReason.thinking_signature + # ── Provider-specific: llama.cpp grammar-parse ── + + def test_llama_cpp_grammar_parse_error(self): + """llama.cpp rejects regex escapes in JSON Schema `pattern`.""" + e = MockAPIError( + "parse: error parsing grammar: unknown escape at \\d", + status_code=400, + ) + result = classify_api_error(e, provider="openai-compatible") + assert result.reason == FailoverReason.llama_cpp_grammar_pattern + assert result.retryable is True + assert result.should_compress is False + + def test_llama_cpp_unable_to_generate_parser(self): + """Older llama.cpp builds surface the error as 'unable to generate parser'.""" + e = MockAPIError( + "Unable to generate parser for this template", + status_code=400, + ) + result = classify_api_error(e, provider="openai-compatible") + assert result.reason == FailoverReason.llama_cpp_grammar_pattern + + def test_llama_cpp_json_schema_to_grammar_phrase(self): + """Some builds mention the module name explicitly.""" + e = MockAPIError( + "json-schema-to-grammar failed to convert schema", + status_code=400, + ) + result = classify_api_error(e, provider="openai-compatible") + assert result.reason == FailoverReason.llama_cpp_grammar_pattern + + def test_llama_cpp_grammar_requires_400(self): + """A 500 with the same phrase isn't the llama.cpp grammar case.""" + e = MockAPIError("error parsing grammar", status_code=500) + result = classify_api_error(e, provider="openai-compatible") + assert result.reason != FailoverReason.llama_cpp_grammar_pattern + # ── Provider-specific: Anthropic long-context tier ── def test_anthropic_long_context_tier(self): diff --git a/tests/tools/test_schema_sanitizer.py b/tests/tools/test_schema_sanitizer.py index 171651ca7a..cc54fbfeb0 100644 --- a/tests/tools/test_schema_sanitizer.py +++ b/tests/tools/test_schema_sanitizer.py @@ -9,7 +9,7 @@ from __future__ import annotations import copy -from tools.schema_sanitizer import sanitize_tool_schemas +from tools.schema_sanitizer import sanitize_tool_schemas, strip_pattern_and_format def _tool(name: str, parameters: dict) -> dict: @@ -203,3 +203,102 @@ def test_empty_tools_list_returns_empty(): def test_none_tools_returns_none(): assert sanitize_tool_schemas(None) is None + + +# ───────────────────────────────────────────────────────────────────────── +# strip_pattern_and_format — reactive recovery when llama.cpp rejects a +# schema with an HTTP 400 grammar-parse error. Must be opt-in (only +# invoked on recovery) and must not damage property names. +# ───────────────────────────────────────────────────────────────────────── + + +def test_strip_pattern_removes_schema_pattern_keyword(): + """`pattern` as a sibling of `type` → stripped.""" + tools = [_tool("t", { + "type": "object", + "properties": { + "date": {"type": "string", "pattern": "\\d{4,4}-\\d{2,2}-\\d{2,2}"}, + }, + })] + _, stripped = strip_pattern_and_format(tools) + assert stripped == 1 + prop = tools[0]["function"]["parameters"]["properties"]["date"] + assert "pattern" not in prop + assert prop["type"] == "string" + + +def test_strip_format_removes_schema_format_keyword(): + """`format` as a sibling of `type` → stripped.""" + tools = [_tool("t", { + "type": "object", + "properties": { + "ts": {"type": "string", "format": "date-time"}, + }, + })] + _, stripped = strip_pattern_and_format(tools) + assert stripped == 1 + assert "format" not in tools[0]["function"]["parameters"]["properties"]["ts"] + + +def test_strip_preserves_property_named_pattern(): + """Property literally *named* 'pattern' (search_files) must survive.""" + tools = [_tool("search_files", { + "type": "object", + "properties": { + "pattern": {"type": "string", "description": "Regex pattern..."}, + "limit": {"type": "integer"}, + }, + "required": ["pattern"], + })] + _, stripped = strip_pattern_and_format(tools) + assert stripped == 0 + params = tools[0]["function"]["parameters"] + # Property named "pattern" still exists with its schema intact + assert "pattern" in params["properties"] + assert params["properties"]["pattern"]["type"] == "string" + assert params["required"] == ["pattern"] + + +def test_strip_recurses_into_anyof_variants(): + """Pattern/format inside anyOf variant schemas are also stripped.""" + tools = [_tool("t", { + "type": "object", + "properties": { + "value": { + "anyOf": [ + {"type": "string", "pattern": "[A-Z]+", "format": "uuid"}, + {"type": "integer"}, + ], + }, + }, + })] + _, stripped = strip_pattern_and_format(tools) + assert stripped == 2 + variants = tools[0]["function"]["parameters"]["properties"]["value"]["anyOf"] + assert "pattern" not in variants[0] + assert "format" not in variants[0] + assert variants[0]["type"] == "string" + + +def test_strip_is_idempotent(): + """Second call on already-stripped tools is a no-op.""" + tools = [_tool("t", { + "type": "object", + "properties": {"d": {"type": "string", "pattern": "\\d+"}}, + })] + _, first = strip_pattern_and_format(tools) + _, second = strip_pattern_and_format(tools) + assert first == 1 + assert second == 0 + + +def test_strip_empty_tools_returns_zero(): + tools, stripped = strip_pattern_and_format([]) + assert tools == [] + assert stripped == 0 + + +def test_strip_none_returns_zero(): + tools, stripped = strip_pattern_and_format(None) + assert tools is None + assert stripped == 0 diff --git a/tools/schema_sanitizer.py b/tools/schema_sanitizer.py index de43b131b6..8c0a915aca 100644 --- a/tools/schema_sanitizer.py +++ b/tools/schema_sanitizer.py @@ -255,3 +255,75 @@ def _sanitize_node(node: Any, path: str) -> Any: out["required"] = valid return out + + +# ============================================================================= +# Reactive strip — only invoked when llama.cpp rejects a schema +# ============================================================================= + +_STRIP_ON_RECOVERY_KEYS = frozenset({"pattern", "format"}) + + +def strip_pattern_and_format(tools: list[dict]) -> tuple[list[dict], int]: + """Strip ``pattern`` and ``format`` JSON Schema keywords from tool schemas. + + This is a *reactive* sanitizer invoked only when llama.cpp's + ``json-schema-to-grammar`` converter has rejected a tool schema with an + HTTP 400 grammar-parse error. llama.cpp's regex engine supports only a + small subset of ECMAScript regex (literals, ``.``, ``[...]``, ``|``, + ``*``, ``+``, ``?``, ``{n,m}``) — it rejects escape classes like ``\\d``, + ``\\w``, ``\\s`` and most ``format`` values. Cloud providers (OpenAI, + Anthropic, OpenRouter, Gemini) accept these keywords fine and rely on + them as prompting hints, so we keep them in the default schema and only + strip on demand. + + The strip operates on a sibling of ``type`` (so schema keywords are + removed) — a property literally *named* ``pattern`` (e.g. the first arg + of the built-in ``search_files`` tool) is not affected because property + names live in the ``properties`` dict, not as siblings of ``type``. + + Args: + tools: OpenAI-format tool list, mutated in place for efficiency. + Callers that need to preserve the original should deep-copy first. + + Returns: + ``(tools, stripped_count)`` — the same list reference plus a count of + how many ``pattern``/``format`` keywords were removed across all tools. + """ + if not tools: + return tools, 0 + + stripped = 0 + + def _walk(node: Any) -> None: + nonlocal stripped + if isinstance(node, dict): + # Only strip as a sibling of ``type`` — i.e. when this node is + # itself a schema. This avoids stripping literal property keys + # named "pattern" (search_files.pattern, etc.) because those live + # inside a ``properties`` dict, not as siblings of ``type``. + is_schema_node = "type" in node or "anyOf" in node or "oneOf" in node or "allOf" in node + for key in list(node.keys()): + if is_schema_node and key in _STRIP_ON_RECOVERY_KEYS: + node.pop(key, None) + stripped += 1 + continue + _walk(node[key]) + elif isinstance(node, list): + for item in node: + _walk(item) + + for tool in tools: + fn = tool.get("function") if isinstance(tool, dict) else None + if isinstance(fn, dict): + params = fn.get("parameters") + if isinstance(params, dict): + _walk(params) + + if stripped: + logger.info( + "schema_sanitizer: stripped %d pattern/format keyword(s) from " + "tool schemas (llama.cpp grammar-parse recovery)", + stripped, + ) + return tools, stripped