From 2ce3ae3d16decaea907d44203f9ea2a6d4c7bae7 Mon Sep 17 00:00:00 2001 From: teknium <127238744+teknium1@users.noreply.github.com> Date: Tue, 9 Jun 2026 22:38:39 -0700 Subject: [PATCH] fix(error-classifier): don't misclassify unsupported-param 400s as context overflow MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A GPT-5 model rejecting max_tokens returns a 400 whose message contains the literal substring 'max_tokens' — one of the _CONTEXT_OVERFLOW_PATTERNS. The 400 path in _classify_400 checked overflow patterns before any request-validation check (which only existed on the 5xx path), so the parameter error was routed into the compression loop, re-sent with the same bad param, and ended in 'Cannot compress further' on a tiny context. Hoist a request-validation guard (unsupported/unknown parameter) above the context-overflow check in _classify_400. Deliberately excludes the generic invalid_request_error code, which OpenAI also stamps on real overflow 400s, so genuine overflows still compress. Pairs with the max_completion_tokens param fix that stops the bad request at the source. Also adds AUTHOR_MAP entry for the salvaged PR #13902 commit. --- agent/error_classifier.py | 28 +++++++++++++++ scripts/release.py | 1 + tests/agent/test_error_classifier.py | 51 ++++++++++++++++++++++++++++ 3 files changed, 80 insertions(+) diff --git a/agent/error_classifier.py b/agent/error_classifier.py index b5656232e39..a2045b5f8cd 100644 --- a/agent/error_classifier.py +++ b/agent/error_classifier.py @@ -966,6 +966,34 @@ def _classify_400( should_fallback=False, ) + # Request-validation errors (unsupported / unknown parameter) MUST be + # checked BEFORE context_overflow. A GPT-5 model rejecting max_tokens + # returns: + # "Unsupported parameter: 'max_tokens' is not supported with this model. + # Use 'max_completion_tokens' instead." + # That string contains the literal substring "max_tokens", which is one of + # the _CONTEXT_OVERFLOW_PATTERNS — so without this guard the 400 is + # misclassified as context_overflow, routed into the compression loop, + # re-sent with the same bad parameter, and ends in "Cannot compress + # further". These errors are deterministic (every retry gets the identical + # rejection), so classify as a non-retryable format_error and fall back. + # + # NOTE: we deliberately do NOT key off the generic ``invalid_request_error`` + # code here — OpenAI stamps that same code on genuine context-overflow 400s, + # so matching it would mis-route real overflows away from compression. The + # unambiguous signals are the explicit "unsupported/unknown parameter" + # message text and the specific parameter-level error codes. + if ( + any(p in error_msg for p in _REQUEST_VALIDATION_PATTERNS + if p != "invalid_request_error") + or error_code_lower in {"unknown_parameter", "unsupported_parameter"} + ): + return result_fn( + FailoverReason.format_error, + retryable=False, + should_fallback=True, + ) + # Context overflow from 400 if any(p in error_msg for p in _CONTEXT_OVERFLOW_PATTERNS): return result_fn( diff --git a/scripts/release.py b/scripts/release.py index 3477c259cc7..3e177815f3d 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -1040,6 +1040,7 @@ AUTHOR_MAP = { "zhang9w0v5@qq.com": "zhang9w0v5", "fuleinist@outlook.com": "fuleinist", "43494187+Llugaes@users.noreply.github.com": "Llugaes", + "xiangji.chen@centurygame.com": "Llugaes", "fengtianyu88@users.noreply.github.com": "fengtianyu88", "l.moncany@gmail.com": "lmoncany", "fatinghenji@users.noreply.github.com": "fatinghenji", diff --git a/tests/agent/test_error_classifier.py b/tests/agent/test_error_classifier.py index b98fbe5beb9..ab6f27d6965 100644 --- a/tests/agent/test_error_classifier.py +++ b/tests/agent/test_error_classifier.py @@ -964,6 +964,57 @@ class TestClassifyApiError: assert result.reason == FailoverReason.format_error assert result.retryable is False + def test_400_unsupported_max_tokens_param_not_context_overflow(self): + """A GPT-5 model rejecting max_tokens must NOT be misclassified as + context overflow. The OpenAI error string contains the literal + 'max_tokens' (a _CONTEXT_OVERFLOW_PATTERNS entry), so without the + request-validation guard it was routed into the compression loop, + re-sent with the same bad param, and ended in "Cannot compress + further". Regression for gpt-5-context-overflow-misclassification.""" + msg = ("Unsupported parameter: 'max_tokens' is not supported with this " + "model. Use 'max_completion_tokens' instead.") + e = MockAPIError( + msg, + status_code=400, + body={"error": {"message": msg, "type": "invalid_request_error", + "code": "unsupported_parameter"}}, + ) + # Tiny context against a huge window — definitely not a real overflow. + result = classify_api_error(e, model="gpt-5.4", + approx_tokens=6962, context_length=1050000) + assert result.reason == FailoverReason.format_error + assert result.retryable is False + assert result.should_compress is False + + def test_400_unknown_parameter_not_context_overflow(self): + """'Unknown parameter' 400s are deterministic request-validation + failures, not overflows.""" + e = MockAPIError( + "Unknown parameter: 'foo'.", + status_code=400, + body={"error": {"message": "Unknown parameter: 'foo'.", + "code": "unknown_parameter"}}, + ) + result = classify_api_error(e, approx_tokens=1000) + assert result.reason == FailoverReason.format_error + assert result.should_compress is False + + def test_400_real_overflow_with_invalid_request_error_code_still_compresses(self): + """Guard the guard: OpenAI stamps genuine context-overflow 400s with + the generic 'invalid_request_error' code. The request-validation guard + must NOT key off that code, or real overflows stop compressing.""" + msg = ("This model's maximum context length is 128000 tokens, however " + "you requested 150000 tokens.") + e = MockAPIError( + msg, + status_code=400, + body={"error": {"message": msg, "type": "invalid_request_error"}}, + ) + result = classify_api_error(e, model="gpt-5.4", + approx_tokens=150000, context_length=128000) + assert result.reason == FailoverReason.context_overflow + assert result.should_compress is True + def test_422_format_error(self): e = MockAPIError("Unprocessable Entity", status_code=422) result = classify_api_error(e)