mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-10 08:32:09 +00:00
fix(error-classifier): don't misclassify unsupported-param 400s as context overflow
A GPT-5 model rejecting max_tokens returns a 400 whose message contains the literal substring 'max_tokens' — one of the _CONTEXT_OVERFLOW_PATTERNS. The 400 path in _classify_400 checked overflow patterns before any request-validation check (which only existed on the 5xx path), so the parameter error was routed into the compression loop, re-sent with the same bad param, and ended in 'Cannot compress further' on a tiny context. Hoist a request-validation guard (unsupported/unknown parameter) above the context-overflow check in _classify_400. Deliberately excludes the generic invalid_request_error code, which OpenAI also stamps on real overflow 400s, so genuine overflows still compress. Pairs with the max_completion_tokens param fix that stops the bad request at the source. Also adds AUTHOR_MAP entry for the salvaged PR #13902 commit.
This commit is contained in:
parent
19c07c4037
commit
2ce3ae3d16
3 changed files with 80 additions and 0 deletions
|
|
@ -966,6 +966,34 @@ def _classify_400(
|
|||
should_fallback=False,
|
||||
)
|
||||
|
||||
# Request-validation errors (unsupported / unknown parameter) MUST be
|
||||
# checked BEFORE context_overflow. A GPT-5 model rejecting max_tokens
|
||||
# returns:
|
||||
# "Unsupported parameter: 'max_tokens' is not supported with this model.
|
||||
# Use 'max_completion_tokens' instead."
|
||||
# That string contains the literal substring "max_tokens", which is one of
|
||||
# the _CONTEXT_OVERFLOW_PATTERNS — so without this guard the 400 is
|
||||
# misclassified as context_overflow, routed into the compression loop,
|
||||
# re-sent with the same bad parameter, and ends in "Cannot compress
|
||||
# further". These errors are deterministic (every retry gets the identical
|
||||
# rejection), so classify as a non-retryable format_error and fall back.
|
||||
#
|
||||
# NOTE: we deliberately do NOT key off the generic ``invalid_request_error``
|
||||
# code here — OpenAI stamps that same code on genuine context-overflow 400s,
|
||||
# so matching it would mis-route real overflows away from compression. The
|
||||
# unambiguous signals are the explicit "unsupported/unknown parameter"
|
||||
# message text and the specific parameter-level error codes.
|
||||
if (
|
||||
any(p in error_msg for p in _REQUEST_VALIDATION_PATTERNS
|
||||
if p != "invalid_request_error")
|
||||
or error_code_lower in {"unknown_parameter", "unsupported_parameter"}
|
||||
):
|
||||
return result_fn(
|
||||
FailoverReason.format_error,
|
||||
retryable=False,
|
||||
should_fallback=True,
|
||||
)
|
||||
|
||||
# Context overflow from 400
|
||||
if any(p in error_msg for p in _CONTEXT_OVERFLOW_PATTERNS):
|
||||
return result_fn(
|
||||
|
|
|
|||
|
|
@ -1040,6 +1040,7 @@ AUTHOR_MAP = {
|
|||
"zhang9w0v5@qq.com": "zhang9w0v5",
|
||||
"fuleinist@outlook.com": "fuleinist",
|
||||
"43494187+Llugaes@users.noreply.github.com": "Llugaes",
|
||||
"xiangji.chen@centurygame.com": "Llugaes",
|
||||
"fengtianyu88@users.noreply.github.com": "fengtianyu88",
|
||||
"l.moncany@gmail.com": "lmoncany",
|
||||
"fatinghenji@users.noreply.github.com": "fatinghenji",
|
||||
|
|
|
|||
|
|
@ -964,6 +964,57 @@ class TestClassifyApiError:
|
|||
assert result.reason == FailoverReason.format_error
|
||||
assert result.retryable is False
|
||||
|
||||
def test_400_unsupported_max_tokens_param_not_context_overflow(self):
|
||||
"""A GPT-5 model rejecting max_tokens must NOT be misclassified as
|
||||
context overflow. The OpenAI error string contains the literal
|
||||
'max_tokens' (a _CONTEXT_OVERFLOW_PATTERNS entry), so without the
|
||||
request-validation guard it was routed into the compression loop,
|
||||
re-sent with the same bad param, and ended in "Cannot compress
|
||||
further". Regression for gpt-5-context-overflow-misclassification."""
|
||||
msg = ("Unsupported parameter: 'max_tokens' is not supported with this "
|
||||
"model. Use 'max_completion_tokens' instead.")
|
||||
e = MockAPIError(
|
||||
msg,
|
||||
status_code=400,
|
||||
body={"error": {"message": msg, "type": "invalid_request_error",
|
||||
"code": "unsupported_parameter"}},
|
||||
)
|
||||
# Tiny context against a huge window — definitely not a real overflow.
|
||||
result = classify_api_error(e, model="gpt-5.4",
|
||||
approx_tokens=6962, context_length=1050000)
|
||||
assert result.reason == FailoverReason.format_error
|
||||
assert result.retryable is False
|
||||
assert result.should_compress is False
|
||||
|
||||
def test_400_unknown_parameter_not_context_overflow(self):
|
||||
"""'Unknown parameter' 400s are deterministic request-validation
|
||||
failures, not overflows."""
|
||||
e = MockAPIError(
|
||||
"Unknown parameter: 'foo'.",
|
||||
status_code=400,
|
||||
body={"error": {"message": "Unknown parameter: 'foo'.",
|
||||
"code": "unknown_parameter"}},
|
||||
)
|
||||
result = classify_api_error(e, approx_tokens=1000)
|
||||
assert result.reason == FailoverReason.format_error
|
||||
assert result.should_compress is False
|
||||
|
||||
def test_400_real_overflow_with_invalid_request_error_code_still_compresses(self):
|
||||
"""Guard the guard: OpenAI stamps genuine context-overflow 400s with
|
||||
the generic 'invalid_request_error' code. The request-validation guard
|
||||
must NOT key off that code, or real overflows stop compressing."""
|
||||
msg = ("This model's maximum context length is 128000 tokens, however "
|
||||
"you requested 150000 tokens.")
|
||||
e = MockAPIError(
|
||||
msg,
|
||||
status_code=400,
|
||||
body={"error": {"message": msg, "type": "invalid_request_error"}},
|
||||
)
|
||||
result = classify_api_error(e, model="gpt-5.4",
|
||||
approx_tokens=150000, context_length=128000)
|
||||
assert result.reason == FailoverReason.context_overflow
|
||||
assert result.should_compress is True
|
||||
|
||||
def test_422_format_error(self):
|
||||
e = MockAPIError("Unprocessable Entity", status_code=422)
|
||||
result = classify_api_error(e)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue