mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-07 02:51:50 +00:00
fix(error_classifier): avoid large-context false overflow heuristics
Generic 400 and server-disconnect heuristics used absolute token/message-count fallbacks that are too aggressive for 1M context sessions. Gate those absolute fallbacks to smaller context windows while preserving relative pressure checks. Fixes #16351
This commit is contained in:
parent
026a5e47df
commit
d29f90e89d
2 changed files with 44 additions and 2 deletions
|
|
@ -520,7 +520,12 @@ def classify_api_error(
|
||||||
|
|
||||||
is_disconnect = any(p in error_msg for p in _SERVER_DISCONNECT_PATTERNS)
|
is_disconnect = any(p in error_msg for p in _SERVER_DISCONNECT_PATTERNS)
|
||||||
if is_disconnect and not status_code:
|
if is_disconnect and not status_code:
|
||||||
is_large = approx_tokens > context_length * 0.6 or approx_tokens > 120000 or num_messages > 200
|
# Absolute token/message-count thresholds are only a proxy for smaller
|
||||||
|
# context windows. Large-context sessions can have hundreds of
|
||||||
|
# messages while still being far below their actual token budget.
|
||||||
|
is_large = approx_tokens > context_length * 0.6 or (
|
||||||
|
context_length <= 256000 and (approx_tokens > 120000 or num_messages > 200)
|
||||||
|
)
|
||||||
if is_large:
|
if is_large:
|
||||||
return _result(
|
return _result(
|
||||||
FailoverReason.context_overflow,
|
FailoverReason.context_overflow,
|
||||||
|
|
@ -766,7 +771,12 @@ def _classify_400(
|
||||||
if not err_body_msg:
|
if not err_body_msg:
|
||||||
err_body_msg = str(body.get("message") or "").strip().lower()
|
err_body_msg = str(body.get("message") or "").strip().lower()
|
||||||
is_generic = len(err_body_msg) < 30 or err_body_msg in ("error", "")
|
is_generic = len(err_body_msg) < 30 or err_body_msg in ("error", "")
|
||||||
is_large = approx_tokens > context_length * 0.4 or approx_tokens > 80000 or num_messages > 80
|
# Absolute token/message-count thresholds are only a proxy for smaller
|
||||||
|
# context windows. Large-context sessions can have many messages while
|
||||||
|
# still being far below their actual token budget.
|
||||||
|
is_large = approx_tokens > context_length * 0.4 or (
|
||||||
|
context_length <= 256000 and (approx_tokens > 80000 or num_messages > 80)
|
||||||
|
)
|
||||||
|
|
||||||
if is_generic and is_large:
|
if is_generic and is_large:
|
||||||
return result_fn(
|
return result_fn(
|
||||||
|
|
|
||||||
|
|
@ -410,6 +410,24 @@ class TestClassifyApiError:
|
||||||
result = classify_api_error(e, approx_tokens=1000, context_length=200000)
|
result = classify_api_error(e, approx_tokens=1000, context_length=200000)
|
||||||
assert result.reason == FailoverReason.format_error
|
assert result.reason == FailoverReason.format_error
|
||||||
|
|
||||||
|
def test_400_generic_many_messages_below_large_context_pressure_is_format_error(self):
|
||||||
|
"""Large-context sessions should not overflow solely due to message count."""
|
||||||
|
e = MockAPIError(
|
||||||
|
"Error",
|
||||||
|
status_code=400,
|
||||||
|
body={"error": {"message": "Error"}},
|
||||||
|
)
|
||||||
|
result = classify_api_error(
|
||||||
|
e,
|
||||||
|
provider="openai-codex",
|
||||||
|
model="gpt-5.5",
|
||||||
|
approx_tokens=74320,
|
||||||
|
context_length=1_000_000,
|
||||||
|
num_messages=432,
|
||||||
|
)
|
||||||
|
assert result.reason == FailoverReason.format_error
|
||||||
|
assert result.should_compress is False
|
||||||
|
|
||||||
# ── Server disconnect + large session ──
|
# ── Server disconnect + large session ──
|
||||||
|
|
||||||
def test_disconnect_large_session_context_overflow(self):
|
def test_disconnect_large_session_context_overflow(self):
|
||||||
|
|
@ -425,6 +443,20 @@ class TestClassifyApiError:
|
||||||
result = classify_api_error(e, approx_tokens=5000, context_length=200000)
|
result = classify_api_error(e, approx_tokens=5000, context_length=200000)
|
||||||
assert result.reason == FailoverReason.timeout
|
assert result.reason == FailoverReason.timeout
|
||||||
|
|
||||||
|
def test_disconnect_many_messages_below_large_context_pressure_is_timeout(self):
|
||||||
|
"""Large-context disconnects should not overflow solely due to message count."""
|
||||||
|
e = Exception("server disconnected without sending complete message")
|
||||||
|
result = classify_api_error(
|
||||||
|
e,
|
||||||
|
provider="openai-codex",
|
||||||
|
model="gpt-5.5",
|
||||||
|
approx_tokens=74320,
|
||||||
|
context_length=1_000_000,
|
||||||
|
num_messages=432,
|
||||||
|
)
|
||||||
|
assert result.reason == FailoverReason.timeout
|
||||||
|
assert result.should_compress is False
|
||||||
|
|
||||||
# ── Provider-specific: Anthropic thinking signature ──
|
# ── Provider-specific: Anthropic thinking signature ──
|
||||||
|
|
||||||
def test_anthropic_thinking_signature(self):
|
def test_anthropic_thinking_signature(self):
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue