mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
fix(agent): preserve MiniMax context length on delta-only overflow
This commit is contained in:
parent
a884f6d5d8
commit
e020f46bec
3 changed files with 109 additions and 1 deletions
23
run_agent.py
23
run_agent.py
|
|
@ -10575,9 +10575,30 @@ class AIAgent:
|
||||||
# Error is about the INPUT being too large — reduce context_length.
|
# Error is about the INPUT being too large — reduce context_length.
|
||||||
# Try to parse the actual limit from the error message
|
# Try to parse the actual limit from the error message
|
||||||
parsed_limit = parse_context_limit_from_error(error_msg)
|
parsed_limit = parse_context_limit_from_error(error_msg)
|
||||||
|
_provider_lower = (getattr(self, "provider", "") or "").lower()
|
||||||
|
_base_lower = (getattr(self, "base_url", "") or "").rstrip("/").lower()
|
||||||
|
is_minimax_provider = (
|
||||||
|
_provider_lower in {"minimax", "minimax-cn"}
|
||||||
|
or _base_lower.startswith((
|
||||||
|
"https://api.minimax.io/anthropic",
|
||||||
|
"https://api.minimaxi.com/anthropic",
|
||||||
|
))
|
||||||
|
)
|
||||||
|
minimax_delta_only_overflow = (
|
||||||
|
is_minimax_provider
|
||||||
|
and parsed_limit is None
|
||||||
|
and "context window exceeds limit (" in error_msg
|
||||||
|
)
|
||||||
if parsed_limit and parsed_limit < old_ctx:
|
if parsed_limit and parsed_limit < old_ctx:
|
||||||
new_ctx = parsed_limit
|
new_ctx = parsed_limit
|
||||||
self._vprint(f"{self.log_prefix}⚠️ Context limit detected from API: {new_ctx:,} tokens (was {old_ctx:,})", force=True)
|
self._vprint(f"{self.log_prefix}Context limit detected from API: {new_ctx:,} tokens (was {old_ctx:,})", force=True)
|
||||||
|
elif minimax_delta_only_overflow:
|
||||||
|
new_ctx = old_ctx
|
||||||
|
self._vprint(
|
||||||
|
f"{self.log_prefix}Provider reported overflow amount only; "
|
||||||
|
f"keeping context_length at {old_ctx:,} tokens and compressing.",
|
||||||
|
force=True,
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
# Step down to the next probe tier
|
# Step down to the next probe tier
|
||||||
new_ctx = get_next_probe_tier(old_ctx)
|
new_ctx = get_next_probe_tier(old_ctx)
|
||||||
|
|
|
||||||
|
|
@ -621,6 +621,10 @@ class TestParseContextLimitFromError:
|
||||||
msg = "Error: context window of 4096 tokens exceeded"
|
msg = "Error: context window of 4096 tokens exceeded"
|
||||||
assert parse_context_limit_from_error(msg) == 4096
|
assert parse_context_limit_from_error(msg) == 4096
|
||||||
|
|
||||||
|
def test_minimax_delta_only_message_returns_none(self):
|
||||||
|
msg = "invalid params, context window exceeds limit (2013)"
|
||||||
|
assert parse_context_limit_from_error(msg) is None
|
||||||
|
|
||||||
def test_completely_unrelated_error(self):
|
def test_completely_unrelated_error(self):
|
||||||
assert parse_context_limit_from_error("Invalid API key") is None
|
assert parse_context_limit_from_error("Invalid API key") is None
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -2575,6 +2575,89 @@ class TestRunConversation:
|
||||||
assert result["final_response"] == "Recovered after compression"
|
assert result["final_response"] == "Recovered after compression"
|
||||||
assert result["completed"] is True
|
assert result["completed"] is True
|
||||||
|
|
||||||
|
def test_minimax_delta_overflow_keeps_known_context_length(self, agent):
|
||||||
|
"""MiniMax reports overflow deltas like 'limit (2013)' without the real window.
|
||||||
|
|
||||||
|
Keep the known 204,800-token window and compress instead of probing down
|
||||||
|
to the generic 128K fallback tier.
|
||||||
|
"""
|
||||||
|
self._setup_agent(agent)
|
||||||
|
agent.provider = "minimax"
|
||||||
|
agent.model = "MiniMax-M2.7-highspeed"
|
||||||
|
agent.base_url = "https://api.minimax.io/anthropic"
|
||||||
|
agent.context_compressor.context_length = 204_800
|
||||||
|
agent.context_compressor.threshold_tokens = int(
|
||||||
|
agent.context_compressor.context_length * agent.context_compressor.threshold_percent
|
||||||
|
)
|
||||||
|
|
||||||
|
err_400 = Exception(
|
||||||
|
"HTTP 400: invalid params, context window exceeds limit (2013)"
|
||||||
|
)
|
||||||
|
err_400.status_code = 400
|
||||||
|
ok_resp = _mock_response(content="Recovered after compression", finish_reason="stop")
|
||||||
|
agent.client.chat.completions.create.side_effect = [err_400, ok_resp]
|
||||||
|
prefill = [
|
||||||
|
{"role": "user", "content": "previous question"},
|
||||||
|
{"role": "assistant", "content": "previous answer"},
|
||||||
|
]
|
||||||
|
|
||||||
|
with (
|
||||||
|
patch.object(agent, "_compress_context") as mock_compress,
|
||||||
|
patch.object(agent, "_persist_session"),
|
||||||
|
patch.object(agent, "_save_trajectory"),
|
||||||
|
patch.object(agent, "_cleanup_task_resources"),
|
||||||
|
):
|
||||||
|
mock_compress.return_value = (
|
||||||
|
[{"role": "user", "content": "hello"}],
|
||||||
|
"compressed system prompt",
|
||||||
|
)
|
||||||
|
result = agent.run_conversation("hello", conversation_history=prefill)
|
||||||
|
|
||||||
|
mock_compress.assert_called_once()
|
||||||
|
assert agent.context_compressor.context_length == 204_800
|
||||||
|
assert agent.context_compressor._context_probed is False
|
||||||
|
assert result["final_response"] == "Recovered after compression"
|
||||||
|
assert result["completed"] is True
|
||||||
|
|
||||||
|
def test_non_minimax_delta_overflow_still_probes_down(self, agent):
|
||||||
|
"""Non-MiniMax providers should keep the generic probe-down behavior."""
|
||||||
|
self._setup_agent(agent)
|
||||||
|
agent.provider = "openrouter"
|
||||||
|
agent.model = "some/unknown-model"
|
||||||
|
agent.base_url = "https://openrouter.ai/api/v1"
|
||||||
|
agent.context_compressor.context_length = 200_000
|
||||||
|
agent.context_compressor.threshold_tokens = int(
|
||||||
|
agent.context_compressor.context_length * agent.context_compressor.threshold_percent
|
||||||
|
)
|
||||||
|
|
||||||
|
err_400 = Exception(
|
||||||
|
"HTTP 400: invalid params, context window exceeds limit (2013)"
|
||||||
|
)
|
||||||
|
err_400.status_code = 400
|
||||||
|
ok_resp = _mock_response(content="Recovered after compression", finish_reason="stop")
|
||||||
|
agent.client.chat.completions.create.side_effect = [err_400, ok_resp]
|
||||||
|
prefill = [
|
||||||
|
{"role": "user", "content": "previous question"},
|
||||||
|
{"role": "assistant", "content": "previous answer"},
|
||||||
|
]
|
||||||
|
|
||||||
|
with (
|
||||||
|
patch.object(agent, "_compress_context") as mock_compress,
|
||||||
|
patch.object(agent, "_persist_session"),
|
||||||
|
patch.object(agent, "_save_trajectory"),
|
||||||
|
patch.object(agent, "_cleanup_task_resources"),
|
||||||
|
):
|
||||||
|
mock_compress.return_value = (
|
||||||
|
[{"role": "user", "content": "hello"}],
|
||||||
|
"compressed system prompt",
|
||||||
|
)
|
||||||
|
result = agent.run_conversation("hello", conversation_history=prefill)
|
||||||
|
|
||||||
|
mock_compress.assert_called_once()
|
||||||
|
assert agent.context_compressor.context_length == 128_000
|
||||||
|
assert result["final_response"] == "Recovered after compression"
|
||||||
|
assert result["completed"] is True
|
||||||
|
|
||||||
def test_length_finish_reason_requests_continuation(self, agent):
|
def test_length_finish_reason_requests_continuation(self, agent):
|
||||||
"""Normal truncation (partial real content) triggers continuation."""
|
"""Normal truncation (partial real content) triggers continuation."""
|
||||||
self._setup_agent(agent)
|
self._setup_agent(agent)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue