fix(agent): honor configured model max tokens

This commit is contained in:
LeonSGP43 2026-05-04 09:36:43 +08:00 committed by Teknium
parent 52e2777821
commit a78e622dfe
4 changed files with 102 additions and 2 deletions

View file

@ -127,6 +127,21 @@ class TestAgentConfigSignature:
)
assert sig1 != sig2
def test_max_tokens_change_busts_cache(self):
"""Editing model.max_tokens in config must produce a new signature."""
from gateway.run import GatewayRunner
runtime = {"api_key": "k", "base_url": "u", "provider": "p"}
sig1 = GatewayRunner._agent_config_signature(
"m", runtime, [], "",
cache_keys={"model.max_tokens": 4096},
)
sig2 = GatewayRunner._agent_config_signature(
"m", runtime, [], "",
cache_keys={"model.max_tokens": 8192},
)
assert sig1 != sig2
def test_compression_threshold_change_busts_cache(self):
from gateway.run import GatewayRunner
@ -195,9 +210,16 @@ class TestExtractCacheBustingConfig:
from gateway.run import GatewayRunner
out = GatewayRunner._extract_cache_busting_config(
{"model": {"context_length": 272_000, "provider": "openrouter"}}
{
"model": {
"context_length": 272_000,
"max_tokens": 4096,
"provider": "openrouter",
}
}
)
assert out["model.context_length"] == 272_000
assert out["model.max_tokens"] == 4096
def test_reads_compression_subkeys(self):
from gateway.run import GatewayRunner

View file

@ -724,6 +724,56 @@ class TestInit:
)
assert a._cache_ttl == "1h"
def test_model_max_tokens_from_config(self):
"""model.max_tokens config populates the chat-completions request cap."""
with (
patch("run_agent.get_tool_definitions", return_value=_make_tool_defs("terminal")),
patch("run_agent.check_toolset_requirements", return_value={}),
patch("run_agent.OpenAI"),
patch(
"hermes_cli.config.load_config",
return_value={"model": {"max_tokens": 4096}},
),
):
a = AIAgent(
api_key="test-k...7890",
provider="custom",
model="claude-opus-4-6-thinking",
base_url="http://proxy.example/v1",
quiet_mode=True,
skip_context_files=True,
skip_memory=True,
)
kwargs = a._build_api_kwargs([{"role": "user", "content": "Hi"}])
assert a.max_tokens == 4096
assert kwargs["max_tokens"] == 4096
def test_constructor_max_tokens_wins_over_config(self):
"""Explicit constructor max_tokens keeps programmatic callers stable."""
with (
patch("run_agent.get_tool_definitions", return_value=[]),
patch("run_agent.check_toolset_requirements", return_value={}),
patch("run_agent.OpenAI"),
patch(
"hermes_cli.config.load_config",
return_value={"model": {"max_tokens": 4096}},
),
):
a = AIAgent(
api_key="test-k...7890",
provider="custom",
model="claude-opus-4-6-thinking",
base_url="http://proxy.example/v1",
max_tokens=8192,
quiet_mode=True,
skip_context_files=True,
skip_memory=True,
)
assert a.max_tokens == 8192
def test_prompt_caching_cache_ttl_invalid_falls_back(self):
"""Non-Anthropic TTL values keep default 5m without raising."""
with (