mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-12 08:51:53 +00:00
Third-party OpenAI-compatible endpoints (self-hosted gateways, OpenRouter,
Azure proxies) fronting gpt-4o / gpt-4.1 / gpt-5+ / o1-o4 models silently
received max_tokens and 400'd with unsupported_parameter, because the three
kwarg-selection sites only checked base_url_hostname(...) == "api.openai.com"
and fell through to max_tokens on every other host. The constraint is
enforced server-side by the model family, not by the URL, so name-based
detection is required as a fallback.
Changes:
- utils.py: new shared helper model_forces_max_completion_tokens(model) that
prefix-matches gpt-4o, gpt-4.1, gpt-5, o1, o3, o4 families on normalized
(lowercased, vendor-prefix-stripped) names.
- run_agent.py: _max_tokens_param ORs the helper into the URL check.
- agent/auxiliary_client.py:
- auxiliary_max_tokens_param gains an optional keyword-only model arg.
- _build_call_kwargs inline branch applies the same check for both
provider == "custom" and non-custom paths.
Tests:
- tests/test_model_forces_max_completion_tokens.py: 31 new cases covering
positive families, negatives (classic gpt-4, claude, llama, mistral, qwen,
deepseek), vendor prefixes, case-insensitivity, whitespace, None/empty,
and substring-not-prefix guards.
- tests/run_agent/test_run_agent.py::TestMaxTokensParam: 5 new model-based
cases (custom + gpt-5.4, openrouter + gpt-4o-mini, custom + o1-preview,
classic gpt-4-turbo keeps max_tokens, llama3 keeps max_tokens).
- tests/agent/test_auxiliary_client.py::TestAuxiliaryMaxTokensParam: new
class, 7 tests covering the URL x model matrix.
137 lines
5.5 KiB
Python
137 lines
5.5 KiB
Python
"""Targeted tests for ``utils.model_forces_max_completion_tokens``.
|
|
|
|
This helper decides whether a given model name requires the newer
|
|
``max_completion_tokens`` kwarg (rather than the legacy ``max_tokens``) on
|
|
``/v1/chat/completions``. It protects against the 400 ``unsupported_parameter``
|
|
error seen when third-party OpenAI-compatible endpoints serve gpt-4o / 4.1 /
|
|
5.x / o-series models by name and the caller only checks the URL host.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from utils import model_forces_max_completion_tokens
|
|
|
|
|
|
# ─── Positive cases: families that require max_completion_tokens ────────────
|
|
|
|
|
|
class TestPositiveCases:
|
|
def test_gpt_5_bare(self):
|
|
assert model_forces_max_completion_tokens("gpt-5") is True
|
|
|
|
def test_gpt_5_point_release(self):
|
|
# The case the user actually hit — gpt-5.4 on a custom OpenAI-compatible
|
|
# endpoint was being sent max_tokens and getting 400 back.
|
|
assert model_forces_max_completion_tokens("gpt-5.4") is True
|
|
|
|
def test_gpt_5_mini(self):
|
|
assert model_forces_max_completion_tokens("gpt-5-mini") is True
|
|
|
|
def test_gpt_5_nano(self):
|
|
assert model_forces_max_completion_tokens("gpt-5-nano") is True
|
|
|
|
def test_gpt_4o(self):
|
|
assert model_forces_max_completion_tokens("gpt-4o") is True
|
|
|
|
def test_gpt_4o_mini(self):
|
|
assert model_forces_max_completion_tokens("gpt-4o-mini") is True
|
|
|
|
def test_gpt_4_1(self):
|
|
assert model_forces_max_completion_tokens("gpt-4.1") is True
|
|
|
|
def test_gpt_4_1_mini(self):
|
|
assert model_forces_max_completion_tokens("gpt-4.1-mini") is True
|
|
|
|
def test_o1(self):
|
|
assert model_forces_max_completion_tokens("o1") is True
|
|
|
|
def test_o1_preview(self):
|
|
assert model_forces_max_completion_tokens("o1-preview") is True
|
|
|
|
def test_o1_mini(self):
|
|
assert model_forces_max_completion_tokens("o1-mini") is True
|
|
|
|
def test_o3(self):
|
|
assert model_forces_max_completion_tokens("o3") is True
|
|
|
|
def test_o3_mini(self):
|
|
assert model_forces_max_completion_tokens("o3-mini") is True
|
|
|
|
def test_o4_mini(self):
|
|
# Future-proofing — o4 is already listed publicly.
|
|
assert model_forces_max_completion_tokens("o4-mini") is True
|
|
|
|
|
|
# ─── Negative cases: older or non-OpenAI families still use max_tokens ──────
|
|
|
|
|
|
class TestNegativeCases:
|
|
def test_gpt_3_5_turbo(self):
|
|
assert model_forces_max_completion_tokens("gpt-3.5-turbo") is False
|
|
|
|
def test_gpt_4(self):
|
|
# Classic gpt-4 (non-omni) still uses max_tokens on chat completions.
|
|
assert model_forces_max_completion_tokens("gpt-4") is False
|
|
|
|
def test_gpt_4_turbo(self):
|
|
assert model_forces_max_completion_tokens("gpt-4-turbo") is False
|
|
|
|
def test_claude_family(self):
|
|
assert model_forces_max_completion_tokens("claude-3-opus") is False
|
|
assert model_forces_max_completion_tokens("claude-sonnet-4-6") is False
|
|
|
|
def test_llama_family(self):
|
|
assert model_forces_max_completion_tokens("llama3") is False
|
|
assert model_forces_max_completion_tokens("llama-3-70b-instruct") is False
|
|
|
|
def test_mistral_family(self):
|
|
assert model_forces_max_completion_tokens("mistral-7b-instruct") is False
|
|
|
|
def test_qwen_family(self):
|
|
assert model_forces_max_completion_tokens("qwen2.5-72b") is False
|
|
|
|
def test_deepseek_family(self):
|
|
assert model_forces_max_completion_tokens("deepseek-chat") is False
|
|
|
|
|
|
# ─── Edge cases ─────────────────────────────────────────────────────────────
|
|
|
|
|
|
class TestEdgeCases:
|
|
def test_empty_string(self):
|
|
assert model_forces_max_completion_tokens("") is False
|
|
|
|
def test_none(self):
|
|
assert model_forces_max_completion_tokens(None) is False # type: ignore[arg-type]
|
|
|
|
def test_whitespace_only(self):
|
|
assert model_forces_max_completion_tokens(" ") is False
|
|
|
|
def test_case_insensitive(self):
|
|
assert model_forces_max_completion_tokens("GPT-5.4") is True
|
|
assert model_forces_max_completion_tokens("Gpt-4o-Mini") is True
|
|
assert model_forces_max_completion_tokens("O3-MINI") is True
|
|
|
|
def test_leading_trailing_whitespace(self):
|
|
assert model_forces_max_completion_tokens(" gpt-5 ") is True
|
|
|
|
def test_vendor_prefix_stripped(self):
|
|
# OpenRouter-style "vendor/model" names should match the tail.
|
|
assert model_forces_max_completion_tokens("openai/gpt-5.4") is True
|
|
assert model_forces_max_completion_tokens("openai/gpt-4o-mini") is True
|
|
assert model_forces_max_completion_tokens("openai/o3-mini") is True
|
|
|
|
def test_vendor_prefix_with_non_matching_tail(self):
|
|
assert model_forces_max_completion_tokens("openai/gpt-3.5-turbo") is False
|
|
assert model_forces_max_completion_tokens("anthropic/claude-3-opus") is False
|
|
|
|
def test_fake_prefix_not_matched(self):
|
|
# "o-series-but-not-really" doesn't start with o1/o3/o4.
|
|
assert model_forces_max_completion_tokens("omni-chat") is False
|
|
# "ox" isn't an o-series model, and "olive" / "opus" shouldn't collide.
|
|
assert model_forces_max_completion_tokens("ox-large") is False
|
|
assert model_forces_max_completion_tokens("opus-3") is False
|
|
|
|
def test_gpt_5_substring_in_middle_not_matched(self):
|
|
# Only a prefix should match — "local-gpt-5-clone" is a different model.
|
|
assert model_forces_max_completion_tokens("local-gpt-5-clone") is False
|