hermes-agent/tests/test_model_forces_max_completion_tokens.py
Xiangji 19c07c4037 fix(params): send max_completion_tokens for newer OpenAI families on custom endpoints
Third-party OpenAI-compatible endpoints (self-hosted gateways, OpenRouter,
Azure proxies) fronting gpt-4o / gpt-4.1 / gpt-5+ / o1-o4 models silently
received max_tokens and 400'd with unsupported_parameter, because the three
kwarg-selection sites only checked base_url_hostname(...) == "api.openai.com"
and fell through to max_tokens on every other host. The constraint is
enforced server-side by the model family, not by the URL, so name-based
detection is required as a fallback.

Changes:
- utils.py: new shared helper model_forces_max_completion_tokens(model) that
  prefix-matches gpt-4o, gpt-4.1, gpt-5, o1, o3, o4 families on normalized
  (lowercased, vendor-prefix-stripped) names.
- run_agent.py: _max_tokens_param ORs the helper into the URL check.
- agent/auxiliary_client.py:
  - auxiliary_max_tokens_param gains an optional keyword-only model arg.
  - _build_call_kwargs inline branch applies the same check for both
    provider == "custom" and non-custom paths.

Tests:
- tests/test_model_forces_max_completion_tokens.py: 31 new cases covering
  positive families, negatives (classic gpt-4, claude, llama, mistral, qwen,
  deepseek), vendor prefixes, case-insensitivity, whitespace, None/empty,
  and substring-not-prefix guards.
- tests/run_agent/test_run_agent.py::TestMaxTokensParam: 5 new model-based
  cases (custom + gpt-5.4, openrouter + gpt-4o-mini, custom + o1-preview,
  classic gpt-4-turbo keeps max_tokens, llama3 keeps max_tokens).
- tests/agent/test_auxiliary_client.py::TestAuxiliaryMaxTokensParam: new
  class, 7 tests covering the URL x model matrix.
2026-06-09 23:22:10 -07:00

137 lines
5.5 KiB
Python

"""Targeted tests for ``utils.model_forces_max_completion_tokens``.
This helper decides whether a given model name requires the newer
``max_completion_tokens`` kwarg (rather than the legacy ``max_tokens``) on
``/v1/chat/completions``. It protects against the 400 ``unsupported_parameter``
error seen when third-party OpenAI-compatible endpoints serve gpt-4o / 4.1 /
5.x / o-series models by name and the caller only checks the URL host.
"""
from __future__ import annotations
from utils import model_forces_max_completion_tokens
# ─── Positive cases: families that require max_completion_tokens ────────────
class TestPositiveCases:
def test_gpt_5_bare(self):
assert model_forces_max_completion_tokens("gpt-5") is True
def test_gpt_5_point_release(self):
# The case the user actually hit — gpt-5.4 on a custom OpenAI-compatible
# endpoint was being sent max_tokens and getting 400 back.
assert model_forces_max_completion_tokens("gpt-5.4") is True
def test_gpt_5_mini(self):
assert model_forces_max_completion_tokens("gpt-5-mini") is True
def test_gpt_5_nano(self):
assert model_forces_max_completion_tokens("gpt-5-nano") is True
def test_gpt_4o(self):
assert model_forces_max_completion_tokens("gpt-4o") is True
def test_gpt_4o_mini(self):
assert model_forces_max_completion_tokens("gpt-4o-mini") is True
def test_gpt_4_1(self):
assert model_forces_max_completion_tokens("gpt-4.1") is True
def test_gpt_4_1_mini(self):
assert model_forces_max_completion_tokens("gpt-4.1-mini") is True
def test_o1(self):
assert model_forces_max_completion_tokens("o1") is True
def test_o1_preview(self):
assert model_forces_max_completion_tokens("o1-preview") is True
def test_o1_mini(self):
assert model_forces_max_completion_tokens("o1-mini") is True
def test_o3(self):
assert model_forces_max_completion_tokens("o3") is True
def test_o3_mini(self):
assert model_forces_max_completion_tokens("o3-mini") is True
def test_o4_mini(self):
# Future-proofing — o4 is already listed publicly.
assert model_forces_max_completion_tokens("o4-mini") is True
# ─── Negative cases: older or non-OpenAI families still use max_tokens ──────
class TestNegativeCases:
def test_gpt_3_5_turbo(self):
assert model_forces_max_completion_tokens("gpt-3.5-turbo") is False
def test_gpt_4(self):
# Classic gpt-4 (non-omni) still uses max_tokens on chat completions.
assert model_forces_max_completion_tokens("gpt-4") is False
def test_gpt_4_turbo(self):
assert model_forces_max_completion_tokens("gpt-4-turbo") is False
def test_claude_family(self):
assert model_forces_max_completion_tokens("claude-3-opus") is False
assert model_forces_max_completion_tokens("claude-sonnet-4-6") is False
def test_llama_family(self):
assert model_forces_max_completion_tokens("llama3") is False
assert model_forces_max_completion_tokens("llama-3-70b-instruct") is False
def test_mistral_family(self):
assert model_forces_max_completion_tokens("mistral-7b-instruct") is False
def test_qwen_family(self):
assert model_forces_max_completion_tokens("qwen2.5-72b") is False
def test_deepseek_family(self):
assert model_forces_max_completion_tokens("deepseek-chat") is False
# ─── Edge cases ─────────────────────────────────────────────────────────────
class TestEdgeCases:
def test_empty_string(self):
assert model_forces_max_completion_tokens("") is False
def test_none(self):
assert model_forces_max_completion_tokens(None) is False # type: ignore[arg-type]
def test_whitespace_only(self):
assert model_forces_max_completion_tokens(" ") is False
def test_case_insensitive(self):
assert model_forces_max_completion_tokens("GPT-5.4") is True
assert model_forces_max_completion_tokens("Gpt-4o-Mini") is True
assert model_forces_max_completion_tokens("O3-MINI") is True
def test_leading_trailing_whitespace(self):
assert model_forces_max_completion_tokens(" gpt-5 ") is True
def test_vendor_prefix_stripped(self):
# OpenRouter-style "vendor/model" names should match the tail.
assert model_forces_max_completion_tokens("openai/gpt-5.4") is True
assert model_forces_max_completion_tokens("openai/gpt-4o-mini") is True
assert model_forces_max_completion_tokens("openai/o3-mini") is True
def test_vendor_prefix_with_non_matching_tail(self):
assert model_forces_max_completion_tokens("openai/gpt-3.5-turbo") is False
assert model_forces_max_completion_tokens("anthropic/claude-3-opus") is False
def test_fake_prefix_not_matched(self):
# "o-series-but-not-really" doesn't start with o1/o3/o4.
assert model_forces_max_completion_tokens("omni-chat") is False
# "ox" isn't an o-series model, and "olive" / "opus" shouldn't collide.
assert model_forces_max_completion_tokens("ox-large") is False
assert model_forces_max_completion_tokens("opus-3") is False
def test_gpt_5_substring_in_middle_not_matched(self):
# Only a prefix should match — "local-gpt-5-clone" is a different model.
assert model_forces_max_completion_tokens("local-gpt-5-clone") is False