hermes-agent/tests/agent/test_arcee_trinity_overrides.py
Teknium 0524c9b34e
feat(compression): raise compaction trigger to 85% for gpt-5.5 on Codex OAuth (#40957)
The ChatGPT Codex OAuth backend hard-caps gpt-5.5 at a 272K context window
(verified live: a ~330K-token request to chatgpt.com/backend-api/codex/responses
is rejected with context_length_exceeded while ~250K succeeds; the same slug
exposes 1.05M on the direct OpenAI API / OpenRouter and 400K on Copilot). At the
default 50% trigger, auto-compaction fires at ~136K — half the usable window.

Raise the trigger to 85% (~231K) on this exact route only, gated by a new
compression.codex_gpt55_autoraise config flag (default true). When it fires,
emit a one-time notice (CLI inline print + gateway status_callback replay) with
the exact opt-back-out command. gpt-5.5 on any other provider keeps the user's
global threshold.

- _is_codex_gpt55() matches the 5.5 family only on provider=openai-codex
- _compression_threshold_for_model() now provider-aware + opt-out param
- config key + _config_version bump (27->28) for backfill
- docs + tests (40 cases in test_arcee_trinity_overrides.py)
2026-06-07 01:40:50 -07:00

159 lines
5.9 KiB
Python

"""Tests for Arcee Trinity Large Thinking per-model overrides.
Arcee Trinity Large Thinking is a reasoning model that wants:
- Fixed temperature=0.5 (vs the global default)
- Compression threshold=0.75 (delay compression to preserve reasoning context)
The helpers must match the bare model name, including when it arrives via
OpenRouter as ``arcee-ai/trinity-large-thinking``, but must NOT hit sibling
Arcee models like trinity-large-preview or trinity-mini.
"""
from __future__ import annotations
import pytest
from agent.auxiliary_client import (
_compression_threshold_for_model,
_fixed_temperature_for_model,
_is_arcee_trinity_thinking,
_is_codex_gpt55,
)
@pytest.mark.parametrize(
"model",
[
"trinity-large-thinking",
"arcee-ai/trinity-large-thinking",
"Arcee-AI/Trinity-Large-Thinking", # case-insensitive
" trinity-large-thinking ", # whitespace tolerant
],
)
def test_is_arcee_trinity_thinking_matches(model: str) -> None:
assert _is_arcee_trinity_thinking(model) is True
@pytest.mark.parametrize(
"model",
[
None,
"",
"trinity-large-preview",
"arcee-ai/trinity-large-preview:free",
"trinity-mini",
"arcee-ai/trinity-mini",
"trinity-large", # prefix-only must not match
"claude-sonnet-4.6",
"gpt-5.4",
],
)
def test_is_arcee_trinity_thinking_rejects_non_matches(model) -> None:
assert _is_arcee_trinity_thinking(model) is False
def test_fixed_temperature_for_trinity_thinking() -> None:
assert _fixed_temperature_for_model("trinity-large-thinking") == 0.5
assert _fixed_temperature_for_model("arcee-ai/trinity-large-thinking") == 0.5
def test_fixed_temperature_sibling_arcee_models_unaffected() -> None:
# Preview and mini do not pin temperature — caller chooses its default.
assert _fixed_temperature_for_model("trinity-large-preview") is None
assert _fixed_temperature_for_model("trinity-mini") is None
def test_compression_threshold_for_trinity_thinking() -> None:
assert _compression_threshold_for_model("trinity-large-thinking") == 0.75
assert _compression_threshold_for_model("arcee-ai/trinity-large-thinking") == 0.75
def test_compression_threshold_default_none_for_other_models() -> None:
# None means "leave the user's config value unchanged".
assert _compression_threshold_for_model(None) is None
assert _compression_threshold_for_model("") is None
assert _compression_threshold_for_model("trinity-large-preview") is None
assert _compression_threshold_for_model("claude-sonnet-4.6") is None
assert _compression_threshold_for_model("kimi-k2") is None
# ---------------------------------------------------------------------------
# Codex gpt-5.5 compaction-threshold autoraise
#
# ChatGPT's Codex OAuth backend caps gpt-5.5 at a 272K window (verified live:
# ~330K-token request rejected with context_length_exceeded, ~250K accepted).
# The default 50% compaction trigger would fire at ~136K — half the usable
# window — so this route raises the trigger to 85%. Only the Codex OAuth route
# is affected; the same slug on OpenAI direct / OpenRouter / Copilot exposes a
# larger window and keeps the user's global threshold.
# ---------------------------------------------------------------------------
@pytest.mark.parametrize(
"model",
[
"gpt-5.5",
"gpt-5.5-pro",
"gpt-5.5-2026-04-23", # dated snapshot
"gpt-5.5-codex-mini", # Codex variant of the 5.5 family (also 272K-capped)
"openai/gpt-5.5", # aggregator-prefixed (still on the codex route)
"GPT-5.5", # case-insensitive
" gpt-5.5 ", # whitespace tolerant
],
)
def test_is_codex_gpt55_matches_on_codex_provider(model: str) -> None:
assert _is_codex_gpt55(model, "openai-codex") is True
@pytest.mark.parametrize(
"provider",
["openrouter", "openai", "copilot", "openai-api", "", None],
)
def test_is_codex_gpt55_rejects_non_codex_providers(provider) -> None:
# gpt-5.5 on any non-Codex route keeps the larger window — no override.
assert _is_codex_gpt55("gpt-5.5", provider) is False
@pytest.mark.parametrize(
"model",
["gpt-5.4", "gpt-5", "gpt-5.55", "gpt-5.50", "", None],
)
def test_is_codex_gpt55_rejects_non_55_models(model) -> None:
# gpt-5.55 / gpt-5.50 are different families and must NOT match — the
# "gpt-5.5-" / "gpt-5.5." prefix guards require a separator after "5.5".
assert _is_codex_gpt55(model, "openai-codex") is False
def test_compression_threshold_for_codex_gpt55() -> None:
assert _compression_threshold_for_model("gpt-5.5", "openai-codex") == 0.85
assert _compression_threshold_for_model("gpt-5.5-pro", "openai-codex") == 0.85
assert _compression_threshold_for_model("openai/gpt-5.5", "openai-codex") == 0.85
def test_compression_threshold_codex_gpt55_other_routes_unaffected() -> None:
# Same slug, different route → no override (keep the user's config value).
assert _compression_threshold_for_model("gpt-5.5", "openrouter") is None
assert _compression_threshold_for_model("gpt-5.5", "openai") is None
assert _compression_threshold_for_model("gpt-5.5", "copilot") is None
assert _compression_threshold_for_model("openai/gpt-5.5") is None # no provider
def test_compression_threshold_codex_gpt55_opt_out() -> None:
# allow_codex_gpt55_autoraise=False reverts to the global default (None).
assert (
_compression_threshold_for_model(
"gpt-5.5", "openai-codex", allow_codex_gpt55_autoraise=False
)
is None
)
def test_compression_threshold_opt_out_does_not_disable_trinity() -> None:
# The opt-out flag is scoped to the Codex gpt-5.5 autoraise; the Arcee
# Trinity override must still apply when the flag is False.
assert (
_compression_threshold_for_model(
"trinity-large-thinking", "openrouter", allow_codex_gpt55_autoraise=False
)
== 0.75
)