From dbe5d8497200b766baab4ed1a2bbd4e2e02d3fbc Mon Sep 17 00:00:00 2001 From: wysie Date: Mon, 25 May 2026 01:57:34 -0700 Subject: [PATCH] fix(auxiliary): universal main-model fallback for aux tasks (#31845) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Aux callers (title generation, vision, session search, etc.) can reach resolve_provider_client() without an explicit model when the user picked their main provider via 'hermes model' and didn't bother configuring a per-task auxiliary..model override. The expectation in that case is universal: 'use my main model for side tasks too.' Before, the OAuth providers (xai-oauth, openai-codex) silently returned (None, None) on an empty model — both lack a catalog default because their accepted-model lists drift on the backend. That caused _resolve_auto to drop to its Step-2 fallback chain (OpenRouter / Nous / etc.), so aux tasks billed against the wrong subscription without warning. The fix is at the top of resolve_provider_client() — a single 3-step universal fallback that runs before any provider branch, so no provider-specific empty-model guards are needed (now or for any future provider we add): 1. caller-passed model (caller knew what they wanted) 2. provider's catalog default (cheap aux model, if registered) 3. user's main model from config.yaml Behaviour by provider class: - OAuth providers (xai-oauth, openai-codex) — no catalog default, so step 3 applies. Title gen runs on grok-4.3 / gpt-5.4 against the user's actual subscription instead of leaking to OpenRouter. - API-key providers (anthropic, gemini, kimi-coding, etc.) — catalog default wins at step 2, preserving the original 'cheap aux model' behaviour. Anthropic users still get claude-haiku-4-5 for titles, not opus. - Explicit-model callers (auxiliary..model config, programmatic callers) — caller wins at step 1, no surprise switching. Salvaged from @wysie's PR #31845 which fixed the xai-oauth branch specifically. The universal shape supersedes the per-branch fix and covers openai-codex (same bug class) plus any future OAuth providers. 4 new tests in TestResolveProviderClientUniversalModelFallback: - empty_model_for_oauth_provider_falls_back_to_main_model - empty_model_for_codex_also_uses_main_model - empty_model_for_catalog_provider_uses_catalog_default - explicit_model_takes_precedence_over_fallbacks 365/365 across tests/agent/test_auxiliary_*, tests/run_agent/test_codex_xai_oauth_recovery.py, tests/hermes_cli/test_auth_xai_oauth_provider.py, and tests/hermes_cli/test_plugin_auxiliary_tasks.py. Co-authored-by: wysie --- agent/auxiliary_client.py | 28 +++++ tests/agent/test_auxiliary_client.py | 149 +++++++++++++++++++++++++++ 2 files changed, 177 insertions(+) diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index 78ec00d3f16..bc98e9d8593 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -3116,6 +3116,34 @@ def resolve_provider_client( # Normalise aliases provider = _normalize_aux_provider(provider) + # Universal model-resolution fallback chain. Callers (notably title + # generation, vision, session search, and other auxiliary tasks) can + # reach this function without an explicit model — the user picked their + # main provider, didn't bother configuring a per-task ``auxiliary..model``, + # and just expects "use my main model for side tasks too." Resolve in + # this order, stopping at the first non-empty answer: + # + # 1. ``model`` argument (caller knew what they wanted) + # 2. Provider's catalog default — cheap/fast model the provider + # registered via ``ProviderProfile.default_aux_model`` or the + # legacy ``_API_KEY_PROVIDER_AUX_MODELS_FALLBACK`` dict. Empty + # string for OAuth-gated providers (openai-codex, xai-oauth) + # whose accepted-model lists drift on the backend, so we don't + # pin a default that can silently rot. + # 3. User's main model from ``model.model`` in config.yaml. This is + # the load-bearing step for OAuth providers: an xai-oauth user + # with grok-4.3 configured gets grok-4.3 for title generation + # instead of silently dropping to whatever Step-2 fallback (#31845). + # + # Each provider branch below sees a non-empty ``model`` whenever the + # user has *anything* configured — no provider-specific empty-model + # guards needed. When the user has NOTHING configured (fresh install, + # main_model also empty), the branches still hit their own + # missing-credentials returns and ``_resolve_auto`` falls through to + # the Step-2 chain as before. + if not model: + model = _get_aux_model_for_provider(provider) or _read_main_model() or model + def _needs_codex_wrap(client_obj, base_url_str: str, model_str: str) -> bool: """Decide if a plain OpenAI client should be wrapped for Responses API. diff --git a/tests/agent/test_auxiliary_client.py b/tests/agent/test_auxiliary_client.py index 221d2725a91..20c30c7ea9e 100644 --- a/tests/agent/test_auxiliary_client.py +++ b/tests/agent/test_auxiliary_client.py @@ -430,6 +430,155 @@ class TestBuildCodexClient: assert mock_openai.call_count == 2 +class TestResolveProviderClientUniversalModelFallback: + """resolve_provider_client() picks a sensible model when callers pass none (#31845). + + Aux tasks (title generation, vision, session search, etc.) routinely + reach this function without an explicit model — the user's main + provider was picked via ``hermes model``, no per-task override is + set, and the expectation is "just use my main model for side tasks + too." The resolver fills in ``model`` from a 3-step universal + fallback before any provider branch runs: + + 1. ``model`` argument (caller knew what they wanted) + 2. provider's catalog default (cheap aux model, if registered) + 3. user's main model (``model.model`` in config.yaml) + + Pre-fix the OAuth providers (xai-oauth, openai-codex) returned + ``(None, None)`` on an empty model — both lack a catalog default + because their accepted-model lists drift on the backend. That + silent failure caused ``_resolve_auto`` to drop to its Step-2 + fallback chain (OpenRouter / Nous / etc.), so aux tasks billed + against the wrong subscription. + """ + + def test_empty_model_for_oauth_provider_falls_back_to_main_model(self): + """xai-oauth: no catalog default → uses main model.""" + from agent.auxiliary_client import resolve_provider_client + + with ( + patch( + "agent.auxiliary_client._read_main_model", + return_value="grok-4.3", + ), + patch( + "agent.auxiliary_client._get_aux_model_for_provider", + return_value="", # xai-oauth has no catalog default + ), + patch( + "agent.auxiliary_client._build_xai_oauth_aux_client", + return_value=(MagicMock(), "grok-4.3"), + ) as mock_build, + ): + client, model = resolve_provider_client("xai-oauth", "") + + assert client is not None, ( + "should not fall through when main model is set" + ) + assert model == "grok-4.3" + # The builder receives the main-model fallback, never the empty + # string the caller passed. + assert mock_build.call_args.args[0] == "grok-4.3" + + def test_empty_model_for_codex_also_uses_main_model(self): + """openai-codex: symmetric with xai-oauth — same universal fallback.""" + from agent.auxiliary_client import resolve_provider_client + + with ( + patch( + "agent.auxiliary_client._read_main_model", + return_value="gpt-5.4", + ), + patch( + "agent.auxiliary_client._get_aux_model_for_provider", + return_value="", # openai-codex has no catalog default either + ), + patch( + "agent.auxiliary_client._build_codex_client", + return_value=(MagicMock(), "gpt-5.4"), + ) as mock_build, + patch( + "agent.auxiliary_client._select_pool_entry", + return_value=(True, None), + ), + ): + client, model = resolve_provider_client("openai-codex", "") + + assert client is not None + assert model == "gpt-5.4" + assert mock_build.call_args.args[0] == "gpt-5.4" + + def test_empty_model_for_catalog_provider_uses_catalog_default(self): + """anthropic / nous / openrouter / etc.: catalog default wins + over main model when no explicit model is passed. + + This preserves the original \"cheap aux model for direct API + providers\" behaviour — users on anthropic for their main chat + still get claude-haiku-4-5 for title generation, NOT their + expensive chat model. Step 2 of the universal fallback chain. + """ + from agent.auxiliary_client import resolve_provider_client + + with ( + patch( + "agent.auxiliary_client._read_main_model", + # Main model is the expensive opus; if this leaks into + # aux it costs real money. + return_value="claude-opus-4-6", + ) as mock_read_main, + patch( + "agent.auxiliary_client._get_aux_model_for_provider", + return_value="claude-haiku-4-5-20251001", + ), + patch( + "agent.anthropic_adapter.build_anthropic_client", + return_value=MagicMock(), + ), + patch( + "agent.anthropic_adapter.resolve_anthropic_token", + return_value="sk-ant-***", + ), + patch( + "agent.auxiliary_client._read_nous_auth", return_value=None + ), + ): + client, model = resolve_provider_client("anthropic", "") + + # Catalog default takes precedence — main_model was a no-op + # because step 2 of the fallback chain already produced a model. + assert client is not None + assert model == "claude-haiku-4-5-20251001" + mock_read_main.assert_not_called() + + def test_explicit_model_takes_precedence_over_fallbacks(self): + """Step 1: caller-passed model wins. Per-task config + (``auxiliary..model``) routes here — when the user + explicitly picks gemini-3-flash for title generation, that's + what runs, not their main model. + """ + from agent.auxiliary_client import resolve_provider_client + + with ( + patch("agent.auxiliary_client._read_main_model") as mock_read_main, + patch( + "agent.auxiliary_client._get_aux_model_for_provider", + return_value="catalog-default-should-not-be-used", + ), + patch( + "agent.auxiliary_client._build_xai_oauth_aux_client", + return_value=(MagicMock(), "grok-4.20-multi-agent"), + ) as mock_build, + ): + client, model = resolve_provider_client( + "xai-oauth", "grok-4.20-multi-agent", + ) + + assert client is not None + assert model == "grok-4.20-multi-agent" + mock_read_main.assert_not_called() + assert mock_build.call_args.args[0] == "grok-4.20-multi-agent" + + class TestExpiredCodexFallback: """Test that expired Codex tokens don't block the auto chain."""