From a8841e2a6877f7b5e7b4b6ab0122ef2fde229fe2 Mon Sep 17 00:00:00 2001 From: Gille <4317663+helix4u@users.noreply.github.com> Date: Tue, 30 Jun 2026 03:54:25 -0700 Subject: [PATCH] fix(aux): preserve provider identity for resolved endpoints _resolve_task_provider_model() flattened any explicit base_url to provider=custom. Correct for bare/custom endpoints, but wrong for provider-backed routes (anthropic, qwen-oauth, minimax-oauth, openai-codex, etc.) whose provider branch adds auth refresh, transport, or request shaping. MoA reference slots resolved through those providers lost their identity before the aux call, so e.g. a Codex reference hit chatgpt.com/backend-api/codex without its Cloudflare headers and got HTML back (surfacing as a spurious rate-limit). Keep first-class providers intact when paired with a resolved base_url via _preserve_provider_with_base_url(); bare/custom/auto/unknown and the direct openai alias still route through custom. Co-authored-by: Hermes Agent <127238744+teknium1@users.noreply.github.com> --- agent/auxiliary_client.py | 31 ++++++++++++-- tests/agent/test_auxiliary_client.py | 60 +++++++++++++++++++++++++++ tests/run_agent/test_moa_loop_mode.py | 43 +++++++++++++++++++ 3 files changed, 131 insertions(+), 3 deletions(-) diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index 295984905ae..39ae9a759c3 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -5238,9 +5238,10 @@ def _resolve_task_provider_model( 3. "auto" (full auto-detection chain) Returns (provider, model, base_url, api_key, api_mode) where model may - be None (use provider default). When base_url is set, provider is forced - to "custom" and the task uses that direct endpoint. api_mode is one of - "chat_completions", "codex_responses", or None (auto-detect). + be None (use provider default). A bare base_url is treated as custom, but + a first-class provider plus base_url keeps the provider identity so its + auth, transport, and request-shaping behavior still apply. api_mode is one + of "chat_completions", "codex_responses", or None (auto-detect). """ cfg_provider = None cfg_model = None @@ -5273,11 +5274,35 @@ def _resolve_task_provider_model( return prov, existing_base return "custom", existing_base or target_base + def _preserve_provider_with_base_url(prov: Optional[str]) -> bool: + normalized = str(prov or "").strip().lower() + if normalized in {"", "auto", "custom"} or normalized.startswith("custom:"): + return False + try: + from hermes_cli.providers import get_provider + + return get_provider(normalized) is not None + except Exception: + # Keep the high-risk provider-backed routes safe even if provider + # catalog loading is unavailable during early import/test paths. + return normalized in { + "anthropic", + "copilot", + "copilot-acp", + "minimax-oauth", + "nous", + "openai-codex", + "qwen-oauth", + "xai-oauth", + } + if provider: provider, base_url = _expand_direct_api_alias(provider, base_url) if cfg_provider: cfg_provider, cfg_base_url = _expand_direct_api_alias(cfg_provider, cfg_base_url) + if base_url and _preserve_provider_with_base_url(provider): + return provider, resolved_model, base_url, api_key, resolved_api_mode if base_url: return "custom", resolved_model, base_url, api_key, resolved_api_mode if provider: diff --git a/tests/agent/test_auxiliary_client.py b/tests/agent/test_auxiliary_client.py index ac8cddd83c0..06bd800abda 100644 --- a/tests/agent/test_auxiliary_client.py +++ b/tests/agent/test_auxiliary_client.py @@ -31,6 +31,7 @@ from agent.auxiliary_client import ( _OPENROUTER_MODEL, OPENROUTER_BASE_URL, _resolve_auto, + _resolve_task_provider_model, _resolve_xai_oauth_for_aux, _CodexCompletionsAdapter, ) @@ -108,6 +109,65 @@ class TestAuxiliaryMaxTokensParam: assert auxiliary_max_tokens_param(2048) == {"max_completion_tokens": 2048} +class TestResolveTaskProviderModel: + @pytest.mark.parametrize( + "provider", + [ + "anthropic", + "minimax-oauth", + "nous", + "openai-codex", + "qwen-oauth", + "xai-oauth", + ], + ) + def test_explicit_base_url_preserves_first_class_provider_identity(self, provider): + resolved_provider, model, base_url, api_key, api_mode = _resolve_task_provider_model( + task="moa_reference", + provider=provider, + model="test-model", + base_url="https://provider.example/v1", + api_key="resolved-token", + ) + + assert resolved_provider == provider + assert model == "test-model" + assert base_url == "https://provider.example/v1" + assert api_key == "resolved-token" + assert api_mode is None + + @pytest.mark.parametrize("provider", ["", "auto", "custom", "custom:local", "unknown-provider"]) + def test_explicit_base_url_without_first_class_provider_routes_as_custom(self, provider): + resolved_provider, model, base_url, api_key, api_mode = _resolve_task_provider_model( + task="moa_reference", + provider=provider, + model="test-model", + base_url="https://provider.example/v1", + api_key="resolved-token", + ) + + assert resolved_provider == "custom" + assert model == "test-model" + assert base_url == "https://provider.example/v1" + assert api_key == "resolved-token" + assert api_mode is None + + def test_direct_openai_alias_with_base_url_still_routes_as_custom(self): + resolved_provider, model, base_url, api_key, api_mode = _resolve_task_provider_model( + task="vision", + provider="openai", + model="gpt-4o-mini", + base_url="https://proxy.example/v1", + api_key="sk-test", + ) + + assert resolved_provider == "custom" + assert model == "gpt-4o-mini" + assert base_url == "https://proxy.example/v1" + assert api_key == "sk-test" + assert api_mode is None + + class TestBuildCallKwargsMaxTokens: """_build_call_kwargs should not cap output by default (#34530). diff --git a/tests/run_agent/test_moa_loop_mode.py b/tests/run_agent/test_moa_loop_mode.py index 3e4b8de93a9..c2dc887e605 100644 --- a/tests/run_agent/test_moa_loop_mode.py +++ b/tests/run_agent/test_moa_loop_mode.py @@ -1,6 +1,8 @@ from types import SimpleNamespace from unittest.mock import MagicMock +import pytest + from run_agent import AIAgent @@ -198,6 +200,47 @@ def test_moa_codex_slot_preserves_provider_identity(monkeypatch): assert rt == {"provider": "openai-codex", "model": "gpt-5.5"} +@pytest.mark.parametrize("provider", ["anthropic", "minimax-oauth", "qwen-oauth"]) +def test_moa_provider_backed_slot_survives_aux_resolution(monkeypatch, provider): + """MoA can pass resolved endpoints for provider-backed slots without + call_llm flattening them to generic custom endpoints. + + ``_slot_runtime`` resolves a provider-backed slot to ``provider`` plus a + concrete ``base_url``/``api_key``/``api_mode``; ``_run_reference`` then + forwards that dict to ``call_llm``. ``call_llm`` resolves the routing tuple + via ``_resolve_task_provider_model`` (which takes everything except + ``api_mode``, handled separately). The provider identity must survive that + resolution rather than being flattened to ``custom``. + """ + from agent import moa_loop + from agent.auxiliary_client import _resolve_task_provider_model + + def fake_resolve(*, requested, target_model=None): + return { + "provider": requested, + "api_mode": "anthropic_messages", + "base_url": f"https://{requested}.example/v1", + "api_key": f"token-for-{requested}", + } + + monkeypatch.setattr( + "hermes_cli.runtime_provider.resolve_runtime_provider", fake_resolve + ) + + rt = moa_loop._slot_runtime({"provider": provider, "model": "test-model"}) + # api_mode is forwarded to call_llm directly, not to _resolve_task_provider_model. + resolver_kwargs = {k: v for k, v in rt.items() if k != "api_mode"} + resolved_provider, model, base_url, api_key, _mode = _resolve_task_provider_model( + task="moa_reference", + **resolver_kwargs, + ) + + assert resolved_provider == provider + assert model == "test-model" + assert base_url == f"https://{provider}.example/v1" + assert api_key == f"token-for-{provider}" + + def test_moa_slot_runtime_falls_back_on_resolution_error(monkeypatch): """A slot whose provider can't be resolved still attempts the call with the bare provider/model rather than aborting the whole MoA turn."""