From a8841e2a6877f7b5e7b4b6ab0122ef2fde229fe2 Mon Sep 17 00:00:00 2001
From: Gille <4317663+helix4u@users.noreply.github.com>
Date: Tue, 30 Jun 2026 03:54:25 -0700
Subject: [PATCH] fix(aux): preserve provider identity for resolved endpoints

_resolve_task_provider_model() flattened any explicit base_url to
provider=custom. Correct for bare/custom endpoints, but wrong for
provider-backed routes (anthropic, qwen-oauth, minimax-oauth,
openai-codex, etc.) whose provider branch adds auth refresh, transport,
or request shaping. MoA reference slots resolved through those providers
lost their identity before the aux call, so e.g. a Codex reference hit
chatgpt.com/backend-api/codex without its Cloudflare headers and got
HTML back (surfacing as a spurious rate-limit).

Keep first-class providers intact when paired with a resolved base_url
via _preserve_provider_with_base_url(); bare/custom/auto/unknown and the
direct openai alias still route through custom.

Co-authored-by: Hermes Agent <127238744+teknium1@users.noreply.github.com>
---
 agent/auxiliary_client.py             | 31 ++++++++++++--
 tests/agent/test_auxiliary_client.py  | 60 +++++++++++++++++++++++++++
 tests/run_agent/test_moa_loop_mode.py | 43 +++++++++++++++++++
 3 files changed, 131 insertions(+), 3 deletions(-)

diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py
index 295984905ae..39ae9a759c3 100644
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@@ -5238,9 +5238,10 @@ def _resolve_task_provider_model(
       3. "auto" (full auto-detection chain)
 
     Returns (provider, model, base_url, api_key, api_mode) where model may
-    be None (use provider default). When base_url is set, provider is forced
-    to "custom" and the task uses that direct endpoint. api_mode is one of
-    "chat_completions", "codex_responses", or None (auto-detect).
+    be None (use provider default). A bare base_url is treated as custom, but
+    a first-class provider plus base_url keeps the provider identity so its
+    auth, transport, and request-shaping behavior still apply. api_mode is one
+    of "chat_completions", "codex_responses", or None (auto-detect).
     """
     cfg_provider = None
     cfg_model = None
@@ -5273,11 +5274,35 @@ def _resolve_task_provider_model(
             return prov, existing_base
         return "custom", existing_base or target_base
 
+    def _preserve_provider_with_base_url(prov: Optional[str]) -> bool:
+        normalized = str(prov or "").strip().lower()
+        if normalized in {"", "auto", "custom"} or normalized.startswith("custom:"):
+            return False
+        try:
+            from hermes_cli.providers import get_provider
+
+            return get_provider(normalized) is not None
+        except Exception:
+            # Keep the high-risk provider-backed routes safe even if provider
+            # catalog loading is unavailable during early import/test paths.
+            return normalized in {
+                "anthropic",
+                "copilot",
+                "copilot-acp",
+                "minimax-oauth",
+                "nous",
+                "openai-codex",
+                "qwen-oauth",
+                "xai-oauth",
+            }
+
     if provider:
         provider, base_url = _expand_direct_api_alias(provider, base_url)
     if cfg_provider:
         cfg_provider, cfg_base_url = _expand_direct_api_alias(cfg_provider, cfg_base_url)
 
+    if base_url and _preserve_provider_with_base_url(provider):
+        return provider, resolved_model, base_url, api_key, resolved_api_mode
     if base_url:
         return "custom", resolved_model, base_url, api_key, resolved_api_mode
     if provider:
diff --git a/tests/agent/test_auxiliary_client.py b/tests/agent/test_auxiliary_client.py
index ac8cddd83c0..06bd800abda 100644
--- a/tests/agent/test_auxiliary_client.py
+++ b/tests/agent/test_auxiliary_client.py
@@ -31,6 +31,7 @@ from agent.auxiliary_client import (
     _OPENROUTER_MODEL,
     OPENROUTER_BASE_URL,
     _resolve_auto,
+    _resolve_task_provider_model,
     _resolve_xai_oauth_for_aux,
     _CodexCompletionsAdapter,
 )
@@ -108,6 +109,65 @@ class TestAuxiliaryMaxTokensParam:
             assert auxiliary_max_tokens_param(2048) == {"max_completion_tokens": 2048}
 
 
+class TestResolveTaskProviderModel:
+    @pytest.mark.parametrize(
+        "provider",
+        [
+            "anthropic",
+            "minimax-oauth",
+            "nous",
+            "openai-codex",
+            "qwen-oauth",
+            "xai-oauth",
+        ],
+    )
+    def test_explicit_base_url_preserves_first_class_provider_identity(self, provider):
+        resolved_provider, model, base_url, api_key, api_mode = _resolve_task_provider_model(
+            task="moa_reference",
+            provider=provider,
+            model="test-model",
+            base_url="https://provider.example/v1",
+            api_key="resolved-token",
+        )
+
+        assert resolved_provider == provider
+        assert model == "test-model"
+        assert base_url == "https://provider.example/v1"
+        assert api_key == "resolved-token"
+        assert api_mode is None
+
+    @pytest.mark.parametrize("provider", ["", "auto", "custom", "custom:local", "unknown-provider"])
+    def test_explicit_base_url_without_first_class_provider_routes_as_custom(self, provider):
+        resolved_provider, model, base_url, api_key, api_mode = _resolve_task_provider_model(
+            task="moa_reference",
+            provider=provider,
+            model="test-model",
+            base_url="https://provider.example/v1",
+            api_key="resolved-token",
+        )
+
+        assert resolved_provider == "custom"
+        assert model == "test-model"
+        assert base_url == "https://provider.example/v1"
+        assert api_key == "resolved-token"
+        assert api_mode is None
+
+    def test_direct_openai_alias_with_base_url_still_routes_as_custom(self):
+        resolved_provider, model, base_url, api_key, api_mode = _resolve_task_provider_model(
+            task="vision",
+            provider="openai",
+            model="gpt-4o-mini",
+            base_url="https://proxy.example/v1",
+            api_key="sk-test",
+        )
+
+        assert resolved_provider == "custom"
+        assert model == "gpt-4o-mini"
+        assert base_url == "https://proxy.example/v1"
+        assert api_key == "sk-test"
+        assert api_mode is None
+
+
 class TestBuildCallKwargsMaxTokens:
     """_build_call_kwargs should not cap output by default (#34530).
 
diff --git a/tests/run_agent/test_moa_loop_mode.py b/tests/run_agent/test_moa_loop_mode.py
index 3e4b8de93a9..c2dc887e605 100644
--- a/tests/run_agent/test_moa_loop_mode.py
+++ b/tests/run_agent/test_moa_loop_mode.py
@@ -1,6 +1,8 @@
 from types import SimpleNamespace
 from unittest.mock import MagicMock
 
+import pytest
+
 from run_agent import AIAgent
 
 
@@ -198,6 +200,47 @@ def test_moa_codex_slot_preserves_provider_identity(monkeypatch):
     assert rt == {"provider": "openai-codex", "model": "gpt-5.5"}
 
 
+@pytest.mark.parametrize("provider", ["anthropic", "minimax-oauth", "qwen-oauth"])
+def test_moa_provider_backed_slot_survives_aux_resolution(monkeypatch, provider):
+    """MoA can pass resolved endpoints for provider-backed slots without
+    call_llm flattening them to generic custom endpoints.
+
+    ``_slot_runtime`` resolves a provider-backed slot to ``provider`` plus a
+    concrete ``base_url``/``api_key``/``api_mode``; ``_run_reference`` then
+    forwards that dict to ``call_llm``. ``call_llm`` resolves the routing tuple
+    via ``_resolve_task_provider_model`` (which takes everything except
+    ``api_mode``, handled separately). The provider identity must survive that
+    resolution rather than being flattened to ``custom``.
+    """
+    from agent import moa_loop
+    from agent.auxiliary_client import _resolve_task_provider_model
+
+    def fake_resolve(*, requested, target_model=None):
+        return {
+            "provider": requested,
+            "api_mode": "anthropic_messages",
+            "base_url": f"https://{requested}.example/v1",
+            "api_key": f"token-for-{requested}",
+        }
+
+    monkeypatch.setattr(
+        "hermes_cli.runtime_provider.resolve_runtime_provider", fake_resolve
+    )
+
+    rt = moa_loop._slot_runtime({"provider": provider, "model": "test-model"})
+    # api_mode is forwarded to call_llm directly, not to _resolve_task_provider_model.
+    resolver_kwargs = {k: v for k, v in rt.items() if k != "api_mode"}
+    resolved_provider, model, base_url, api_key, _mode = _resolve_task_provider_model(
+        task="moa_reference",
+        **resolver_kwargs,
+    )
+
+    assert resolved_provider == provider
+    assert model == "test-model"
+    assert base_url == f"https://{provider}.example/v1"
+    assert api_key == f"token-for-{provider}"
+
+
 def test_moa_slot_runtime_falls_back_on_resolution_error(monkeypatch):
     """A slot whose provider can't be resolved still attempts the call with the
     bare provider/model rather than aborting the whole MoA turn."""