fix(aux): preserve provider identity for resolved endpoints

_resolve_task_provider_model() flattened any explicit base_url to
provider=custom. Correct for bare/custom endpoints, but wrong for
provider-backed routes (anthropic, qwen-oauth, minimax-oauth,
openai-codex, etc.) whose provider branch adds auth refresh, transport,
or request shaping. MoA reference slots resolved through those providers
lost their identity before the aux call, so e.g. a Codex reference hit
chatgpt.com/backend-api/codex without its Cloudflare headers and got
HTML back (surfacing as a spurious rate-limit).

Keep first-class providers intact when paired with a resolved base_url
via _preserve_provider_with_base_url(); bare/custom/auto/unknown and the
direct openai alias still route through custom.

Co-authored-by: Hermes Agent <127238744+teknium1@users.noreply.github.com>
This commit is contained in:
Gille 2026-06-30 03:54:25 -07:00 committed by Teknium
parent 1cae1bd0de
commit a8841e2a68
3 changed files with 131 additions and 3 deletions

View file

@ -5238,9 +5238,10 @@ def _resolve_task_provider_model(
3. "auto" (full auto-detection chain)
Returns (provider, model, base_url, api_key, api_mode) where model may
be None (use provider default). When base_url is set, provider is forced
to "custom" and the task uses that direct endpoint. api_mode is one of
"chat_completions", "codex_responses", or None (auto-detect).
be None (use provider default). A bare base_url is treated as custom, but
a first-class provider plus base_url keeps the provider identity so its
auth, transport, and request-shaping behavior still apply. api_mode is one
of "chat_completions", "codex_responses", or None (auto-detect).
"""
cfg_provider = None
cfg_model = None
@ -5273,11 +5274,35 @@ def _resolve_task_provider_model(
return prov, existing_base
return "custom", existing_base or target_base
def _preserve_provider_with_base_url(prov: Optional[str]) -> bool:
normalized = str(prov or "").strip().lower()
if normalized in {"", "auto", "custom"} or normalized.startswith("custom:"):
return False
try:
from hermes_cli.providers import get_provider
return get_provider(normalized) is not None
except Exception:
# Keep the high-risk provider-backed routes safe even if provider
# catalog loading is unavailable during early import/test paths.
return normalized in {
"anthropic",
"copilot",
"copilot-acp",
"minimax-oauth",
"nous",
"openai-codex",
"qwen-oauth",
"xai-oauth",
}
if provider:
provider, base_url = _expand_direct_api_alias(provider, base_url)
if cfg_provider:
cfg_provider, cfg_base_url = _expand_direct_api_alias(cfg_provider, cfg_base_url)
if base_url and _preserve_provider_with_base_url(provider):
return provider, resolved_model, base_url, api_key, resolved_api_mode
if base_url:
return "custom", resolved_model, base_url, api_key, resolved_api_mode
if provider:

View file

@ -31,6 +31,7 @@ from agent.auxiliary_client import (
_OPENROUTER_MODEL,
OPENROUTER_BASE_URL,
_resolve_auto,
_resolve_task_provider_model,
_resolve_xai_oauth_for_aux,
_CodexCompletionsAdapter,
)
@ -108,6 +109,65 @@ class TestAuxiliaryMaxTokensParam:
assert auxiliary_max_tokens_param(2048) == {"max_completion_tokens": 2048}
class TestResolveTaskProviderModel:
@pytest.mark.parametrize(
"provider",
[
"anthropic",
"minimax-oauth",
"nous",
"openai-codex",
"qwen-oauth",
"xai-oauth",
],
)
def test_explicit_base_url_preserves_first_class_provider_identity(self, provider):
resolved_provider, model, base_url, api_key, api_mode = _resolve_task_provider_model(
task="moa_reference",
provider=provider,
model="test-model",
base_url="https://provider.example/v1",
api_key="resolved-token",
)
assert resolved_provider == provider
assert model == "test-model"
assert base_url == "https://provider.example/v1"
assert api_key == "resolved-token"
assert api_mode is None
@pytest.mark.parametrize("provider", ["", "auto", "custom", "custom:local", "unknown-provider"])
def test_explicit_base_url_without_first_class_provider_routes_as_custom(self, provider):
resolved_provider, model, base_url, api_key, api_mode = _resolve_task_provider_model(
task="moa_reference",
provider=provider,
model="test-model",
base_url="https://provider.example/v1",
api_key="resolved-token",
)
assert resolved_provider == "custom"
assert model == "test-model"
assert base_url == "https://provider.example/v1"
assert api_key == "resolved-token"
assert api_mode is None
def test_direct_openai_alias_with_base_url_still_routes_as_custom(self):
resolved_provider, model, base_url, api_key, api_mode = _resolve_task_provider_model(
task="vision",
provider="openai",
model="gpt-4o-mini",
base_url="https://proxy.example/v1",
api_key="sk-test",
)
assert resolved_provider == "custom"
assert model == "gpt-4o-mini"
assert base_url == "https://proxy.example/v1"
assert api_key == "sk-test"
assert api_mode is None
class TestBuildCallKwargsMaxTokens:
"""_build_call_kwargs should not cap output by default (#34530).

View file

@ -1,6 +1,8 @@
from types import SimpleNamespace
from unittest.mock import MagicMock
import pytest
from run_agent import AIAgent
@ -198,6 +200,47 @@ def test_moa_codex_slot_preserves_provider_identity(monkeypatch):
assert rt == {"provider": "openai-codex", "model": "gpt-5.5"}
@pytest.mark.parametrize("provider", ["anthropic", "minimax-oauth", "qwen-oauth"])
def test_moa_provider_backed_slot_survives_aux_resolution(monkeypatch, provider):
"""MoA can pass resolved endpoints for provider-backed slots without
call_llm flattening them to generic custom endpoints.
``_slot_runtime`` resolves a provider-backed slot to ``provider`` plus a
concrete ``base_url``/``api_key``/``api_mode``; ``_run_reference`` then
forwards that dict to ``call_llm``. ``call_llm`` resolves the routing tuple
via ``_resolve_task_provider_model`` (which takes everything except
``api_mode``, handled separately). The provider identity must survive that
resolution rather than being flattened to ``custom``.
"""
from agent import moa_loop
from agent.auxiliary_client import _resolve_task_provider_model
def fake_resolve(*, requested, target_model=None):
return {
"provider": requested,
"api_mode": "anthropic_messages",
"base_url": f"https://{requested}.example/v1",
"api_key": f"token-for-{requested}",
}
monkeypatch.setattr(
"hermes_cli.runtime_provider.resolve_runtime_provider", fake_resolve
)
rt = moa_loop._slot_runtime({"provider": provider, "model": "test-model"})
# api_mode is forwarded to call_llm directly, not to _resolve_task_provider_model.
resolver_kwargs = {k: v for k, v in rt.items() if k != "api_mode"}
resolved_provider, model, base_url, api_key, _mode = _resolve_task_provider_model(
task="moa_reference",
**resolver_kwargs,
)
assert resolved_provider == provider
assert model == "test-model"
assert base_url == f"https://{provider}.example/v1"
assert api_key == f"token-for-{provider}"
def test_moa_slot_runtime_falls_back_on_resolution_error(monkeypatch):
"""A slot whose provider can't be resolved still attempts the call with the
bare provider/model rather than aborting the whole MoA turn."""