mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-07-01 12:02:05 +00:00
fix(aux): preserve provider identity for resolved endpoints
_resolve_task_provider_model() flattened any explicit base_url to provider=custom. Correct for bare/custom endpoints, but wrong for provider-backed routes (anthropic, qwen-oauth, minimax-oauth, openai-codex, etc.) whose provider branch adds auth refresh, transport, or request shaping. MoA reference slots resolved through those providers lost their identity before the aux call, so e.g. a Codex reference hit chatgpt.com/backend-api/codex without its Cloudflare headers and got HTML back (surfacing as a spurious rate-limit). Keep first-class providers intact when paired with a resolved base_url via _preserve_provider_with_base_url(); bare/custom/auto/unknown and the direct openai alias still route through custom. Co-authored-by: Hermes Agent <127238744+teknium1@users.noreply.github.com>
This commit is contained in:
parent
1cae1bd0de
commit
a8841e2a68
3 changed files with 131 additions and 3 deletions
|
|
@ -5238,9 +5238,10 @@ def _resolve_task_provider_model(
|
|||
3. "auto" (full auto-detection chain)
|
||||
|
||||
Returns (provider, model, base_url, api_key, api_mode) where model may
|
||||
be None (use provider default). When base_url is set, provider is forced
|
||||
to "custom" and the task uses that direct endpoint. api_mode is one of
|
||||
"chat_completions", "codex_responses", or None (auto-detect).
|
||||
be None (use provider default). A bare base_url is treated as custom, but
|
||||
a first-class provider plus base_url keeps the provider identity so its
|
||||
auth, transport, and request-shaping behavior still apply. api_mode is one
|
||||
of "chat_completions", "codex_responses", or None (auto-detect).
|
||||
"""
|
||||
cfg_provider = None
|
||||
cfg_model = None
|
||||
|
|
@ -5273,11 +5274,35 @@ def _resolve_task_provider_model(
|
|||
return prov, existing_base
|
||||
return "custom", existing_base or target_base
|
||||
|
||||
def _preserve_provider_with_base_url(prov: Optional[str]) -> bool:
|
||||
normalized = str(prov or "").strip().lower()
|
||||
if normalized in {"", "auto", "custom"} or normalized.startswith("custom:"):
|
||||
return False
|
||||
try:
|
||||
from hermes_cli.providers import get_provider
|
||||
|
||||
return get_provider(normalized) is not None
|
||||
except Exception:
|
||||
# Keep the high-risk provider-backed routes safe even if provider
|
||||
# catalog loading is unavailable during early import/test paths.
|
||||
return normalized in {
|
||||
"anthropic",
|
||||
"copilot",
|
||||
"copilot-acp",
|
||||
"minimax-oauth",
|
||||
"nous",
|
||||
"openai-codex",
|
||||
"qwen-oauth",
|
||||
"xai-oauth",
|
||||
}
|
||||
|
||||
if provider:
|
||||
provider, base_url = _expand_direct_api_alias(provider, base_url)
|
||||
if cfg_provider:
|
||||
cfg_provider, cfg_base_url = _expand_direct_api_alias(cfg_provider, cfg_base_url)
|
||||
|
||||
if base_url and _preserve_provider_with_base_url(provider):
|
||||
return provider, resolved_model, base_url, api_key, resolved_api_mode
|
||||
if base_url:
|
||||
return "custom", resolved_model, base_url, api_key, resolved_api_mode
|
||||
if provider:
|
||||
|
|
|
|||
|
|
@ -31,6 +31,7 @@ from agent.auxiliary_client import (
|
|||
_OPENROUTER_MODEL,
|
||||
OPENROUTER_BASE_URL,
|
||||
_resolve_auto,
|
||||
_resolve_task_provider_model,
|
||||
_resolve_xai_oauth_for_aux,
|
||||
_CodexCompletionsAdapter,
|
||||
)
|
||||
|
|
@ -108,6 +109,65 @@ class TestAuxiliaryMaxTokensParam:
|
|||
assert auxiliary_max_tokens_param(2048) == {"max_completion_tokens": 2048}
|
||||
|
||||
|
||||
class TestResolveTaskProviderModel:
|
||||
@pytest.mark.parametrize(
|
||||
"provider",
|
||||
[
|
||||
"anthropic",
|
||||
"minimax-oauth",
|
||||
"nous",
|
||||
"openai-codex",
|
||||
"qwen-oauth",
|
||||
"xai-oauth",
|
||||
],
|
||||
)
|
||||
def test_explicit_base_url_preserves_first_class_provider_identity(self, provider):
|
||||
resolved_provider, model, base_url, api_key, api_mode = _resolve_task_provider_model(
|
||||
task="moa_reference",
|
||||
provider=provider,
|
||||
model="test-model",
|
||||
base_url="https://provider.example/v1",
|
||||
api_key="resolved-token",
|
||||
)
|
||||
|
||||
assert resolved_provider == provider
|
||||
assert model == "test-model"
|
||||
assert base_url == "https://provider.example/v1"
|
||||
assert api_key == "resolved-token"
|
||||
assert api_mode is None
|
||||
|
||||
@pytest.mark.parametrize("provider", ["", "auto", "custom", "custom:local", "unknown-provider"])
|
||||
def test_explicit_base_url_without_first_class_provider_routes_as_custom(self, provider):
|
||||
resolved_provider, model, base_url, api_key, api_mode = _resolve_task_provider_model(
|
||||
task="moa_reference",
|
||||
provider=provider,
|
||||
model="test-model",
|
||||
base_url="https://provider.example/v1",
|
||||
api_key="resolved-token",
|
||||
)
|
||||
|
||||
assert resolved_provider == "custom"
|
||||
assert model == "test-model"
|
||||
assert base_url == "https://provider.example/v1"
|
||||
assert api_key == "resolved-token"
|
||||
assert api_mode is None
|
||||
|
||||
def test_direct_openai_alias_with_base_url_still_routes_as_custom(self):
|
||||
resolved_provider, model, base_url, api_key, api_mode = _resolve_task_provider_model(
|
||||
task="vision",
|
||||
provider="openai",
|
||||
model="gpt-4o-mini",
|
||||
base_url="https://proxy.example/v1",
|
||||
api_key="sk-test",
|
||||
)
|
||||
|
||||
assert resolved_provider == "custom"
|
||||
assert model == "gpt-4o-mini"
|
||||
assert base_url == "https://proxy.example/v1"
|
||||
assert api_key == "sk-test"
|
||||
assert api_mode is None
|
||||
|
||||
|
||||
class TestBuildCallKwargsMaxTokens:
|
||||
"""_build_call_kwargs should not cap output by default (#34530).
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
from types import SimpleNamespace
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
from run_agent import AIAgent
|
||||
|
||||
|
||||
|
|
@ -198,6 +200,47 @@ def test_moa_codex_slot_preserves_provider_identity(monkeypatch):
|
|||
assert rt == {"provider": "openai-codex", "model": "gpt-5.5"}
|
||||
|
||||
|
||||
@pytest.mark.parametrize("provider", ["anthropic", "minimax-oauth", "qwen-oauth"])
|
||||
def test_moa_provider_backed_slot_survives_aux_resolution(monkeypatch, provider):
|
||||
"""MoA can pass resolved endpoints for provider-backed slots without
|
||||
call_llm flattening them to generic custom endpoints.
|
||||
|
||||
``_slot_runtime`` resolves a provider-backed slot to ``provider`` plus a
|
||||
concrete ``base_url``/``api_key``/``api_mode``; ``_run_reference`` then
|
||||
forwards that dict to ``call_llm``. ``call_llm`` resolves the routing tuple
|
||||
via ``_resolve_task_provider_model`` (which takes everything except
|
||||
``api_mode``, handled separately). The provider identity must survive that
|
||||
resolution rather than being flattened to ``custom``.
|
||||
"""
|
||||
from agent import moa_loop
|
||||
from agent.auxiliary_client import _resolve_task_provider_model
|
||||
|
||||
def fake_resolve(*, requested, target_model=None):
|
||||
return {
|
||||
"provider": requested,
|
||||
"api_mode": "anthropic_messages",
|
||||
"base_url": f"https://{requested}.example/v1",
|
||||
"api_key": f"token-for-{requested}",
|
||||
}
|
||||
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.runtime_provider.resolve_runtime_provider", fake_resolve
|
||||
)
|
||||
|
||||
rt = moa_loop._slot_runtime({"provider": provider, "model": "test-model"})
|
||||
# api_mode is forwarded to call_llm directly, not to _resolve_task_provider_model.
|
||||
resolver_kwargs = {k: v for k, v in rt.items() if k != "api_mode"}
|
||||
resolved_provider, model, base_url, api_key, _mode = _resolve_task_provider_model(
|
||||
task="moa_reference",
|
||||
**resolver_kwargs,
|
||||
)
|
||||
|
||||
assert resolved_provider == provider
|
||||
assert model == "test-model"
|
||||
assert base_url == f"https://{provider}.example/v1"
|
||||
assert api_key == f"token-for-{provider}"
|
||||
|
||||
|
||||
def test_moa_slot_runtime_falls_back_on_resolution_error(monkeypatch):
|
||||
"""A slot whose provider can't be resolved still attempts the call with the
|
||||
bare provider/model rather than aborting the whole MoA turn."""
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue