mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-08 03:01:47 +00:00
feat: provider modules — ProviderProfile ABC, 33 providers, fetch_models, transport single-path
Introduces providers/ package — single source of truth for every inference provider. Adding a simple api-key provider now requires one providers/<name>.py file with zero edits anywhere else. What this PR ships: - providers/ package (ProviderProfile ABC + 33 profiles across 4 api_modes) - ProviderProfile declarative fields: name, api_mode, aliases, display_name, env_vars, base_url, models_url, auth_type, fallback_models, hostname, default_headers, fixed_temperature, default_max_tokens, default_aux_model - 4 overridable hooks: prepare_messages, build_extra_body, build_api_kwargs_extras, fetch_models - chat_completions.build_kwargs: profile path via _build_kwargs_from_profile, legacy flag path retained for lmstudio/tencent-tokenhub (which have session-aware reasoning probing that doesn't map cleanly to hooks yet) - run_agent.py: profile path for all registered providers; legacy path variable scoping fixed (all flags defined before branching) - Auto-wires: auth.PROVIDER_REGISTRY, models.CANONICAL_PROVIDERS, doctor health checks, config.OPTIONAL_ENV_VARS, model_metadata._URL_TO_PROVIDER - GeminiProfile: thinking_config translation (native + openai-compat nested) - New tests/providers/ (79 tests covering profile declarations, transport parity, hook overrides, e2e kwargs assembly) Deltas vs original PR (salvaged onto current main): - Added profiles: alibaba-coding-plan, azure-foundry, minimax-oauth (were added to main since original PR) - Skipped profiles: lmstudio, tencent-tokenhub stay on legacy path (their reasoning_effort probing has no clean hook equivalent yet) - Removed lmstudio alias from custom profile (it's a separate provider now) - Skipped openrouter/custom from PROVIDER_REGISTRY auto-extension (resolve_provider special-cases them; adding breaks runtime resolution) - runtime_provider: profile.api_mode only as fallback when URL detection finds nothing (was breaking minimax /v1 override) - Preserved main's legacy-path improvements: deepseek reasoning_content preserve, gemini Gemma skip, OpenRouter response caching, Anthropic 1M beta recovery, etc. - Kept agent/copilot_acp_client.py in place (rejected PR's relocation — main has 7 fixes landed since; relocation would revert them) - _API_KEY_PROVIDER_AUX_MODELS alias kept for backward compat with existing test imports Co-authored-by: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com> Closes #14418
This commit is contained in:
parent
2b500ed68a
commit
20a4f79ed1
57 changed files with 3149 additions and 177 deletions
|
|
@ -1117,6 +1117,7 @@ class TestBuildApiKwargs:
|
|||
assert "temperature" not in kwargs
|
||||
|
||||
def test_kimi_coding_endpoint_omits_temperature(self, agent):
|
||||
agent.provider = "kimi-coding"
|
||||
agent.base_url = "https://api.kimi.com/coding/v1"
|
||||
agent._base_url_lower = agent.base_url.lower()
|
||||
agent.model = "kimi-k2.5"
|
||||
|
|
@ -1129,6 +1130,7 @@ class TestBuildApiKwargs:
|
|||
def test_kimi_coding_endpoint_sends_max_tokens_and_reasoning(self, agent):
|
||||
"""Kimi endpoint should send max_tokens=32000 and reasoning_effort as
|
||||
top-level params, matching Kimi CLI's default behavior."""
|
||||
agent.provider = "kimi-coding"
|
||||
agent.base_url = "https://api.kimi.com/coding/v1"
|
||||
agent._base_url_lower = agent.base_url.lower()
|
||||
agent.model = "kimi-for-coding"
|
||||
|
|
@ -1141,6 +1143,7 @@ class TestBuildApiKwargs:
|
|||
|
||||
def test_kimi_coding_endpoint_respects_custom_effort(self, agent):
|
||||
"""reasoning_effort should reflect reasoning_config.effort when set."""
|
||||
agent.provider = "kimi-coding"
|
||||
agent.base_url = "https://api.kimi.com/coding/v1"
|
||||
agent._base_url_lower = agent.base_url.lower()
|
||||
agent.model = "kimi-for-coding"
|
||||
|
|
@ -1154,6 +1157,7 @@ class TestBuildApiKwargs:
|
|||
def test_kimi_coding_endpoint_sends_thinking_extra_body(self, agent):
|
||||
"""Kimi endpoint should send extra_body.thinking={"type":"enabled"}
|
||||
to activate reasoning mode, mirroring Kimi CLI's with_thinking()."""
|
||||
agent.provider = "kimi-coding"
|
||||
agent.base_url = "https://api.kimi.com/coding/v1"
|
||||
agent._base_url_lower = agent.base_url.lower()
|
||||
agent.model = "kimi-for-coding"
|
||||
|
|
@ -1167,6 +1171,7 @@ class TestBuildApiKwargs:
|
|||
"""When reasoning_config.enabled=False, thinking should be disabled
|
||||
and reasoning_effort should be omitted entirely — mirroring Kimi
|
||||
CLI's with_thinking("off") which maps to reasoning_effort=None."""
|
||||
agent.provider = "kimi-coding"
|
||||
agent.base_url = "https://api.kimi.com/coding/v1"
|
||||
agent._base_url_lower = agent.base_url.lower()
|
||||
agent.model = "kimi-for-coding"
|
||||
|
|
@ -1180,6 +1185,7 @@ class TestBuildApiKwargs:
|
|||
|
||||
def test_moonshot_endpoint_sends_max_tokens_and_reasoning(self, agent):
|
||||
"""api.moonshot.ai should get the same Kimi-compatible params."""
|
||||
agent.provider = "kimi-coding"
|
||||
agent.base_url = "https://api.moonshot.ai/v1"
|
||||
agent._base_url_lower = agent.base_url.lower()
|
||||
agent.model = "kimi-k2.5"
|
||||
|
|
@ -1193,6 +1199,7 @@ class TestBuildApiKwargs:
|
|||
|
||||
def test_moonshot_cn_endpoint_sends_max_tokens_and_reasoning(self, agent):
|
||||
"""api.moonshot.cn (China endpoint) should get the same params."""
|
||||
agent.provider = "kimi-coding-cn"
|
||||
agent.base_url = "https://api.moonshot.cn/v1"
|
||||
agent._base_url_lower = agent.base_url.lower()
|
||||
agent.model = "kimi-k2.5"
|
||||
|
|
@ -1205,6 +1212,7 @@ class TestBuildApiKwargs:
|
|||
assert kwargs["extra_body"]["thinking"] == {"type": "enabled"}
|
||||
|
||||
def test_provider_preferences_injected(self, agent):
|
||||
agent.provider = "openrouter"
|
||||
agent.base_url = "https://openrouter.ai/api/v1"
|
||||
agent.providers_allowed = ["Anthropic"]
|
||||
messages = [{"role": "user", "content": "hi"}]
|
||||
|
|
@ -1213,6 +1221,7 @@ class TestBuildApiKwargs:
|
|||
|
||||
def test_reasoning_config_default_openrouter(self, agent):
|
||||
"""Default reasoning config for OpenRouter should be medium."""
|
||||
agent.provider = "openrouter"
|
||||
agent.base_url = "https://openrouter.ai/api/v1"
|
||||
agent.model = "anthropic/claude-sonnet-4-20250514"
|
||||
messages = [{"role": "user", "content": "hi"}]
|
||||
|
|
@ -1222,6 +1231,7 @@ class TestBuildApiKwargs:
|
|||
assert reasoning["effort"] == "medium"
|
||||
|
||||
def test_reasoning_config_custom(self, agent):
|
||||
agent.provider = "openrouter"
|
||||
agent.base_url = "https://openrouter.ai/api/v1"
|
||||
agent.model = "anthropic/claude-sonnet-4-20250514"
|
||||
agent.reasoning_config = {"enabled": False}
|
||||
|
|
@ -1237,6 +1247,7 @@ class TestBuildApiKwargs:
|
|||
assert "reasoning" not in kwargs.get("extra_body", {})
|
||||
|
||||
def test_reasoning_sent_for_supported_openrouter_model(self, agent):
|
||||
agent.provider = "openrouter"
|
||||
agent.base_url = "https://openrouter.ai/api/v1"
|
||||
agent.model = "qwen/qwen3.5-plus-02-15"
|
||||
messages = [{"role": "user", "content": "hi"}]
|
||||
|
|
@ -1244,6 +1255,7 @@ class TestBuildApiKwargs:
|
|||
assert kwargs["extra_body"]["reasoning"]["effort"] == "medium"
|
||||
|
||||
def test_reasoning_sent_for_nous_route(self, agent):
|
||||
agent.provider = "nous"
|
||||
agent.base_url = "https://inference-api.nousresearch.com/v1"
|
||||
agent.model = "minimax/minimax-m2.5"
|
||||
messages = [{"role": "user", "content": "hi"}]
|
||||
|
|
@ -1251,18 +1263,38 @@ class TestBuildApiKwargs:
|
|||
assert kwargs["extra_body"]["reasoning"]["effort"] == "medium"
|
||||
|
||||
def test_reasoning_sent_for_copilot_gpt5(self, agent):
|
||||
agent.base_url = "https://api.githubcopilot.com"
|
||||
agent.model = "gpt-5.4"
|
||||
messages = [{"role": "user", "content": "hi"}]
|
||||
kwargs = agent._build_api_kwargs(messages)
|
||||
"""Copilot/GitHub Models: GPT-5 reasoning goes in extra_body.reasoning."""
|
||||
from agent.transports import get_transport
|
||||
from providers import get_provider_profile
|
||||
|
||||
transport = get_transport("chat_completions")
|
||||
profile = get_provider_profile("copilot")
|
||||
msgs = [{"role": "user", "content": "hi"}]
|
||||
kwargs = transport.build_kwargs(
|
||||
model="gpt-5.4",
|
||||
messages=msgs,
|
||||
tools=None,
|
||||
supports_reasoning=True,
|
||||
provider_profile=profile,
|
||||
)
|
||||
assert kwargs["extra_body"]["reasoning"] == {"effort": "medium"}
|
||||
|
||||
def test_reasoning_xhigh_normalized_for_copilot(self, agent):
|
||||
agent.base_url = "https://api.githubcopilot.com"
|
||||
agent.model = "gpt-5.4"
|
||||
agent.reasoning_config = {"enabled": True, "effort": "xhigh"}
|
||||
messages = [{"role": "user", "content": "hi"}]
|
||||
kwargs = agent._build_api_kwargs(messages)
|
||||
"""xhigh effort should normalize to high for Copilot GitHub Models."""
|
||||
from agent.transports import get_transport
|
||||
from providers import get_provider_profile
|
||||
|
||||
transport = get_transport("chat_completions")
|
||||
profile = get_provider_profile("copilot")
|
||||
msgs = [{"role": "user", "content": "hi"}]
|
||||
kwargs = transport.build_kwargs(
|
||||
model="gpt-5.4",
|
||||
messages=msgs,
|
||||
tools=None,
|
||||
supports_reasoning=True,
|
||||
reasoning_config={"enabled": True, "effort": "xhigh"},
|
||||
provider_profile=profile,
|
||||
)
|
||||
assert kwargs["extra_body"]["reasoning"] == {"effort": "high"}
|
||||
|
||||
def test_reasoning_omitted_for_non_reasoning_copilot_model(self, agent):
|
||||
|
|
@ -1280,6 +1312,7 @@ class TestBuildApiKwargs:
|
|||
|
||||
|
||||
def test_qwen_portal_formats_messages_and_metadata(self, agent):
|
||||
agent.provider = "qwen-oauth"
|
||||
agent.base_url = "https://portal.qwen.ai/v1"
|
||||
agent._base_url_lower = agent.base_url.lower()
|
||||
agent.session_id = "sess-123"
|
||||
|
|
@ -1296,6 +1329,7 @@ class TestBuildApiKwargs:
|
|||
assert kwargs["messages"][2]["content"][0]["text"] == "hi"
|
||||
|
||||
def test_qwen_portal_normalizes_bare_string_content_parts(self, agent):
|
||||
agent.provider = "qwen-oauth"
|
||||
agent.base_url = "https://portal.qwen.ai/v1"
|
||||
agent._base_url_lower = agent.base_url.lower()
|
||||
messages = [
|
||||
|
|
@ -1308,6 +1342,7 @@ class TestBuildApiKwargs:
|
|||
assert user_content[1] == {"type": "text", "text": "world"}
|
||||
|
||||
def test_qwen_portal_no_system_message(self, agent):
|
||||
agent.provider = "qwen-oauth"
|
||||
agent.base_url = "https://portal.qwen.ai/v1"
|
||||
agent._base_url_lower = agent.base_url.lower()
|
||||
messages = [{"role": "user", "content": "hi"}]
|
||||
|
|
@ -1328,6 +1363,7 @@ class TestBuildApiKwargs:
|
|||
def test_qwen_portal_default_max_tokens(self, agent):
|
||||
"""When max_tokens is None, Qwen Portal gets a default of 65536
|
||||
to prevent reasoning models from exhausting their output budget."""
|
||||
agent.provider = "qwen-oauth"
|
||||
agent.base_url = "https://portal.qwen.ai/v1"
|
||||
agent._base_url_lower = agent.base_url.lower()
|
||||
agent.max_tokens = None
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue