mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-07-01 12:02:05 +00:00
fix(moa): propagate api_mode from slot runtime to call_llm
Slot_runtime resolved the provider's real API surface (including api_mode) but only forwarded base_url and api_key to call_llm, dropping api_mode. This caused Copilot GPT-5.x reference slots to hit /chat/completions instead of the Responses API, returning 400 unsupported_api_for_model. - _slot_runtime: forward api_mode from resolve_runtime_provider - call_llm: accept explicit api_mode param, override task config - 4 regression tests for propagation, omission, and signature
This commit is contained in:
parent
da4f15cddc
commit
d76ca3a7f2
3 changed files with 82 additions and 0 deletions
|
|
@ -5685,6 +5685,7 @@ def call_llm(
|
|||
tools: list = None,
|
||||
timeout: float = None,
|
||||
extra_body: dict = None,
|
||||
api_mode: str = None,
|
||||
) -> Any:
|
||||
"""Centralized synchronous LLM call.
|
||||
|
||||
|
|
@ -5697,6 +5698,8 @@ def call_llm(
|
|||
Reads provider:model from config/env. Ignored if provider is set.
|
||||
provider: Explicit provider override.
|
||||
model: Explicit model override.
|
||||
api_mode: Explicit API mode override (e.g. "codex_responses",
|
||||
"anthropic_messages"). Takes precedence over task config.
|
||||
messages: Chat messages list.
|
||||
temperature: Sampling temperature (None = provider default).
|
||||
max_tokens: Max output tokens (handles max_tokens vs max_completion_tokens).
|
||||
|
|
@ -5712,6 +5715,8 @@ def call_llm(
|
|||
"""
|
||||
resolved_provider, resolved_model, resolved_base_url, resolved_api_key, resolved_api_mode = _resolve_task_provider_model(
|
||||
task, provider, model, base_url, api_key)
|
||||
if api_mode:
|
||||
resolved_api_mode = api_mode
|
||||
effective_extra_body = _get_task_extra_body(task)
|
||||
effective_extra_body.update(extra_body or {})
|
||||
|
||||
|
|
|
|||
|
|
@ -109,6 +109,8 @@ def _slot_runtime(slot: dict[str, str]) -> dict[str, Any]:
|
|||
out["base_url"] = rt["base_url"]
|
||||
if rt.get("api_key"):
|
||||
out["api_key"] = rt["api_key"]
|
||||
if rt.get("api_mode"):
|
||||
out["api_mode"] = rt["api_mode"]
|
||||
except Exception as exc: # pragma: no cover - defensive
|
||||
logger.debug("MoA slot runtime resolution failed for %s: %s", _slot_label(slot), exc)
|
||||
return out
|
||||
|
|
|
|||
75
tests/agent/test_moa_slot_api_mode.py
Normal file
75
tests/agent/test_moa_slot_api_mode.py
Normal file
|
|
@ -0,0 +1,75 @@
|
|||
"""Tests for MoA slot_runtime api_mode propagation (issue #54379).
|
||||
|
||||
Verify that _slot_runtime passes the resolved api_mode through to call_llm,
|
||||
so reference slots using providers that require a specific API surface
|
||||
(e.g. Copilot GPT-5.x → codex_responses) get routed correctly.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
class TestSlotRuntimeApiMode:
|
||||
"""_slot_runtime should include api_mode when resolve_runtime_provider returns it."""
|
||||
|
||||
@patch("hermes_cli.runtime_provider.resolve_runtime_provider")
|
||||
def test_slot_runtime_includes_api_mode(self, mock_resolve):
|
||||
"""api_mode from resolve_runtime_provider is forwarded in output dict."""
|
||||
mock_resolve.return_value = {
|
||||
"provider": "copilot",
|
||||
"model": "gpt-5.5",
|
||||
"base_url": "https://api.githubcopilot.com",
|
||||
"api_key": "test-key",
|
||||
"api_mode": "codex_responses",
|
||||
}
|
||||
from agent.moa_loop import _slot_runtime
|
||||
|
||||
result = _slot_runtime({"provider": "copilot", "model": "gpt-5.5"})
|
||||
assert result["api_mode"] == "codex_responses"
|
||||
assert result["base_url"] == "https://api.githubcopilot.com"
|
||||
assert result["api_key"] == "test-key"
|
||||
|
||||
@patch("hermes_cli.runtime_provider.resolve_runtime_provider")
|
||||
def test_slot_runtime_omits_api_mode_when_absent(self, mock_resolve):
|
||||
"""When resolve_runtime_provider does not return api_mode, output omits it."""
|
||||
mock_resolve.return_value = {
|
||||
"provider": "openai",
|
||||
"model": "gpt-4o",
|
||||
"base_url": "https://api.openai.com/v1",
|
||||
"api_key": "test-key",
|
||||
}
|
||||
from agent.moa_loop import _slot_runtime
|
||||
|
||||
result = _slot_runtime({"provider": "openai", "model": "gpt-4o"})
|
||||
assert "api_mode" not in result
|
||||
|
||||
@patch("hermes_cli.runtime_provider.resolve_runtime_provider")
|
||||
def test_slot_runtime_omits_api_mode_when_empty(self, mock_resolve):
|
||||
"""Empty string api_mode is treated as absent."""
|
||||
mock_resolve.return_value = {
|
||||
"provider": "copilot",
|
||||
"model": "gpt-5.5",
|
||||
"base_url": "https://api.githubcopilot.com",
|
||||
"api_key": "test-key",
|
||||
"api_mode": "",
|
||||
}
|
||||
from agent.moa_loop import _slot_runtime
|
||||
|
||||
result = _slot_runtime({"provider": "copilot", "model": "gpt-5.5"})
|
||||
assert "api_mode" not in result
|
||||
|
||||
|
||||
class TestCallLlmApiMode:
|
||||
"""call_llm should accept and forward api_mode parameter."""
|
||||
|
||||
def test_call_llm_accepts_api_mode_kwarg(self):
|
||||
"""call_llm signature includes api_mode parameter."""
|
||||
import inspect
|
||||
from agent.auxiliary_client import call_llm
|
||||
|
||||
sig = inspect.signature(call_llm)
|
||||
assert "api_mode" in sig.parameters
|
||||
assert sig.parameters["api_mode"].default is None
|
||||
Loading…
Add table
Add a link
Reference in a new issue