fix(moa): propagate api_mode from slot runtime to call_llm

Slot_runtime resolved the provider's real API surface (including api_mode)
but only forwarded base_url and api_key to call_llm, dropping api_mode.
This caused Copilot GPT-5.x reference slots to hit /chat/completions
instead of the Responses API, returning 400 unsupported_api_for_model.

- _slot_runtime: forward api_mode from resolve_runtime_provider
- call_llm: accept explicit api_mode param, override task config
- 4 regression tests for propagation, omission, and signature
This commit is contained in:
liuhao1024 2026-06-29 02:48:22 +08:00 committed by Teknium
parent da4f15cddc
commit d76ca3a7f2
3 changed files with 82 additions and 0 deletions

View file

@ -5685,6 +5685,7 @@ def call_llm(
tools: list = None,
timeout: float = None,
extra_body: dict = None,
api_mode: str = None,
) -> Any:
"""Centralized synchronous LLM call.
@ -5697,6 +5698,8 @@ def call_llm(
Reads provider:model from config/env. Ignored if provider is set.
provider: Explicit provider override.
model: Explicit model override.
api_mode: Explicit API mode override (e.g. "codex_responses",
"anthropic_messages"). Takes precedence over task config.
messages: Chat messages list.
temperature: Sampling temperature (None = provider default).
max_tokens: Max output tokens (handles max_tokens vs max_completion_tokens).
@ -5712,6 +5715,8 @@ def call_llm(
"""
resolved_provider, resolved_model, resolved_base_url, resolved_api_key, resolved_api_mode = _resolve_task_provider_model(
task, provider, model, base_url, api_key)
if api_mode:
resolved_api_mode = api_mode
effective_extra_body = _get_task_extra_body(task)
effective_extra_body.update(extra_body or {})

View file

@ -109,6 +109,8 @@ def _slot_runtime(slot: dict[str, str]) -> dict[str, Any]:
out["base_url"] = rt["base_url"]
if rt.get("api_key"):
out["api_key"] = rt["api_key"]
if rt.get("api_mode"):
out["api_mode"] = rt["api_mode"]
except Exception as exc: # pragma: no cover - defensive
logger.debug("MoA slot runtime resolution failed for %s: %s", _slot_label(slot), exc)
return out

View file

@ -0,0 +1,75 @@
"""Tests for MoA slot_runtime api_mode propagation (issue #54379).
Verify that _slot_runtime passes the resolved api_mode through to call_llm,
so reference slots using providers that require a specific API surface
(e.g. Copilot GPT-5.x codex_responses) get routed correctly.
"""
from __future__ import annotations
from unittest.mock import MagicMock, patch
import pytest
class TestSlotRuntimeApiMode:
"""_slot_runtime should include api_mode when resolve_runtime_provider returns it."""
@patch("hermes_cli.runtime_provider.resolve_runtime_provider")
def test_slot_runtime_includes_api_mode(self, mock_resolve):
"""api_mode from resolve_runtime_provider is forwarded in output dict."""
mock_resolve.return_value = {
"provider": "copilot",
"model": "gpt-5.5",
"base_url": "https://api.githubcopilot.com",
"api_key": "test-key",
"api_mode": "codex_responses",
}
from agent.moa_loop import _slot_runtime
result = _slot_runtime({"provider": "copilot", "model": "gpt-5.5"})
assert result["api_mode"] == "codex_responses"
assert result["base_url"] == "https://api.githubcopilot.com"
assert result["api_key"] == "test-key"
@patch("hermes_cli.runtime_provider.resolve_runtime_provider")
def test_slot_runtime_omits_api_mode_when_absent(self, mock_resolve):
"""When resolve_runtime_provider does not return api_mode, output omits it."""
mock_resolve.return_value = {
"provider": "openai",
"model": "gpt-4o",
"base_url": "https://api.openai.com/v1",
"api_key": "test-key",
}
from agent.moa_loop import _slot_runtime
result = _slot_runtime({"provider": "openai", "model": "gpt-4o"})
assert "api_mode" not in result
@patch("hermes_cli.runtime_provider.resolve_runtime_provider")
def test_slot_runtime_omits_api_mode_when_empty(self, mock_resolve):
"""Empty string api_mode is treated as absent."""
mock_resolve.return_value = {
"provider": "copilot",
"model": "gpt-5.5",
"base_url": "https://api.githubcopilot.com",
"api_key": "test-key",
"api_mode": "",
}
from agent.moa_loop import _slot_runtime
result = _slot_runtime({"provider": "copilot", "model": "gpt-5.5"})
assert "api_mode" not in result
class TestCallLlmApiMode:
"""call_llm should accept and forward api_mode parameter."""
def test_call_llm_accepts_api_mode_kwarg(self):
"""call_llm signature includes api_mode parameter."""
import inspect
from agent.auxiliary_client import call_llm
sig = inspect.signature(call_llm)
assert "api_mode" in sig.parameters
assert sig.parameters["api_mode"].default is None