fix(moa): propagate api_mode from slot runtime to call_llm

Slot_runtime resolved the provider's real API surface (including api_mode) but only forwarded base_url and api_key to call_llm, dropping api_mode. This caused Copilot GPT-5.x reference slots to hit /chat/completions instead of the Responses API, returning 400 unsupported_api_for_model. - _slot_runtime: forward api_mode from resolve_runtime_provider - call_llm: accept explicit api_mode param, override task config - 4 regression tests for propagation, omission, and signature
2026-07-01 12:02:05 +00:00 · 2026-06-29 02:48:22 +08:00 · 2026-06-29 02:48:22 +08:00 · d76ca3a7f2
commit d76ca3a7f2
parent da4f15cddc
3 changed files with 82 additions and 0 deletions
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@ -5685,6 +5685,7 @@ def call_llm(
    tools: list = None,
    timeout: float = None,
    extra_body: dict = None,
+    api_mode: str = None,
 ) -> Any:
    """Centralized synchronous LLM call.

@ -5697,6 +5698,8 @@ def call_llm(
              Reads provider:model from config/env. Ignored if provider is set.
        provider: Explicit provider override.
        model: Explicit model override.
+        api_mode: Explicit API mode override (e.g. "codex_responses",
+              "anthropic_messages"). Takes precedence over task config.
        messages: Chat messages list.
        temperature: Sampling temperature (None = provider default).
        max_tokens: Max output tokens (handles max_tokens vs max_completion_tokens).
@ -5712,6 +5715,8 @@ def call_llm(
    """
    resolved_provider, resolved_model, resolved_base_url, resolved_api_key, resolved_api_mode = _resolve_task_provider_model(
        task, provider, model, base_url, api_key)
+    if api_mode:
+        resolved_api_mode = api_mode
    effective_extra_body = _get_task_extra_body(task)
    effective_extra_body.update(extra_body or {})

--- a/agent/moa_loop.py
+++ b/agent/moa_loop.py
@ -109,6 +109,8 @@ def _slot_runtime(slot: dict[str, str]) -> dict[str, Any]:
            out["base_url"] = rt["base_url"]
        if rt.get("api_key"):
            out["api_key"] = rt["api_key"]
+        if rt.get("api_mode"):
+            out["api_mode"] = rt["api_mode"]
    except Exception as exc:  # pragma: no cover - defensive
        logger.debug("MoA slot runtime resolution failed for %s: %s", _slot_label(slot), exc)
    return out
--- a/tests/agent/test_moa_slot_api_mode.py
+++ b/tests/agent/test_moa_slot_api_mode.py
@ -0,0 +1,75 @@
+"""Tests for MoA slot_runtime api_mode propagation (issue #54379).
+
+Verify that _slot_runtime passes the resolved api_mode through to call_llm,
+so reference slots using providers that require a specific API surface
+(e.g. Copilot GPT-5.x → codex_responses) get routed correctly.
+"""
+
+from __future__ import annotations
+
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+
+class TestSlotRuntimeApiMode:
+    """_slot_runtime should include api_mode when resolve_runtime_provider returns it."""
+
+    @patch("hermes_cli.runtime_provider.resolve_runtime_provider")
+    def test_slot_runtime_includes_api_mode(self, mock_resolve):
+        """api_mode from resolve_runtime_provider is forwarded in output dict."""
+        mock_resolve.return_value = {
+            "provider": "copilot",
+            "model": "gpt-5.5",
+            "base_url": "https://api.githubcopilot.com",
+            "api_key": "test-key",
+            "api_mode": "codex_responses",
+        }
+        from agent.moa_loop import _slot_runtime
+
+        result = _slot_runtime({"provider": "copilot", "model": "gpt-5.5"})
+        assert result["api_mode"] == "codex_responses"
+        assert result["base_url"] == "https://api.githubcopilot.com"
+        assert result["api_key"] == "test-key"
+
+    @patch("hermes_cli.runtime_provider.resolve_runtime_provider")
+    def test_slot_runtime_omits_api_mode_when_absent(self, mock_resolve):
+        """When resolve_runtime_provider does not return api_mode, output omits it."""
+        mock_resolve.return_value = {
+            "provider": "openai",
+            "model": "gpt-4o",
+            "base_url": "https://api.openai.com/v1",
+            "api_key": "test-key",
+        }
+        from agent.moa_loop import _slot_runtime
+
+        result = _slot_runtime({"provider": "openai", "model": "gpt-4o"})
+        assert "api_mode" not in result
+
+    @patch("hermes_cli.runtime_provider.resolve_runtime_provider")
+    def test_slot_runtime_omits_api_mode_when_empty(self, mock_resolve):
+        """Empty string api_mode is treated as absent."""
+        mock_resolve.return_value = {
+            "provider": "copilot",
+            "model": "gpt-5.5",
+            "base_url": "https://api.githubcopilot.com",
+            "api_key": "test-key",
+            "api_mode": "",
+        }
+        from agent.moa_loop import _slot_runtime
+
+        result = _slot_runtime({"provider": "copilot", "model": "gpt-5.5"})
+        assert "api_mode" not in result
+
+
+class TestCallLlmApiMode:
+    """call_llm should accept and forward api_mode parameter."""
+
+    def test_call_llm_accepts_api_mode_kwarg(self):
+        """call_llm signature includes api_mode parameter."""
+        import inspect
+        from agent.auxiliary_client import call_llm
+
+        sig = inspect.signature(call_llm)
+        assert "api_mode" in sig.parameters
+        assert sig.parameters["api_mode"].default is None