fix(moa): preserve Codex slot routing

2026-07-01 12:02:05 +00:00 · 2026-06-27 14:31:24 -06:00 · 2026-06-27 14:31:24 -06:00 · e7bb67332d
commit e7bb67332d
parent 66aeda3550
2 changed files with 33 additions and 0 deletions
--- a/agent/moa_loop.py
+++ b/agent/moa_loop.py
@ -55,6 +55,13 @@ def _slot_runtime(slot: dict[str, str]) -> dict[str, Any]:
        from hermes_cli.runtime_provider import resolve_runtime_provider

        rt = resolve_runtime_provider(requested=provider, target_model=model)
+        resolved_provider = str(rt.get("provider") or provider).strip().lower()
+        # call_llm treats an explicit base_url as a custom endpoint. That is
+        # correct for ordinary OpenAI-compatible targets, but wrong for OAuth /
+        # adapter-backed providers whose provider branch adds auth headers and
+        # request-shape adapters. Keep those providers identified by name.
+        if resolved_provider in {"openai-codex", "xai-oauth"}:
+            return out
        # Pass the resolved endpoint through so call_llm builds the request for
        # the provider's actual API surface instead of auto-detecting. base_url
        # routes call_llm to the right adapter (incl. anthropic_messages mode);
--- a/tests/run_agent/test_moa_loop_mode.py
+++ b/tests/run_agent/test_moa_loop_mode.py
@ -172,6 +172,32 @@ def test_moa_slots_routed_through_resolve_runtime_provider(monkeypatch):
    assert rt["api_key"] == "key-for-minimax"


+def test_moa_codex_slot_preserves_provider_identity(monkeypatch):
+    """Codex slots must not become custom chat-completions endpoints.
+
+    _resolve_task_provider_model treats any explicit base_url as provider=custom.
+    For openai-codex that bypasses the Codex auxiliary branch, losing the
+    Cloudflare headers and Responses adapter required for chatgpt.com/backend-api/codex.
+    """
+    from agent import moa_loop
+
+    def fake_resolve(*, requested, target_model=None):
+        return {
+            "provider": requested,
+            "api_mode": "codex_responses",
+            "base_url": "https://chatgpt.com/backend-api/codex",
+            "api_key": "codex-oauth-token",
+        }
+
+    monkeypatch.setattr(
+        "hermes_cli.runtime_provider.resolve_runtime_provider", fake_resolve
+    )
+
+    rt = moa_loop._slot_runtime({"provider": "openai-codex", "model": "gpt-5.5"})
+
+    assert rt == {"provider": "openai-codex", "model": "gpt-5.5"}
+
+
 def test_moa_slot_runtime_falls_back_on_resolution_error(monkeypatch):
    """A slot whose provider can't be resolved still attempts the call with the
    bare provider/model rather than aborting the whole MoA turn."""