From e7bb67332d758c7093e58efa5caa30fef96051b2 Mon Sep 17 00:00:00 2001 From: Gille <4317663+helix4u@users.noreply.github.com> Date: Sat, 27 Jun 2026 14:31:24 -0600 Subject: [PATCH] fix(moa): preserve Codex slot routing --- agent/moa_loop.py | 7 +++++++ tests/run_agent/test_moa_loop_mode.py | 26 ++++++++++++++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/agent/moa_loop.py b/agent/moa_loop.py index bab36fb4010..ed566325eeb 100644 --- a/agent/moa_loop.py +++ b/agent/moa_loop.py @@ -55,6 +55,13 @@ def _slot_runtime(slot: dict[str, str]) -> dict[str, Any]: from hermes_cli.runtime_provider import resolve_runtime_provider rt = resolve_runtime_provider(requested=provider, target_model=model) + resolved_provider = str(rt.get("provider") or provider).strip().lower() + # call_llm treats an explicit base_url as a custom endpoint. That is + # correct for ordinary OpenAI-compatible targets, but wrong for OAuth / + # adapter-backed providers whose provider branch adds auth headers and + # request-shape adapters. Keep those providers identified by name. + if resolved_provider in {"openai-codex", "xai-oauth"}: + return out # Pass the resolved endpoint through so call_llm builds the request for # the provider's actual API surface instead of auto-detecting. base_url # routes call_llm to the right adapter (incl. anthropic_messages mode); diff --git a/tests/run_agent/test_moa_loop_mode.py b/tests/run_agent/test_moa_loop_mode.py index 2bf5624df69..c05dd3b267f 100644 --- a/tests/run_agent/test_moa_loop_mode.py +++ b/tests/run_agent/test_moa_loop_mode.py @@ -172,6 +172,32 @@ def test_moa_slots_routed_through_resolve_runtime_provider(monkeypatch): assert rt["api_key"] == "key-for-minimax" +def test_moa_codex_slot_preserves_provider_identity(monkeypatch): + """Codex slots must not become custom chat-completions endpoints. + + _resolve_task_provider_model treats any explicit base_url as provider=custom. + For openai-codex that bypasses the Codex auxiliary branch, losing the + Cloudflare headers and Responses adapter required for chatgpt.com/backend-api/codex. + """ + from agent import moa_loop + + def fake_resolve(*, requested, target_model=None): + return { + "provider": requested, + "api_mode": "codex_responses", + "base_url": "https://chatgpt.com/backend-api/codex", + "api_key": "codex-oauth-token", + } + + monkeypatch.setattr( + "hermes_cli.runtime_provider.resolve_runtime_provider", fake_resolve + ) + + rt = moa_loop._slot_runtime({"provider": "openai-codex", "model": "gpt-5.5"}) + + assert rt == {"provider": "openai-codex", "model": "gpt-5.5"} + + def test_moa_slot_runtime_falls_back_on_resolution_error(monkeypatch): """A slot whose provider can't be resolved still attempts the call with the bare provider/model rather than aborting the whole MoA turn."""