fix: correct Copilot API mode selection to match opencode

The previous copilot_model_api_mode() checked the catalog's supported_endpoints first and picked /chat/completions when a model supported both endpoints. This is wrong — GPT-5+ models should use the Responses API even when the catalog lists both. Replicate opencode's shouldUseCopilotResponsesApi() logic: - GPT-5+ models (gpt-5.4, gpt-5.3-codex, etc.) → Responses API - gpt-5-mini → Chat Completions (explicit exception) - Everything else (gpt-4o, claude, gemini, etc.) → Chat Completions - Model ID pattern is the primary signal, catalog is secondary The catalog fallback now only matters for non-GPT-5 models that might exclusively support /v1/messages (e.g. Claude via Copilot). Models are auto-detected from the live catalog at api.githubcopilot.com/models — no hardcoded list required for supported models, only a static fallback for when the API is unreachable.
2026-04-25 00:51:20 +00:00 · 2026-03-18 03:54:50 -07:00 · 2026-03-18 03:54:50 -07:00 · 36921a3e98
commit 36921a3e98
parent 21c45ba0ac
3 changed files with 98 additions and 17 deletions
--- a/tests/hermes_cli/test_model_validation.py
+++ b/tests/hermes_cli/test_model_validation.py
@ -276,7 +276,37 @@ class TestCopilotNormalization:
        catalog = [{"id": "gpt-4.1"}, {"id": "gpt-5.4"}]
        assert normalize_copilot_model_id("openai/gpt-4.1-mini", catalog=catalog) == "gpt-4.1"

-    def test_copilot_api_mode_prefers_responses(self):
+    def test_copilot_api_mode_gpt5_uses_responses(self):
+        """GPT-5+ models should use Responses API (matching opencode)."""
+        assert copilot_model_api_mode("gpt-5.4") == "codex_responses"
+        assert copilot_model_api_mode("gpt-5.4-mini") == "codex_responses"
+        assert copilot_model_api_mode("gpt-5.3-codex") == "codex_responses"
+        assert copilot_model_api_mode("gpt-5.2-codex") == "codex_responses"
+        assert copilot_model_api_mode("gpt-5.2") == "codex_responses"
+
+    def test_copilot_api_mode_gpt5_mini_uses_chat(self):
+        """gpt-5-mini is the exception — uses Chat Completions."""
+        assert copilot_model_api_mode("gpt-5-mini") == "chat_completions"
+
+    def test_copilot_api_mode_non_gpt5_uses_chat(self):
+        """Non-GPT-5 models use Chat Completions."""
+        assert copilot_model_api_mode("gpt-4.1") == "chat_completions"
+        assert copilot_model_api_mode("gpt-4o") == "chat_completions"
+        assert copilot_model_api_mode("gpt-4o-mini") == "chat_completions"
+        assert copilot_model_api_mode("claude-sonnet-4.6") == "chat_completions"
+        assert copilot_model_api_mode("claude-opus-4.6") == "chat_completions"
+        assert copilot_model_api_mode("gemini-2.5-pro") == "chat_completions"
+
+    def test_copilot_api_mode_with_catalog_both_endpoints(self):
+        """When catalog shows both endpoints, model ID pattern wins."""
+        catalog = [{
+            "id": "gpt-5.4",
+            "supported_endpoints": ["/chat/completions", "/responses"],
+        }]
+        # GPT-5.4 should use responses even though chat/completions is listed
+        assert copilot_model_api_mode("gpt-5.4", catalog=catalog) == "codex_responses"
+
+    def test_copilot_api_mode_with_catalog_only_responses(self):
        catalog = [{
            "id": "gpt-5.4",
            "supported_endpoints": ["/responses"],