diff --git a/hermes_cli/models.py b/hermes_cli/models.py index 77acb99ab..bbf35abaa 100644 --- a/hermes_cli/models.py +++ b/hermes_cli/models.py @@ -802,38 +802,59 @@ def _github_reasoning_efforts_for_model_id(model_id: str) -> list[str]: return [] +def _should_use_copilot_responses_api(model_id: str) -> bool: + """Decide whether a Copilot model should use the Responses API. + + Replicates opencode's ``shouldUseCopilotResponsesApi`` logic: + GPT-5+ models use Responses API, except ``gpt-5-mini`` which uses + Chat Completions. All non-GPT models (Claude, Gemini, etc.) use + Chat Completions. + """ + import re + + match = re.match(r"^gpt-(\d+)", model_id) + if not match: + return False + major = int(match.group(1)) + return major >= 5 and not model_id.startswith("gpt-5-mini") + + def copilot_model_api_mode( model_id: Optional[str], *, catalog: Optional[list[dict[str, Any]]] = None, api_key: Optional[str] = None, ) -> str: + """Determine the API mode for a Copilot model. + + Uses the model ID pattern (matching opencode's approach) as the + primary signal. Falls back to the catalog's ``supported_endpoints`` + only for models not covered by the pattern check. + """ normalized = normalize_copilot_model_id(model_id, catalog=catalog, api_key=api_key) if not normalized: return "chat_completions" + # Primary: model ID pattern (matches opencode's shouldUseCopilotResponsesApi) + if _should_use_copilot_responses_api(normalized): + return "codex_responses" + + # Secondary: check catalog for non-GPT-5 models (Claude via /v1/messages, etc.) if catalog is None and api_key: catalog = fetch_github_model_catalog(api_key=api_key) - catalog_entry = None if catalog: catalog_entry = next((item for item in catalog if item.get("id") == normalized), None) + if isinstance(catalog_entry, dict): + supported_endpoints = { + str(endpoint).strip() + for endpoint in (catalog_entry.get("supported_endpoints") or []) + if str(endpoint).strip() + } + # For non-GPT-5 models, check if they only support messages API + if "/v1/messages" in supported_endpoints and "/chat/completions" not in supported_endpoints: + return "anthropic_messages" - if isinstance(catalog_entry, dict): - supported_endpoints = { - str(endpoint).strip() - for endpoint in (catalog_entry.get("supported_endpoints") or []) - if str(endpoint).strip() - } - if "/chat/completions" in supported_endpoints: - return "chat_completions" - if "/responses" in supported_endpoints: - return "codex_responses" - if "/v1/messages" in supported_endpoints: - return "anthropic_messages" - - if normalized.startswith(("gpt-5.4", "gpt-5.3-codex", "gpt-5.2-codex", "gpt-5.1-codex")): - return "codex_responses" return "chat_completions" diff --git a/tests/hermes_cli/test_copilot_auth.py b/tests/hermes_cli/test_copilot_auth.py index b28afeddc..7bceec9bf 100644 --- a/tests/hermes_cli/test_copilot_auth.py +++ b/tests/hermes_cli/test_copilot_auth.py @@ -160,6 +160,36 @@ class TestCopilotDefaultHeaders: assert "x-initiator" in headers +class TestApiModeSelection: + """API mode selection matching opencode's shouldUseCopilotResponsesApi.""" + + def test_gpt5_uses_responses(self): + from hermes_cli.models import _should_use_copilot_responses_api + assert _should_use_copilot_responses_api("gpt-5.4") is True + assert _should_use_copilot_responses_api("gpt-5.4-mini") is True + assert _should_use_copilot_responses_api("gpt-5.3-codex") is True + assert _should_use_copilot_responses_api("gpt-5.2-codex") is True + assert _should_use_copilot_responses_api("gpt-5.2") is True + assert _should_use_copilot_responses_api("gpt-5.1-codex-max") is True + + def test_gpt5_mini_excluded(self): + from hermes_cli.models import _should_use_copilot_responses_api + assert _should_use_copilot_responses_api("gpt-5-mini") is False + + def test_gpt4_uses_chat(self): + from hermes_cli.models import _should_use_copilot_responses_api + assert _should_use_copilot_responses_api("gpt-4.1") is False + assert _should_use_copilot_responses_api("gpt-4o") is False + assert _should_use_copilot_responses_api("gpt-4o-mini") is False + + def test_non_gpt_uses_chat(self): + from hermes_cli.models import _should_use_copilot_responses_api + assert _should_use_copilot_responses_api("claude-sonnet-4.6") is False + assert _should_use_copilot_responses_api("claude-opus-4.6") is False + assert _should_use_copilot_responses_api("gemini-2.5-pro") is False + assert _should_use_copilot_responses_api("grok-code-fast-1") is False + + class TestEnvVarOrder: """PROVIDER_REGISTRY has correct env var order.""" diff --git a/tests/hermes_cli/test_model_validation.py b/tests/hermes_cli/test_model_validation.py index 5f26e401f..9d4b670b0 100644 --- a/tests/hermes_cli/test_model_validation.py +++ b/tests/hermes_cli/test_model_validation.py @@ -276,7 +276,37 @@ class TestCopilotNormalization: catalog = [{"id": "gpt-4.1"}, {"id": "gpt-5.4"}] assert normalize_copilot_model_id("openai/gpt-4.1-mini", catalog=catalog) == "gpt-4.1" - def test_copilot_api_mode_prefers_responses(self): + def test_copilot_api_mode_gpt5_uses_responses(self): + """GPT-5+ models should use Responses API (matching opencode).""" + assert copilot_model_api_mode("gpt-5.4") == "codex_responses" + assert copilot_model_api_mode("gpt-5.4-mini") == "codex_responses" + assert copilot_model_api_mode("gpt-5.3-codex") == "codex_responses" + assert copilot_model_api_mode("gpt-5.2-codex") == "codex_responses" + assert copilot_model_api_mode("gpt-5.2") == "codex_responses" + + def test_copilot_api_mode_gpt5_mini_uses_chat(self): + """gpt-5-mini is the exception — uses Chat Completions.""" + assert copilot_model_api_mode("gpt-5-mini") == "chat_completions" + + def test_copilot_api_mode_non_gpt5_uses_chat(self): + """Non-GPT-5 models use Chat Completions.""" + assert copilot_model_api_mode("gpt-4.1") == "chat_completions" + assert copilot_model_api_mode("gpt-4o") == "chat_completions" + assert copilot_model_api_mode("gpt-4o-mini") == "chat_completions" + assert copilot_model_api_mode("claude-sonnet-4.6") == "chat_completions" + assert copilot_model_api_mode("claude-opus-4.6") == "chat_completions" + assert copilot_model_api_mode("gemini-2.5-pro") == "chat_completions" + + def test_copilot_api_mode_with_catalog_both_endpoints(self): + """When catalog shows both endpoints, model ID pattern wins.""" + catalog = [{ + "id": "gpt-5.4", + "supported_endpoints": ["/chat/completions", "/responses"], + }] + # GPT-5.4 should use responses even though chat/completions is listed + assert copilot_model_api_mode("gpt-5.4", catalog=catalog) == "codex_responses" + + def test_copilot_api_mode_with_catalog_only_responses(self): catalog = [{ "id": "gpt-5.4", "supported_endpoints": ["/responses"],