fix(copilot): preserve base URL and gpt-5-mini routing

2026-04-25 00:51:20 +00:00 · 2026-04-14 19:05:03 -06:00 · 2026-04-14 19:05:03 -06:00 · 96cc556055
commit 96cc556055
parent 3b4ecf8ee7
4 changed files with 56 additions and 10 deletions
--- a/agent/credential_pool.py
+++ b/agent/credential_pool.py
@ -1162,6 +1162,7 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
            if token:
                source_name = "gh_cli" if "gh" in source.lower() else f"env:{source}"
                active_sources.add(source_name)
+                pconfig = PROVIDER_REGISTRY.get(provider)
                changed |= _upsert_entry(
                    entries,
                    provider,
@ -1170,6 +1171,7 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
                        "source": source_name,
                        "auth_type": AUTH_TYPE_API_KEY,
                        "access_token": token,
+                        "base_url": pconfig.inference_base_url if pconfig else "",
                        "label": source,
                    },
                )
--- a/run_agent.py
+++ b/run_agent.py
@ -714,12 +714,13 @@ class AIAgent:
        except Exception:
            pass

-        # GPT-5.x models require the Responses API path — they are rejected
-        # on /v1/chat/completions by both OpenAI and OpenRouter.  Also
-        # auto-upgrade for direct OpenAI URLs (api.openai.com) since all
-        # newer tool-calling models prefer Responses there.
-        # ACP runtimes are excluded: CopilotACPClient handles its own
-        # routing and does not implement the Responses API surface.
+        # GPT-5.x models usually require the Responses API path, but some
+        # providers have exceptions (for example Copilot's gpt-5-mini still
+        # uses chat completions). Also auto-upgrade for direct OpenAI URLs
+        # (api.openai.com) since all newer tool-calling models prefer
+        # Responses there. ACP runtimes are excluded: CopilotACPClient
+        # handles its own routing and does not implement the Responses API
+        # surface.
        if (
            self.api_mode == "chat_completions"
            and self.provider != "copilot-acp"
@ -727,7 +728,10 @@ class AIAgent:
            and not str(self.base_url or "").lower().startswith("acp+tcp://")
            and (
                self._is_direct_openai_url()
-                or self._model_requires_responses_api(self.model)
+                or self._provider_model_requires_responses_api(
+                    self.model,
+                    provider=self.provider,
+                )
            )
        ):
            self.api_mode = "codex_responses"
@ -1960,6 +1964,24 @@ class AIAgent:
            m = m.rsplit("/", 1)[-1]
        return m.startswith("gpt-5")

+    @staticmethod
+    def _provider_model_requires_responses_api(
+        model: str,
+        *,
+        provider: Optional[str] = None,
+    ) -> bool:
+        """Return True when this provider/model pair should use Responses API."""
+        normalized_provider = (provider or "").strip().lower()
+        if normalized_provider == "copilot":
+            try:
+                from hermes_cli.models import _should_use_copilot_responses_api
+                return _should_use_copilot_responses_api(model)
+            except Exception:
+                # Fall back to the generic GPT-5 rule if Copilot-specific
+                # logic is unavailable for any reason.
+                pass
+        return AIAgent._model_requires_responses_api(model)
+
    def _max_tokens_param(self, value: int) -> dict:
        """Return the correct max tokens kwarg for the current provider.
        
@ -5729,9 +5751,13 @@ class AIAgent:
                fb_api_mode = "anthropic_messages"
            elif self._is_direct_openai_url(fb_base_url):
                fb_api_mode = "codex_responses"
-            elif self._model_requires_responses_api(fb_model):
-                # GPT-5.x models need Responses API on every provider
-                # (OpenRouter, Copilot, direct OpenAI, etc.)
+            elif self._provider_model_requires_responses_api(
+                fb_model,
+                provider=fb_provider,
+            ):
+                # GPT-5.x models usually need Responses API, but keep
+                # provider-specific exceptions like Copilot gpt-5-mini on
+                # chat completions.
                fb_api_mode = "codex_responses"

            old_model = self.model
--- a/tests/agent/test_credential_pool.py
+++ b/tests/agent/test_credential_pool.py
@ -1091,6 +1091,7 @@ def test_load_pool_seeds_copilot_via_gh_auth_token(tmp_path, monkeypatch):
    assert len(entries) == 1
    assert entries[0].source == "gh_cli"
    assert entries[0].access_token == "gho_fake_token_abc123"
+    assert entries[0].base_url == "https://api.githubcopilot.com"


 def test_load_pool_does_not_seed_copilot_when_no_token(tmp_path, monkeypatch):
--- a/tests/run_agent/test_run_agent_codex_responses.py
+++ b/tests/run_agent/test_run_agent_codex_responses.py
@ -259,6 +259,23 @@ def test_copilot_acp_stays_on_chat_completions_for_gpt_5_models(monkeypatch):
    assert agent.api_mode == "chat_completions"


+def test_copilot_gpt_5_mini_stays_on_chat_completions(monkeypatch):
+    _patch_agent_bootstrap(monkeypatch)
+    agent = run_agent.AIAgent(
+        model="gpt-5-mini",
+        base_url="https://api.githubcopilot.com",
+        provider="copilot",
+        api_key="gh-token",
+        api_mode="chat_completions",
+        quiet_mode=True,
+        max_iterations=1,
+        skip_context_files=True,
+        skip_memory=True,
+    )
+    assert agent.provider == "copilot"
+    assert agent.api_mode == "chat_completions"
+
+
 def test_build_api_kwargs_codex(monkeypatch):
    agent = _build_agent(monkeypatch)
    kwargs = agent._build_api_kwargs(