fix(openai): route api.openai.com to Responses API for GPT-5.x

Based on PR #1859 by @magi-morph (too stale to cherry-pick, reimplemented). GPT-5.x models reject tool calls + reasoning_effort on /v1/chat/completions with a 400 error directing to /v1/responses. This auto-detects api.openai.com in the base URL and switches to codex_responses mode in three places: - AIAgent.__init__: upgrades chat_completions → codex_responses - _try_activate_fallback(): same routing for fallback model - runtime_provider.py: _detect_api_mode_for_url() for both custom provider and openrouter runtime resolution paths Also extracts _is_direct_openai_url() helper to replace the inline check in _max_tokens_param().
2026-04-25 00:51:20 +00:00 · 2026-03-20 05:09:41 -07:00 · 2026-03-20 05:09:41 -07:00 · b1d05dfe8b
commit b1d05dfe8b
parent 4ad0083118
2 changed files with 33 additions and 8 deletions
--- a/run_agent.py
+++ b/run_agent.py
@ -501,6 +501,12 @@ class AIAgent:
        else:
            self.api_mode = "chat_completions"

+        # Direct OpenAI sessions use the Responses API path.  GPT-5.x tool
+        # calls with reasoning are rejected on /v1/chat/completions, and
+        # Hermes is a tool-using client by default.
+        if self.api_mode == "chat_completions" and self._is_direct_openai_url():
+            self.api_mode = "codex_responses"
+
        # Pre-warm OpenRouter model metadata cache in a background thread.
        # fetch_model_metadata() is cached for 1 hour; this avoids a blocking
        # HTTP request on the first API response when pricing is estimated.
@ -1080,6 +1086,11 @@ class AIAgent:
            return
        self._safe_print(*args, **kwargs)

+    def _is_direct_openai_url(self, base_url: str = None) -> bool:
+        """Return True when a base URL targets OpenAI's native API."""
+        url = (base_url or self._base_url_lower).lower()
+        return "api.openai.com" in url and "openrouter" not in url
+
    def _max_tokens_param(self, value: int) -> dict:
        """Return the correct max tokens kwarg for the current provider.
        
@ -1087,11 +1098,7 @@ class AIAgent:
        'max_completion_tokens'. OpenRouter, local models, and older
        OpenAI models use 'max_tokens'.
        """
-        _is_direct_openai = (
-            "api.openai.com" in self._base_url_lower
-            and "openrouter" not in self._base_url_lower
-        )
-        if _is_direct_openai:
+        if self._is_direct_openai_url():
            return {"max_completion_tokens": value}
        return {"max_tokens": value}

@ -3553,13 +3560,15 @@ class AIAgent:
                    fb_provider)
                return False

-            # Determine api_mode from provider
+            # Determine api_mode from provider / base URL
            fb_api_mode = "chat_completions"
            fb_base_url = str(fb_client.base_url)
            if fb_provider == "openai-codex":
                fb_api_mode = "codex_responses"
            elif fb_provider == "anthropic" or fb_base_url.rstrip("/").lower().endswith("/anthropic"):
                fb_api_mode = "anthropic_messages"
+            elif self._is_direct_openai_url(fb_base_url):
+                fb_api_mode = "codex_responses"

            old_model = self.model
            self.model = fb_model