mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
fix: wrap copilot Responses-API models in CodexAuxiliaryClient for auxiliary tasks
GPT-5+ models (except gpt-5-mini) are only accessible via the Responses
API on Copilot. When these models were configured as the compression
summary_model (or any auxiliary task), the plain OpenAI client sent them
to /chat/completions which returned a 400 error:
model "gpt-5.4-mini" is not accessible via the /chat/completions endpoint
resolve_provider_client() now checks _should_use_copilot_responses_api()
for the copilot provider and wraps the client in CodexAuxiliaryClient
when needed, routing calls through responses.stream() transparently.
Adds tests for both the wrapping (gpt-5.4-mini) and non-wrapping
(gpt-4.1-mini) paths.
This commit is contained in:
parent
718e8ad6fa
commit
c1af614289
2 changed files with 80 additions and 0 deletions
|
|
@ -1425,6 +1425,23 @@ def resolve_provider_client(
|
|||
|
||||
client = OpenAI(api_key=api_key, base_url=base_url,
|
||||
**({"default_headers": headers} if headers else {}))
|
||||
|
||||
# Copilot GPT-5+ models (except gpt-5-mini) require the Responses
|
||||
# API — they are not accessible via /chat/completions. Wrap the
|
||||
# plain client in CodexAuxiliaryClient so call_llm() transparently
|
||||
# routes through responses.stream().
|
||||
if provider == "copilot" and final_model and not raw_codex:
|
||||
try:
|
||||
from hermes_cli.models import _should_use_copilot_responses_api
|
||||
if _should_use_copilot_responses_api(final_model):
|
||||
logger.debug(
|
||||
"resolve_provider_client: copilot model %s needs "
|
||||
"Responses API — wrapping with CodexAuxiliaryClient",
|
||||
final_model)
|
||||
client = CodexAuxiliaryClient(client, final_model)
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
logger.debug("resolve_provider_client: %s (%s)", provider, final_model)
|
||||
return (_to_async_client(client, final_model) if async_mode
|
||||
else (client, final_model))
|
||||
|
|
|
|||
|
|
@ -756,6 +756,69 @@ class TestAuxiliaryPoolAwareness:
|
|||
assert call_kwargs["base_url"] == "https://api.githubcopilot.com"
|
||||
assert call_kwargs["default_headers"]["Editor-Version"]
|
||||
|
||||
def test_copilot_responses_api_model_wrapped_in_codex_client(self, monkeypatch):
|
||||
"""Copilot GPT-5+ models (needing Responses API) are wrapped in CodexAuxiliaryClient."""
|
||||
monkeypatch.delenv("GITHUB_TOKEN", raising=False)
|
||||
monkeypatch.delenv("GH_TOKEN", raising=False)
|
||||
|
||||
with (
|
||||
patch(
|
||||
"hermes_cli.auth.resolve_api_key_provider_credentials",
|
||||
return_value={
|
||||
"provider": "copilot",
|
||||
"api_key": "test-token",
|
||||
"base_url": "https://api.githubcopilot.com",
|
||||
"source": "gh auth token",
|
||||
},
|
||||
),
|
||||
patch("agent.auxiliary_client.OpenAI"),
|
||||
):
|
||||
client, model = resolve_provider_client("copilot", model="gpt-5.4-mini")
|
||||
|
||||
from agent.auxiliary_client import CodexAuxiliaryClient
|
||||
assert isinstance(client, CodexAuxiliaryClient)
|
||||
assert model == "gpt-5.4-mini"
|
||||
|
||||
def test_copilot_chat_completions_model_not_wrapped(self, monkeypatch):
|
||||
"""Copilot models using Chat Completions are returned as plain OpenAI clients."""
|
||||
monkeypatch.delenv("GITHUB_TOKEN", raising=False)
|
||||
monkeypatch.delenv("GH_TOKEN", raising=False)
|
||||
|
||||
with (
|
||||
patch(
|
||||
"hermes_cli.auth.resolve_api_key_provider_credentials",
|
||||
return_value={
|
||||
"provider": "copilot",
|
||||
"api_key": "test-token",
|
||||
"base_url": "https://api.githubcopilot.com",
|
||||
"source": "gh auth token",
|
||||
},
|
||||
),
|
||||
patch("agent.auxiliary_client.OpenAI") as mock_openai,
|
||||
):
|
||||
client, model = resolve_provider_client("copilot", model="gpt-4.1-mini")
|
||||
|
||||
from agent.auxiliary_client import CodexAuxiliaryClient
|
||||
assert not isinstance(client, CodexAuxiliaryClient)
|
||||
assert model == "gpt-4.1-mini"
|
||||
# Should be the raw mock OpenAI client
|
||||
assert client is mock_openai.return_value
|
||||
|
||||
def test_vision_auto_uses_active_provider_as_fallback(self, monkeypatch):
|
||||
"""When no OpenRouter/Nous available, vision auto falls back to active provider."""
|
||||
monkeypatch.setenv("ANTHROPIC_API_KEY", "***")
|
||||
with (
|
||||
patch("agent.auxiliary_client._read_nous_auth", return_value=None),
|
||||
patch("agent.auxiliary_client._read_main_provider", return_value="anthropic"),
|
||||
patch("agent.auxiliary_client._read_main_model", return_value="claude-sonnet-4"),
|
||||
patch("agent.anthropic_adapter.build_anthropic_client", return_value=MagicMock()),
|
||||
patch("agent.anthropic_adapter.resolve_anthropic_token", return_value="***"),
|
||||
):
|
||||
client, model = get_vision_auxiliary_client()
|
||||
|
||||
assert client is not None
|
||||
assert client.__class__.__name__ == "AnthropicAuxiliaryClient"
|
||||
|
||||
def test_vision_auto_prefers_active_provider_over_openrouter(self, monkeypatch):
|
||||
"""Active provider is tried before OpenRouter in vision auto."""
|
||||
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue