fix: use configured model for gateway auth fallback

This commit is contained in:
Luciano Pacheco 2026-05-03 23:02:23 +00:00 committed by Teknium
parent 8aa30407c2
commit f7b71aa0da
2 changed files with 69 additions and 1 deletions

View file

@ -643,7 +643,11 @@ def _try_resolve_fallback_provider() -> dict | None:
explicit_base_url=entry.get("base_url"),
explicit_api_key=entry.get("api_key"),
)
logger.info("Fallback provider resolved: %s", runtime.get("provider"))
logger.info(
"Fallback provider resolved: %s model=%s",
runtime.get("provider"),
entry.get("model"),
)
return {
"api_key": runtime.get("api_key"),
"base_url": runtime.get("base_url"),
@ -652,6 +656,7 @@ def _try_resolve_fallback_provider() -> dict | None:
"command": runtime.get("command"),
"args": list(runtime.get("args") or []),
"credential_pool": runtime.get("credential_pool"),
"model": entry.get("model"),
}
except Exception as fb_exc:
logger.debug("Fallback entry %s failed: %s", entry.get("provider"), fb_exc)
@ -1661,6 +1666,14 @@ class GatewayRunner:
)
runtime_kwargs = _resolve_runtime_agent_kwargs()
runtime_model = runtime_kwargs.pop("model", None)
if runtime_model:
logger.info(
"Runtime provider supplied explicit model override: %s -> %s",
model,
runtime_model,
)
model = runtime_model
if override and resolved_session_key:
model, runtime_kwargs = self._apply_session_model_override(
resolved_session_key, model, runtime_kwargs

View file

@ -163,3 +163,58 @@ async def test_background_task_prefers_session_override_over_global_runtime(monk
assert _CapturingAgent.last_init["base_url"] == "https://chatgpt.com/backend-api/codex"
assert _CapturingAgent.last_init["api_key"] == "***"
assert _CapturingAgent.last_init["reasoning_config"] == {"enabled": True, "effort": "high"}
def test_gateway_auth_fallback_uses_fallback_model_from_config(tmp_path, monkeypatch):
"""Regression: fallback provider must not inherit the primary model.
If primary openai-codex auth fails and fallback_providers selects
OpenRouter/minimax, the gateway must instantiate AIAgent with the fallback
model, not the primary config model (e.g. gpt-5.5). Otherwise OpenRouter
receives an unintended GPT request.
"""
config = tmp_path / "config.yaml"
config.write_text(
"""
model:
default: gpt-5.5
provider: openai-codex
fallback_providers:
- provider: openrouter
model: minimax/minimax-m2.7
""".lstrip(),
encoding="utf-8",
)
monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
def fake_resolve_runtime_provider(*, requested=None, explicit_base_url=None, explicit_api_key=None):
if requested in (None, "", "openai-codex"):
from hermes_cli.auth import AuthError
raise AuthError("No Codex credentials stored. Run `hermes auth` to authenticate.")
assert requested == "openrouter"
return {
"api_key": "sk-openrouter",
"base_url": "https://openrouter.ai/api/v1",
"provider": "openrouter",
"api_mode": "chat_completions",
"command": None,
"args": [],
"credential_pool": None,
}
import hermes_cli.runtime_provider as runtime_provider
monkeypatch.setattr(runtime_provider, "resolve_runtime_provider", fake_resolve_runtime_provider)
runner = _make_runner()
model, runtime_kwargs = runner._resolve_session_agent_runtime(
session_key="agent:main:telegram:group:-1003715515980:63",
user_config={
"model": {"default": "gpt-5.5", "provider": "openai-codex"},
"fallback_providers": [{"provider": "openrouter", "model": "minimax/minimax-m2.7"}],
},
)
assert model == "minimax/minimax-m2.7"
assert runtime_kwargs["provider"] == "openrouter"
assert runtime_kwargs["api_key"] == "sk-openrouter"