fix(auth): explicit provider intent beats stale OAuth active_provider (#29285)

`resolve_provider("auto")` checked `auth.json` `active_provider` BEFORE the
config.yaml `model.provider` and env-var API-key checks. So a user who was
OAuth-logged-into one provider (e.g. Anthropic) but had set an explicit
`model.provider` or exported an API key (e.g. `OPENAI_API_KEY`) was silently
routed to the stale OAuth provider — the override was invisible and surprising.

Reorder the auto-path so explicit intent wins (the order the issue asks for):

  1. explicit CLI api_key/base_url
  2. config.yaml `model.provider`            (safety net — see below)
  3. OPENAI_API_KEY / OPENROUTER_API_KEY env
  4. OpenRouter credential pool
  5. provider-specific API-key env vars
  6. auth.json `active_provider` (OAuth)      ← demoted to last-resort
  7. AWS Bedrock credential chain
  8. error

`active_provider` is still honored — it's just a last-resort fallback chosen
only when the user expressed no other preference, instead of overriding one.

The normal chat/gateway/TUI/ACP/status path already resolves config.provider
upstream in `resolve_requested_provider()` before "auto" is reached, so this
duplicate config check is the safety net for the lone direct caller
(`main.py` `resolve_provider("auto")`) and any future bypass. Because every
surface funnels through this one resolver, the fix propagates everywhere with
a single edit — no sibling path re-implements precedence.

Also add a one-shot WARN when resolution lands on `active_provider` while a
populated `model` config dict lacks a `provider` key — surfacing the silent
override the issue reported without breaking first-install.

Synthesizes the two competing PRs: #29615 (LifeJiggy — config-before-auth +
the silent-override framing) and #29809 (Minksgo — the env-before-auth
reorder). #29809 could not be merged directly (bundled unrelated, un-opt-in
cost-tagging telemetry); its reorder idea is incorporated here and credited.

Tests: tests/hermes_cli/test_provider_precedence.py — config/env beat stale
OAuth, OAuth still used as last resort, explicit request short-circuits, WARN
fires on silent fall-through. Full provider-resolution suites: 374 passed.

Fixes #29285

Co-authored-by: LifeJiggy <141562589+LifeJiggy@users.noreply.github.com>
Co-authored-by: Minksgo <153416856+Minksgo@users.noreply.github.com>
This commit is contained in:
kshitijk4poor 2026-06-22 23:02:08 +05:30 committed by Teknium
parent 2b73dd1ca6
commit 2af1678bfc
2 changed files with 186 additions and 14 deletions

View file

@ -1519,12 +1519,16 @@ def resolve_provider(
"""
Determine which inference provider to use.
Priority (when requested="auto" or None):
1. active_provider in auth.json with valid credentials
2. Explicit CLI api_key/base_url -> "openrouter"
3. OPENAI_API_KEY or OPENROUTER_API_KEY env vars -> "openrouter"
4. Provider-specific API keys (GLM, Kimi, MiniMax) -> that provider
5. Fallback: "openrouter"
Priority (when requested="auto" or None) explicit user intent wins over a
stale logged-in OAuth provider (#29285):
1. Explicit CLI api_key/base_url -> "openrouter"
2. config.yaml `model.provider`
3. OPENAI_API_KEY / OPENROUTER_API_KEY env vars -> "openrouter"
4. OpenRouter credential pool
5. Provider-specific API keys (GLM, Kimi, MiniMax, ...) -> that provider
6. auth.json `active_provider` (logged-in OAuth) last-resort fallback
7. AWS Bedrock credential chain
8. Error (no provider configured)
"""
normalized = (requested or "auto").strip().lower()
@ -1596,16 +1600,26 @@ def resolve_provider(
if explicit_api_key or explicit_base_url:
return "openrouter"
# Check auth store for an active OAuth provider
# Provider precedence for the auto-path (#29285): explicit user intent must
# win over a stale logged-in OAuth `active_provider`. Order matches the
# docstring: 1. explicit CLI creds 2. config.yaml `model.provider`
# 3. OPENAI/OPENROUTER env keys 4. OpenRouter pool 5. provider-specific
# env keys 6. auth.json `active_provider` (OAuth) 7. Bedrock 8. error.
# The normal chat/gateway path resolves config.provider upstream in
# resolve_requested_provider() before ever reaching "auto"; this duplicate
# check is the safety net for the lone direct caller (main.py resolve_provider
# ("auto")) and any future bypass of that stage.
_model_cfg: Any = None
try:
auth_store = _load_auth_store()
active = auth_store.get("active_provider")
if active and active in PROVIDER_REGISTRY:
status = get_auth_status(active)
if status.get("logged_in"):
return active
from hermes_cli.config import load_config
_model_cfg = (load_config() or {}).get("model")
if isinstance(_model_cfg, dict):
_cfg_provider = _model_cfg.get("provider")
if isinstance(_cfg_provider, str) and _cfg_provider.strip().lower() in PROVIDER_REGISTRY:
return _cfg_provider.strip().lower()
except Exception as e:
logger.debug("Could not detect active auth provider: %s", e)
logger.debug("Could not read config.yaml model.provider for auto-resolution: %s", e)
if has_usable_secret(os.getenv("OPENAI_API_KEY")) or has_usable_secret(os.getenv("OPENROUTER_API_KEY")):
return "openrouter"
@ -1625,6 +1639,18 @@ def resolve_provider(
except Exception as e:
logger.debug("Could not check OpenRouter credential pool: %s", e)
# Determine the logged-in OAuth provider up front so the env-key loop below
# can WARN when an exported API key preempts it (#29285 transparency). The
# actual OAuth fallback (tier 6) still happens later if nothing else matches.
_oauth_active: Optional[str] = None
try:
_store = _load_auth_store()
_maybe = _store.get("active_provider")
if _maybe and _maybe in PROVIDER_REGISTRY and get_auth_status(_maybe).get("logged_in"):
_oauth_active = _maybe
except Exception as e:
logger.debug("Could not pre-read active auth provider: %s", e)
# Auto-detect API-key providers by checking their env vars
for pid, pconfig in PROVIDER_REGISTRY.items():
if pconfig.auth_type != "api_key":
@ -1639,8 +1665,37 @@ def resolve_provider(
continue
for env_var in pconfig.api_key_env_vars:
if has_usable_secret(os.getenv(env_var, "")):
# An exported API key now wins over a logged-in OAuth provider
# (the #29285 fix). Surface that so a user who deliberately uses
# OAuth but has a stale key in ~/.hermes/.env isn't silently
# switched without knowing why.
if _oauth_active and _oauth_active != pid:
logger.warning(
"Provider resolved to %r via %s, preempting your "
"logged-in OAuth provider %r. If you meant to use the "
"OAuth login, unset %s or set `model.provider` "
"explicitly.",
pid, env_var, _oauth_active, env_var,
)
return pid
# Logged-in OAuth provider (auth.json `active_provider`) — a LAST-RESORT
# fallback, chosen only when the user expressed no other preference above.
# Previously this sat ABOVE the env-var/config checks, so a stale OAuth
# login silently overrode an explicit `model.provider` or an exported API
# key (#29285). Demoted here so explicit intent always wins.
if _oauth_active:
# Surface the silent-override case the issue reported: a populated
# `model` config that lacks a `provider` key falls through to OAuth.
if isinstance(_model_cfg, dict) and _model_cfg and not _model_cfg.get("provider"):
logger.warning(
"Provider resolved to logged-in OAuth provider %r because "
"config.yaml `model` has no `provider` key. If you meant a "
"different provider, set `model.provider` explicitly.",
_oauth_active,
)
return _oauth_active
# AWS Bedrock — detect via boto3 credential chain (IAM roles, SSO, env vars).
# This runs after API-key providers so explicit keys always win.
try:

View file

@ -0,0 +1,117 @@
"""Regression tests for #29285 — provider precedence in resolve_provider("auto").
Explicit user intent (config.yaml model.provider, env-var API keys) must win
over a stale logged-in OAuth `active_provider` in auth.json. Before the fix,
`active_provider` sat above the env/config checks and silently overrode an
explicit choice e.g. a user OAuth-logged-into Anthropic but with
OPENAI_API_KEY exported (or model.provider set) got routed to Anthropic.
"""
import pytest
from hermes_cli.auth import resolve_provider, AuthError
def _login(monkeypatch, provider_id):
"""Simulate a logged-in OAuth active_provider in auth.json."""
monkeypatch.setattr("hermes_cli.auth._load_auth_store",
lambda: {"active_provider": provider_id})
monkeypatch.setattr("hermes_cli.auth.get_auth_status",
lambda p: {"logged_in": p == provider_id})
def _config(monkeypatch, model_cfg):
monkeypatch.setattr("hermes_cli.config.load_config", lambda: {"model": model_cfg})
def _no_aws(monkeypatch):
# Neutralize any ambient AWS creds so Bedrock auto-detect can't interfere.
monkeypatch.setattr("agent.bedrock_adapter.has_aws_credentials", lambda: False)
def _clear_provider_env(monkeypatch):
for var in ("OPENAI_API_KEY", "OPENROUTER_API_KEY", "GLM_API_KEY", "ZAI_API_KEY",
"KIMI_API_KEY", "MINIMAX_API_KEY", "HERMES_INFERENCE_PROVIDER"):
monkeypatch.delenv(var, raising=False)
class TestProviderPrecedence:
def test_config_provider_beats_stale_oauth(self, monkeypatch):
"""config.yaml model.provider wins over a logged-in OAuth active_provider."""
_clear_provider_env(monkeypatch)
_no_aws(monkeypatch)
_login(monkeypatch, "anthropic") # stale OAuth login
_config(monkeypatch, {"provider": "zai", "default": "glm-4.6"})
assert resolve_provider("auto") == "zai"
def test_env_key_beats_stale_oauth(self, monkeypatch):
"""An exported provider API key wins over a logged-in OAuth active_provider."""
_clear_provider_env(monkeypatch)
_no_aws(monkeypatch)
_login(monkeypatch, "anthropic")
_config(monkeypatch, {"default": "some-model"}) # dict, NO provider key
monkeypatch.setenv("OPENAI_API_KEY", "sk-test-key")
assert resolve_provider("auto") == "openrouter"
def test_provider_specific_env_key_beats_stale_oauth(self, monkeypatch):
"""A provider-specific env key (GLM) wins over a logged-in OAuth provider."""
_clear_provider_env(monkeypatch)
_no_aws(monkeypatch)
_login(monkeypatch, "anthropic")
_config(monkeypatch, {})
monkeypatch.setenv("GLM_API_KEY", "test-glm-key")
assert resolve_provider("auto") == "zai"
def test_oauth_used_as_last_resort(self, monkeypatch):
"""With NO config provider and NO env keys, the logged-in OAuth provider
is still used (it's the last-resort fallback, not removed)."""
_clear_provider_env(monkeypatch)
_no_aws(monkeypatch)
_login(monkeypatch, "anthropic")
_config(monkeypatch, {}) # empty model config, no provider
assert resolve_provider("auto") == "anthropic"
def test_explicit_request_unaffected(self, monkeypatch):
"""An explicit requested provider short-circuits everything."""
_clear_provider_env(monkeypatch)
_login(monkeypatch, "anthropic")
assert resolve_provider("zai") == "zai"
def test_warns_on_silent_oauth_fallthrough(self, monkeypatch, caplog):
"""A populated model dict lacking `provider` that falls through to OAuth
emits a WARN so the silent override is visible (#29285)."""
import logging
_clear_provider_env(monkeypatch)
_no_aws(monkeypatch)
_login(monkeypatch, "anthropic")
_config(monkeypatch, {"default": "claude-x"}) # populated, no provider
with caplog.at_level(logging.WARNING, logger="hermes_cli.auth"):
assert resolve_provider("auto") == "anthropic"
assert any("no `provider` key" in r.message for r in caplog.records)
def test_warns_when_env_key_preempts_oauth(self, monkeypatch, caplog):
"""When an exported API key preempts a logged-in OAuth provider, a WARN
makes the silent routing switch visible (#29285)."""
import logging
_clear_provider_env(monkeypatch)
_no_aws(monkeypatch)
_login(monkeypatch, "anthropic") # OAuth into anthropic
_config(monkeypatch, {})
monkeypatch.setenv("GLM_API_KEY", "test-glm-key") # unrelated key present
with caplog.at_level(logging.WARNING, logger="hermes_cli.auth"):
assert resolve_provider("auto") == "zai"
assert any("preempting your" in r.message for r in caplog.records)
def test_openrouter_pool_beats_stale_oauth(self, monkeypatch):
"""An OpenRouter credential-pool entry (no env var) wins over a logged-in
OAuth provider the pool rung sits above OAuth (#42130 + #29285)."""
_clear_provider_env(monkeypatch)
_no_aws(monkeypatch)
_login(monkeypatch, "anthropic")
_config(monkeypatch, {})
class _Pool:
def has_credentials(self):
return True
monkeypatch.setattr("agent.credential_pool.load_pool", lambda name: _Pool())
assert resolve_provider("auto") == "openrouter"