Fix custom provider identity loss in session persistence

_runtime_model_config persisted the live agent's RESOLVED provider into
the session row's model_config JSON. For any named providers:/
custom_providers: entry, agent.provider is the literal string "custom",
so the entry name was lost (and the api_key is deliberately never
persisted). On session.resume or _reset_session_agent the stored
provider="custom" fed resolve_runtime_provider(requested="custom"),
which cannot match a named entry — the rebuild either raised "No LLM
provider configured" or silently resolved placeholder credentials
against the patched-back base_url.

Persist the REQUESTED/entry identity instead: a new reverse lookup
find_custom_provider_identity(base_url) maps the endpoint URL back to
the canonical custom:<name> menu key. _runtime_model_config stores that
key; _make_agent performs the same recovery for rows persisted before
the fix, falling back to passing the stored base_url as
explicit_base_url so the direct-alias branch still targets the
session's endpoint when no entry matches.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
Adalsteinn Helgason 2026-06-11 07:47:27 +00:00 committed by Teknium
parent e256f4aae4
commit 643dc82793
4 changed files with 392 additions and 0 deletions

View file

@ -660,6 +660,61 @@ def has_named_custom_provider(requested_provider: str) -> bool:
return False
def find_custom_provider_identity(base_url: str) -> Optional[str]:
"""Map an endpoint URL back to its canonical ``custom:<name>`` menu key.
Returns the ``custom:<normalized-name>`` slug of the first ``providers:``
/ ``custom_providers:`` entry whose base_url matches, or ``None`` when no
entry owns the URL.
Session persistence stores the agent's *resolved* provider, and for every
named custom endpoint that is the literal string ``"custom"`` the entry
name is lost, and the api_key is deliberately never persisted. The
endpoint URL is the one durable fact that survives the round-trip, so
this reverse lookup lets persist/rebuild paths recover the entry identity
(and with it key_env/api_key/api_mode resolution via
:func:`_get_named_custom_provider`) instead of failing with
``auth_unavailable`` or silently rebuilding with placeholder credentials.
"""
target = _normalize_base_url_for_match(base_url)
if not target:
return None
try:
config = load_config()
except Exception:
return None
providers = config.get("providers")
if isinstance(providers, dict):
for ep_name, entry in providers.items():
if not isinstance(entry, dict):
continue
entry_url = (
entry.get("api") or entry.get("url") or entry.get("base_url") or ""
)
if _normalize_base_url_for_match(entry_url) == target:
return f"custom:{_normalize_custom_provider_name(str(ep_name))}"
try:
custom_providers = get_compatible_custom_providers(config)
except Exception:
custom_providers = None
for entry in custom_providers or []:
if not isinstance(entry, dict):
continue
name = entry.get("name")
if not isinstance(name, str) or not name.strip():
continue
if _normalize_base_url_for_match(entry.get("base_url")) == target:
return f"custom:{_normalize_custom_provider_name(name)}"
return None
def _normalize_base_url_for_match(value) -> str:
return str(value or "").strip().rstrip("/").lower()
def _custom_provider_request_overrides(custom_provider: Dict[str, Any]) -> Dict[str, Any]:
extra_body = custom_provider.get("extra_body")
if not isinstance(extra_body, dict) or not extra_body:

View file

@ -0,0 +1,99 @@
"""Unit tests for find_custom_provider_identity (base_url → custom:<name>).
Reverse lookup used by tui_gateway session persistence to recover a named
``providers:`` / ``custom_providers:`` entry from the only durable fact the
session row keeps once the provider has been resolved to the literal string
"custom": the endpoint URL. See
tests/tui_gateway/test_custom_provider_session_persistence.py for the
end-to-end persist/resume round-trip.
"""
import hermes_cli.runtime_provider as rp
def test_matches_legacy_custom_providers_list(monkeypatch):
monkeypatch.setattr(
rp,
"load_config",
lambda: {
"custom_providers": [
{"name": "MiMo v2.5 Pro", "base_url": "https://api.mimo.example/v1"}
]
},
)
assert (
rp.find_custom_provider_identity("https://api.mimo.example/v1")
== "custom:mimo-v2.5-pro"
)
def test_matches_providers_dict_by_key(monkeypatch):
monkeypatch.setattr(
rp,
"load_config",
lambda: {"providers": {"local": {"api": "http://127.0.0.1:8000/v1"}}},
)
assert (
rp.find_custom_provider_identity("http://127.0.0.1:8000/v1")
== "custom:local"
)
def test_match_ignores_trailing_slash_and_case(monkeypatch):
monkeypatch.setattr(
rp,
"load_config",
lambda: {
"custom_providers": [
{"name": "local", "base_url": "http://Localhost:8000/v1/"}
]
},
)
assert (
rp.find_custom_provider_identity("http://localhost:8000/v1")
== "custom:local"
)
def test_no_match_returns_none(monkeypatch):
monkeypatch.setattr(
rp,
"load_config",
lambda: {
"custom_providers": [
{"name": "other", "base_url": "https://elsewhere.example/v1"}
]
},
)
assert rp.find_custom_provider_identity("https://api.mimo.example/v1") is None
def test_empty_base_url_returns_none(monkeypatch):
monkeypatch.setattr(
rp, "load_config", lambda: {"custom_providers": [{"name": "x"}]}
)
assert rp.find_custom_provider_identity("") is None
assert rp.find_custom_provider_identity(None) is None
def test_identity_resolves_back_through_named_lookup(monkeypatch):
"""The returned slug must be accepted by _get_named_custom_provider —
that is the whole point of persisting it."""
config = {
"custom_providers": [
{
"name": "mimo-v2.5-pro",
"base_url": "https://api.mimo.example/v1",
"api_key": "sk-entry",
}
]
}
monkeypatch.setattr(rp, "load_config", lambda: config)
slug = rp.find_custom_provider_identity("https://api.mimo.example/v1")
assert slug == "custom:mimo-v2.5-pro"
entry = rp._get_named_custom_provider(slug)
assert entry is not None
assert entry["base_url"] == "https://api.mimo.example/v1"
assert entry["api_key"] == "sk-entry"

View file

@ -0,0 +1,198 @@
"""Session persistence must not strip a custom provider's identity.
``_runtime_model_config`` persists the live agent's RESOLVED provider into
the session row's ``model_config`` JSON. For any named ``providers:`` /
``custom_providers:`` entry (e.g. one called "mimo-v2.5-pro"),
``agent.provider`` is the literal string "custom", so the entry name was
lost and the api_key is deliberately never persisted. On ``session.resume``
or ``_reset_session_agent``, ``_stored_session_runtime_overrides`` fed
provider="custom" back into ``_make_agent``
``resolve_runtime_provider(requested="custom")``, which cannot match an entry
named "mimo-v2.5-pro". Depending on config the rebuild either raised
"No LLM provider configured. Run `hermes model`..." (resume failed) or
silently resolved placeholder credentials ("no-key-required") against the
patched-back base_url.
Fix: persist the REQUESTED/entry identity ``_runtime_model_config`` maps
the agent's base_url back to the canonical ``custom:<name>`` menu key via
``find_custom_provider_identity``; ``_make_agent`` performs the same
recovery for rows persisted before the fix (and falls back to handing the
stored base_url to the direct-alias branch when no entry matches).
Related investigation: GH #44070 / PR #44099 (credential-pool base_url
pinning); same family of resolved-vs-requested identity loss.
"""
import json
import types
from unittest.mock import MagicMock, patch
import hermes_cli.runtime_provider as rp
MIMO_URL = "https://token-plan-cn.xiaomimimo.com/v1"
MIMO_KEY = "sk-mimo-entry-key"
LEGACY_LIST_CONFIG = {
"custom_providers": [
{
"name": "mimo-v2.5-pro",
"base_url": MIMO_URL,
"api_key": MIMO_KEY,
"api_mode": "chat_completions",
}
]
}
PROVIDERS_DICT_CONFIG = {
"providers": {
"mimo-v2.5-pro": {
"api": MIMO_URL,
"api_key": MIMO_KEY,
}
}
}
def _custom_agent(base_url=MIMO_URL):
return types.SimpleNamespace(
model="mimo-v2.5-pro",
provider="custom",
base_url=base_url,
api_mode="chat_completions",
reasoning_config=None,
service_tier=None,
)
class TestRuntimeModelConfigPersistsEntryIdentity:
def test_persists_menu_key_instead_of_resolved_custom(self, monkeypatch):
monkeypatch.setattr(rp, "load_config", lambda: LEGACY_LIST_CONFIG)
from tui_gateway.server import _runtime_model_config
config = _runtime_model_config(_custom_agent())
assert config["provider"] == "custom:mimo-v2.5-pro"
assert config["base_url"] == MIMO_URL
# Credentials must keep coming from config/provider resolution,
# never from the session DB.
assert "api_key" not in config
def test_persists_menu_key_for_providers_dict_entry(self, monkeypatch):
monkeypatch.setattr(rp, "load_config", lambda: PROVIDERS_DICT_CONFIG)
from tui_gateway.server import _runtime_model_config
config = _runtime_model_config(_custom_agent())
assert config["provider"] == "custom:mimo-v2.5-pro"
def test_keeps_bare_custom_when_no_entry_matches(self, monkeypatch):
monkeypatch.setattr(rp, "load_config", lambda: {})
from tui_gateway.server import _runtime_model_config
config = _runtime_model_config(_custom_agent())
assert config["provider"] == "custom"
def test_non_custom_provider_untouched(self, monkeypatch):
def _boom():
raise AssertionError("identity lookup must not run for built-ins")
monkeypatch.setattr(rp, "load_config", _boom)
from tui_gateway.server import _runtime_model_config
agent = _custom_agent()
agent.provider = "anthropic"
agent.base_url = "https://api.anthropic.com"
assert _runtime_model_config(agent)["provider"] == "anthropic"
def _make_agent_with_override(override, monkeypatch, config):
"""Run _make_agent through the REAL resolve_runtime_provider against a
patched config, returning the kwargs AIAgent was constructed with."""
monkeypatch.setattr(rp, "load_config", lambda: config)
monkeypatch.setattr(rp, "_get_model_config", lambda: {})
# Keep credential-pool resolution off the developer's real HERMES home.
monkeypatch.setattr(rp, "_try_resolve_from_custom_pool", lambda *a, **k: None)
fake_cfg = {"agent": {"system_prompt": ""}, "model": {"default": "unused"}}
with (
patch("tui_gateway.server._load_cfg", return_value=fake_cfg),
patch("tui_gateway.server._get_db", return_value=MagicMock()),
patch("tui_gateway.server._load_reasoning_config", return_value=None),
patch("tui_gateway.server._load_service_tier", return_value=None),
patch("tui_gateway.server._load_enabled_toolsets", return_value=None),
patch("run_agent.AIAgent") as mock_agent,
):
from tui_gateway.server import _make_agent
_make_agent("sid-custom", "key-custom", model_override=override)
return mock_agent.call_args.kwargs
class TestResumeRoundTrip:
def test_round_trip_restores_entry_credentials(self, monkeypatch):
"""persist → stored-overrides → _make_agent resolves the entry's
api_key again (the exact path that raised "No LLM provider
configured" before the fix)."""
monkeypatch.setattr(rp, "load_config", lambda: LEGACY_LIST_CONFIG)
from tui_gateway.server import (
_runtime_model_config,
_stored_session_runtime_overrides,
)
model_config = _runtime_model_config(_custom_agent())
row = {
"model": "mimo-v2.5-pro",
"model_config": json.dumps(model_config),
}
overrides = _stored_session_runtime_overrides(row)
assert overrides["model_override"]["provider"] == "custom:mimo-v2.5-pro"
kwargs = _make_agent_with_override(
overrides["model_override"], monkeypatch, LEGACY_LIST_CONFIG
)
assert kwargs["provider"] == "custom"
assert kwargs["base_url"] == MIMO_URL
assert kwargs["api_key"] == MIMO_KEY
def test_legacy_row_with_bare_custom_heals_via_base_url(self, monkeypatch):
"""Rows persisted BEFORE the fix stored provider="custom"; the
rebuild must recover the entry identity from the stored base_url."""
override = {
"model": "mimo-v2.5-pro",
"provider": "custom",
"base_url": MIMO_URL,
"api_mode": "chat_completions",
}
kwargs = _make_agent_with_override(override, monkeypatch, LEGACY_LIST_CONFIG)
assert kwargs["base_url"] == MIMO_URL
assert kwargs["api_key"] == MIMO_KEY
def test_legacy_row_without_matching_entry_keeps_endpoint(self, monkeypatch):
"""No config entry owns the stored URL: the direct-alias branch must
still receive the base_url so resolution targets the session's
endpoint instead of raising auth_unavailable."""
monkeypatch.delenv("OPENAI_API_KEY", raising=False)
monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
override = {
"model": "local-model",
"provider": "custom",
"base_url": "http://127.0.0.1:8000/v1",
"api_mode": "chat_completions",
}
kwargs = _make_agent_with_override(override, monkeypatch, {})
assert kwargs["provider"] == "custom"
assert kwargs["base_url"] == "http://127.0.0.1:8000/v1"
assert kwargs["api_key"] == "no-key-required"

View file

@ -1559,6 +1559,25 @@ def _runtime_model_config(agent, existing: dict | None = None) -> dict:
if model:
config["model"] = model
if provider:
if provider == "custom" and base_url:
# ``agent.provider`` is the RESOLVED provider, and for any named
# ``providers:`` / ``custom_providers:`` entry that is the literal
# string "custom" — persisting it loses the entry identity, so a
# later resume/rebuild cannot re-resolve the entry's credentials
# (the api_key is deliberately never persisted; see
# _stored_session_runtime_overrides). Recover the canonical
# ``custom:<name>`` menu key from the endpoint URL so
# resolve_runtime_provider() can find the entry again.
try:
from hermes_cli.runtime_provider import (
find_custom_provider_identity,
)
provider = find_custom_provider_identity(base_url) or provider
except Exception:
logger.debug(
"custom provider identity lookup failed", exc_info=True
)
config["provider"] = provider
if base_url:
config["base_url"] = base_url
@ -3310,9 +3329,30 @@ def _make_agent(
override_base_url = model_override.get("base_url")
override_api_key = model_override.get("api_key")
override_api_mode = model_override.get("api_mode")
resolve_kwargs = {}
if (
override_base_url
and str(requested_provider or "").strip().lower() == "custom"
):
# Session rows persisted before the custom-provider identity fix
# (see _runtime_model_config) stored the resolved provider
# "custom", which _get_named_custom_provider cannot match back to
# a named ``providers:`` / ``custom_providers:`` entry — the
# rebuild then either raised auth_unavailable or silently
# resolved placeholder credentials against the patched-back
# base_url. Recover the entry identity from the persisted
# base_url; failing that, hand the base_url to the direct-alias
# branch so pool/env credentials can still be resolved for it.
from hermes_cli.runtime_provider import find_custom_provider_identity
recovered = find_custom_provider_identity(override_base_url)
if recovered:
requested_provider = recovered
resolve_kwargs["explicit_base_url"] = override_base_url
runtime = resolve_runtime_provider(
requested=requested_provider,
target_model=model or None,
**resolve_kwargs,
)
# The switch already resolved concrete credentials/endpoint; honor them
# so a custom/named endpoint survives the rebuild even if global