diff --git a/hermes_cli/model_switch.py b/hermes_cli/model_switch.py index 252d933b1..5864ec5d8 100644 --- a/hermes_cli/model_switch.py +++ b/hermes_cli/model_switch.py @@ -771,7 +771,10 @@ def switch_model( if provider_changed or explicit_provider: try: - runtime = resolve_runtime_provider(requested=target_provider) + runtime = resolve_runtime_provider( + requested=target_provider, + target_model=new_model, + ) api_key = runtime.get("api_key", "") base_url = runtime.get("base_url", "") api_mode = runtime.get("api_mode", "") @@ -788,7 +791,10 @@ def switch_model( ) else: try: - runtime = resolve_runtime_provider(requested=current_provider) + runtime = resolve_runtime_provider( + requested=current_provider, + target_model=new_model, + ) api_key = runtime.get("api_key", "") base_url = runtime.get("base_url", "") api_mode = runtime.get("api_mode", "") diff --git a/hermes_cli/runtime_provider.py b/hermes_cli/runtime_provider.py index ebdbfe605..cbfcbdbd6 100644 --- a/hermes_cli/runtime_provider.py +++ b/hermes_cli/runtime_provider.py @@ -183,8 +183,16 @@ def _resolve_runtime_from_pool_entry( requested_provider: str, model_cfg: Optional[Dict[str, Any]] = None, pool: Optional[CredentialPool] = None, + target_model: Optional[str] = None, ) -> Dict[str, Any]: model_cfg = model_cfg or _get_model_config() + # When the caller is resolving for a specific target model (e.g. a /model + # mid-session switch), prefer that over the persisted model.default. This + # prevents api_mode being computed from a stale config default that no + # longer matches the model actually being used — the bug that caused + # opencode-zen /v1 to be stripped for chat_completions requests when + # config.default was still a Claude model. + effective_model = (target_model or model_cfg.get("default") or "") base_url = (getattr(entry, "runtime_base_url", None) or getattr(entry, "base_url", None) or "").rstrip("/") api_key = getattr(entry, "runtime_api_key", None) or getattr(entry, "access_token", "") api_mode = "chat_completions" @@ -230,7 +238,7 @@ def _resolve_runtime_from_pool_entry( api_mode = configured_mode elif provider in ("opencode-zen", "opencode-go"): from hermes_cli.models import opencode_model_api_mode - api_mode = opencode_model_api_mode(provider, model_cfg.get("default", "")) + api_mode = opencode_model_api_mode(provider, effective_model) else: # Auto-detect Anthropic-compatible endpoints (/anthropic suffix, # Kimi /coding, api.openai.com → codex_responses, api.x.ai → @@ -724,8 +732,18 @@ def resolve_runtime_provider( requested: Optional[str] = None, explicit_api_key: Optional[str] = None, explicit_base_url: Optional[str] = None, + target_model: Optional[str] = None, ) -> Dict[str, Any]: - """Resolve runtime provider credentials for agent execution.""" + """Resolve runtime provider credentials for agent execution. + + target_model: Optional override for model_cfg.get("default") when + computing provider-specific api_mode (e.g. OpenCode Zen/Go where different + models route through different API surfaces). Callers performing an + explicit mid-session model switch should pass the new model here so + api_mode is derived from the model they are switching TO, not the stale + persisted default. Other callers can leave it None to preserve existing + behavior (api_mode derived from config). + """ requested_provider = resolve_requested_provider(requested) custom_runtime = _resolve_named_custom_runtime( @@ -807,6 +825,7 @@ def resolve_runtime_provider( requested_provider=requested_provider, model_cfg=model_cfg, pool=pool, + target_model=target_model, ) if provider == "nous": @@ -1025,7 +1044,11 @@ def resolve_runtime_provider( api_mode = configured_mode elif provider in ("opencode-zen", "opencode-go"): from hermes_cli.models import opencode_model_api_mode - api_mode = opencode_model_api_mode(provider, model_cfg.get("default", "")) + # Prefer the target_model from the caller (explicit mid-session + # switch) over the stale model.default; see _resolve_runtime_from_pool_entry + # for the same rationale. + _effective = target_model or model_cfg.get("default", "") + api_mode = opencode_model_api_mode(provider, _effective) else: # Auto-detect Anthropic-compatible endpoints by URL convention # (e.g. https://api.minimax.io/anthropic, https://dashscope.../anthropic) diff --git a/tests/hermes_cli/test_model_switch_custom_providers.py b/tests/hermes_cli/test_model_switch_custom_providers.py index 7fc92136a..2899172ed 100644 --- a/tests/hermes_cli/test_model_switch_custom_providers.py +++ b/tests/hermes_cli/test_model_switch_custom_providers.py @@ -69,7 +69,7 @@ def test_switch_model_accepts_explicit_named_custom_provider(monkeypatch): """Shared /model switch pipeline should accept --provider for custom_providers.""" monkeypatch.setattr( "hermes_cli.runtime_provider.resolve_runtime_provider", - lambda requested: { + lambda **kwargs: { "api_key": "no-key-required", "base_url": "http://127.0.0.1:4141/v1", "api_mode": "chat_completions", diff --git a/tests/hermes_cli/test_model_switch_opencode_anthropic.py b/tests/hermes_cli/test_model_switch_opencode_anthropic.py index ae56dce23..f5b564c23 100644 --- a/tests/hermes_cli/test_model_switch_opencode_anthropic.py +++ b/tests/hermes_cli/test_model_switch_opencode_anthropic.py @@ -250,3 +250,126 @@ class TestAgentSwitchModelDefenseInDepth: f"agent.switch_model did not strip /v1; passed {captured.get('base_url')} " "to build_anthropic_client" ) + + + +class TestStaleConfigDefaultDoesNotWedgeResolver: + """Regression for the real bug Quentin hit. + + When ``model.default`` in config.yaml is an OpenCode Anthropic-routed model + (e.g. ``claude-sonnet-4-6`` on opencode-zen) and the user does ``/model + kimi-k2.6 --provider opencode-zen`` session-only, the resolver must derive + api_mode from the model being requested, not the persisted default. The + earlier bug computed api_mode from ``model_cfg.get("default")``, flipped it + to ``anthropic_messages`` based on the stale Claude default, and stripped + ``/v1``. The chat_completions override in switch_model() fixed api_mode but + never re-added ``/v1``, so requests landed on ``https://opencode.ai/zen`` + and got OpenCode's website 404 HTML page. + + These tests use the REAL ``resolve_runtime_provider`` (not a mock) so a + regression in the target_model plumbing surfaces immediately. + """ + + def test_kimi_switch_keeps_v1_despite_claude_config_default(self, tmp_path, monkeypatch): + import yaml + import importlib + + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + monkeypatch.setenv("OPENCODE_ZEN_API_KEY", "test-key") + (tmp_path / "config.yaml").write_text(yaml.safe_dump({ + "model": {"provider": "opencode-zen", "default": "claude-sonnet-4-6"}, + })) + + # Re-import with the new HERMES_HOME so config cache is fresh. + import hermes_cli.config as _cfg_mod + importlib.reload(_cfg_mod) + import hermes_cli.runtime_provider as _rp_mod + importlib.reload(_rp_mod) + import hermes_cli.model_switch as _ms_mod + importlib.reload(_ms_mod) + + result = _ms_mod.switch_model( + raw_input="kimi-k2.6", + current_provider="opencode-zen", + current_model="claude-sonnet-4-6", + current_base_url="https://opencode.ai/zen", # stripped from prior claude turn + current_api_key="test-key", + is_global=False, + explicit_provider="opencode-zen", + ) + + assert result.success, f"switch failed: {result.error_message}" + assert result.base_url == "https://opencode.ai/zen/v1", ( + f"base_url wedged at {result.base_url!r} - stale Claude config.default " + "caused api_mode to be computed as anthropic_messages, stripping /v1, " + "and chat_completions override never re-added it." + ) + assert result.api_mode == "chat_completions" + + def test_go_glm_switch_keeps_v1_despite_minimax_config_default(self, tmp_path, monkeypatch): + import yaml + import importlib + + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + monkeypatch.setenv("OPENCODE_GO_API_KEY", "test-key") + monkeypatch.delenv("OPENCODE_ZEN_API_KEY", raising=False) + (tmp_path / "config.yaml").write_text(yaml.safe_dump({ + "model": {"provider": "opencode-go", "default": "minimax-m2.7"}, + })) + + import hermes_cli.config as _cfg_mod + importlib.reload(_cfg_mod) + import hermes_cli.runtime_provider as _rp_mod + importlib.reload(_rp_mod) + import hermes_cli.model_switch as _ms_mod + importlib.reload(_ms_mod) + + result = _ms_mod.switch_model( + raw_input="glm-5.1", + current_provider="opencode-go", + current_model="minimax-m2.7", + current_base_url="https://opencode.ai/zen/go", # stripped from prior minimax turn + current_api_key="test-key", + is_global=False, + explicit_provider="opencode-go", + ) + + assert result.success, f"switch failed: {result.error_message}" + assert result.base_url == "https://opencode.ai/zen/go/v1" + assert result.api_mode == "chat_completions" + + def test_claude_switch_still_strips_v1_with_kimi_config_default(self, tmp_path, monkeypatch): + """Inverse case: config default is chat_completions, switch TO anthropic_messages. + + Guards that the target_model plumbing does not break the original + strip-for-anthropic behavior. + """ + import yaml + import importlib + + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + monkeypatch.setenv("OPENCODE_ZEN_API_KEY", "test-key") + (tmp_path / "config.yaml").write_text(yaml.safe_dump({ + "model": {"provider": "opencode-zen", "default": "kimi-k2.6"}, + })) + + import hermes_cli.config as _cfg_mod + importlib.reload(_cfg_mod) + import hermes_cli.runtime_provider as _rp_mod + importlib.reload(_rp_mod) + import hermes_cli.model_switch as _ms_mod + importlib.reload(_ms_mod) + + result = _ms_mod.switch_model( + raw_input="claude-sonnet-4-6", + current_provider="opencode-zen", + current_model="kimi-k2.6", + current_base_url="https://opencode.ai/zen/v1", + current_api_key="test-key", + is_global=False, + explicit_provider="opencode-zen", + ) + + assert result.success, f"switch failed: {result.error_message}" + assert result.base_url == "https://opencode.ai/zen" + assert result.api_mode == "anthropic_messages" diff --git a/tests/hermes_cli/test_ollama_cloud_auth.py b/tests/hermes_cli/test_ollama_cloud_auth.py index 7a5dbf6ae..760832523 100644 --- a/tests/hermes_cli/test_ollama_cloud_auth.py +++ b/tests/hermes_cli/test_ollama_cloud_auth.py @@ -518,7 +518,7 @@ class TestSwitchModelDirectAliasOverride: monkeypatch.setattr( "hermes_cli.runtime_provider.resolve_runtime_provider", - lambda requested: {"api_key": "", "base_url": "", "api_mode": "openai_compat", "provider": "custom"}, + lambda **kwargs: {"api_key": "", "base_url": "", "api_mode": "openai_compat", "provider": "custom"}, ) monkeypatch.setattr("hermes_cli.models.validate_requested_model", @@ -544,7 +544,7 @@ class TestSwitchModelDirectAliasOverride: lambda raw, prov: ("custom", "local-model", "local")) monkeypatch.setattr( "hermes_cli.runtime_provider.resolve_runtime_provider", - lambda requested: {"api_key": "", "base_url": "", "api_mode": "openai_compat", "provider": "custom"}, + lambda **kwargs: {"api_key": "", "base_url": "", "api_mode": "openai_compat", "provider": "custom"}, ) monkeypatch.setattr("hermes_cli.models.validate_requested_model", lambda *a, **kw: {"accepted": True, "persist": True, "recognized": True, "message": None})