diff --git a/agent/anthropic_adapter.py b/agent/anthropic_adapter.py index f4e8dcee65..3292f0c667 100644 --- a/agent/anthropic_adapter.py +++ b/agent/anthropic_adapter.py @@ -1224,9 +1224,9 @@ def build_anthropic_kwargs( # Map reasoning_config to Anthropic's thinking parameter. # Claude 4.6 models use adaptive thinking + output_config.effort. # Older models use manual thinking with budget_tokens. - # Haiku models do NOT support extended thinking at all — skip entirely. + # Haiku and MiniMax models do NOT support extended thinking — skip entirely. if reasoning_config and isinstance(reasoning_config, dict): - if reasoning_config.get("enabled") is not False and "haiku" not in model.lower(): + if reasoning_config.get("enabled") is not False and "haiku" not in model.lower() and "minimax" not in model.lower(): effort = str(reasoning_config.get("effort", "medium")).lower() budget = THINKING_BUDGET.get(effort, 8000) if _supports_adaptive_thinking(model): diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index c739183359..2b99ac0708 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -99,8 +99,8 @@ _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = { "gemini": "gemini-3-flash-preview", "zai": "glm-4.5-flash", "kimi-coding": "kimi-k2-turbo-preview", - "minimax": "MiniMax-M2.7-highspeed", - "minimax-cn": "MiniMax-M2.7-highspeed", + "minimax": "MiniMax-M2.7", + "minimax-cn": "MiniMax-M2.7", "anthropic": "claude-haiku-4-5-20251001", "ai-gateway": "google/gemini-3-flash", "opencode-zen": "gemini-3-flash", diff --git a/agent/model_metadata.py b/agent/model_metadata.py index a5fb11f503..0a22711865 100644 --- a/agent/model_metadata.py +++ b/agent/model_metadata.py @@ -113,8 +113,15 @@ DEFAULT_CONTEXT_LENGTHS = { "llama": 131072, # Qwen "qwen": 131072, - # MiniMax - "minimax": 204800, + # MiniMax (lowercase — lookup lowercases model names at line 973) + "minimax-m1-256k": 1000000, + "minimax-m1-128k": 1000000, + "minimax-m1-80k": 1000000, + "minimax-m1-40k": 1000000, + "minimax-m1": 1000000, + "minimax-m2.5": 1048576, + "minimax-m2.7": 1048576, + "minimax": 1048576, # GLM "glm": 202752, # Kimi @@ -127,7 +134,7 @@ DEFAULT_CONTEXT_LENGTHS = { "deepseek-ai/DeepSeek-V3.2": 65536, "moonshotai/Kimi-K2.5": 262144, "moonshotai/Kimi-K2-Thinking": 262144, - "MiniMaxAI/MiniMax-M2.5": 204800, + "minimaxai/minimax-m2.5": 1048576, "XiaomiMiMo/MiMo-V2-Flash": 32768, "mimo-v2-pro": 1048576, "mimo-v2-omni": 1048576, diff --git a/hermes_cli/models.py b/hermes_cli/models.py index 4b37bc9e73..aa68f877d9 100644 --- a/hermes_cli/models.py +++ b/hermes_cli/models.py @@ -144,18 +144,22 @@ _PROVIDER_MODELS: dict[str, list[str]] = { "kimi-k2-0905-preview", ], "minimax": [ - "MiniMax-M2.7", - "MiniMax-M2.7-highspeed", + "MiniMax-M1", + "MiniMax-M1-40k", + "MiniMax-M1-80k", + "MiniMax-M1-128k", + "MiniMax-M1-256k", "MiniMax-M2.5", - "MiniMax-M2.5-highspeed", - "MiniMax-M2.1", + "MiniMax-M2.7", ], "minimax-cn": [ - "MiniMax-M2.7", - "MiniMax-M2.7-highspeed", + "MiniMax-M1", + "MiniMax-M1-40k", + "MiniMax-M1-80k", + "MiniMax-M1-128k", + "MiniMax-M1-256k", "MiniMax-M2.5", - "MiniMax-M2.5-highspeed", - "MiniMax-M2.1", + "MiniMax-M2.7", ], "anthropic": [ "claude-opus-4-6", diff --git a/hermes_cli/runtime_provider.py b/hermes_cli/runtime_provider.py index 9c82ef62af..fa9d493980 100644 --- a/hermes_cli/runtime_provider.py +++ b/hermes_cli/runtime_provider.py @@ -163,6 +163,16 @@ def _resolve_runtime_from_pool_entry( api_mode = _copilot_runtime_api_mode(model_cfg, getattr(entry, "runtime_api_key", "")) else: configured_provider = str(model_cfg.get("provider") or "").strip().lower() + # Honour model.base_url from config.yaml when the configured provider + # matches this provider — same pattern as the Anthropic branch above. + # Only override when the pool entry has no explicit base_url (i.e. it + # fell back to the hardcoded default). Env var overrides win (#6039). + pconfig = PROVIDER_REGISTRY.get(provider) + pool_url_is_default = pconfig and base_url.rstrip("/") == pconfig.inference_base_url.rstrip("/") + if configured_provider == provider and pool_url_is_default: + cfg_base_url = str(model_cfg.get("base_url") or "").strip().rstrip("/") + if cfg_base_url: + base_url = cfg_base_url configured_mode = _parse_api_mode(model_cfg.get("api_mode")) if configured_mode and _provider_supports_explicit_api_mode(provider, configured_provider): api_mode = configured_mode @@ -724,7 +734,15 @@ def resolve_runtime_provider( pconfig = PROVIDER_REGISTRY.get(provider) if pconfig and pconfig.auth_type == "api_key": creds = resolve_api_key_provider_credentials(provider) - base_url = creds.get("base_url", "").rstrip("/") + # Honour model.base_url from config.yaml when the configured provider + # matches this provider — mirrors the Anthropic path above. Without + # this, users who set model.base_url to e.g. api.minimaxi.com/anthropic + # (China endpoint) still get the hardcoded api.minimax.io default (#6039). + cfg_provider = str(model_cfg.get("provider") or "").strip().lower() + cfg_base_url = "" + if cfg_provider == provider: + cfg_base_url = (model_cfg.get("base_url") or "").strip().rstrip("/") + base_url = cfg_base_url or creds.get("base_url", "").rstrip("/") api_mode = "chat_completions" if provider == "copilot": api_mode = _copilot_runtime_api_mode(model_cfg, creds.get("api_key", "")) diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py index 2407ca275d..43c3b086d9 100644 --- a/hermes_cli/setup.py +++ b/hermes_cli/setup.py @@ -105,8 +105,8 @@ _DEFAULT_PROVIDER_MODELS = { ], "zai": ["glm-5", "glm-4.7", "glm-4.5", "glm-4.5-flash"], "kimi-coding": ["kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview"], - "minimax": ["MiniMax-M2.7", "MiniMax-M2.7-highspeed", "MiniMax-M2.5", "MiniMax-M2.5-highspeed", "MiniMax-M2.1"], - "minimax-cn": ["MiniMax-M2.7", "MiniMax-M2.7-highspeed", "MiniMax-M2.5", "MiniMax-M2.5-highspeed", "MiniMax-M2.1"], + "minimax": ["MiniMax-M1", "MiniMax-M1-40k", "MiniMax-M1-80k", "MiniMax-M1-128k", "MiniMax-M1-256k", "MiniMax-M2.5", "MiniMax-M2.7"], + "minimax-cn": ["MiniMax-M1", "MiniMax-M1-40k", "MiniMax-M1-80k", "MiniMax-M1-128k", "MiniMax-M1-256k", "MiniMax-M2.5", "MiniMax-M2.7"], "ai-gateway": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5", "google/gemini-3-flash"], "kilocode": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5.4", "google/gemini-3-pro-preview", "google/gemini-3-flash-preview"], "opencode-zen": ["gpt-5.4", "gpt-5.3-codex", "claude-sonnet-4-6", "gemini-3-flash", "glm-5", "kimi-k2.5", "minimax-m2.7"], diff --git a/tests/agent/test_minimax_provider.py b/tests/agent/test_minimax_provider.py new file mode 100644 index 0000000000..c6819e877d --- /dev/null +++ b/tests/agent/test_minimax_provider.py @@ -0,0 +1,105 @@ +"""Tests for MiniMax provider hardening — context lengths, thinking guard, catalog.""" + + +class TestMinimaxContextLengths: + """Verify per-model context length entries for MiniMax models.""" + + def test_m1_variants_have_1m_context(self): + from agent.model_metadata import DEFAULT_CONTEXT_LENGTHS + # Keys are lowercase because the lookup lowercases model names + for model in ("minimax-m1", "minimax-m1-40k", "minimax-m1-80k", + "minimax-m1-128k", "minimax-m1-256k"): + assert model in DEFAULT_CONTEXT_LENGTHS, f"{model} missing from context lengths" + assert DEFAULT_CONTEXT_LENGTHS[model] == 1_000_000, f"{model} expected 1M" + + def test_m2_variants_have_1m_context(self): + from agent.model_metadata import DEFAULT_CONTEXT_LENGTHS + # Keys are lowercase because the lookup lowercases model names + for model in ("minimax-m2.5", "minimax-m2.7"): + assert model in DEFAULT_CONTEXT_LENGTHS, f"{model} missing from context lengths" + assert DEFAULT_CONTEXT_LENGTHS[model] == 1_048_576, f"{model} expected 1048576" + + def test_minimax_prefix_fallback(self): + from agent.model_metadata import DEFAULT_CONTEXT_LENGTHS + # The generic "minimax" prefix entry should be 1M for unknown models + assert DEFAULT_CONTEXT_LENGTHS["minimax"] == 1_048_576 + + + +class TestMinimaxThinkingGuard: + """Verify that build_anthropic_kwargs does NOT add thinking params for MiniMax models.""" + + def test_no_thinking_for_minimax_m27(self): + from agent.anthropic_adapter import build_anthropic_kwargs + kwargs = build_anthropic_kwargs( + model="MiniMax-M2.7", + messages=[{"role": "user", "content": "hello"}], + tools=None, + max_tokens=4096, + reasoning_config={"enabled": True, "effort": "medium"}, + ) + assert "thinking" not in kwargs + assert "output_config" not in kwargs + + def test_no_thinking_for_minimax_m1(self): + from agent.anthropic_adapter import build_anthropic_kwargs + kwargs = build_anthropic_kwargs( + model="MiniMax-M1-128k", + messages=[{"role": "user", "content": "hello"}], + tools=None, + max_tokens=4096, + reasoning_config={"enabled": True, "effort": "high"}, + ) + assert "thinking" not in kwargs + + def test_thinking_still_works_for_claude(self): + from agent.anthropic_adapter import build_anthropic_kwargs + kwargs = build_anthropic_kwargs( + model="claude-sonnet-4-20250514", + messages=[{"role": "user", "content": "hello"}], + tools=None, + max_tokens=4096, + reasoning_config={"enabled": True, "effort": "medium"}, + ) + assert "thinking" in kwargs + + +class TestMinimaxAuxModel: + """Verify auxiliary model is standard (not highspeed).""" + + def test_minimax_aux_is_standard(self): + from agent.auxiliary_client import _API_KEY_PROVIDER_AUX_MODELS + assert _API_KEY_PROVIDER_AUX_MODELS["minimax"] == "MiniMax-M2.7" + assert _API_KEY_PROVIDER_AUX_MODELS["minimax-cn"] == "MiniMax-M2.7" + + def test_minimax_aux_not_highspeed(self): + from agent.auxiliary_client import _API_KEY_PROVIDER_AUX_MODELS + assert "highspeed" not in _API_KEY_PROVIDER_AUX_MODELS["minimax"] + assert "highspeed" not in _API_KEY_PROVIDER_AUX_MODELS["minimax-cn"] + + +class TestMinimaxModelCatalog: + """Verify the model catalog includes M1 family and excludes deprecated models.""" + + def test_catalog_includes_m1_family(self): + from hermes_cli.models import _PROVIDER_MODELS + for provider in ("minimax", "minimax-cn"): + models = _PROVIDER_MODELS[provider] + assert "MiniMax-M1" in models + assert "MiniMax-M1-40k" in models + assert "MiniMax-M1-80k" in models + assert "MiniMax-M1-128k" in models + assert "MiniMax-M1-256k" in models + + def test_catalog_excludes_deprecated(self): + from hermes_cli.models import _PROVIDER_MODELS + for provider in ("minimax", "minimax-cn"): + models = _PROVIDER_MODELS[provider] + assert "MiniMax-M2.1" not in models + + def test_catalog_excludes_highspeed(self): + from hermes_cli.models import _PROVIDER_MODELS + for provider in ("minimax", "minimax-cn"): + models = _PROVIDER_MODELS[provider] + assert "MiniMax-M2.7-highspeed" not in models + assert "MiniMax-M2.5-highspeed" not in models diff --git a/tests/hermes_cli/test_runtime_provider_resolution.py b/tests/hermes_cli/test_runtime_provider_resolution.py index ded0c9202f..0abc8196f7 100644 --- a/tests/hermes_cli/test_runtime_provider_resolution.py +++ b/tests/hermes_cli/test_runtime_provider_resolution.py @@ -808,6 +808,55 @@ def test_minimax_explicit_api_mode_respected(monkeypatch): assert resolved["api_mode"] == "chat_completions" +def test_minimax_config_base_url_overrides_hardcoded_default(monkeypatch): + """model.base_url in config.yaml should override the hardcoded default (#6039).""" + monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "minimax") + monkeypatch.setattr(rp, "_get_model_config", lambda: { + "provider": "minimax", + "base_url": "https://api.minimaxi.com/anthropic", + }) + monkeypatch.setenv("MINIMAX_API_KEY", "test-minimax-key") + monkeypatch.delenv("MINIMAX_BASE_URL", raising=False) + + resolved = rp.resolve_runtime_provider(requested="minimax") + + assert resolved["provider"] == "minimax" + assert resolved["base_url"] == "https://api.minimaxi.com/anthropic" + assert resolved["api_mode"] == "anthropic_messages" + + +def test_minimax_env_base_url_still_wins_over_config(monkeypatch): + """MINIMAX_BASE_URL env var should take priority over config.yaml model.base_url.""" + monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "minimax") + monkeypatch.setattr(rp, "_get_model_config", lambda: { + "provider": "minimax", + "base_url": "https://api.minimaxi.com/anthropic", + }) + monkeypatch.setenv("MINIMAX_API_KEY", "test-minimax-key") + monkeypatch.setenv("MINIMAX_BASE_URL", "https://custom.example.com/v1") + + resolved = rp.resolve_runtime_provider(requested="minimax") + + # Env var wins because resolve_api_key_provider_credentials prefers it + assert resolved["base_url"] == "https://custom.example.com/v1" + + +def test_minimax_config_base_url_ignored_for_different_provider(monkeypatch): + """model.base_url should NOT be used when model.provider doesn't match.""" + monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "minimax") + monkeypatch.setattr(rp, "_get_model_config", lambda: { + "provider": "openrouter", + "base_url": "https://some-other-endpoint.com/v1", + }) + monkeypatch.setenv("MINIMAX_API_KEY", "test-minimax-key") + monkeypatch.delenv("MINIMAX_BASE_URL", raising=False) + + resolved = rp.resolve_runtime_provider(requested="minimax") + + # Should use the default, NOT the config base_url from a different provider + assert resolved["base_url"] == "https://api.minimax.io/anthropic" + + def test_alibaba_default_coding_intl_endpoint_uses_chat_completions(monkeypatch): """Alibaba default coding-intl /v1 URL should use chat_completions mode.""" monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "alibaba") diff --git a/tests/hermes_cli/test_setup_model_selection.py b/tests/hermes_cli/test_setup_model_selection.py index 3cb7056cf2..b42365da9d 100644 --- a/tests/hermes_cli/test_setup_model_selection.py +++ b/tests/hermes_cli/test_setup_model_selection.py @@ -34,8 +34,8 @@ class TestSetupProviderModelSelection: @pytest.mark.parametrize("provider_id,expected_defaults", [ ("zai", ["glm-5", "glm-4.7", "glm-4.5", "glm-4.5-flash"]), ("kimi-coding", ["kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview"]), - ("minimax", ["MiniMax-M2.7", "MiniMax-M2.7-highspeed", "MiniMax-M2.5", "MiniMax-M2.5-highspeed", "MiniMax-M2.1"]), - ("minimax-cn", ["MiniMax-M2.7", "MiniMax-M2.7-highspeed", "MiniMax-M2.5", "MiniMax-M2.5-highspeed", "MiniMax-M2.1"]), + ("minimax", ["MiniMax-M1", "MiniMax-M1-40k", "MiniMax-M1-80k", "MiniMax-M1-128k", "MiniMax-M1-256k", "MiniMax-M2.5", "MiniMax-M2.7"]), + ("minimax-cn", ["MiniMax-M1", "MiniMax-M1-40k", "MiniMax-M1-80k", "MiniMax-M1-128k", "MiniMax-M1-256k", "MiniMax-M2.5", "MiniMax-M2.7"]), ("opencode-zen", ["gpt-5.4", "gpt-5.3-codex", "claude-sonnet-4-6", "gemini-3-flash"]), ("opencode-go", ["glm-5", "kimi-k2.5", "minimax-m2.5", "minimax-m2.7"]), ])