diff --git a/hermes_cli/main.py b/hermes_cli/main.py index ffcb9f53fd9..195fd53c4fd 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -2995,10 +2995,10 @@ def _model_flow_kimi(config, current_model=""): # Step 3: Model selection — show appropriate models for the endpoint if is_coding_plan: - # Coding Plan models (kimi-for-coding first) + # Coding Plan models (kimi-k2.5 first) model_list = [ - "kimi-for-coding", "kimi-k2.5", + "kimi-for-coding", "kimi-k2-thinking", "kimi-k2-thinking-turbo", ] diff --git a/hermes_cli/models.py b/hermes_cli/models.py index a292d3fcbad..fe2a0c433ae 100644 --- a/hermes_cli/models.py +++ b/hermes_cli/models.py @@ -26,7 +26,8 @@ COPILOT_REASONING_EFFORTS_O_SERIES = ["low", "medium", "high"] # Fallback OpenRouter snapshot used when the live catalog is unavailable. # (model_id, display description shown in menus) OPENROUTER_MODELS: list[tuple[str, str]] = [ - ("anthropic/claude-opus-4.7", "recommended"), + ("moonshotai/kimi-k2.5", "recommended"), + ("anthropic/claude-opus-4.7", ""), ("anthropic/claude-opus-4.6", ""), ("anthropic/claude-sonnet-4.6", ""), ("qwen/qwen3.6-plus", ""), @@ -49,7 +50,6 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [ ("z-ai/glm-5.1", ""), ("z-ai/glm-5v-turbo", ""), ("z-ai/glm-5-turbo", ""), - ("moonshotai/kimi-k2.5", ""), ("x-ai/grok-4.20", ""), ("nvidia/nemotron-3-super-120b-a12b", ""), ("nvidia/nemotron-3-super-120b-a12b:free", "free"), @@ -75,6 +75,7 @@ def _codex_curated_models() -> list[str]: _PROVIDER_MODELS: dict[str, list[str]] = { "nous": [ + "moonshotai/kimi-k2.5", "xiaomi/mimo-v2-pro", "anthropic/claude-opus-4.7", "anthropic/claude-opus-4.6", @@ -96,7 +97,6 @@ _PROVIDER_MODELS: dict[str, list[str]] = { "z-ai/glm-5.1", "z-ai/glm-5v-turbo", "z-ai/glm-5-turbo", - "moonshotai/kimi-k2.5", "x-ai/grok-4.20-beta", "nvidia/nemotron-3-super-120b-a12b", "nvidia/nemotron-3-super-120b-a12b:free", @@ -156,8 +156,8 @@ _PROVIDER_MODELS: dict[str, list[str]] = { "grok-4-1-fast-reasoning", ], "kimi-coding": [ - "kimi-for-coding", "kimi-k2.5", + "kimi-for-coding", "kimi-k2-thinking", "kimi-k2-thinking-turbo", "kimi-k2-turbo-preview", @@ -212,6 +212,7 @@ _PROVIDER_MODELS: dict[str, list[str]] = { "trinity-mini", ], "opencode-zen": [ + "kimi-k2.5", "gpt-5.4-pro", "gpt-5.4", "gpt-5.3-codex", @@ -243,16 +244,15 @@ _PROVIDER_MODELS: dict[str, list[str]] = { "glm-5", "glm-4.7", "glm-4.6", - "kimi-k2.5", "kimi-k2-thinking", "kimi-k2", "qwen3-coder", "big-pickle", ], "opencode-go": [ + "kimi-k2.5", "glm-5.1", "glm-5", - "kimi-k2.5", "mimo-v2-pro", "mimo-v2-omni", "minimax-m2.7", @@ -285,21 +285,21 @@ _PROVIDER_MODELS: dict[str, list[str]] = { # to https://dashscope-intl.aliyuncs.com/compatible-mode/v1 (OpenAI-compat) # or https://dashscope-intl.aliyuncs.com/apps/anthropic (Anthropic-compat). "alibaba": [ + "kimi-k2.5", "qwen3.5-plus", "qwen3-coder-plus", "qwen3-coder-next", # Third-party models available on coding-intl "glm-5", "glm-4.7", - "kimi-k2.5", "MiniMax-M2.5", ], # Curated HF model list — only agentic models that map to OpenRouter defaults. "huggingface": [ + "moonshotai/Kimi-K2.5", "Qwen/Qwen3.5-397B-A17B", "Qwen/Qwen3.5-35B-A3B", "deepseek-ai/DeepSeek-V3.2", - "moonshotai/Kimi-K2.5", "MiniMaxAI/MiniMax-M2.5", "zai-org/GLM-5", "XiaomiMiMo/MiMo-V2-Flash", diff --git a/tests/hermes_cli/test_opencode_go_in_model_list.py b/tests/hermes_cli/test_opencode_go_in_model_list.py index cb8b3b14267..a84701f09c7 100644 --- a/tests/hermes_cli/test_opencode_go_in_model_list.py +++ b/tests/hermes_cli/test_opencode_go_in_model_list.py @@ -15,7 +15,7 @@ def test_opencode_go_appears_when_api_key_set(): opencode_go = next((p for p in providers if p["slug"] == "opencode-go"), None) assert opencode_go is not None, "opencode-go should appear when OPENCODE_GO_API_KEY is set" - assert opencode_go["models"] == ["glm-5.1", "glm-5", "kimi-k2.5", "mimo-v2-pro", "mimo-v2-omni", "minimax-m2.7", "minimax-m2.5"] + assert opencode_go["models"] == ["kimi-k2.5", "glm-5.1", "glm-5", "mimo-v2-pro", "mimo-v2-omni", "minimax-m2.7", "minimax-m2.5"] # opencode-go can appear as "built-in" (from PROVIDER_TO_MODELS_DEV when # models.dev is reachable) or "hermes" (from HERMES_OVERLAYS fallback when # the API is unavailable, e.g. in CI). diff --git a/tests/run_agent/test_1630_context_overflow_loop.py b/tests/run_agent/test_1630_context_overflow_loop.py index c33aaa9670d..f69b01241bb 100644 --- a/tests/run_agent/test_1630_context_overflow_loop.py +++ b/tests/run_agent/test_1630_context_overflow_loop.py @@ -32,6 +32,7 @@ class TestGeneric400Heuristic: from run_agent import AIAgent a = AIAgent( api_key="test-key-12345", + base_url="https://openrouter.ai/api/v1", quiet_mode=True, skip_context_files=True, skip_memory=True, diff --git a/tests/run_agent/test_413_compression.py b/tests/run_agent/test_413_compression.py index 1d6f6cebb82..e8835c64124 100644 --- a/tests/run_agent/test_413_compression.py +++ b/tests/run_agent/test_413_compression.py @@ -69,6 +69,7 @@ def agent(): ): a = AIAgent( api_key="test-key-1234567890", + base_url="https://openrouter.ai/api/v1", quiet_mode=True, skip_context_files=True, skip_memory=True, diff --git a/tests/run_agent/test_860_dedup.py b/tests/run_agent/test_860_dedup.py index 350d2a21a2f..89f4c010b65 100644 --- a/tests/run_agent/test_860_dedup.py +++ b/tests/run_agent/test_860_dedup.py @@ -29,6 +29,8 @@ class TestFlushDeduplication: with patch.dict(os.environ, {"OPENROUTER_API_KEY": "test-key"}): from run_agent import AIAgent agent = AIAgent( + api_key="test-key", + base_url="https://openrouter.ai/api/v1", model="test/model", quiet_mode=True, session_db=session_db, @@ -271,6 +273,8 @@ class TestFlushIdxInit: with patch.dict(os.environ, {"OPENROUTER_API_KEY": "test-key"}): from run_agent import AIAgent agent = AIAgent( + api_key="test-key", + base_url="https://openrouter.ai/api/v1", model="test/model", quiet_mode=True, skip_context_files=True, @@ -283,6 +287,8 @@ class TestFlushIdxInit: with patch.dict(os.environ, {"OPENROUTER_API_KEY": "test-key"}): from run_agent import AIAgent agent = AIAgent( + api_key="test-key", + base_url="https://openrouter.ai/api/v1", model="test/model", quiet_mode=True, skip_context_files=True, diff --git a/tests/run_agent/test_compression_persistence.py b/tests/run_agent/test_compression_persistence.py index 272b39bfea6..46ab963d420 100644 --- a/tests/run_agent/test_compression_persistence.py +++ b/tests/run_agent/test_compression_persistence.py @@ -37,6 +37,8 @@ class TestFlushAfterCompression: with patch.dict(os.environ, {"OPENROUTER_API_KEY": "test-key"}): from run_agent import AIAgent agent = AIAgent( + api_key="test-key", + base_url="https://openrouter.ai/api/v1", model="test/model", quiet_mode=True, session_db=session_db, diff --git a/tests/run_agent/test_create_openai_client_kwargs_isolation.py b/tests/run_agent/test_create_openai_client_kwargs_isolation.py index 506e1486cee..98b7ff480c6 100644 --- a/tests/run_agent/test_create_openai_client_kwargs_isolation.py +++ b/tests/run_agent/test_create_openai_client_kwargs_isolation.py @@ -19,6 +19,8 @@ from run_agent import AIAgent def test_create_openai_client_does_not_mutate_input_kwargs(mock_openai): mock_openai.return_value = MagicMock() agent = AIAgent( + api_key="test-key", + base_url="https://openrouter.ai/api/v1", model="test/model", quiet_mode=True, skip_context_files=True, diff --git a/tests/run_agent/test_create_openai_client_reuse.py b/tests/run_agent/test_create_openai_client_reuse.py index 8183e7eead9..0eac567ae6c 100644 --- a/tests/run_agent/test_create_openai_client_reuse.py +++ b/tests/run_agent/test_create_openai_client_reuse.py @@ -23,6 +23,8 @@ from run_agent import AIAgent def _make_agent(): return AIAgent( + api_key="test-key", + base_url="https://openrouter.ai/api/v1", model="test/model", quiet_mode=True, skip_context_files=True, diff --git a/tests/run_agent/test_fallback_model.py b/tests/run_agent/test_fallback_model.py index ac693caf019..6491bd686da 100644 --- a/tests/run_agent/test_fallback_model.py +++ b/tests/run_agent/test_fallback_model.py @@ -36,6 +36,7 @@ def _make_agent(fallback_model=None): ): agent = AIAgent( api_key="test-key", + base_url="https://openrouter.ai/api/v1", quiet_mode=True, skip_context_files=True, skip_memory=True, diff --git a/tests/run_agent/test_plugin_context_engine_init.py b/tests/run_agent/test_plugin_context_engine_init.py index 7583d9e7535..60e89889088 100644 --- a/tests/run_agent/test_plugin_context_engine_init.py +++ b/tests/run_agent/test_plugin_context_engine_init.py @@ -45,6 +45,7 @@ def test_plugin_engine_gets_context_length_on_init(): agent = AIAgent( api_key="test-key-1234567890", + base_url="https://openrouter.ai/api/v1", quiet_mode=True, skip_context_files=True, skip_memory=True, @@ -75,6 +76,7 @@ def test_plugin_engine_update_model_args(): agent = AIAgent( model="openrouter/auto", api_key="test-key-1234567890", + base_url="https://openrouter.ai/api/v1", quiet_mode=True, skip_context_files=True, skip_memory=True, diff --git a/tests/run_agent/test_provider_fallback.py b/tests/run_agent/test_provider_fallback.py index 2bb2109552f..e441bfd33b4 100644 --- a/tests/run_agent/test_provider_fallback.py +++ b/tests/run_agent/test_provider_fallback.py @@ -19,6 +19,7 @@ def _make_agent(fallback_model=None): ): agent = AIAgent( api_key="test-key", + base_url="https://openrouter.ai/api/v1", quiet_mode=True, skip_context_files=True, skip_memory=True, diff --git a/tests/run_agent/test_provider_parity.py b/tests/run_agent/test_provider_parity.py index 1817e44a695..bdbe89f3ad1 100644 --- a/tests/run_agent/test_provider_parity.py +++ b/tests/run_agent/test_provider_parity.py @@ -60,6 +60,9 @@ def _make_agent(monkeypatch, provider, api_mode="chat_completions", base_url="ht ) if model: kwargs["model"] = model + base_url="https://openrouter.ai/api/v1", + api_key="test-key", + base_url="https://openrouter.ai/api/v1", return AIAgent(**kwargs) diff --git a/tests/run_agent/test_run_agent.py b/tests/run_agent/test_run_agent.py index 46eec2cf71d..86f95580f02 100644 --- a/tests/run_agent/test_run_agent.py +++ b/tests/run_agent/test_run_agent.py @@ -55,6 +55,7 @@ def agent(): ): a = AIAgent( api_key="test-key-1234567890", + base_url="https://openrouter.ai/api/v1", quiet_mode=True, skip_context_files=True, skip_memory=True, @@ -76,6 +77,7 @@ def agent_with_memory_tool(): ): a = AIAgent( api_key="test-k...7890", + base_url="https://openrouter.ai/api/v1", quiet_mode=True, skip_context_files=True, skip_memory=True, @@ -112,12 +114,14 @@ def test_aiagent_reuses_existing_errors_log_handler(): ): AIAgent( api_key="test-k...7890", + base_url="https://openrouter.ai/api/v1", quiet_mode=True, skip_context_files=True, skip_memory=True, ) AIAgent( api_key="test-k...7890", + base_url="https://openrouter.ai/api/v1", quiet_mode=True, skip_context_files=True, skip_memory=True, @@ -491,6 +495,7 @@ class TestInit: ): a = AIAgent( api_key="test-key-1234567890", + base_url="https://openrouter.ai/api/v1", model="openai/gpt-4o", quiet_mode=True, skip_context_files=True, @@ -542,6 +547,7 @@ class TestInit: ): a = AIAgent( api_key="test-key-1234567890", + base_url="https://openrouter.ai/api/v1", quiet_mode=True, skip_context_files=True, skip_memory=True, @@ -557,6 +563,7 @@ class TestInit: ): a = AIAgent( api_key="test-key-1234567890", + base_url="https://openrouter.ai/api/v1", quiet_mode=True, skip_context_files=True, skip_memory=True, @@ -694,6 +701,7 @@ class TestBuildSystemPrompt: ): agent = AIAgent( api_key="test-k...7890", + base_url="https://openrouter.ai/api/v1", quiet_mode=True, skip_context_files=True, skip_memory=True, @@ -726,6 +734,7 @@ class TestToolUseEnforcementConfig: a = AIAgent( model=model, api_key="test-key-1234567890", + base_url="https://openrouter.ai/api/v1", quiet_mode=True, skip_context_files=True, skip_memory=True, @@ -822,6 +831,7 @@ class TestToolUseEnforcementConfig: ): a = AIAgent( api_key="test-key-1234567890", + base_url="https://openrouter.ai/api/v1", quiet_mode=True, skip_context_files=True, skip_memory=True, @@ -3433,7 +3443,7 @@ class TestAnthropicBaseUrlPassthrough: ): mock_build.return_value = MagicMock() a = AIAgent( - api_key="sk-ant-api03-test1234567890", + api_key="sk-ant...7890", api_mode="anthropic_messages", quiet_mode=True, skip_context_files=True, @@ -3457,6 +3467,7 @@ class TestAnthropicCredentialRefresh: mock_build.side_effect = [old_client, new_client] agent = AIAgent( api_key="sk-ant-oat01-stale-token", + base_url="https://openrouter.ai/api/v1", api_mode="anthropic_messages", quiet_mode=True, skip_context_files=True, @@ -3487,6 +3498,7 @@ class TestAnthropicCredentialRefresh: ): agent = AIAgent( api_key="sk-ant-oat01-same-token", + base_url="https://openrouter.ai/api/v1", api_mode="anthropic_messages", quiet_mode=True, skip_context_files=True, @@ -3514,6 +3526,7 @@ class TestAnthropicCredentialRefresh: ): agent = AIAgent( api_key="sk-ant-oat01-current-token", + base_url="https://openrouter.ai/api/v1", api_mode="anthropic_messages", quiet_mode=True, skip_context_files=True, diff --git a/tests/run_agent/test_streaming.py b/tests/run_agent/test_streaming.py index 97dcffc67fa..73a9872020e 100644 --- a/tests/run_agent/test_streaming.py +++ b/tests/run_agent/test_streaming.py @@ -80,6 +80,8 @@ class TestStreamingAccumulator: mock_create.return_value = mock_client agent = AIAgent( + api_key="test-key", + base_url="https://openrouter.ai/api/v1", model="test/model", quiet_mode=True, skip_context_files=True, @@ -120,6 +122,8 @@ class TestStreamingAccumulator: mock_create.return_value = mock_client agent = AIAgent( + api_key="test-key", + base_url="https://openrouter.ai/api/v1", model="test/model", quiet_mode=True, skip_context_files=True, @@ -167,6 +171,8 @@ class TestStreamingAccumulator: mock_create.return_value = mock_client agent = AIAgent( + api_key="test-key", + base_url="https://openrouter.ai/api/v1", model="test/model", quiet_mode=True, skip_context_files=True, @@ -205,6 +211,8 @@ class TestStreamingAccumulator: mock_create.return_value = mock_client agent = AIAgent( + api_key="test-key", + base_url="https://openrouter.ai/api/v1", model="test/model", quiet_mode=True, skip_context_files=True, @@ -245,6 +253,8 @@ class TestStreamingCallbacks: mock_create.return_value = mock_client agent = AIAgent( + api_key="test-key", + base_url="https://openrouter.ai/api/v1", model="test/model", quiet_mode=True, skip_context_files=True, @@ -277,6 +287,8 @@ class TestStreamingCallbacks: mock_create.return_value = mock_client agent = AIAgent( + api_key="test-key", + base_url="https://openrouter.ai/api/v1", model="test/model", quiet_mode=True, skip_context_files=True, @@ -308,6 +320,8 @@ class TestStreamingCallbacks: mock_create.return_value = mock_client agent = AIAgent( + api_key="test-key", + base_url="https://openrouter.ai/api/v1", model="test/model", quiet_mode=True, skip_context_files=True, @@ -346,6 +360,8 @@ class TestStreamingCallbacks: mock_create.return_value = mock_client agent = AIAgent( + api_key="test-key", + base_url="https://openrouter.ai/api/v1", model="test/model", quiet_mode=True, skip_context_files=True, @@ -381,6 +397,8 @@ class TestStreamingCallbacks: mock_create.return_value = mock_client agent = AIAgent( + api_key="test-key", + base_url="https://openrouter.ai/api/v1", model="test/model", quiet_mode=True, skip_context_files=True, @@ -428,6 +446,8 @@ class TestStreamingFallback: mock_create.return_value = mock_client agent = AIAgent( + api_key="test-key", + base_url="https://openrouter.ai/api/v1", model="test/model", quiet_mode=True, skip_context_files=True, @@ -455,6 +475,8 @@ class TestStreamingFallback: mock_create.return_value = mock_client agent = AIAgent( + api_key="test-key", + base_url="https://openrouter.ai/api/v1", model="test/model", quiet_mode=True, skip_context_files=True, @@ -477,6 +499,8 @@ class TestStreamingFallback: mock_create.return_value = mock_client agent = AIAgent( + api_key="test-key", + base_url="https://openrouter.ai/api/v1", model="test/model", quiet_mode=True, skip_context_files=True, @@ -500,6 +524,8 @@ class TestStreamingFallback: mock_create.return_value = mock_client agent = AIAgent( + api_key="test-key", + base_url="https://openrouter.ai/api/v1", model="test/model", quiet_mode=True, skip_context_files=True, @@ -542,6 +568,8 @@ class TestStreamingFallback: mock_create.return_value = mock_client agent = AIAgent( + api_key="test-key", + base_url="https://openrouter.ai/api/v1", model="test/model", quiet_mode=True, skip_context_files=True, @@ -577,6 +605,8 @@ class TestStreamingFallback: mock_create.return_value = mock_client agent = AIAgent( + api_key="test-key", + base_url="https://openrouter.ai/api/v1", model="test/model", quiet_mode=True, skip_context_files=True, @@ -619,6 +649,8 @@ class TestReasoningStreaming: mock_create.return_value = mock_client agent = AIAgent( + api_key="test-key", + base_url="https://openrouter.ai/api/v1", model="test/model", quiet_mode=True, skip_context_files=True, @@ -646,6 +678,8 @@ class TestHasStreamConsumers: def test_no_consumers(self): from run_agent import AIAgent agent = AIAgent( + api_key="test-key", + base_url="https://openrouter.ai/api/v1", model="test/model", quiet_mode=True, skip_context_files=True, @@ -656,6 +690,8 @@ class TestHasStreamConsumers: def test_delta_callback_set(self): from run_agent import AIAgent agent = AIAgent( + api_key="test-key", + base_url="https://openrouter.ai/api/v1", model="test/model", quiet_mode=True, skip_context_files=True, @@ -667,6 +703,8 @@ class TestHasStreamConsumers: def test_stream_callback_set(self): from run_agent import AIAgent agent = AIAgent( + api_key="test-key", + base_url="https://openrouter.ai/api/v1", model="test/model", quiet_mode=True, skip_context_files=True, @@ -688,6 +726,8 @@ class TestCodexStreamCallbacks: deltas = [] agent = AIAgent( + api_key="test-key", + base_url="https://openrouter.ai/api/v1", model="test/model", quiet_mode=True, skip_context_files=True, @@ -729,6 +769,8 @@ class TestCodexStreamCallbacks: from run_agent import AIAgent agent = AIAgent( + api_key="test-key", + base_url="https://openrouter.ai/api/v1", model="test/model", quiet_mode=True, skip_context_files=True, @@ -792,6 +834,8 @@ class TestCodexStreamCallbacks: ) agent = AIAgent( + api_key="test-key", + base_url="https://openrouter.ai/api/v1", model="test/model", quiet_mode=True, skip_context_files=True, @@ -810,6 +854,8 @@ class TestCodexStreamCallbacks: from run_agent import AIAgent agent = AIAgent( + api_key="test-key", + base_url="https://openrouter.ai/api/v1", model="test/model", quiet_mode=True, skip_context_files=True, @@ -861,6 +907,8 @@ class TestAnthropicStreamCallbacks: from run_agent import AIAgent agent = AIAgent( + api_key="test-key", + base_url="https://openrouter.ai/api/v1", model="test/model", quiet_mode=True, skip_context_files=True, diff --git a/tests/run_agent/test_token_persistence_non_cli.py b/tests/run_agent/test_token_persistence_non_cli.py index d25cf07ab8e..044d8abb3b0 100644 --- a/tests/run_agent/test_token_persistence_non_cli.py +++ b/tests/run_agent/test_token_persistence_non_cli.py @@ -22,6 +22,7 @@ def _make_agent(session_db, *, platform: str): ): agent = AIAgent( api_key="test-key", + base_url="https://openrouter.ai/api/v1", quiet_mode=True, skip_context_files=True, skip_memory=True,