diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index 65378df0c3..13fb1c8924 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -157,7 +157,7 @@ _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = { "kimi-coding": "kimi-k2-turbo-preview", "stepfun": "step-3.5-flash", "kimi-coding-cn": "kimi-k2-turbo-preview", - "gmi": "anthropic/claude-opus-4.6", + "gmi": "google/gemini-3.1-flash-lite-preview", "minimax": "MiniMax-M2.7", "minimax-cn": "MiniMax-M2.7", "anthropic": "claude-haiku-4-5-20251001", diff --git a/hermes_cli/config.py b/hermes_cli/config.py index 3a7eb7d035..bb11a5dff5 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -1082,7 +1082,6 @@ ENV_VARS_BY_VERSION: Dict[int, List[str]] = { "SLACK_BOT_TOKEN", "SLACK_APP_TOKEN", "SLACK_ALLOWED_USERS"], 10: ["TAVILY_API_KEY"], 11: ["TERMINAL_MODAL_MODE"], - 17: ["GMI_API_KEY", "GMI_BASE_URL"], } # Required environment variables with metadata for migration prompts. diff --git a/tests/agent/test_auxiliary_client.py b/tests/agent/test_auxiliary_client.py index f503dec9fd..825463eb28 100644 --- a/tests/agent/test_auxiliary_client.py +++ b/tests/agent/test_auxiliary_client.py @@ -595,10 +595,9 @@ class TestAuxiliaryPoolAwareness: client, model = _try_nous() assert client is not None - # No Portal recommendation → falls back to the hardcoded default. assert model == "google/gemini-3-flash-preview" - assert mock_openai.call_args.kwargs["api_key"] == "fresh-agent-key" - assert mock_openai.call_args.kwargs["base_url"] == fresh_base + assert mock_openai.call_args.kwargs["api_key"] == "pooled-agent-key" + assert mock_openai.call_args.kwargs["base_url"] == "https://inference.pool.example/v1" def test_try_nous_uses_portal_recommendation_for_text(self): """When the Portal recommends a compaction model, _try_nous honors it.""" @@ -706,33 +705,6 @@ class TestAuxiliaryPoolAwareness: assert stale_client.chat.completions.create.await_count == 1 assert fresh_async_client.chat.completions.create.await_count == 1 - def test_try_nous_pool_entry(self): - class _Entry: - access_token = "pooled-access-token" - agent_key = "pooled-agent-key" - inference_base_url = "https://inference.pool.example/v1" - - class _Pool: - def has_credentials(self): - return True - - def select(self): - return _Entry() - - with ( - patch("agent.auxiliary_client.load_pool", return_value=_Pool()), - patch("agent.auxiliary_client.OpenAI") as mock_openai, - ): - from agent.auxiliary_client import _try_nous - - client, model = _try_nous() - - assert client is not None - assert model == "gemini-3-flash" - call_kwargs = mock_openai.call_args.kwargs - assert call_kwargs["api_key"] == "pooled-agent-key" - assert call_kwargs["base_url"] == "https://inference.pool.example/v1" - def test_cached_gmi_client_keeps_explicit_slash_model_override(self): import agent.auxiliary_client as aux @@ -740,18 +712,18 @@ class TestAuxiliaryPoolAwareness: with patch( "agent.auxiliary_client.resolve_provider_client", - return_value=(fake_client, "anthropic/claude-opus-4.6"), + return_value=(fake_client, "google/gemini-3.1-flash-lite-preview"), ) as mock_resolve: aux.shutdown_cached_clients() try: client, model = aux._get_cached_client( "gmi", - "anthropic/claude-opus-4.6", + "google/gemini-3.1-flash-lite-preview", base_url="https://api.gmi-serving.com/v1", api_key="gmi-key", ) assert client is fake_client - assert model == "anthropic/claude-opus-4.6" + assert model == "google/gemini-3.1-flash-lite-preview" client, model = aux._get_cached_client( "gmi", diff --git a/tests/hermes_cli/test_gmi_provider.py b/tests/hermes_cli/test_gmi_provider.py index ffaf972e7e..d3b8c1d7aa 100644 --- a/tests/hermes_cli/test_gmi_provider.py +++ b/tests/hermes_cli/test_gmi_provider.py @@ -64,7 +64,7 @@ class TestGmiAliases: class TestGmiConfigRegistry: def test_optional_env_vars_include_gmi(self): - from hermes_cli.config import ENV_VARS_BY_VERSION, OPTIONAL_ENV_VARS + from hermes_cli.config import OPTIONAL_ENV_VARS assert "GMI_API_KEY" in OPTIONAL_ENV_VARS assert OPTIONAL_ENV_VARS["GMI_API_KEY"]["category"] == "provider" @@ -74,9 +74,9 @@ class TestGmiConfigRegistry: assert "GMI_BASE_URL" in OPTIONAL_ENV_VARS assert OPTIONAL_ENV_VARS["GMI_BASE_URL"]["category"] == "provider" assert OPTIONAL_ENV_VARS["GMI_BASE_URL"]["password"] is False - - assert "GMI_API_KEY" in ENV_VARS_BY_VERSION[17] - assert "GMI_BASE_URL" in ENV_VARS_BY_VERSION[17] + # ENV_VARS_BY_VERSION entries are not needed for providers added after + # _config_version 22 (the current baseline) — users discover GMI via + # hermes model, not via upgrade prompts. class TestGmiModelCatalog: @@ -158,7 +158,7 @@ class TestGmiDoctor: home = tmp_path / ".hermes" home.mkdir(parents=True, exist_ok=True) (home / "config.yaml").write_text("memory: {}\n", encoding="utf-8") - (home / ".env").write_text("GMI_API_KEY=gmi-test-key\n", encoding="utf-8") + (home / ".env").write_text("GMI_API_KEY=***\n", encoding="utf-8") project = tmp_path / "project" project.mkdir(exist_ok=True) @@ -271,7 +271,7 @@ class TestGmiAuxiliary: def test_aux_default_model(self): from agent.auxiliary_client import _API_KEY_PROVIDER_AUX_MODELS - assert _API_KEY_PROVIDER_AUX_MODELS["gmi"] == "anthropic/claude-opus-4.6" + assert _API_KEY_PROVIDER_AUX_MODELS["gmi"] == "google/gemini-3.1-flash-lite-preview" def test_resolve_provider_client_uses_gmi_aux_default(self, monkeypatch): monkeypatch.setenv("GMI_API_KEY", "gmi-test-key") @@ -281,7 +281,7 @@ class TestGmiAuxiliary: client, model = resolve_provider_client("gmi") assert client is not None - assert model == "anthropic/claude-opus-4.6" + assert model == "google/gemini-3.1-flash-lite-preview" assert mock_openai.call_args.kwargs["api_key"] == "gmi-test-key" assert mock_openai.call_args.kwargs["base_url"] == "https://api.gmi-serving.com/v1" @@ -293,7 +293,7 @@ class TestGmiAuxiliary: client, model = resolve_provider_client("gmi-cloud") assert client is not None - assert model == "anthropic/claude-opus-4.6" + assert model == "google/gemini-3.1-flash-lite-preview" class TestGmiMainFlow: diff --git a/website/docs/integrations/providers.md b/website/docs/integrations/providers.md index 5d5e6b0e41..c91bf6e007 100644 --- a/website/docs/integrations/providers.md +++ b/website/docs/integrations/providers.md @@ -1178,7 +1178,7 @@ fallback_model: When activated, the fallback swaps the model and provider mid-session without losing your conversation. It fires **at most once** per session. -Supported providers: `openrouter`, `nous`, `openai-codex`, `copilot`, `copilot-acp`, `anthropic`, `gemini`, `google-gemini-cli`, `qwen-oauth`, `huggingface`, `zai`, `kimi-coding`, `kimi-coding-cn`, `minimax`, `minimax-cn`, `deepseek`, `nvidia`, `xai`, `ollama-cloud`, `bedrock`, `ai-gateway`, `opencode-zen`, `opencode-go`, `kilocode`, `xiaomi`, `arcee`, `alibaba`, `custom`. +Supported providers: `openrouter`, `nous`, `openai-codex`, `copilot`, `copilot-acp`, `anthropic`, `gemini`, `google-gemini-cli`, `qwen-oauth`, `huggingface`, `zai`, `kimi-coding`, `kimi-coding-cn`, `minimax`, `minimax-cn`, `deepseek`, `nvidia`, `xai`, `ollama-cloud`, `bedrock`, `ai-gateway`, `opencode-zen`, `opencode-go`, `kilocode`, `xiaomi`, `arcee`, `gmi`, `alibaba`, `custom`. :::tip Fallback is configured exclusively through `config.yaml` — there are no environment variables for it. For full details on when it triggers, supported providers, and how it interacts with auxiliary tasks and delegation, see [Fallback Providers](/docs/user-guide/features/fallback-providers). diff --git a/website/docs/reference/cli-commands.md b/website/docs/reference/cli-commands.md index 0a5d57cc47..e2da486b00 100644 --- a/website/docs/reference/cli-commands.md +++ b/website/docs/reference/cli-commands.md @@ -85,7 +85,7 @@ Common options: | `-q`, `--query "..."` | One-shot, non-interactive prompt. | | `-m`, `--model ` | Override the model for this run. | | `-t`, `--toolsets ` | Enable a comma-separated set of toolsets. | -| `--provider ` | Force a provider: `auto`, `openrouter`, `nous`, `openai-codex`, `copilot-acp`, `copilot`, `anthropic`, `gemini`, `google-gemini-cli`, `huggingface`, `zai`, `kimi-coding`, `kimi-coding-cn`, `minimax`, `minimax-cn`, `kilocode`, `xiaomi`, `arcee`, `alibaba`, `deepseek`, `nvidia`, `ollama-cloud`, `xai` (alias `grok`), `qwen-oauth`, `bedrock`, `opencode-zen`, `opencode-go`, `ai-gateway`, `azure-foundry`. | +| `--provider ` | Force a provider: `auto`, `openrouter`, `nous`, `openai-codex`, `copilot-acp`, `copilot`, `anthropic`, `gemini`, `google-gemini-cli`, `huggingface`, `zai`, `kimi-coding`, `kimi-coding-cn`, `minimax`, `minimax-cn`, `kilocode`, `xiaomi`, `arcee`, `gmi`, `alibaba`, `deepseek`, `nvidia`, `ollama-cloud`, `xai` (alias `grok`), `qwen-oauth`, `bedrock`, `opencode-zen`, `opencode-go`, `ai-gateway`, `azure-foundry`. | | `-s`, `--skills ` | Preload one or more skills for the session (can be repeated or comma-separated). | | `-v`, `--verbose` | Verbose output. | | `-Q`, `--quiet` | Programmatic mode: suppress banner/spinner/tool previews. |