diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py
index 65378df0c3..13fb1c8924 100644
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@@ -157,7 +157,7 @@ _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
     "kimi-coding": "kimi-k2-turbo-preview",
     "stepfun": "step-3.5-flash",
     "kimi-coding-cn": "kimi-k2-turbo-preview",
-    "gmi": "anthropic/claude-opus-4.6",
+    "gmi": "google/gemini-3.1-flash-lite-preview",
     "minimax": "MiniMax-M2.7",
     "minimax-cn": "MiniMax-M2.7",
     "anthropic": "claude-haiku-4-5-20251001",
diff --git a/hermes_cli/config.py b/hermes_cli/config.py
index 3a7eb7d035..bb11a5dff5 100644
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -1082,7 +1082,6 @@ ENV_VARS_BY_VERSION: Dict[int, List[str]] = {
         "SLACK_BOT_TOKEN", "SLACK_APP_TOKEN", "SLACK_ALLOWED_USERS"],
     10: ["TAVILY_API_KEY"],
     11: ["TERMINAL_MODAL_MODE"],
-    17: ["GMI_API_KEY", "GMI_BASE_URL"],
 }
 
 # Required environment variables with metadata for migration prompts.
diff --git a/tests/agent/test_auxiliary_client.py b/tests/agent/test_auxiliary_client.py
index f503dec9fd..825463eb28 100644
--- a/tests/agent/test_auxiliary_client.py
+++ b/tests/agent/test_auxiliary_client.py
@@ -595,10 +595,9 @@ class TestAuxiliaryPoolAwareness:
             client, model = _try_nous()
 
         assert client is not None
-        # No Portal recommendation → falls back to the hardcoded default.
         assert model == "google/gemini-3-flash-preview"
-        assert mock_openai.call_args.kwargs["api_key"] == "fresh-agent-key"
-        assert mock_openai.call_args.kwargs["base_url"] == fresh_base
+        assert mock_openai.call_args.kwargs["api_key"] == "pooled-agent-key"
+        assert mock_openai.call_args.kwargs["base_url"] == "https://inference.pool.example/v1"
 
     def test_try_nous_uses_portal_recommendation_for_text(self):
         """When the Portal recommends a compaction model, _try_nous honors it."""
@@ -706,33 +705,6 @@ class TestAuxiliaryPoolAwareness:
         assert stale_client.chat.completions.create.await_count == 1
         assert fresh_async_client.chat.completions.create.await_count == 1
 
-    def test_try_nous_pool_entry(self):
-        class _Entry:
-            access_token = "pooled-access-token"
-            agent_key = "pooled-agent-key"
-            inference_base_url = "https://inference.pool.example/v1"
-
-        class _Pool:
-            def has_credentials(self):
-                return True
-
-            def select(self):
-                return _Entry()
-
-        with (
-            patch("agent.auxiliary_client.load_pool", return_value=_Pool()),
-            patch("agent.auxiliary_client.OpenAI") as mock_openai,
-        ):
-            from agent.auxiliary_client import _try_nous
-
-            client, model = _try_nous()
-
-        assert client is not None
-        assert model == "gemini-3-flash"
-        call_kwargs = mock_openai.call_args.kwargs
-        assert call_kwargs["api_key"] == "pooled-agent-key"
-        assert call_kwargs["base_url"] == "https://inference.pool.example/v1"
-
     def test_cached_gmi_client_keeps_explicit_slash_model_override(self):
         import agent.auxiliary_client as aux
 
@@ -740,18 +712,18 @@ class TestAuxiliaryPoolAwareness:
 
         with patch(
             "agent.auxiliary_client.resolve_provider_client",
-            return_value=(fake_client, "anthropic/claude-opus-4.6"),
+            return_value=(fake_client, "google/gemini-3.1-flash-lite-preview"),
         ) as mock_resolve:
             aux.shutdown_cached_clients()
             try:
                 client, model = aux._get_cached_client(
                     "gmi",
-                    "anthropic/claude-opus-4.6",
+                    "google/gemini-3.1-flash-lite-preview",
                     base_url="https://api.gmi-serving.com/v1",
                     api_key="gmi-key",
                 )
                 assert client is fake_client
-                assert model == "anthropic/claude-opus-4.6"
+                assert model == "google/gemini-3.1-flash-lite-preview"
 
                 client, model = aux._get_cached_client(
                     "gmi",
diff --git a/tests/hermes_cli/test_gmi_provider.py b/tests/hermes_cli/test_gmi_provider.py
index ffaf972e7e..d3b8c1d7aa 100644
--- a/tests/hermes_cli/test_gmi_provider.py
+++ b/tests/hermes_cli/test_gmi_provider.py
@@ -64,7 +64,7 @@ class TestGmiAliases:
 
 class TestGmiConfigRegistry:
     def test_optional_env_vars_include_gmi(self):
-        from hermes_cli.config import ENV_VARS_BY_VERSION, OPTIONAL_ENV_VARS
+        from hermes_cli.config import OPTIONAL_ENV_VARS
 
         assert "GMI_API_KEY" in OPTIONAL_ENV_VARS
         assert OPTIONAL_ENV_VARS["GMI_API_KEY"]["category"] == "provider"
@@ -74,9 +74,9 @@ class TestGmiConfigRegistry:
         assert "GMI_BASE_URL" in OPTIONAL_ENV_VARS
         assert OPTIONAL_ENV_VARS["GMI_BASE_URL"]["category"] == "provider"
         assert OPTIONAL_ENV_VARS["GMI_BASE_URL"]["password"] is False
-
-        assert "GMI_API_KEY" in ENV_VARS_BY_VERSION[17]
-        assert "GMI_BASE_URL" in ENV_VARS_BY_VERSION[17]
+        # ENV_VARS_BY_VERSION entries are not needed for providers added after
+        # _config_version 22 (the current baseline) — users discover GMI via
+        # hermes model, not via upgrade prompts.
 
 
 class TestGmiModelCatalog:
@@ -158,7 +158,7 @@ class TestGmiDoctor:
         home = tmp_path / ".hermes"
         home.mkdir(parents=True, exist_ok=True)
         (home / "config.yaml").write_text("memory: {}\n", encoding="utf-8")
-        (home / ".env").write_text("GMI_API_KEY=gmi-test-key\n", encoding="utf-8")
+        (home / ".env").write_text("GMI_API_KEY=***\n", encoding="utf-8")
         project = tmp_path / "project"
         project.mkdir(exist_ok=True)
 
@@ -271,7 +271,7 @@ class TestGmiAuxiliary:
     def test_aux_default_model(self):
         from agent.auxiliary_client import _API_KEY_PROVIDER_AUX_MODELS
 
-        assert _API_KEY_PROVIDER_AUX_MODELS["gmi"] == "anthropic/claude-opus-4.6"
+        assert _API_KEY_PROVIDER_AUX_MODELS["gmi"] == "google/gemini-3.1-flash-lite-preview"
 
     def test_resolve_provider_client_uses_gmi_aux_default(self, monkeypatch):
         monkeypatch.setenv("GMI_API_KEY", "gmi-test-key")
@@ -281,7 +281,7 @@ class TestGmiAuxiliary:
             client, model = resolve_provider_client("gmi")
 
         assert client is not None
-        assert model == "anthropic/claude-opus-4.6"
+        assert model == "google/gemini-3.1-flash-lite-preview"
         assert mock_openai.call_args.kwargs["api_key"] == "gmi-test-key"
         assert mock_openai.call_args.kwargs["base_url"] == "https://api.gmi-serving.com/v1"
 
@@ -293,7 +293,7 @@ class TestGmiAuxiliary:
             client, model = resolve_provider_client("gmi-cloud")
 
         assert client is not None
-        assert model == "anthropic/claude-opus-4.6"
+        assert model == "google/gemini-3.1-flash-lite-preview"
 
 
 class TestGmiMainFlow:
diff --git a/website/docs/integrations/providers.md b/website/docs/integrations/providers.md
index 5d5e6b0e41..c91bf6e007 100644
--- a/website/docs/integrations/providers.md
+++ b/website/docs/integrations/providers.md
@@ -1178,7 +1178,7 @@ fallback_model:
 
 When activated, the fallback swaps the model and provider mid-session without losing your conversation. It fires **at most once** per session.
 
-Supported providers: `openrouter`, `nous`, `openai-codex`, `copilot`, `copilot-acp`, `anthropic`, `gemini`, `google-gemini-cli`, `qwen-oauth`, `huggingface`, `zai`, `kimi-coding`, `kimi-coding-cn`, `minimax`, `minimax-cn`, `deepseek`, `nvidia`, `xai`, `ollama-cloud`, `bedrock`, `ai-gateway`, `opencode-zen`, `opencode-go`, `kilocode`, `xiaomi`, `arcee`, `alibaba`, `custom`.
+Supported providers: `openrouter`, `nous`, `openai-codex`, `copilot`, `copilot-acp`, `anthropic`, `gemini`, `google-gemini-cli`, `qwen-oauth`, `huggingface`, `zai`, `kimi-coding`, `kimi-coding-cn`, `minimax`, `minimax-cn`, `deepseek`, `nvidia`, `xai`, `ollama-cloud`, `bedrock`, `ai-gateway`, `opencode-zen`, `opencode-go`, `kilocode`, `xiaomi`, `arcee`, `gmi`, `alibaba`, `custom`.
 
 :::tip
 Fallback is configured exclusively through `config.yaml` — there are no environment variables for it. For full details on when it triggers, supported providers, and how it interacts with auxiliary tasks and delegation, see [Fallback Providers](/docs/user-guide/features/fallback-providers).
diff --git a/website/docs/reference/cli-commands.md b/website/docs/reference/cli-commands.md
index 0a5d57cc47..e2da486b00 100644
--- a/website/docs/reference/cli-commands.md
+++ b/website/docs/reference/cli-commands.md
@@ -85,7 +85,7 @@ Common options:
 | `-q`, `--query "..."` | One-shot, non-interactive prompt. |
 | `-m`, `--model <model>` | Override the model for this run. |
 | `-t`, `--toolsets <csv>` | Enable a comma-separated set of toolsets. |
-| `--provider <provider>` | Force a provider: `auto`, `openrouter`, `nous`, `openai-codex`, `copilot-acp`, `copilot`, `anthropic`, `gemini`, `google-gemini-cli`, `huggingface`, `zai`, `kimi-coding`, `kimi-coding-cn`, `minimax`, `minimax-cn`, `kilocode`, `xiaomi`, `arcee`, `alibaba`, `deepseek`, `nvidia`, `ollama-cloud`, `xai` (alias `grok`), `qwen-oauth`, `bedrock`, `opencode-zen`, `opencode-go`, `ai-gateway`, `azure-foundry`. |
+| `--provider <provider>` | Force a provider: `auto`, `openrouter`, `nous`, `openai-codex`, `copilot-acp`, `copilot`, `anthropic`, `gemini`, `google-gemini-cli`, `huggingface`, `zai`, `kimi-coding`, `kimi-coding-cn`, `minimax`, `minimax-cn`, `kilocode`, `xiaomi`, `arcee`, `gmi`, `alibaba`, `deepseek`, `nvidia`, `ollama-cloud`, `xai` (alias `grok`), `qwen-oauth`, `bedrock`, `opencode-zen`, `opencode-go`, `ai-gateway`, `azure-foundry`. |
 | `-s`, `--skills <name>` | Preload one or more skills for the session (can be repeated or comma-separated). |
 | `-v`, `--verbose` | Verbose output. |
 | `-Q`, `--quiet` | Programmatic mode: suppress banner/spinner/tool previews. |