From a77efada5f55436e6a17da45a30a3352ce24a780 Mon Sep 17 00:00:00 2001
From: teknium1 <127238744+teknium1@users.noreply.github.com>
Date: Mon, 8 Jun 2026 07:53:41 -0700
Subject: [PATCH] refactor(cli): extract 18 model-flow wizard functions into
model_setup_flows (god-file Phase 2)
Lift the 18 _model_flow_* provider-setup wizard functions out of hermes_cli/main.py
into hermes_cli/model_setup_flows.py. Behavior-neutral; main.py 14050 -> 11479 LOC.
select_provider_and_model (the dispatcher) STAYS in main.py and re-imports the
flows via an explicit 'from hermes_cli.model_setup_flows import (...)' block, so
both its bare-name calls and existing test monkeypatches targeting
hermes_cli.main._model_flow_* keep resolving against main's namespace unchanged.
Imports: 3 neutral deps (argparse, os, subprocess) at the module top; the 14
main.py-internal helpers the flows call (_prompt_api_key, _save_custom_provider,
the reasoning-effort/stepfun/qwen helpers, _run_anthropic_oauth_flow, ...) are
lazy-imported per-flow (from hermes_cli.main import ...) so the new module never
imports main at module scope -> no import cycle.
Repointed one source-inspection change-detector (test_setup_ollama_cloud_force_refresh)
to read the module the ollama-cloud branch moved to.
Validation: 6563/6563 hermes_cli tests pass; live flow-dispatch probe confirms the
lazy main-internal imports resolve at runtime.
---
hermes_cli/main.py | 2619 +---------------
hermes_cli/model_setup_flows.py | 2648 +++++++++++++++++
.../test_setup_ollama_cloud_force_refresh.py | 7 +-
3 files changed, 2677 insertions(+), 2597 deletions(-)
create mode 100644 hermes_cli/model_setup_flows.py
diff --git a/hermes_cli/main.py b/hermes_cli/main.py
index e9f02c72c90..2115764d5b5 100644
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@@ -494,6 +494,30 @@ import time as _time
from datetime import datetime
from hermes_cli import __version__, __release_date__
+
+# Provider model-selection wizard flows extracted to hermes_cli/model_setup_flows.py
+# (god-file decomposition Phase 2). Re-imported here so select_provider_and_model and
+# existing test monkeypatches (hermes_cli.main._model_flow_*) keep resolving unchanged.
+from hermes_cli.model_setup_flows import (
+ _model_flow_openrouter,
+ _model_flow_nous,
+ _model_flow_openai_codex,
+ _model_flow_xai_oauth,
+ _model_flow_qwen_oauth,
+ _model_flow_minimax_oauth,
+ _model_flow_google_gemini_cli,
+ _model_flow_custom,
+ _model_flow_azure_foundry,
+ _model_flow_named_custom,
+ _model_flow_copilot,
+ _model_flow_copilot_acp,
+ _model_flow_kimi,
+ _model_flow_stepfun,
+ _model_flow_bedrock_api_key,
+ _model_flow_bedrock,
+ _model_flow_api_key_provider,
+ _model_flow_anthropic,
+)
logger = logging.getLogger(__name__)
@@ -3341,467 +3365,12 @@ def _prompt_provider_choice(choices, *, default=0):
return None
-def _model_flow_openrouter(config, current_model=""):
- """OpenRouter provider: ensure API key, then pick model."""
- from hermes_constants import OPENROUTER_BASE_URL
- from hermes_cli.auth import (
- ProviderConfig,
- _prompt_model_selection,
- _save_model_choice,
- deactivate_provider,
- )
- from hermes_cli.config import get_env_value
-
- # Route through _prompt_api_key so users can replace a stale/broken key
- # in-flow (K/R/C) instead of having to edit ~/.hermes/.env by hand. The
- # previous bypass-when-key-exists branch left no way to recover from a
- # bad paste short of re-running `hermes setup` from scratch. OpenRouter
- # isn't in PROVIDER_REGISTRY so we synthesize a minimal pconfig.
- pconfig = ProviderConfig(
- id="openrouter",
- name="OpenRouter",
- auth_type="api_key",
- api_key_env_vars=("OPENROUTER_API_KEY",),
- )
- existing_key = get_env_value("OPENROUTER_API_KEY") or ""
- if not existing_key:
- print("Get one at: https://openrouter.ai/keys")
- print()
- _resolved, abort = _prompt_api_key(pconfig, existing_key, provider_id="openrouter")
- if abort:
- return
-
- from hermes_cli.models import model_ids, get_pricing_for_provider
-
- openrouter_models = model_ids(force_refresh=True)
-
- # Fetch live pricing (non-blocking — returns empty dict on failure)
- pricing = get_pricing_for_provider("openrouter", force_refresh=True)
-
- selected = _prompt_model_selection(
- openrouter_models, current_model=current_model, pricing=pricing
- )
- if selected:
- _save_model_choice(selected)
-
- # Update config provider and deactivate any OAuth provider
- from hermes_cli.config import load_config, save_config
-
- cfg = load_config()
- model = cfg.get("model")
- if not isinstance(model, dict):
- model = {"default": model} if model else {}
- cfg["model"] = model
- model["provider"] = "openrouter"
- model["base_url"] = OPENROUTER_BASE_URL
- model["api_mode"] = "chat_completions"
- save_config(cfg)
- deactivate_provider()
- print(f"Default model set to: {selected} (via OpenRouter)")
- else:
- print("No change.")
-def _model_flow_nous(config, current_model="", args=None):
- """Nous Portal provider: ensure logged in, then pick model."""
- from hermes_cli.auth import (
- get_provider_auth_state,
- _prompt_model_selection,
- _save_model_choice,
- _update_config_for_provider,
- resolve_nous_runtime_credentials,
- AuthError,
- format_auth_error,
- _login_nous,
- PROVIDER_REGISTRY,
- )
- from hermes_cli.config import (
- get_env_value,
- load_config,
- save_config,
- save_env_value,
- )
- from hermes_cli.nous_subscription import prompt_enable_tool_gateway
-
- state = get_provider_auth_state("nous")
- if not state or not state.get("access_token"):
- print("Not logged into Nous Portal. Starting login...")
- print()
- try:
- mock_args = argparse.Namespace(
- portal_url=getattr(args, "portal_url", None),
- inference_url=getattr(args, "inference_url", None),
- client_id=getattr(args, "client_id", None),
- scope=getattr(args, "scope", None),
- no_browser=bool(getattr(args, "no_browser", False)),
- timeout=getattr(args, "timeout", None) or 15.0,
- ca_bundle=getattr(args, "ca_bundle", None),
- insecure=bool(getattr(args, "insecure", False)),
- )
- _login_nous(mock_args, PROVIDER_REGISTRY["nous"])
- # Offer Tool Gateway enablement for paid subscribers
- try:
- _refreshed = load_config() or {}
- prompt_enable_tool_gateway(_refreshed)
- except Exception:
- pass
- except SystemExit:
- print("Login cancelled or failed.")
- return
- except Exception as exc:
- print(f"Login failed: {exc}")
- return
- # login_nous already handles model selection + config update
- return
-
- # Already logged in — use curated model list (same as OpenRouter defaults).
- # The live /models endpoint returns hundreds of models; the curated list
- # shows only agentic models users recognize from OpenRouter.
- from hermes_cli.models import (
- get_curated_nous_model_ids,
- get_pricing_for_provider,
- check_nous_free_tier,
- partition_nous_models_by_tier,
- union_with_portal_free_recommendations,
- union_with_portal_paid_recommendations,
- )
-
- model_ids = get_curated_nous_model_ids()
- if not model_ids:
- print("No curated models available for Nous Portal.")
- return
-
- # Verify credentials are still valid (catches expired sessions early)
- try:
- creds = resolve_nous_runtime_credentials()
- except Exception as exc:
- relogin = isinstance(exc, AuthError) and exc.relogin_required
- msg = format_auth_error(exc) if isinstance(exc, AuthError) else str(exc)
- if relogin:
- print(f"Session expired: {msg}")
- print("Re-authenticating with Nous Portal...\n")
- try:
- mock_args = argparse.Namespace(
- portal_url=None,
- inference_url=None,
- client_id=None,
- scope=None,
- no_browser=False,
- timeout=15.0,
- ca_bundle=None,
- insecure=False,
- )
- _login_nous(mock_args, PROVIDER_REGISTRY["nous"])
- except Exception as login_exc:
- print(f"Re-login failed: {login_exc}")
- return
- print(f"Could not verify credentials: {msg}")
- return
-
- # Fetch live pricing (non-blocking — returns empty dict on failure)
- pricing = get_pricing_for_provider("nous")
-
- # Force fresh account data for model selection so recent credit purchases
- # are reflected immediately.
- free_tier = check_nous_free_tier(force_fresh=True)
- if not free_tier:
- try:
- refreshed_creds = resolve_nous_runtime_credentials(
- force_refresh=True,
- )
- if refreshed_creds:
- creds = refreshed_creds
- except Exception:
- # Runtime inference has its own paid-entitlement recovery path; do
- # not block model selection if this opportunistic refresh fails.
- pass
-
- # Resolve portal URL early — needed both for upgrade links and for the
- # freeRecommendedModels endpoint below.
- _nous_portal_url = ""
- try:
- _nous_state = get_provider_auth_state("nous")
- if _nous_state:
- _nous_portal_url = _nous_state.get("portal_base_url", "")
- except Exception:
- pass
-
- # For free users: partition models into selectable/unavailable based on
- # whether they are free per the Portal-reported pricing. First augment
- # with the Portal's freeRecommendedModels list so newly-launched free
- # models show up even if this CLI build's hardcoded curated list and
- # docs-hosted manifest haven't caught up yet.
- #
- # For paid users: mirror the same idea with paidRecommendedModels so
- # newly-launched paid models surface in the picker too — independent
- # of CLI release cadence.
- unavailable_models: list[str] = []
- unavailable_message = ""
- if free_tier:
- try:
- from hermes_cli.nous_account import (
- format_nous_portal_entitlement_message,
- get_nous_portal_account_info,
- )
-
- _account_info = get_nous_portal_account_info(force_fresh=True)
- unavailable_message = (
- format_nous_portal_entitlement_message(
- _account_info,
- capability="paid Nous models",
- )
- or ""
- )
- except Exception:
- unavailable_message = ""
- model_ids, pricing = union_with_portal_free_recommendations(
- model_ids, pricing, _nous_portal_url,
- )
- model_ids, unavailable_models = partition_nous_models_by_tier(
- model_ids, pricing, free_tier=True
- )
- else:
- model_ids, pricing = union_with_portal_paid_recommendations(
- model_ids, pricing, _nous_portal_url,
- )
-
- if not model_ids and not unavailable_models:
- print("No models available for Nous Portal after filtering.")
- return
-
- if free_tier and not model_ids:
- print("No free models currently available.")
- if unavailable_models:
- from hermes_cli.auth import DEFAULT_NOUS_PORTAL_URL
-
- _url = (_nous_portal_url or DEFAULT_NOUS_PORTAL_URL).rstrip("/")
- print(unavailable_message or f"Upgrade at {_url} to access paid models.")
- return
-
- print(
- f'Showing {len(model_ids)} curated models — use "Enter custom model name" for others.'
- )
-
- selected = _prompt_model_selection(
- model_ids,
- current_model=current_model,
- pricing=pricing,
- unavailable_models=unavailable_models,
- portal_url=_nous_portal_url,
- unavailable_message=unavailable_message,
- )
- if selected:
- _save_model_choice(selected)
- # Reactivate Nous as the provider and update config
- inference_url = creds.get("base_url", "")
- _update_config_for_provider("nous", inference_url)
- current_model_cfg = config.get("model")
- if isinstance(current_model_cfg, dict):
- model_cfg = dict(current_model_cfg)
- elif isinstance(current_model_cfg, str) and current_model_cfg.strip():
- model_cfg = {"default": current_model_cfg.strip()}
- else:
- model_cfg = {}
- model_cfg["provider"] = "nous"
- model_cfg["default"] = selected
- if inference_url and inference_url.strip():
- model_cfg["base_url"] = inference_url.rstrip("/")
- else:
- model_cfg.pop("base_url", None)
- config["model"] = model_cfg
- # Clear any custom endpoint that might conflict
- if get_env_value("OPENAI_BASE_URL"):
- save_env_value("OPENAI_BASE_URL", "")
- save_env_value("OPENAI_API_KEY", "")
- save_config(config)
- print(f"Default model set to: {selected} (via Nous Portal)")
- # Offer Tool Gateway enablement for paid subscribers
- prompt_enable_tool_gateway(config)
- else:
- print("No change.")
-def _model_flow_openai_codex(config, current_model=""):
- """OpenAI Codex provider: ensure logged in, then pick model."""
- from hermes_cli.auth import (
- get_codex_auth_status,
- _prompt_model_selection,
- _save_model_choice,
- _update_config_for_provider,
- _login_openai_codex,
- PROVIDER_REGISTRY,
- DEFAULT_CODEX_BASE_URL,
- )
- from hermes_cli.codex_models import get_codex_model_ids
-
- status = get_codex_auth_status()
- if status.get("logged_in"):
- print(" OpenAI Codex credentials: ✓")
- print()
- print(" 1. Use existing credentials")
- print(" 2. Reauthenticate (new OAuth login)")
- print(" 3. Cancel")
- print()
- try:
- choice = input(" Choice [1/2/3]: ").strip()
- except (KeyboardInterrupt, EOFError):
- choice = "1"
-
- if choice == "2":
- print("Starting a fresh OpenAI Codex login...")
- print()
- try:
- mock_args = argparse.Namespace()
- _login_openai_codex(
- mock_args,
- PROVIDER_REGISTRY["openai-codex"],
- force_new_login=True,
- )
- except SystemExit:
- print("Login cancelled or failed.")
- return
- except Exception as exc:
- print(f"Login failed: {exc}")
- return
- status = get_codex_auth_status()
- if not status.get("logged_in"):
- print("Login failed.")
- return
- elif choice == "3":
- return
- else:
- print("Not logged into OpenAI Codex. Starting login...")
- print()
- try:
- mock_args = argparse.Namespace()
- _login_openai_codex(mock_args, PROVIDER_REGISTRY["openai-codex"])
- except SystemExit:
- print("Login cancelled or failed.")
- return
- except Exception as exc:
- print(f"Login failed: {exc}")
- return
-
- _codex_token = None
- # Prefer credential pool (where `hermes auth` stores device_code tokens),
- # fall back to legacy provider state.
- try:
- _codex_status = get_codex_auth_status()
- if _codex_status.get("logged_in"):
- _codex_token = _codex_status.get("api_key")
- except Exception:
- pass
- if not _codex_token:
- try:
- from hermes_cli.auth import resolve_codex_runtime_credentials
-
- _codex_creds = resolve_codex_runtime_credentials()
- _codex_token = _codex_creds.get("api_key")
- except Exception:
- pass
-
- codex_models = get_codex_model_ids(access_token=_codex_token)
-
- selected = _prompt_model_selection(codex_models, current_model=current_model)
- if selected:
- _save_model_choice(selected)
- _update_config_for_provider("openai-codex", DEFAULT_CODEX_BASE_URL)
- print(f"Default model set to: {selected} (via OpenAI Codex)")
- else:
- print("No change.")
-def _model_flow_xai_oauth(_config, current_model="", *, args=None):
- """xAI Grok OAuth (SuperGrok / Premium+) provider: ensure logged in, then pick model."""
- from hermes_cli.auth import (
- get_xai_oauth_auth_status,
- _prompt_model_selection,
- _save_model_choice,
- _update_config_for_provider,
- resolve_xai_oauth_runtime_credentials,
- _login_xai_oauth,
- DEFAULT_XAI_OAUTH_BASE_URL,
- PROVIDER_REGISTRY,
- )
- from hermes_cli.models import _PROVIDER_MODELS
-
- status = get_xai_oauth_auth_status()
- if status.get("logged_in"):
- print(" xAI Grok OAuth (SuperGrok / Premium+) credentials: ✓")
- print()
- print(" 1. Use existing credentials")
- print(" 2. Reauthenticate (new OAuth login)")
- print(" 3. Cancel")
- print()
- try:
- choice = input(" Choice [1/2/3]: ").strip()
- except (KeyboardInterrupt, EOFError):
- choice = "1"
-
- if choice == "2":
- print("Starting a fresh xAI OAuth login...")
- print()
- try:
- # Forward CLI flags from ``hermes model --manual-paste``
- # / ``--no-browser`` / ``--timeout`` into the loopback
- # login. Without this, browser-only remotes (#26923)
- # can't reach the manual-paste path via ``hermes model``.
- mock_args = argparse.Namespace(
- manual_paste=bool(getattr(args, "manual_paste", False)),
- no_browser=bool(getattr(args, "no_browser", False)),
- timeout=getattr(args, "timeout", None),
- )
- _login_xai_oauth(
- mock_args,
- PROVIDER_REGISTRY["xai-oauth"],
- force_new_login=True,
- )
- except SystemExit:
- print("Login cancelled or failed.")
- return
- except Exception as exc:
- print(f"Login failed: {exc}")
- return
- elif choice == "3":
- return
- else:
- print("Not logged into xAI Grok OAuth (SuperGrok / Premium+). Starting login...")
- print()
- try:
- mock_args = argparse.Namespace(
- manual_paste=bool(getattr(args, "manual_paste", False)),
- no_browser=bool(getattr(args, "no_browser", False)),
- timeout=getattr(args, "timeout", None),
- )
- _login_xai_oauth(mock_args, PROVIDER_REGISTRY["xai-oauth"])
- except SystemExit:
- print("Login cancelled or failed.")
- return
- except Exception as exc:
- print(f"Login failed: {exc}")
- return
-
- # Resolve a usable base URL. ``resolve_xai_oauth_runtime_credentials``
- # only reads from the auth.json singleton — but credentials may legitimately
- # live only in the pool (e.g. after ``hermes auth add xai-oauth``). Fall
- # back to the default base URL in that case so the model picker still
- # completes successfully instead of bailing out with
- # ``Could not resolve xAI OAuth credentials``.
- base_url = DEFAULT_XAI_OAUTH_BASE_URL
- try:
- creds = resolve_xai_oauth_runtime_credentials()
- base_url = (creds.get("base_url") or "").strip().rstrip("/") or base_url
- except Exception:
- pass
-
- models = list(_PROVIDER_MODELS.get("xai-oauth") or _PROVIDER_MODELS.get("xai") or [])
- selected = _prompt_model_selection(models, current_model=current_model or (models[0] if models else "grok-4.3"))
- if selected:
- _save_model_choice(selected)
- _update_config_for_provider("xai-oauth", base_url)
- print(f"Default model set to: {selected} (via xAI Grok OAuth — SuperGrok / Premium+)")
- else:
- print("No change.")
_DEFAULT_QWEN_PORTAL_MODELS = [
@@ -3810,390 +3379,12 @@ _DEFAULT_QWEN_PORTAL_MODELS = [
]
-def _model_flow_qwen_oauth(_config, current_model=""):
- """Qwen OAuth provider: reuse local Qwen CLI login, then pick model."""
- from hermes_cli.auth import (
- get_qwen_auth_status,
- resolve_qwen_runtime_credentials,
- _prompt_model_selection,
- _save_model_choice,
- _update_config_for_provider,
- DEFAULT_QWEN_BASE_URL,
- )
- from hermes_cli.models import fetch_api_models
-
- status = get_qwen_auth_status()
- if not status.get("logged_in"):
- print("Not logged into Qwen CLI OAuth.")
- print("Run: qwen auth qwen-oauth")
- auth_file = status.get("auth_file")
- if auth_file:
- print(f"Expected credentials file: {auth_file}")
- if status.get("error"):
- print(f"Error: {status.get('error')}")
- return
-
- # Try live model discovery, fall back to curated list.
- models = None
- try:
- creds = resolve_qwen_runtime_credentials(refresh_if_expiring=True)
- models = fetch_api_models(creds["api_key"], creds["base_url"])
- except Exception:
- pass
- if not models:
- models = list(_DEFAULT_QWEN_PORTAL_MODELS)
-
- default = current_model or (models[0] if models else "qwen3-coder-plus")
- selected = _prompt_model_selection(models, current_model=default)
- if selected:
- _save_model_choice(selected)
- _update_config_for_provider("qwen-oauth", DEFAULT_QWEN_BASE_URL)
- print(f"Default model set to: {selected} (via Qwen OAuth)")
- else:
- print("No change.")
-def _model_flow_minimax_oauth(config, current_model="", args=None):
- """MiniMax OAuth provider: ensure logged in, then pick model."""
- from hermes_cli.auth import (
- get_provider_auth_state,
- _prompt_model_selection,
- _save_model_choice,
- _update_config_for_provider,
- resolve_minimax_oauth_runtime_credentials,
- AuthError,
- format_auth_error,
- _login_minimax_oauth,
- PROVIDER_REGISTRY,
- )
-
- state = get_provider_auth_state("minimax-oauth")
- if not state or not state.get("access_token"):
- print("Not logged into MiniMax. Starting OAuth login...")
- print()
- try:
- mock_args = argparse.Namespace(
- region=getattr(args, "region", None) or "global",
- no_browser=bool(getattr(args, "no_browser", False)),
- timeout=getattr(args, "timeout", None) or 15.0,
- )
- _login_minimax_oauth(mock_args, PROVIDER_REGISTRY["minimax-oauth"])
- except SystemExit:
- print("Login cancelled or failed.")
- return
- except Exception as exc:
- print(f"Login failed: {exc}")
- return
-
- try:
- creds = resolve_minimax_oauth_runtime_credentials()
- except AuthError as exc:
- print(format_auth_error(exc))
- return
-
- from hermes_cli.models import _PROVIDER_MODELS
-
- model_ids = _PROVIDER_MODELS.get("minimax-oauth", [])
- selected = _prompt_model_selection(model_ids, current_model)
- if not selected:
- return
- _save_model_choice(selected)
- _update_config_for_provider("minimax-oauth", creds["base_url"])
- print(f"\u2713 Using MiniMax model: {selected}")
-def _model_flow_google_gemini_cli(_config, current_model=""):
- """Google Gemini OAuth (PKCE) via Cloud Code Assist — supports free AND paid tiers.
-
- Flow:
- 1. Show upfront warning about Google's ToS stance (per opencode-gemini-auth).
- 2. If creds missing, run PKCE browser OAuth via agent.google_oauth.
- 3. Resolve project context (env -> config -> auto-discover -> free tier).
- 4. Prompt user to pick a model.
- 5. Save to ~/.hermes/config.yaml.
- """
- from hermes_cli.auth import (
- DEFAULT_GEMINI_CLOUDCODE_BASE_URL,
- get_gemini_oauth_auth_status,
- resolve_gemini_oauth_runtime_credentials,
- _prompt_model_selection,
- _save_model_choice,
- _update_config_for_provider,
- )
- from hermes_cli.models import _PROVIDER_MODELS
-
- print()
- print("⚠ Google considers using the Gemini CLI OAuth client with third-party")
- print(" software a policy violation. Some users have reported account")
- print(" restrictions. You can use your own API key via 'gemini' provider")
- print(" for the lowest-risk experience.")
- print()
- try:
- proceed = input("Continue with OAuth login? [y/N]: ").strip().lower()
- except (EOFError, KeyboardInterrupt):
- print("Cancelled.")
- return
- if proceed not in {"y", "yes"}:
- print("Cancelled.")
- return
-
- status = get_gemini_oauth_auth_status()
- if not status.get("logged_in"):
- try:
- from agent.google_oauth import resolve_project_id_from_env, start_oauth_flow
-
- env_project = resolve_project_id_from_env()
- start_oauth_flow(force_relogin=True, project_id=env_project)
- except Exception as exc:
- print(f"OAuth login failed: {exc}")
- return
-
- # Verify creds resolve + trigger project discovery
- try:
- creds = resolve_gemini_oauth_runtime_credentials(force_refresh=False)
- project_id = creds.get("project_id", "")
- if project_id:
- print(f" Using GCP project: {project_id}")
- else:
- print(
- " No GCP project configured — free tier will be auto-provisioned on first request."
- )
- except Exception as exc:
- print(f"Failed to resolve Gemini credentials: {exc}")
- return
-
- models = list(_PROVIDER_MODELS.get("google-gemini-cli") or [])
- default = current_model or (models[0] if models else "gemini-3-flash-preview")
- selected = _prompt_model_selection(models, current_model=default)
- if selected:
- _save_model_choice(selected)
- _update_config_for_provider(
- "google-gemini-cli", DEFAULT_GEMINI_CLOUDCODE_BASE_URL
- )
- print(
- f"Default model set to: {selected} (via Google Gemini OAuth / Code Assist)"
- )
- else:
- print("No change.")
-def _model_flow_custom(config):
- """Custom endpoint: collect URL, API key, and model name.
-
- Automatically saves the endpoint to ``custom_providers`` in config.yaml
- so it appears in the provider menu on subsequent runs.
- """
- from hermes_cli.auth import _save_model_choice, deactivate_provider
- from hermes_cli.config import get_env_value, load_config, save_config
- from hermes_cli.secret_prompt import masked_secret_prompt
-
- current_url = get_env_value("OPENAI_BASE_URL") or ""
- current_key = get_env_value("OPENAI_API_KEY") or ""
-
- print("Custom OpenAI-compatible endpoint configuration:")
- if current_url:
- print(f" Current URL: {current_url}")
- if current_key:
- print(f" Current key: {current_key[:8]}...")
- print()
-
- try:
- base_url = input(
- f"API base URL [{current_url or 'e.g. https://api.example.com/v1'}]: "
- ).strip()
- api_key = masked_secret_prompt(
- f"API key [{current_key[:8] + '...' if current_key else 'optional'}]: "
- ).strip()
- except (KeyboardInterrupt, EOFError):
- print("\nCancelled.")
- return
-
- if not base_url and not current_url:
- print("No URL provided. Cancelled.")
- return
-
- # Validate URL format
- effective_url = base_url or current_url
- if not effective_url.startswith(("http://", "https://")):
- print(f"Invalid URL: {effective_url} (must start with http:// or https://)")
- return
-
- effective_key = api_key or current_key
-
- # Hint: most local model servers (Ollama, vLLM, llama.cpp) require /v1
- # in the base URL for OpenAI-compatible chat completions. Prompt the
- # user if the URL looks like a local server without /v1.
- _url_lower = effective_url.rstrip("/").lower()
- _looks_local = any(
- h in _url_lower
- for h in ("localhost", "127.0.0.1", "0.0.0.0", ":11434", ":8080", ":5000")
- )
- if _looks_local and not _url_lower.endswith("/v1"):
- print()
- print(f" Hint: Did you mean to add /v1 at the end?")
- print(f" Most local model servers (Ollama, vLLM, llama.cpp) require it.")
- print(f" e.g. {effective_url.rstrip('/')}/v1")
- try:
- _add_v1 = input(" Add /v1? [Y/n]: ").strip().lower()
- except (KeyboardInterrupt, EOFError):
- _add_v1 = "n"
- if _add_v1 in {"", "y", "yes"}:
- effective_url = effective_url.rstrip("/") + "/v1"
- if base_url:
- base_url = effective_url
- print(f" Updated URL: {effective_url}")
- print()
-
- from hermes_cli.models import probe_api_models
-
- probe = probe_api_models(effective_key, effective_url)
- if probe.get("used_fallback") and probe.get("resolved_base_url"):
- print(
- f"Warning: endpoint verification worked at {probe['resolved_base_url']}/models, "
- f"not the exact URL you entered. Saving the working base URL instead."
- )
- effective_url = probe["resolved_base_url"]
- if base_url:
- base_url = effective_url
- elif probe.get("models") is not None:
- print(
- f"Verified endpoint via {probe.get('probed_url')} "
- f"({len(probe.get('models') or [])} model(s) visible)"
- )
- else:
- print(
- f"Warning: could not verify this endpoint via {probe.get('probed_url')}. "
- f"Hermes will still save it."
- )
- if probe.get("suggested_base_url"):
- suggested = probe["suggested_base_url"]
- if suggested.endswith("/v1"):
- print(
- f" If this server expects /v1 in the path, try base URL: {suggested}"
- )
- else:
- print(f" If /v1 should not be in the base URL, try: {suggested}")
-
- # Prompt for API compatibility mode explicitly so codex-compatible custom
- # providers don't silently fall back to chat_completions.
- current_model_cfg = config.get("model")
- current_api_mode = ""
- if isinstance(current_model_cfg, dict):
- current_api_mode = str(current_model_cfg.get("api_mode") or "").strip()
- api_mode = _prompt_custom_api_mode_selection(
- effective_url,
- current_api_mode=current_api_mode,
- )
- if api_mode:
- print(f" API mode: {api_mode}")
- else:
- print(" API mode: auto-detect")
-
- # Select model — use probe results when available, fall back to manual input
- model_name = ""
- detected_models = probe.get("models") or []
- try:
- if len(detected_models) == 1:
- print(f" Detected model: {detected_models[0]}")
- confirm = input(" Use this model? [Y/n]: ").strip().lower()
- if confirm in {"", "y", "yes"}:
- model_name = detected_models[0]
- else:
- model_name = input("Model name (e.g. gpt-4, llama-3-70b): ").strip()
- elif len(detected_models) > 1:
- print(" Available models:")
- for i, m in enumerate(detected_models, 1):
- print(f" {i}. {m}")
- pick = input(
- f" Select model [1-{len(detected_models)}] or type name: "
- ).strip()
- if pick.isdigit() and 1 <= int(pick) <= len(detected_models):
- model_name = detected_models[int(pick) - 1]
- elif pick:
- model_name = pick
- else:
- model_name = input("Model name (e.g. gpt-4, llama-3-70b): ").strip()
-
- context_length_str = input(
- "Context length in tokens [leave blank for auto-detect]: "
- ).strip()
-
- # Prompt for a display name — shown in the provider menu on future runs
- default_name = _auto_provider_name(effective_url)
- display_name = input(f"Display name [{default_name}]: ").strip() or default_name
- except (KeyboardInterrupt, EOFError):
- print("\nCancelled.")
- return
-
- context_length = None
- if context_length_str:
- try:
- context_length = int(
- context_length_str.replace(",", "")
- .replace("k", "000")
- .replace("K", "000")
- )
- if context_length <= 0:
- context_length = None
- except ValueError:
- print(f"Invalid context length: {context_length_str} — will auto-detect.")
- context_length = None
-
- if model_name:
- _save_model_choice(model_name)
-
- # Update config and deactivate any OAuth provider
- cfg = load_config()
- model = cfg.get("model")
- if not isinstance(model, dict):
- model = {"default": model} if model else {}
- cfg["model"] = model
- model["provider"] = "custom"
- model["base_url"] = effective_url
- if effective_key:
- model["api_key"] = effective_key
- if api_mode:
- model["api_mode"] = api_mode
- else:
- model.pop("api_mode", None)
- save_config(cfg)
- deactivate_provider()
-
- # Sync the caller's config dict so the setup wizard's final
- # save_config(config) preserves our model settings. Without
- # this, the wizard overwrites model.provider/base_url with
- # the stale values from its own config dict (#4172).
- config["model"] = dict(model)
-
- print(f"Default model set to: {model_name} (via {effective_url})")
- else:
- if base_url or api_key:
- deactivate_provider()
- # Even without a model name, persist the custom endpoint on the
- # caller's config dict so the setup wizard doesn't lose it.
- _caller_model = config.get("model")
- if not isinstance(_caller_model, dict):
- _caller_model = {"default": _caller_model} if _caller_model else {}
- _caller_model["provider"] = "custom"
- _caller_model["base_url"] = effective_url
- if effective_key:
- _caller_model["api_key"] = effective_key
- if api_mode:
- _caller_model["api_mode"] = api_mode
- else:
- _caller_model.pop("api_mode", None)
- config["model"] = _caller_model
- print("Endpoint saved. Use `/model` in chat or `hermes model` to set a model.")
-
- # Auto-save to custom_providers so it appears in the menu next time
- _save_custom_provider(
- effective_url,
- effective_key,
- model_name or "",
- context_length=context_length,
- name=display_name,
- api_mode=api_mode,
- )
def _prompt_custom_api_mode_selection(base_url: str, current_api_mode: str = "") -> Optional[str]:
@@ -4372,372 +3563,6 @@ def _save_custom_provider(
print(f' 💾 Saved to custom providers as "{name}" (edit in config.yaml)')
-def _model_flow_azure_foundry(config, current_model=""):
- """Azure Foundry provider: configure endpoint, auth mode, API mode, and model.
-
- Azure Foundry supports both OpenAI-style (``/v1/chat/completions``) and
- Anthropic-style (``/v1/messages``) endpoints, and two authentication
- modes:
-
- * **API key** (default) — uses ``AZURE_FOUNDRY_API_KEY`` from .env.
- * **Microsoft Entra ID** — keyless, RBAC-based auth via the
- ``azure-identity`` SDK (Managed Identity / Workload Identity / az
- login / VS Code / azd / service principal env vars). Works on both
- OpenAI-style and Anthropic-style endpoints — Microsoft RBAC is
- per-resource and the same ``Azure AI User`` role grants
- both. For OpenAI-style the OpenAI SDK's native callable
- ``api_key=`` contract is used; for Anthropic-style an
- ``httpx.Client`` with a request event hook (built by
- :func:`agent.azure_identity_adapter.build_bearer_http_client`)
- mints a fresh JWT per request because the Anthropic SDK does not
- accept a callable ``auth_token`` natively.
-
- The wizard auto-detects the transport and available models when
- possible:
-
- * URLs ending in ``/anthropic`` → Anthropic Messages API.
- * Successful ``GET /models`` probe → OpenAI-style + populates
- a picker with the returned deployment / model IDs.
- * Anthropic Messages probe fallback when ``/models`` fails.
- * Manual entry when every probe fails (private endpoints, etc.).
-
- Context lengths for the chosen model are resolved via the standard
- :func:`agent.model_metadata.get_model_context_length` chain
- (models.dev, provider metadata, hardcoded family fallbacks).
- """
- from hermes_cli.auth import _save_model_choice, deactivate_provider # noqa: F401
- from hermes_cli.config import (
- get_env_value,
- save_env_value,
- load_config,
- save_config,
- )
- from hermes_cli import azure_detect
-
- # ── Load current Azure Foundry configuration ─────────────────────
- model_cfg = config.get("model", {})
- if isinstance(model_cfg, dict) and model_cfg.get("provider") == "azure-foundry":
- current_base_url = str(model_cfg.get("base_url", "") or "")
- current_api_mode = str(model_cfg.get("api_mode", "") or "")
- current_auth_mode = str(model_cfg.get("auth_mode") or "api_key").strip().lower() or "api_key"
- _cur_entra = model_cfg.get("entra") or {}
- current_entra = _cur_entra if isinstance(_cur_entra, dict) else {}
- else:
- current_base_url = ""
- current_api_mode = ""
- current_auth_mode = "api_key"
- current_entra = {}
-
- current_api_key = get_env_value("AZURE_FOUNDRY_API_KEY") or ""
-
- print()
- print("Azure Foundry Configuration")
- print("=" * 50)
- print()
- print("Azure Foundry can host models with either OpenAI-style or")
- print("Anthropic-style API endpoints. Hermes will probe your")
- print("endpoint to auto-detect the transport and the deployed")
- print("models when possible.")
- print()
-
- if current_base_url:
- print(f" Current endpoint: {current_base_url}")
- if current_api_mode:
- _lbl = (
- "OpenAI-style"
- if current_api_mode == "chat_completions"
- else "Anthropic-style"
- )
- print(f" Current API mode: {_lbl}")
- if current_auth_mode == "entra_id":
- print(f" Current auth mode: Microsoft Entra ID (keyless)")
- elif current_api_key:
- print(f" Current auth mode: API key ({current_api_key[:8]}...)")
- print()
-
- # ── Step 1: endpoint URL ─────────────────────────────────────────
- try:
- _placeholder = (
- current_base_url
- or "e.g. https://.openai.azure.com/openai/v1 "
- "or https://.services.ai.azure.com/anthropic"
- )
- base_url = input(
- f"API endpoint URL [{_placeholder}]: "
- ).strip()
- except (KeyboardInterrupt, EOFError):
- print("\nCancelled.")
- return
-
- effective_url = (base_url or current_base_url).rstrip("/")
- if not effective_url:
- print("No endpoint URL provided. Cancelled.")
- return
- if not effective_url.startswith(("http://", "https://")):
- print(f"Invalid URL: {effective_url} (must start with http:// or https://)")
- return
-
- # ── Step 2: authentication mode ──────────────────────────────────
- print()
- print("Authentication:")
- print(" 1. API key (AZURE_FOUNDRY_API_KEY in .env)")
- print(" 2. Microsoft Entra ID (managed identity / workload identity / az login)")
- print(" Recommended by Microsoft. Works for both OpenAI-style and Anthropic-style endpoints.")
- print(" Requires the 'Azure AI User' role on the Foundry resource.")
- try:
- _auth_default = "2" if current_auth_mode == "entra_id" else "1"
- auth_choice = (
- input(f"Authentication mode [1/2] ({_auth_default}): ").strip()
- or _auth_default
- )
- except (KeyboardInterrupt, EOFError):
- print("\nCancelled.")
- return
- use_entra = auth_choice == "2"
- auth_mode_label = "entra_id" if use_entra else "api_key"
-
- # ── Step 3: credentials (key OR Entra preflight) ─────────────────
- effective_key: str = ""
- entra_overrides: dict = {}
- token_provider = None # callable when entra
- entra_scope = ""
-
- if use_entra:
- try:
- from agent.azure_identity_adapter import (
- EntraIdentityConfig,
- SCOPE_AI_AZURE_DEFAULT,
- build_token_provider,
- describe_active_credential,
- has_azure_identity_installed,
- )
- except ImportError as exc:
- print()
- print(f"⚠ Could not import azure-identity adapter: {exc}")
- print(" Falling back to API key auth.")
- use_entra = False
- auth_mode_label = "api_key"
-
- if use_entra:
- print()
- if not has_azure_identity_installed():
- print("◐ The 'azure-identity' package is not installed yet.")
- print(
- " Hermes will install it now (the preflight below "
- "triggers the lazy-install). To skip lazy installs, "
- "run: pip install azure-identity"
- )
-
- # Preserve only the optional scope override. Identity selection
- # (tenant, user-assigned MI, workload identity, service principal)
- # stays in Azure SDK env vars such as AZURE_CLIENT_ID.
- _persisted_scope_override = str(current_entra.get("scope") or "").strip()
- entra_scope = _persisted_scope_override or SCOPE_AI_AZURE_DEFAULT
-
- entra_overrides = {}
- if _persisted_scope_override:
- entra_overrides["scope"] = _persisted_scope_override
-
- print()
- print("◐ Probing Microsoft Entra ID credential chain (up to 10s)...")
- _config = EntraIdentityConfig(
- scope=entra_scope,
- )
- info = describe_active_credential(config=_config, timeout_seconds=10.0)
- if info.get("ok"):
- env_sources = info.get("env_sources") or []
- tag = ", ".join(env_sources) if env_sources else "default chain"
- print(f"✓ Entra ID token acquired ({tag}, scope={entra_scope})")
- else:
- err = info.get("error") or "credential chain exhausted"
- hint = info.get("hint") or (
- "Run `az login`, attach a managed identity to this VM, or "
- "set AZURE_TENANT_ID/AZURE_CLIENT_ID/AZURE_CLIENT_SECRET."
- )
- print(f"⚠ {err}")
- print(f" Hint: {hint}")
- try:
- ans = input("Save Entra config anyway and validate later? [Y/n]: ").strip().lower()
- except (KeyboardInterrupt, EOFError):
- print("\nCancelled.")
- return
- if ans and ans not in ("y", "yes"):
- print("Cancelled.")
- return
-
- # Build the token provider for the detection probe (best-effort —
- # if the credential chain failed above, this will silently return
- # None inside azure_detect and the probe falls back to manual).
- try:
- token_provider = build_token_provider(config=_config)
- except Exception as exc:
- print(f"⚠ Could not build token provider for probing: {exc}")
- token_provider = None
- else:
- print()
- from hermes_cli.secret_prompt import masked_secret_prompt
-
- try:
- api_key = masked_secret_prompt(
- f"API key [{current_api_key[:8] + '...' if current_api_key else 'required'}]: "
- ).strip()
- except (KeyboardInterrupt, EOFError):
- print("\nCancelled.")
- return
-
- effective_key = api_key or current_api_key
- if not effective_key:
- print("No API key provided. Cancelled.")
- return
-
- # ── Step 4: auto-detect transport + models ───────────────────────
- print()
- print("◐ Probing endpoint to auto-detect transport and models...")
- detection = azure_detect.detect(
- effective_url,
- api_key=effective_key,
- token_provider=token_provider,
- )
-
- discovered_models: list[str] = list(detection.models)
- api_mode: str = detection.api_mode or ""
-
- if api_mode:
- mode_label = (
- "OpenAI-style" if api_mode == "chat_completions" else "Anthropic-style"
- )
- print(f"✓ Detected API transport: {mode_label}")
- if detection.reason:
- print(f" ({detection.reason})")
- if discovered_models:
- print(
- f"✓ Found {len(discovered_models)} deployed model(s) on this endpoint"
- )
- else:
- print(f"⚠ Auto-detection incomplete: {detection.reason}")
- print()
- print("Select the API format your Azure Foundry endpoint uses:")
- print(" 1. OpenAI-style (POST /v1/chat/completions)")
- print(" For: GPT models, Llama, Mistral, and most open models")
- print(" 2. Anthropic-style (POST /v1/messages)")
- print(" For: Claude models deployed via Anthropic API format")
- try:
- default_choice = "2" if current_api_mode == "anthropic_messages" else "1"
- mode_choice = (
- input(f"API format [1/2] ({default_choice}): ").strip()
- or default_choice
- )
- except (KeyboardInterrupt, EOFError):
- print("\nCancelled.")
- return
- api_mode = "anthropic_messages" if mode_choice == "2" else "chat_completions"
-
- # ── Step 5: model name ───────────────────────────────────────────
- print()
- effective_model = ""
- if discovered_models:
- print("Available models on this endpoint:")
- for i, mid in enumerate(discovered_models[:30], start=1):
- print(f" {i:>2}. {mid}")
- if len(discovered_models) > 30:
- print(
- f" ... and {len(discovered_models) - 30} more (type name manually if not shown)"
- )
- print()
- try:
- pick = input(
- f"Pick by number, or type a deployment name [{current_model or discovered_models[0]}]: "
- ).strip()
- except (KeyboardInterrupt, EOFError):
- print("\nCancelled.")
- return
- if not pick:
- effective_model = current_model or discovered_models[0]
- elif pick.isdigit() and 1 <= int(pick) <= min(len(discovered_models), 30):
- effective_model = discovered_models[int(pick) - 1]
- else:
- effective_model = pick
- else:
- try:
- model_name = input(
- f"Model / deployment name [{current_model or 'e.g. gpt-5.4, claude-sonnet-4-6'}]: "
- ).strip()
- except (KeyboardInterrupt, EOFError):
- print("\nCancelled.")
- return
- effective_model = model_name or current_model
-
- if not effective_model:
- print("No model name provided. Cancelled.")
- return
-
- # ── Step 6: context-length lookup ────────────────────────────────
- ctx_len = azure_detect.lookup_context_length(
- effective_model,
- effective_url,
- api_key=effective_key,
- token_provider=token_provider,
- )
-
- # ── Step 7: persist ──────────────────────────────────────────────
- if not use_entra:
- save_env_value("AZURE_FOUNDRY_API_KEY", effective_key)
-
- cfg = load_config()
- model = cfg.get("model")
- if not isinstance(model, dict):
- model = {"default": model} if model else {}
- cfg["model"] = model
-
- model["provider"] = "azure-foundry"
- model["base_url"] = effective_url
- model["api_mode"] = api_mode
- model["default"] = effective_model
- model["auth_mode"] = auth_mode_label
- if use_entra:
- # Persist only the non-default Entra scope so config.yaml stays tidy.
- # Azure identity selection stays in standard AZURE_* env vars.
- clean_entra: dict = {}
- for key in ("scope",):
- val = entra_overrides.get(key)
- if val:
- clean_entra[key] = val
- if clean_entra:
- model["entra"] = clean_entra
- elif "entra" in model:
- del model["entra"]
- else:
- if "entra" in model:
- del model["entra"]
- if ctx_len:
- model["context_length"] = ctx_len
-
- save_config(cfg)
- deactivate_provider()
- config["model"] = dict(model)
-
- # Clear any conflicting env vars so auxiliary clients don't poison
- # themselves with a stale OpenAI base URL / key.
- if get_env_value("OPENAI_BASE_URL"):
- save_env_value("OPENAI_BASE_URL", "")
- if get_env_value("OPENAI_API_KEY"):
- save_env_value("OPENAI_API_KEY", "")
-
- mode_label = "OpenAI-style" if api_mode == "chat_completions" else "Anthropic-style"
- auth_label = (
- "Microsoft Entra ID (keyless)" if use_entra else "API key"
- )
- print()
- print("✓ Azure Foundry configured:")
- print(f" Endpoint: {effective_url}")
- print(f" API mode: {mode_label}")
- print(f" Auth: {auth_label}")
- print(f" Model: {effective_model}")
- if ctx_len:
- print(f" Context length: {ctx_len:,} tokens")
- else:
- print(" Context length: not auto-detected (will fall back at runtime)")
- print()
def _remove_custom_provider(config):
@@ -4798,196 +3623,6 @@ def _remove_custom_provider(config):
print(f'✅ Removed "{removed_name}" from custom providers.')
-def _model_flow_named_custom(config, provider_info):
- """Handle a named custom provider from config.yaml custom_providers list.
-
- Always probes the endpoint's /models API to let the user pick a model.
- If a model was previously saved, it is pre-selected in the menu.
- Falls back to the saved model if probing fails.
- """
- from hermes_cli.auth import _save_model_choice, deactivate_provider
- from hermes_cli.config import load_config, save_config
- from hermes_cli.models import fetch_api_models
-
- name = provider_info["name"]
- base_url = provider_info["base_url"]
- api_mode = provider_info.get("api_mode", "")
- api_key = provider_info.get("api_key", "")
- key_env = provider_info.get("key_env", "")
- saved_model = provider_info.get("model", "")
- provider_key = (provider_info.get("provider_key") or "").strip()
-
- # Resolve key from env var if api_key not set directly
- if not api_key and key_env:
- api_key = os.environ.get(key_env, "")
- config_api_key = _custom_provider_api_key_config_value(provider_info, api_key)
-
- # Honor ``discover_models: false`` (default True) — when discovery is
- # disabled, use the configured ``models:`` list verbatim and skip the
- # live /models probe. This lets operators restrict the picker to the
- # subset their plan actually serves instead of the endpoint's full
- # catalog (#18726: Baidu Qianfan returns 100+ models for a 2-3 model
- # plan). Same semantics as the slash-command picker (model_switch.py
- # sections 3 & 4): default discovers, false keeps the explicit list.
- discover = provider_info.get("discover_models", True)
- if isinstance(discover, str):
- discover = discover.lower() not in {"false", "no", "0"}
- configured_models: list[str] = []
- cfg_models = provider_info.get("models", {})
- if isinstance(cfg_models, dict):
- configured_models = [str(m) for m in cfg_models if str(m).strip()]
- elif isinstance(cfg_models, list):
- configured_models = [
- str(m) for m in cfg_models if isinstance(m, str) and m.strip()
- ]
-
- print(f" Provider: {name}")
- print(f" URL: {base_url}")
- if saved_model:
- print(f" Current: {saved_model}")
- print()
-
- if not discover and configured_models:
- # Discovery disabled with an explicit list — use it verbatim, no probe.
- print(f"Using configured models (discover_models: false): {len(configured_models)}")
- models = configured_models
- else:
- print("Fetching available models...")
- fetch_kwargs = {"timeout": 8.0}
- if api_mode:
- fetch_kwargs["api_mode"] = api_mode
- models = fetch_api_models(api_key, base_url, **fetch_kwargs)
- # If the probe came back empty but the operator configured an explicit
- # list, fall back to it rather than forcing manual entry.
- if not models and configured_models:
- models = configured_models
-
- if models:
- default_idx = 0
- if saved_model and saved_model in models:
- default_idx = models.index(saved_model)
-
- print(f"Found {len(models)} model(s):\n")
- try:
- from hermes_cli.curses_ui import curses_radiolist
-
- menu_items = [
- f"{m} (current)" if m == saved_model else m for m in models
- ] + ["Cancel"]
- idx = curses_radiolist(
- f"Select model from {name}:",
- menu_items,
- selected=default_idx,
- cancel_returns=-1,
- searchable=True,
- )
- print()
- if idx < 0 or idx >= len(models):
- print("Cancelled.")
- return
- model_name = models[idx]
- except (ImportError, NotImplementedError, OSError, subprocess.SubprocessError):
- for i, m in enumerate(models, 1):
- suffix = " (current)" if m == saved_model else ""
- print(f" {i}. {m}{suffix}")
- print(f" {len(models) + 1}. Cancel")
- print()
- try:
- val = input(f"Choice [1-{len(models) + 1}]: ").strip()
- if not val:
- print("Cancelled.")
- return
- idx = int(val) - 1
- if idx < 0 or idx >= len(models):
- print("Cancelled.")
- return
- model_name = models[idx]
- except (ValueError, KeyboardInterrupt, EOFError):
- print("\nCancelled.")
- return
- elif saved_model:
- print("Could not fetch models from endpoint.")
- try:
- model_name = input(f"Model name [{saved_model}]: ").strip() or saved_model
- except (KeyboardInterrupt, EOFError):
- print("\nCancelled.")
- return
- else:
- print("Could not fetch models from endpoint. Enter model name manually.")
- try:
- model_name = input("Model name: ").strip()
- except (KeyboardInterrupt, EOFError):
- print("\nCancelled.")
- return
- if not model_name:
- print("No model specified. Cancelled.")
- return
-
- # Activate and save the model to the custom_providers entry
- _save_model_choice(model_name)
-
- cfg = load_config()
- model = cfg.get("model")
- if not isinstance(model, dict):
- model = {"default": model} if model else {}
- cfg["model"] = model
- if provider_key:
- model["provider"] = provider_key
- model.pop("base_url", None)
- model.pop("api_key", None)
- else:
- model["provider"] = "custom"
- model["base_url"] = _custom_provider_base_url_config_value(
- provider_info, base_url
- )
- if config_api_key:
- model["api_key"] = config_api_key
- # Apply api_mode from custom_providers entry, or clear stale value
- custom_api_mode = provider_info.get("api_mode", "")
- if custom_api_mode:
- model["api_mode"] = custom_api_mode
- else:
- model.pop("api_mode", None) # let runtime auto-detect from URL
- save_config(cfg)
- deactivate_provider()
-
- # Persist the selected model back to whichever schema owns this endpoint.
- if provider_key:
- cfg = load_config()
- providers_cfg = cfg.get("providers")
- if isinstance(providers_cfg, dict):
- provider_entry = providers_cfg.get(provider_key)
- if isinstance(provider_entry, dict):
- provider_entry["default_model"] = model_name
- # Only persist an inline api_key when the user originally had
- # one (either a literal secret or a ``${VAR}`` template). When
- # the entry relies on ``key_env``, do not synthesize a
- # ``${key_env}`` api_key — the runtime already resolves the
- # key from ``key_env`` directly, and writing the resolved
- # secret (or even a synthesized template) would silently
- # downgrade credential hygiene on entries that intentionally
- # keep plaintext out of ``config.yaml``. See issue #15803.
- original_api_key_ref = str(
- provider_info.get("api_key_ref", "") or ""
- ).strip()
- original_api_key = str(provider_info.get("api_key", "") or "").strip()
- had_inline_api_key = bool(original_api_key_ref or original_api_key)
- if (
- had_inline_api_key
- and config_api_key
- and not str(provider_entry.get("api_key", "") or "").strip()
- ):
- provider_entry["api_key"] = config_api_key
- if key_env and not str(provider_entry.get("key_env", "") or "").strip():
- provider_entry["key_env"] = key_env
- cfg["providers"] = providers_cfg
- save_config(cfg)
- else:
- # Save model name to the custom_providers entry for next time
- _save_custom_provider(base_url, config_api_key, model_name, api_mode=api_mode)
-
- print(f"\n✅ Model set to: {model_name}")
- print(f" Provider: {name} ({base_url})")
# Lazy-export the model catalog at module level. Tests and a handful of
@@ -5108,312 +3743,8 @@ def _prompt_reasoning_effort_selection(efforts, current_effort=""):
return None
-def _model_flow_copilot(config, current_model=""):
- """GitHub Copilot flow using env vars, gh CLI, or OAuth device code."""
- from hermes_cli.auth import (
- PROVIDER_REGISTRY,
- _prompt_model_selection,
- _save_model_choice,
- deactivate_provider,
- resolve_api_key_provider_credentials,
- )
- from hermes_cli.config import save_env_value, load_config, save_config
- from hermes_cli.models import (
- _PROVIDER_MODELS,
- fetch_api_models,
- fetch_github_model_catalog,
- github_model_reasoning_efforts,
- copilot_model_api_mode,
- normalize_copilot_model_id,
- )
-
- provider_id = "copilot"
- pconfig = PROVIDER_REGISTRY[provider_id]
-
- creds = resolve_api_key_provider_credentials(provider_id)
- api_key = creds.get("api_key", "")
- source = creds.get("source", "")
-
- if not api_key:
- print("No GitHub token configured for GitHub Copilot.")
- print()
- print(" Supported token types:")
- print(
- " → OAuth token (gho_*) via `copilot login` or device code flow"
- )
- print(" → Fine-grained PAT (github_pat_*) with Copilot Requests permission")
- print(" → GitHub App token (ghu_*) via environment variable")
- print(" ✗ Classic PAT (ghp_*) NOT supported by Copilot API")
- print()
- print(" Options:")
- print(" 1. Login with GitHub (OAuth device code flow)")
- print(" 2. Enter a token manually")
- print(" 3. Cancel")
- print()
- try:
- choice = input(" Choice [1-3]: ").strip()
- except (KeyboardInterrupt, EOFError):
- print()
- return
-
- if choice == "1":
- try:
- from hermes_cli.copilot_auth import copilot_device_code_login
-
- token = copilot_device_code_login()
- if token:
- save_env_value("COPILOT_GITHUB_TOKEN", token)
- print(" Copilot token saved.")
- print()
- else:
- print(" Login cancelled or failed.")
- return
- except Exception as exc:
- print(f" Login failed: {exc}")
- return
- elif choice == "2":
- from hermes_cli.secret_prompt import masked_secret_prompt
-
- try:
- new_key = masked_secret_prompt(" Token (COPILOT_GITHUB_TOKEN): ").strip()
- except (KeyboardInterrupt, EOFError):
- print()
- return
- if not new_key:
- print(" Cancelled.")
- return
- # Validate token type
- try:
- from hermes_cli.copilot_auth import validate_copilot_token
-
- valid, msg = validate_copilot_token(new_key)
- if not valid:
- print(f" ✗ {msg}")
- return
- except ImportError:
- pass
- save_env_value("COPILOT_GITHUB_TOKEN", new_key)
- print(" Token saved.")
- print()
- else:
- print(" Cancelled.")
- return
-
- creds = resolve_api_key_provider_credentials(provider_id)
- api_key = creds.get("api_key", "")
- source = creds.get("source", "")
- else:
- if source in {"GITHUB_TOKEN", "GH_TOKEN"}:
- from hermes_cli.env_loader import format_secret_source_suffix
- bw_suffix = format_secret_source_suffix(source)
- print(f" GitHub token: {api_key[:8]}... ✓ ({source}{bw_suffix})")
- elif source == "gh auth token":
- print(" GitHub token: ✓ (from `gh auth token`)")
- else:
- print(" GitHub token: ✓")
- print()
-
- effective_base = pconfig.inference_base_url
-
- catalog = fetch_github_model_catalog(api_key)
- live_models = (
- [item.get("id", "") for item in catalog if item.get("id")]
- if catalog
- else fetch_api_models(api_key, effective_base)
- )
- normalized_current_model = (
- normalize_copilot_model_id(
- current_model,
- catalog=catalog,
- api_key=api_key,
- )
- or current_model
- )
- if live_models:
- model_list = [model_id for model_id in live_models if model_id]
- print(f" Found {len(model_list)} model(s) from GitHub Copilot")
- else:
- model_list = _PROVIDER_MODELS.get(provider_id, [])
- if model_list:
- print(
- " ⚠ Could not auto-detect models from GitHub Copilot — showing defaults."
- )
- print(' Use "Enter custom model name" if you do not see your model.')
-
- if model_list:
- selected = _prompt_model_selection(
- model_list, current_model=normalized_current_model
- )
- else:
- try:
- selected = input("Model name: ").strip()
- except (KeyboardInterrupt, EOFError):
- selected = None
-
- if selected:
- selected = (
- normalize_copilot_model_id(
- selected,
- catalog=catalog,
- api_key=api_key,
- )
- or selected
- )
- initial_cfg = load_config()
- current_effort = _current_reasoning_effort(initial_cfg)
- reasoning_efforts = github_model_reasoning_efforts(
- selected,
- catalog=catalog,
- api_key=api_key,
- )
- selected_effort = None
- if reasoning_efforts:
- print(f" {selected} supports reasoning controls.")
- selected_effort = _prompt_reasoning_effort_selection(
- reasoning_efforts, current_effort=current_effort
- )
-
- _save_model_choice(selected)
-
- cfg = load_config()
- model = cfg.get("model")
- if not isinstance(model, dict):
- model = {"default": model} if model else {}
- cfg["model"] = model
- model["provider"] = provider_id
- model["base_url"] = effective_base
- model["api_mode"] = copilot_model_api_mode(
- selected,
- catalog=catalog,
- api_key=api_key,
- )
- if selected_effort is not None:
- _set_reasoning_effort(cfg, selected_effort)
- save_config(cfg)
- deactivate_provider()
-
- print(f"Default model set to: {selected} (via {pconfig.name})")
- if reasoning_efforts:
- if selected_effort == "none":
- print("Reasoning disabled for this model.")
- elif selected_effort:
- print(f"Reasoning effort set to: {selected_effort}")
- else:
- print("No change.")
-def _model_flow_copilot_acp(config, current_model=""):
- """GitHub Copilot ACP flow using the local Copilot CLI."""
- from hermes_cli.auth import (
- PROVIDER_REGISTRY,
- _prompt_model_selection,
- _save_model_choice,
- deactivate_provider,
- get_external_process_provider_status,
- resolve_api_key_provider_credentials,
- resolve_external_process_provider_credentials,
- )
- from hermes_cli.models import (
- _PROVIDER_MODELS,
- fetch_github_model_catalog,
- normalize_copilot_model_id,
- )
- from hermes_cli.config import load_config, save_config
-
- del config
-
- provider_id = "copilot-acp"
- pconfig = PROVIDER_REGISTRY[provider_id]
-
- status = get_external_process_provider_status(provider_id)
- resolved_command = (
- status.get("resolved_command") or status.get("command") or "copilot"
- )
- effective_base = status.get("base_url") or pconfig.inference_base_url
-
- print(" GitHub Copilot ACP delegates Hermes turns to `copilot --acp`.")
- print(" Hermes currently starts its own ACP subprocess for each request.")
- print(" Hermes uses your selected model as a hint for the Copilot ACP session.")
- print(f" Command: {resolved_command}")
- print(f" Backend marker: {effective_base}")
- print()
-
- try:
- creds = resolve_external_process_provider_credentials(provider_id)
- except Exception as exc:
- print(f" ⚠ {exc}")
- print(
- " Set HERMES_COPILOT_ACP_COMMAND or COPILOT_CLI_PATH if Copilot CLI is installed elsewhere."
- )
- return
-
- effective_base = creds.get("base_url") or effective_base
-
- catalog_api_key = ""
- try:
- catalog_creds = resolve_api_key_provider_credentials("copilot")
- catalog_api_key = catalog_creds.get("api_key", "")
- except Exception:
- pass
-
- catalog = fetch_github_model_catalog(catalog_api_key)
- normalized_current_model = (
- normalize_copilot_model_id(
- current_model,
- catalog=catalog,
- api_key=catalog_api_key,
- )
- or current_model
- )
-
- if catalog:
- model_list = [item.get("id", "") for item in catalog if item.get("id")]
- print(f" Found {len(model_list)} model(s) from GitHub Copilot")
- else:
- model_list = _PROVIDER_MODELS.get("copilot", [])
- if model_list:
- print(
- " ⚠ Could not auto-detect models from GitHub Copilot — showing defaults."
- )
- print(' Use "Enter custom model name" if you do not see your model.')
-
- if model_list:
- selected = _prompt_model_selection(
- model_list,
- current_model=normalized_current_model,
- )
- else:
- try:
- selected = input("Model name: ").strip()
- except (KeyboardInterrupt, EOFError):
- selected = None
-
- if not selected:
- print("No change.")
- return
-
- selected = (
- normalize_copilot_model_id(
- selected,
- catalog=catalog,
- api_key=catalog_api_key,
- )
- or selected
- )
- _save_model_choice(selected)
-
- cfg = load_config()
- model = cfg.get("model")
- if not isinstance(model, dict):
- model = {"default": model} if model else {}
- cfg["model"] = model
- model["provider"] = provider_id
- model["base_url"] = effective_base
- model["api_mode"] = "chat_completions"
- save_config(cfg)
- deactivate_provider()
-
- print(f"Default model set to: {selected} (via {pconfig.name})")
def _prompt_api_key(pconfig, existing_key: str, provider_id: str = "") -> tuple:
@@ -5499,101 +3830,6 @@ def _prompt_api_key(pconfig, existing_key: str, provider_id: str = "") -> tuple:
return existing_key, False
-def _model_flow_kimi(config, current_model=""):
- """Kimi / Moonshot model selection with automatic endpoint routing.
-
- - sk-kimi-* keys → api.kimi.com/coding/v1 (Kimi Coding Plan)
- - Other keys → api.moonshot.ai/v1 (legacy Moonshot)
-
- No manual base URL prompt — endpoint is determined by key prefix.
- """
- from hermes_cli.auth import (
- PROVIDER_REGISTRY,
- KIMI_CODE_BASE_URL,
- _prompt_model_selection,
- _save_model_choice,
- deactivate_provider,
- )
- from hermes_cli.config import (
- get_env_value,
- save_env_value,
- load_config,
- save_config,
- )
- from hermes_cli.models import _PROVIDER_MODELS
-
- provider_id = "kimi-coding"
- pconfig = PROVIDER_REGISTRY[provider_id]
- key_env = pconfig.api_key_env_vars[0] if pconfig.api_key_env_vars else ""
- base_url_env = pconfig.base_url_env_var or ""
-
- # Step 1: Check / prompt for API key
- existing_key = ""
- for ev in pconfig.api_key_env_vars:
- existing_key = get_env_value(ev) or os.getenv(ev, "")
- if existing_key:
- break
-
- existing_key, abort = _prompt_api_key(
- pconfig, existing_key, provider_id=provider_id
- )
- if abort:
- return
-
- # Step 2: Auto-detect endpoint from key prefix
- is_coding_plan = existing_key.startswith("sk-kimi-")
- if is_coding_plan:
- effective_base = KIMI_CODE_BASE_URL
- print(f" Detected Kimi Coding Plan key → {effective_base}")
- else:
- effective_base = pconfig.inference_base_url
- print(f" Using Moonshot endpoint → {effective_base}")
- # Clear any manual base URL override so auto-detection works at runtime
- if base_url_env and get_env_value(base_url_env):
- save_env_value(base_url_env, "")
- print()
-
- # Step 3: Model selection — show appropriate models for the endpoint
- if is_coding_plan:
- # Coding Plan models (kimi-k2.6 first)
- model_list = [
- "kimi-k2.6",
- "kimi-k2.5",
- "kimi-for-coding",
- "kimi-k2-thinking",
- "kimi-k2-thinking-turbo",
- ]
- else:
- # Legacy Moonshot models (excludes Coding Plan-only models)
- model_list = _PROVIDER_MODELS.get("moonshot", [])
-
- if model_list:
- selected = _prompt_model_selection(model_list, current_model=current_model)
- else:
- try:
- selected = input("Enter model name: ").strip()
- except (KeyboardInterrupt, EOFError):
- selected = None
-
- if selected:
- _save_model_choice(selected)
-
- # Update config with provider and base URL
- cfg = load_config()
- model = cfg.get("model")
- if not isinstance(model, dict):
- model = {"default": model} if model else {}
- cfg["model"] = model
- model["provider"] = provider_id
- model["base_url"] = effective_base
- model.pop("api_mode", None) # let runtime auto-detect from URL
- save_config(cfg)
- deactivate_provider()
-
- endpoint_label = "Kimi Coding" if is_coding_plan else "Moonshot"
- print(f"Default model set to: {selected} (via {endpoint_label})")
- else:
- print("No change.")
def _infer_stepfun_region(base_url: str) -> str:
@@ -5617,668 +3853,12 @@ def _stepfun_base_url_for_region(region: str) -> str:
)
-def _model_flow_stepfun(config, current_model=""):
- """StepFun Step Plan flow with region-specific endpoints."""
- from hermes_cli.auth import (
- PROVIDER_REGISTRY,
- _prompt_model_selection,
- _save_model_choice,
- deactivate_provider,
- )
- from hermes_cli.config import (
- get_env_value,
- save_env_value,
- load_config,
- save_config,
- )
- from hermes_cli.models import _PROVIDER_MODELS, fetch_api_models
- provider_id = "stepfun"
- pconfig = PROVIDER_REGISTRY[provider_id]
- key_env = pconfig.api_key_env_vars[0] if pconfig.api_key_env_vars else ""
- base_url_env = pconfig.base_url_env_var or ""
- existing_key = ""
- for ev in pconfig.api_key_env_vars:
- existing_key = get_env_value(ev) or os.getenv(ev, "")
- if existing_key:
- break
- existing_key, abort = _prompt_api_key(
- pconfig, existing_key, provider_id=provider_id
- )
- if abort:
- return
- current_base = ""
- if base_url_env:
- current_base = get_env_value(base_url_env) or os.getenv(base_url_env, "")
- if not current_base:
- model_cfg = config.get("model")
- if isinstance(model_cfg, dict):
- current_base = str(model_cfg.get("base_url") or "").strip()
- current_region = _infer_stepfun_region(current_base or pconfig.inference_base_url)
- region_choices = [
- (
- "international",
- f"International ({_stepfun_base_url_for_region('international')})",
- ),
- ("china", f"China ({_stepfun_base_url_for_region('china')})"),
- ]
- ordered_regions = []
- for region_key, label in region_choices:
- if region_key == current_region:
- ordered_regions.insert(0, (region_key, f"{label} ← currently active"))
- else:
- ordered_regions.append((region_key, label))
- ordered_regions.append(("cancel", "Cancel"))
- region_idx = _prompt_provider_choice([label for _, label in ordered_regions])
- if region_idx is None or ordered_regions[region_idx][0] == "cancel":
- print("No change.")
- return
-
- selected_region = ordered_regions[region_idx][0]
- effective_base = _stepfun_base_url_for_region(selected_region)
- if base_url_env:
- save_env_value(base_url_env, effective_base)
-
- live_models = fetch_api_models(existing_key, effective_base)
- if live_models:
- model_list = live_models
- print(f" Found {len(model_list)} model(s) from {pconfig.name} API")
- else:
- model_list = _PROVIDER_MODELS.get(provider_id, [])
- if model_list:
- print(
- f" Could not auto-detect models from {pconfig.name} API — "
- "showing Step Plan fallback catalog."
- )
-
- if model_list:
- selected = _prompt_model_selection(model_list, current_model=current_model)
- else:
- try:
- selected = input("Model name: ").strip()
- except (KeyboardInterrupt, EOFError):
- selected = None
-
- if selected:
- _save_model_choice(selected)
-
- cfg = load_config()
- model = cfg.get("model")
- if not isinstance(model, dict):
- model = {"default": model} if model else {}
- cfg["model"] = model
- model["provider"] = provider_id
- model["base_url"] = effective_base
- model.pop("api_mode", None)
- save_config(cfg)
- deactivate_provider()
-
- config["model"] = dict(model)
- print(f"Default model set to: {selected} (via {pconfig.name})")
- else:
- print("No change.")
-
-
-def _model_flow_bedrock_api_key(config, region, current_model=""):
- """Bedrock API Key mode — uses the OpenAI-compatible bedrock-mantle endpoint.
-
- For developers who don't have an AWS account but received a Bedrock API Key
- from their AWS admin. Works like any OpenAI-compatible endpoint.
- """
- from hermes_cli.auth import (
- _prompt_model_selection,
- _save_model_choice,
- deactivate_provider,
- )
- from hermes_cli.config import (
- load_config,
- save_config,
- get_env_value,
- save_env_value,
- )
- from hermes_cli.models import _PROVIDER_MODELS
-
- mantle_base_url = f"https://bedrock-mantle.{region}.api.aws/v1"
-
- # Prompt for API key
- existing_key = get_env_value("AWS_BEARER_TOKEN_BEDROCK") or ""
- if existing_key:
- from hermes_cli.env_loader import format_secret_source_suffix
- source_suffix = format_secret_source_suffix("AWS_BEARER_TOKEN_BEDROCK")
- print(f" Bedrock API Key: {existing_key[:12]}... ✓{source_suffix}")
- else:
- print(f" Endpoint: {mantle_base_url}")
- print()
- from hermes_cli.secret_prompt import masked_secret_prompt
-
- try:
- api_key = masked_secret_prompt(" Bedrock API Key: ").strip()
- except (KeyboardInterrupt, EOFError):
- print()
- return
- if not api_key:
- print(" Cancelled.")
- return
- save_env_value("AWS_BEARER_TOKEN_BEDROCK", api_key)
- existing_key = api_key
- print(" ✓ API key saved.")
- print()
-
- # Model selection — use static list (mantle doesn't need boto3 for discovery)
- model_list = _PROVIDER_MODELS.get("bedrock", [])
- print(f" Showing {len(model_list)} curated models")
-
- if model_list:
- selected = _prompt_model_selection(model_list, current_model=current_model)
- else:
- try:
- selected = input(" Model ID: ").strip()
- except (KeyboardInterrupt, EOFError):
- selected = None
-
- if selected:
- _save_model_choice(selected)
-
- # Save as custom provider pointing to bedrock-mantle
- cfg = load_config()
- model = cfg.get("model")
- if not isinstance(model, dict):
- model = {"default": model} if model else {}
- cfg["model"] = model
- model["provider"] = "custom"
- model["base_url"] = mantle_base_url
- model.pop("api_mode", None) # chat_completions is the default
-
- # Also save region in bedrock config for reference
- bedrock_cfg = cfg.get("bedrock", {})
- if not isinstance(bedrock_cfg, dict):
- bedrock_cfg = {}
- bedrock_cfg["region"] = region
- cfg["bedrock"] = bedrock_cfg
-
- # Save the API key env var name so hermes knows where to find it
- save_env_value("OPENAI_API_KEY", existing_key)
- save_env_value("OPENAI_BASE_URL", mantle_base_url)
-
- save_config(cfg)
- deactivate_provider()
-
- print(f" Default model set to: {selected} (via Bedrock API Key, {region})")
- print(f" Endpoint: {mantle_base_url}")
- else:
- print(" No change.")
-
-
-def _model_flow_bedrock(config, current_model=""):
- """AWS Bedrock provider: verify credentials, pick region, discover models.
-
- Uses the native Converse API via boto3 — not the OpenAI-compatible endpoint.
- Auth is handled by the AWS SDK default credential chain (env vars, profile,
- instance role), so no API key prompt is needed.
- """
- from hermes_cli.auth import (
- _prompt_model_selection,
- _save_model_choice,
- deactivate_provider,
- )
- from hermes_cli.config import load_config, save_config
- from hermes_cli.models import _PROVIDER_MODELS
-
- # 1. Check for AWS credentials
- try:
- from agent.bedrock_adapter import (
- has_aws_credentials,
- resolve_aws_auth_env_var,
- resolve_bedrock_region,
- discover_bedrock_models,
- )
- except ImportError:
- print(" ✗ boto3 is not installed. Install it with:")
- print(" pip install boto3")
- print()
- return
-
- if not has_aws_credentials():
- print(" ⚠ No AWS credentials detected via environment variables.")
- print(" Bedrock will use boto3's default credential chain (IMDS, SSO, etc.)")
- print()
-
- auth_var = resolve_aws_auth_env_var()
- if auth_var:
- print(f" AWS credentials: {auth_var} ✓")
- else:
- print(" AWS credentials: boto3 default chain (instance role / SSO)")
- print()
-
- # 2. Region selection
- current_region = resolve_bedrock_region()
- try:
- region_input = input(f" AWS Region [{current_region}]: ").strip()
- except (KeyboardInterrupt, EOFError):
- print()
- return
- region = region_input or current_region
-
- # 2b. Authentication mode
- print(" Choose authentication method:")
- print()
- print(" 1. IAM credential chain (recommended)")
- print(" Works with EC2 instance roles, SSO, env vars, aws configure")
- print(" 2. Bedrock API Key")
- print(" Enter your Bedrock API Key directly — also supports")
- print(" team scenarios where an admin distributes keys")
- print()
- try:
- auth_choice = input(" Choice [1]: ").strip()
- except (KeyboardInterrupt, EOFError):
- print()
- return
-
- if auth_choice == "2":
- _model_flow_bedrock_api_key(config, region, current_model)
- return
-
- # 3. Model discovery — try live API first, fall back to static list
- print(f" Discovering models in {region}...")
- live_models = discover_bedrock_models(region)
-
- if live_models:
- _EXCLUDE_PREFIXES = (
- "stability.",
- "cohere.embed",
- "twelvelabs.",
- "us.stability.",
- "us.cohere.embed",
- "us.twelvelabs.",
- "global.cohere.embed",
- "global.twelvelabs.",
- )
- _EXCLUDE_SUBSTRINGS = ("safeguard", "voxtral", "palmyra-vision")
- filtered = []
- for m in live_models:
- mid = m["id"]
- if any(mid.startswith(p) for p in _EXCLUDE_PREFIXES):
- continue
- if any(s in mid.lower() for s in _EXCLUDE_SUBSTRINGS):
- continue
- filtered.append(m)
-
- # Deduplicate: prefer inference profiles (us.*, global.*) over bare
- # foundation model IDs.
- profile_base_ids = set()
- for m in filtered:
- mid = m["id"]
- if mid.startswith(("us.", "global.")):
- base = mid.split(".", 1)[1] if "." in mid[3:] else mid
- profile_base_ids.add(base)
-
- deduped = []
- for m in filtered:
- mid = m["id"]
- if not mid.startswith(("us.", "global.")) and mid in profile_base_ids:
- continue
- deduped.append(m)
-
- _RECOMMENDED = [
- "us.anthropic.claude-sonnet-4-6",
- "us.anthropic.claude-opus-4-6",
- "us.anthropic.claude-haiku-4-5",
- "us.amazon.nova-pro",
- "us.amazon.nova-lite",
- "us.amazon.nova-micro",
- "deepseek.v3",
- "us.meta.llama4-maverick",
- "us.meta.llama4-scout",
- ]
-
- def _sort_key(m):
- mid = m["id"]
- for i, rec in enumerate(_RECOMMENDED):
- if mid.startswith(rec):
- return (0, i, mid)
- if mid.startswith("global."):
- return (1, 0, mid)
- return (2, 0, mid)
-
- deduped.sort(key=_sort_key)
- model_list = [m["id"] for m in deduped]
- print(
- f" Found {len(model_list)} text model(s) (filtered from {len(live_models)} total)"
- )
- else:
- model_list = _PROVIDER_MODELS.get("bedrock", [])
- if model_list:
- print(
- f" Using {len(model_list)} curated models (live discovery unavailable)"
- )
- else:
- print(
- " No models found. Check IAM permissions for bedrock:ListFoundationModels."
- )
- return
-
- # 4. Model selection
- if model_list:
- selected = _prompt_model_selection(model_list, current_model=current_model)
- else:
- try:
- selected = input(" Model ID: ").strip()
- except (KeyboardInterrupt, EOFError):
- selected = None
-
- if selected:
- _save_model_choice(selected)
-
- cfg = load_config()
- model = cfg.get("model")
- if not isinstance(model, dict):
- model = {"default": model} if model else {}
- cfg["model"] = model
- model["provider"] = "bedrock"
- model["base_url"] = f"https://bedrock-runtime.{region}.amazonaws.com"
- model.pop("api_mode", None) # bedrock_converse is auto-detected
-
- bedrock_cfg = cfg.get("bedrock", {})
- if not isinstance(bedrock_cfg, dict):
- bedrock_cfg = {}
- bedrock_cfg["region"] = region
- cfg["bedrock"] = bedrock_cfg
-
- save_config(cfg)
- deactivate_provider()
-
- print(f" Default model set to: {selected} (via AWS Bedrock, {region})")
- else:
- print(" No change.")
-
-
-def _model_flow_api_key_provider(config, provider_id, current_model=""):
- """Generic flow for API-key providers (z.ai, MiniMax, OpenCode, etc.)."""
- from hermes_cli.auth import (
- PROVIDER_REGISTRY,
- _prompt_model_selection,
- _save_model_choice,
- deactivate_provider,
- )
- from hermes_cli.config import (
- get_env_value,
- save_env_value,
- load_config,
- save_config,
- )
- from hermes_cli.models import (
- _PROVIDER_MODELS,
- fetch_api_models,
- opencode_model_api_mode,
- normalize_opencode_model_id,
- )
-
- pconfig = PROVIDER_REGISTRY[provider_id]
- key_env = pconfig.api_key_env_vars[0] if pconfig.api_key_env_vars else ""
- base_url_env = pconfig.base_url_env_var or ""
-
- # Check / prompt for API key
- existing_key = ""
- for ev in pconfig.api_key_env_vars:
- existing_key = get_env_value(ev) or os.getenv(ev, "")
- if existing_key:
- break
-
- existing_key, abort = _prompt_api_key(
- pconfig, existing_key, provider_id=provider_id
- )
- if abort:
- return
-
- # Gemini free-tier gate: free-tier daily quotas (<= 250 RPD for Flash)
- # are exhausted in a handful of agent turns, so refuse to wire up the
- # provider with a free-tier key. Probe is best-effort; network or auth
- # errors fall through without blocking.
- if provider_id == "gemini" and existing_key:
- try:
- from agent.gemini_native_adapter import probe_gemini_tier
- except Exception:
- probe_gemini_tier = None
- if probe_gemini_tier is not None:
- print(" Checking Gemini API tier...")
- probe_base = (
- (get_env_value(base_url_env) if base_url_env else "")
- or os.getenv(base_url_env or "", "")
- or pconfig.inference_base_url
- )
- tier = probe_gemini_tier(existing_key, probe_base)
- if tier == "free":
- print()
- print(
- "❌ This Google API key is on the free tier "
- "(<= 250 requests/day for gemini-2.5-flash)."
- )
- print(
- " Hermes typically makes 3-10 API calls per user turn "
- "(tool iterations + auxiliary tasks),"
- )
- print(
- " so the free tier is exhausted after a handful of "
- "messages and cannot sustain"
- )
- print(" an agent session.")
- print()
- print(
- " To use Gemini with Hermes, enable billing on your "
- "Google Cloud project and regenerate"
- )
- print(
- " the key in a billing-enabled project: "
- "https://aistudio.google.com/apikey"
- )
- print()
- print(
- " Alternatives with workable free usage: DeepSeek, "
- "OpenRouter (free models), Groq, Nous."
- )
- print()
- print("Not saving Gemini as the default provider.")
- return
- if tier == "paid":
- print(" Tier check: paid ✓")
- else:
- # "unknown" -- network issue, auth problem, unexpected response.
- # Don't block; the runtime 429 handler will surface free-tier
- # guidance if the key turns out to be free tier.
- print(" Tier check: could not verify (proceeding anyway).")
- print()
-
- # Optional base URL override.
- # Precedence: env var → config.yaml model.base_url → registry default.
- # Reading config.yaml prevents silently overwriting a saved remote URL
- # (e.g. a remote LM Studio endpoint) with localhost when the user just
- # presses Enter at the prompt below.
- current_base = ""
- if base_url_env:
- current_base = get_env_value(base_url_env) or os.getenv(base_url_env, "")
- if not current_base:
- try:
- _m = load_config().get("model") or {}
- if str(_m.get("provider") or "").strip().lower() == provider_id:
- current_base = str(_m.get("base_url") or "").strip()
- except Exception:
- pass
- effective_base = current_base or pconfig.inference_base_url
-
- try:
- override = input(f"Base URL [{effective_base}]: ").strip()
- except (KeyboardInterrupt, EOFError):
- print()
- override = ""
- if override and base_url_env:
- if not override.startswith(("http://", "https://")):
- print(
- " Invalid URL — must start with http:// or https://. Keeping current value."
- )
- else:
- save_env_value(base_url_env, override)
- effective_base = override
-
- # Model selection — resolution order:
- # 1. models.dev registry (cached, filtered for agentic/tool-capable models)
- # 2. Curated static fallback list (offline insurance)
- # 3. Live /models endpoint probe (small providers without models.dev data)
- #
- # LM Studio: live /api/v1/models probe (no models.dev catalog).
- # Ollama Cloud: merged discovery (live API + models.dev + disk cache).
- if provider_id == "lmstudio":
- from hermes_cli.auth import AuthError
- from hermes_cli.models import fetch_lmstudio_models
-
- api_key_for_probe = existing_key or (get_env_value(key_env) if key_env else "")
- try:
- model_list = fetch_lmstudio_models(
- api_key=api_key_for_probe, base_url=effective_base
- )
- except AuthError as exc:
- print(f" LM Studio rejected the request: {exc}")
- print(" Set LM_API_KEY (or update it) to match the server's bearer token.")
- model_list = []
- if model_list:
- print(f" Found {len(model_list)} model(s) from LM Studio")
- elif provider_id == "ollama-cloud":
- from hermes_cli.models import fetch_ollama_cloud_models
-
- api_key_for_probe = existing_key or (get_env_value(key_env) if key_env else "")
- # During setup, force a live refresh so the picker reflects newly
- # released models (e.g. deepseek v4 flash, kimi k2.6) the moment
- # the user enters their key — not an hour later when the disk
- # cache TTL expires.
- model_list = fetch_ollama_cloud_models(
- api_key=api_key_for_probe,
- base_url=effective_base,
- force_refresh=True,
- )
- if model_list:
- print(f" Found {len(model_list)} model(s) from Ollama Cloud")
- elif provider_id == "novita":
- from hermes_cli.models import fetch_api_models
-
- api_key_for_probe = existing_key or (get_env_value(key_env) if key_env else "")
- curated = _PROVIDER_MODELS.get(provider_id, [])
- live_models = fetch_api_models(api_key_for_probe, effective_base)
- if live_models:
- model_list = live_models
- print(f" Found {len(model_list)} model(s) from {pconfig.name} API")
- else:
- mdev_models: list = []
- try:
- from agent.models_dev import list_agentic_models
-
- mdev_models = list_agentic_models(provider_id)
- except Exception:
- pass
- if mdev_models:
- seen = {m.lower() for m in mdev_models}
- model_list = list(mdev_models)
- for m in curated:
- if m.lower() not in seen:
- model_list.append(m)
- seen.add(m.lower())
- print(f" Found {len(model_list)} model(s) from models.dev registry")
- else:
- model_list = curated
- if model_list:
- print(
- f' Showing {len(model_list)} curated models — use "Enter custom model name" for others.'
- )
- else:
- curated = _PROVIDER_MODELS.get(provider_id, [])
-
- # Try models.dev first — returns tool-capable models, filtered for noise
- mdev_models: list = []
- try:
- from agent.models_dev import list_agentic_models
-
- mdev_models = list_agentic_models(provider_id)
- except Exception:
- pass
-
- if mdev_models:
- # Merge models.dev with curated list so newly added models
- # (not yet in models.dev) still appear in the picker.
- if curated:
- seen = {m.lower() for m in mdev_models}
- merged = list(mdev_models)
- for m in curated:
- if m.lower() not in seen:
- merged.append(m)
- seen.add(m.lower())
- model_list = merged
- else:
- model_list = mdev_models
- print(f" Found {len(model_list)} model(s) from models.dev registry")
- elif curated and len(curated) >= 8:
- # Curated list is substantial — use it directly, skip live probe
- model_list = curated
- print(
- f' Showing {len(model_list)} curated models — use "Enter custom model name" for others.'
- )
- else:
- api_key_for_probe = existing_key or (
- get_env_value(key_env) if key_env else ""
- )
- live_models = fetch_api_models(api_key_for_probe, effective_base)
- if live_models and len(live_models) >= len(curated):
- model_list = live_models
- print(f" Found {len(model_list)} model(s) from {pconfig.name} API")
- else:
- model_list = curated
- if model_list:
- print(
- f' Showing {len(model_list)} curated models — use "Enter custom model name" for others.'
- )
- # else: no defaults either, will fall through to raw input
-
- if provider_id in {"opencode-zen", "opencode-go"}:
- model_list = [
- normalize_opencode_model_id(provider_id, mid) for mid in model_list
- ]
- current_model = normalize_opencode_model_id(provider_id, current_model)
- model_list = list(dict.fromkeys(mid for mid in model_list if mid))
-
- if model_list:
- selected = _prompt_model_selection(model_list, current_model=current_model)
- else:
- try:
- selected = input("Model name: ").strip()
- except (KeyboardInterrupt, EOFError):
- selected = None
-
- if selected:
- if provider_id in {"opencode-zen", "opencode-go"}:
- selected = normalize_opencode_model_id(provider_id, selected)
-
- _save_model_choice(selected)
-
- # Update config with provider, base URL, and provider-specific API mode
- cfg = load_config()
- model = cfg.get("model")
- if not isinstance(model, dict):
- model = {"default": model} if model else {}
- cfg["model"] = model
- model["provider"] = provider_id
- model["base_url"] = effective_base
- if provider_id in {"opencode-zen", "opencode-go"}:
- model["api_mode"] = opencode_model_api_mode(provider_id, selected)
- else:
- model.pop("api_mode", None)
- save_config(cfg)
- deactivate_provider()
-
- print(f"Default model set to: {selected} (via {pconfig.name})")
- else:
- print("No change.")
def _run_anthropic_oauth_flow(save_env_value):
@@ -6374,157 +3954,6 @@ def _run_anthropic_oauth_flow(save_env_value):
return False
-def _model_flow_anthropic(config, current_model=""):
- """Flow for Anthropic provider — OAuth subscription, API key, or Claude Code creds."""
- from hermes_cli.auth import (
- _prompt_model_selection,
- _save_model_choice,
- deactivate_provider,
- )
- from hermes_cli.config import (
- save_env_value,
- load_config,
- save_config,
- save_anthropic_api_key,
- )
- from hermes_cli.models import _PROVIDER_MODELS
-
- # Check ALL credential sources
- from hermes_cli.auth import get_anthropic_key
-
- existing_key = get_anthropic_key()
- cc_available = False
- try:
- from agent.anthropic_adapter import (
- read_claude_code_credentials,
- is_claude_code_token_valid,
- _is_oauth_token,
- )
-
- cc_creds = read_claude_code_credentials()
- if cc_creds and is_claude_code_token_valid(cc_creds):
- cc_available = True
- except Exception:
- pass
-
- # Stale-OAuth guard: if the only existing cred is an expired OAuth token
- # (no valid cc_creds to fall back on), treat it as missing so the re-auth
- # path is offered instead of silently accepting a broken token.
- existing_is_stale_oauth = False
- if existing_key and _is_oauth_token(existing_key) and not cc_available:
- existing_is_stale_oauth = True
-
- has_creds = (bool(existing_key) and not existing_is_stale_oauth) or cc_available
- needs_auth = not has_creds
-
- if has_creds:
- # Show what we found
- if existing_key:
- from hermes_cli.env_loader import format_secret_source_suffix
- from hermes_cli.auth import PROVIDER_REGISTRY
-
- # Surface which env var supplied the key so users with
- # Bitwarden see "(from Bitwarden)" — without this, a detected
- # BSM key looks identical to a key in .env and users assume
- # nothing is wired up.
- source_suffix = ""
- for var in PROVIDER_REGISTRY["anthropic"].api_key_env_vars:
- if os.getenv(var, "").strip() == existing_key:
- source_suffix = format_secret_source_suffix(var)
- if source_suffix:
- break
- print(
- f" Anthropic credentials: {existing_key[:12]}... ✓{source_suffix}"
- )
- elif cc_available:
- print(" Claude Code credentials: ✓ (auto-detected)")
- print()
- print(" 1. Use existing credentials")
- print(" 2. Reauthenticate (new OAuth login)")
- print(" 3. Cancel")
- print()
- try:
- choice = input(" Choice [1/2/3]: ").strip()
- except (KeyboardInterrupt, EOFError):
- choice = "1"
-
- if choice == "2":
- needs_auth = True
- elif choice == "3":
- return
- # choice == "1" or default: use existing, proceed to model selection
-
- if needs_auth:
- # Show auth method choice
- print()
- print(" Choose authentication method:")
- print()
- print(" 1. Claude Pro/Max subscription (OAuth login)")
- print(" 2. Anthropic API key (pay-per-token)")
- print(" 3. Cancel")
- print()
- try:
- choice = input(" Choice [1/2/3]: ").strip()
- except (KeyboardInterrupt, EOFError):
- print()
- return
-
- if choice == "1":
- if not _run_anthropic_oauth_flow(save_env_value):
- return
-
- elif choice == "2":
- print()
- print(" Get an API key at: https://platform.claude.com/settings/keys")
- print()
- from hermes_cli.secret_prompt import masked_secret_prompt
-
- try:
- api_key = masked_secret_prompt(" API key (sk-ant-...): ").strip()
- except (KeyboardInterrupt, EOFError):
- print()
- return
- if not api_key:
- print(" Cancelled.")
- return
- save_anthropic_api_key(api_key, save_fn=save_env_value)
- print(" ✓ API key saved.")
-
- else:
- print(" No change.")
- return
- print()
-
- # Model selection
- model_list = _PROVIDER_MODELS.get("anthropic", [])
- if model_list:
- selected = _prompt_model_selection(model_list, current_model=current_model)
- else:
- try:
- selected = input("Model name (e.g., claude-sonnet-4-20250514): ").strip()
- except (KeyboardInterrupt, EOFError):
- selected = None
-
- if selected:
- _save_model_choice(selected)
-
- # Update config with provider — clear base_url since
- # resolve_runtime_provider() always hardcodes Anthropic's URL.
- # Leaving a stale base_url in config can contaminate other
- # providers if the user switches without running 'hermes model'.
- cfg = load_config()
- model = cfg.get("model")
- if not isinstance(model, dict):
- model = {"default": model} if model else {}
- cfg["model"] = model
- model["provider"] = "anthropic"
- model.pop("base_url", None)
- save_config(cfg)
- deactivate_provider()
-
- print(f"Default model set to: {selected} (via Anthropic)")
- else:
- print("No change.")
def cmd_login(args):
diff --git a/hermes_cli/model_setup_flows.py b/hermes_cli/model_setup_flows.py
new file mode 100644
index 00000000000..f4d8e43cff9
--- /dev/null
+++ b/hermes_cli/model_setup_flows.py
@@ -0,0 +1,2648 @@
+"""Per-provider model-selection wizard flows for ``hermes setup`` / ``hermes model``.
+
+Extracted from ``hermes_cli/main.py`` as part of the god-file decomposition
+campaign (``~/.hermes/plans/god-file-decomposition.md``, Phase 2 — splitting
+main.py handler/flow bodies out of the module). These 18 ``_model_flow_*``
+functions are the interactive provider-setup branches dispatched by
+``select_provider_and_model`` (which stays in main.py).
+
+Behavior-neutral: each function is lifted verbatim. ``select_provider_and_model``
+in main.py re-imports them (``from hermes_cli.model_setup_flows import *``-style
+explicit import) so existing call sites — and test monkeypatches that target
+``hermes_cli.main._model_flow_*`` — keep resolving against main.py's namespace.
+
+main.py-internal helpers the flows call (``_prompt_api_key``, ``_save_custom_provider``,
+the reasoning-effort/stepfun/qwen helpers, ``_run_anthropic_oauth_flow``, …) are
+imported lazily inside the flows (``from hermes_cli.main import ...`` resolves at
+call time, when main.py is fully loaded) so this module never imports
+``hermes_cli.main`` at import time -> no import cycle.
+"""
+
+from __future__ import annotations
+
+import argparse
+import os
+import subprocess
+
+
+def _model_flow_openrouter(config, current_model=""):
+ """OpenRouter provider: ensure API key, then pick model."""
+ from hermes_cli.main import _prompt_api_key
+ from hermes_constants import OPENROUTER_BASE_URL
+ from hermes_cli.auth import (
+ ProviderConfig,
+ _prompt_model_selection,
+ _save_model_choice,
+ deactivate_provider,
+ )
+ from hermes_cli.config import get_env_value
+
+ # Route through _prompt_api_key so users can replace a stale/broken key
+ # in-flow (K/R/C) instead of having to edit ~/.hermes/.env by hand. The
+ # previous bypass-when-key-exists branch left no way to recover from a
+ # bad paste short of re-running `hermes setup` from scratch. OpenRouter
+ # isn't in PROVIDER_REGISTRY so we synthesize a minimal pconfig.
+ pconfig = ProviderConfig(
+ id="openrouter",
+ name="OpenRouter",
+ auth_type="api_key",
+ api_key_env_vars=("OPENROUTER_API_KEY",),
+ )
+ existing_key = get_env_value("OPENROUTER_API_KEY") or ""
+ if not existing_key:
+ print("Get one at: https://openrouter.ai/keys")
+ print()
+ _resolved, abort = _prompt_api_key(pconfig, existing_key, provider_id="openrouter")
+ if abort:
+ return
+
+ from hermes_cli.models import model_ids, get_pricing_for_provider
+
+ openrouter_models = model_ids(force_refresh=True)
+
+ # Fetch live pricing (non-blocking — returns empty dict on failure)
+ pricing = get_pricing_for_provider("openrouter", force_refresh=True)
+
+ selected = _prompt_model_selection(
+ openrouter_models, current_model=current_model, pricing=pricing
+ )
+ if selected:
+ _save_model_choice(selected)
+
+ # Update config provider and deactivate any OAuth provider
+ from hermes_cli.config import load_config, save_config
+
+ cfg = load_config()
+ model = cfg.get("model")
+ if not isinstance(model, dict):
+ model = {"default": model} if model else {}
+ cfg["model"] = model
+ model["provider"] = "openrouter"
+ model["base_url"] = OPENROUTER_BASE_URL
+ model["api_mode"] = "chat_completions"
+ save_config(cfg)
+ deactivate_provider()
+ print(f"Default model set to: {selected} (via OpenRouter)")
+ else:
+ print("No change.")
+
+def _model_flow_nous(config, current_model="", args=None):
+ """Nous Portal provider: ensure logged in, then pick model."""
+ from hermes_cli.auth import (
+ get_provider_auth_state,
+ _prompt_model_selection,
+ _save_model_choice,
+ _update_config_for_provider,
+ resolve_nous_runtime_credentials,
+ AuthError,
+ format_auth_error,
+ _login_nous,
+ PROVIDER_REGISTRY,
+ )
+ from hermes_cli.config import (
+ get_env_value,
+ load_config,
+ save_config,
+ save_env_value,
+ )
+ from hermes_cli.nous_subscription import prompt_enable_tool_gateway
+
+ state = get_provider_auth_state("nous")
+ if not state or not state.get("access_token"):
+ print("Not logged into Nous Portal. Starting login...")
+ print()
+ try:
+ mock_args = argparse.Namespace(
+ portal_url=getattr(args, "portal_url", None),
+ inference_url=getattr(args, "inference_url", None),
+ client_id=getattr(args, "client_id", None),
+ scope=getattr(args, "scope", None),
+ no_browser=bool(getattr(args, "no_browser", False)),
+ timeout=getattr(args, "timeout", None) or 15.0,
+ ca_bundle=getattr(args, "ca_bundle", None),
+ insecure=bool(getattr(args, "insecure", False)),
+ )
+ _login_nous(mock_args, PROVIDER_REGISTRY["nous"])
+ # Offer Tool Gateway enablement for paid subscribers
+ try:
+ _refreshed = load_config() or {}
+ prompt_enable_tool_gateway(_refreshed)
+ except Exception:
+ pass
+ except SystemExit:
+ print("Login cancelled or failed.")
+ return
+ except Exception as exc:
+ print(f"Login failed: {exc}")
+ return
+ # login_nous already handles model selection + config update
+ return
+
+ # Already logged in — use curated model list (same as OpenRouter defaults).
+ # The live /models endpoint returns hundreds of models; the curated list
+ # shows only agentic models users recognize from OpenRouter.
+ from hermes_cli.models import (
+ get_curated_nous_model_ids,
+ get_pricing_for_provider,
+ check_nous_free_tier,
+ partition_nous_models_by_tier,
+ union_with_portal_free_recommendations,
+ union_with_portal_paid_recommendations,
+ )
+
+ model_ids = get_curated_nous_model_ids()
+ if not model_ids:
+ print("No curated models available for Nous Portal.")
+ return
+
+ # Verify credentials are still valid (catches expired sessions early)
+ try:
+ creds = resolve_nous_runtime_credentials()
+ except Exception as exc:
+ relogin = isinstance(exc, AuthError) and exc.relogin_required
+ msg = format_auth_error(exc) if isinstance(exc, AuthError) else str(exc)
+ if relogin:
+ print(f"Session expired: {msg}")
+ print("Re-authenticating with Nous Portal...\n")
+ try:
+ mock_args = argparse.Namespace(
+ portal_url=None,
+ inference_url=None,
+ client_id=None,
+ scope=None,
+ no_browser=False,
+ timeout=15.0,
+ ca_bundle=None,
+ insecure=False,
+ )
+ _login_nous(mock_args, PROVIDER_REGISTRY["nous"])
+ except Exception as login_exc:
+ print(f"Re-login failed: {login_exc}")
+ return
+ print(f"Could not verify credentials: {msg}")
+ return
+
+ # Fetch live pricing (non-blocking — returns empty dict on failure)
+ pricing = get_pricing_for_provider("nous")
+
+ # Force fresh account data for model selection so recent credit purchases
+ # are reflected immediately.
+ free_tier = check_nous_free_tier(force_fresh=True)
+ if not free_tier:
+ try:
+ refreshed_creds = resolve_nous_runtime_credentials(
+ force_refresh=True,
+ )
+ if refreshed_creds:
+ creds = refreshed_creds
+ except Exception:
+ # Runtime inference has its own paid-entitlement recovery path; do
+ # not block model selection if this opportunistic refresh fails.
+ pass
+
+ # Resolve portal URL early — needed both for upgrade links and for the
+ # freeRecommendedModels endpoint below.
+ _nous_portal_url = ""
+ try:
+ _nous_state = get_provider_auth_state("nous")
+ if _nous_state:
+ _nous_portal_url = _nous_state.get("portal_base_url", "")
+ except Exception:
+ pass
+
+ # For free users: partition models into selectable/unavailable based on
+ # whether they are free per the Portal-reported pricing. First augment
+ # with the Portal's freeRecommendedModels list so newly-launched free
+ # models show up even if this CLI build's hardcoded curated list and
+ # docs-hosted manifest haven't caught up yet.
+ #
+ # For paid users: mirror the same idea with paidRecommendedModels so
+ # newly-launched paid models surface in the picker too — independent
+ # of CLI release cadence.
+ unavailable_models: list[str] = []
+ unavailable_message = ""
+ if free_tier:
+ try:
+ from hermes_cli.nous_account import (
+ format_nous_portal_entitlement_message,
+ get_nous_portal_account_info,
+ )
+
+ _account_info = get_nous_portal_account_info(force_fresh=True)
+ unavailable_message = (
+ format_nous_portal_entitlement_message(
+ _account_info,
+ capability="paid Nous models",
+ )
+ or ""
+ )
+ except Exception:
+ unavailable_message = ""
+ model_ids, pricing = union_with_portal_free_recommendations(
+ model_ids, pricing, _nous_portal_url,
+ )
+ model_ids, unavailable_models = partition_nous_models_by_tier(
+ model_ids, pricing, free_tier=True
+ )
+ else:
+ model_ids, pricing = union_with_portal_paid_recommendations(
+ model_ids, pricing, _nous_portal_url,
+ )
+
+ if not model_ids and not unavailable_models:
+ print("No models available for Nous Portal after filtering.")
+ return
+
+ if free_tier and not model_ids:
+ print("No free models currently available.")
+ if unavailable_models:
+ from hermes_cli.auth import DEFAULT_NOUS_PORTAL_URL
+
+ _url = (_nous_portal_url or DEFAULT_NOUS_PORTAL_URL).rstrip("/")
+ print(unavailable_message or f"Upgrade at {_url} to access paid models.")
+ return
+
+ print(
+ f'Showing {len(model_ids)} curated models — use "Enter custom model name" for others.'
+ )
+
+ selected = _prompt_model_selection(
+ model_ids,
+ current_model=current_model,
+ pricing=pricing,
+ unavailable_models=unavailable_models,
+ portal_url=_nous_portal_url,
+ unavailable_message=unavailable_message,
+ )
+ if selected:
+ _save_model_choice(selected)
+ # Reactivate Nous as the provider and update config
+ inference_url = creds.get("base_url", "")
+ _update_config_for_provider("nous", inference_url)
+ current_model_cfg = config.get("model")
+ if isinstance(current_model_cfg, dict):
+ model_cfg = dict(current_model_cfg)
+ elif isinstance(current_model_cfg, str) and current_model_cfg.strip():
+ model_cfg = {"default": current_model_cfg.strip()}
+ else:
+ model_cfg = {}
+ model_cfg["provider"] = "nous"
+ model_cfg["default"] = selected
+ if inference_url and inference_url.strip():
+ model_cfg["base_url"] = inference_url.rstrip("/")
+ else:
+ model_cfg.pop("base_url", None)
+ config["model"] = model_cfg
+ # Clear any custom endpoint that might conflict
+ if get_env_value("OPENAI_BASE_URL"):
+ save_env_value("OPENAI_BASE_URL", "")
+ save_env_value("OPENAI_API_KEY", "")
+ save_config(config)
+ print(f"Default model set to: {selected} (via Nous Portal)")
+ # Offer Tool Gateway enablement for paid subscribers
+ prompt_enable_tool_gateway(config)
+ else:
+ print("No change.")
+
+def _model_flow_openai_codex(config, current_model=""):
+ """OpenAI Codex provider: ensure logged in, then pick model."""
+ from hermes_cli.auth import (
+ get_codex_auth_status,
+ _prompt_model_selection,
+ _save_model_choice,
+ _update_config_for_provider,
+ _login_openai_codex,
+ PROVIDER_REGISTRY,
+ DEFAULT_CODEX_BASE_URL,
+ )
+ from hermes_cli.codex_models import get_codex_model_ids
+
+ status = get_codex_auth_status()
+ if status.get("logged_in"):
+ print(" OpenAI Codex credentials: ✓")
+ print()
+ print(" 1. Use existing credentials")
+ print(" 2. Reauthenticate (new OAuth login)")
+ print(" 3. Cancel")
+ print()
+ try:
+ choice = input(" Choice [1/2/3]: ").strip()
+ except (KeyboardInterrupt, EOFError):
+ choice = "1"
+
+ if choice == "2":
+ print("Starting a fresh OpenAI Codex login...")
+ print()
+ try:
+ mock_args = argparse.Namespace()
+ _login_openai_codex(
+ mock_args,
+ PROVIDER_REGISTRY["openai-codex"],
+ force_new_login=True,
+ )
+ except SystemExit:
+ print("Login cancelled or failed.")
+ return
+ except Exception as exc:
+ print(f"Login failed: {exc}")
+ return
+ status = get_codex_auth_status()
+ if not status.get("logged_in"):
+ print("Login failed.")
+ return
+ elif choice == "3":
+ return
+ else:
+ print("Not logged into OpenAI Codex. Starting login...")
+ print()
+ try:
+ mock_args = argparse.Namespace()
+ _login_openai_codex(mock_args, PROVIDER_REGISTRY["openai-codex"])
+ except SystemExit:
+ print("Login cancelled or failed.")
+ return
+ except Exception as exc:
+ print(f"Login failed: {exc}")
+ return
+
+ _codex_token = None
+ # Prefer credential pool (where `hermes auth` stores device_code tokens),
+ # fall back to legacy provider state.
+ try:
+ _codex_status = get_codex_auth_status()
+ if _codex_status.get("logged_in"):
+ _codex_token = _codex_status.get("api_key")
+ except Exception:
+ pass
+ if not _codex_token:
+ try:
+ from hermes_cli.auth import resolve_codex_runtime_credentials
+
+ _codex_creds = resolve_codex_runtime_credentials()
+ _codex_token = _codex_creds.get("api_key")
+ except Exception:
+ pass
+
+ codex_models = get_codex_model_ids(access_token=_codex_token)
+
+ selected = _prompt_model_selection(codex_models, current_model=current_model)
+ if selected:
+ _save_model_choice(selected)
+ _update_config_for_provider("openai-codex", DEFAULT_CODEX_BASE_URL)
+ print(f"Default model set to: {selected} (via OpenAI Codex)")
+ else:
+ print("No change.")
+
+def _model_flow_xai_oauth(_config, current_model="", *, args=None):
+ """xAI Grok OAuth (SuperGrok / Premium+) provider: ensure logged in, then pick model."""
+ from hermes_cli.auth import (
+ get_xai_oauth_auth_status,
+ _prompt_model_selection,
+ _save_model_choice,
+ _update_config_for_provider,
+ resolve_xai_oauth_runtime_credentials,
+ _login_xai_oauth,
+ DEFAULT_XAI_OAUTH_BASE_URL,
+ PROVIDER_REGISTRY,
+ )
+ from hermes_cli.models import _PROVIDER_MODELS
+
+ status = get_xai_oauth_auth_status()
+ if status.get("logged_in"):
+ print(" xAI Grok OAuth (SuperGrok / Premium+) credentials: ✓")
+ print()
+ print(" 1. Use existing credentials")
+ print(" 2. Reauthenticate (new OAuth login)")
+ print(" 3. Cancel")
+ print()
+ try:
+ choice = input(" Choice [1/2/3]: ").strip()
+ except (KeyboardInterrupt, EOFError):
+ choice = "1"
+
+ if choice == "2":
+ print("Starting a fresh xAI OAuth login...")
+ print()
+ try:
+ # Forward CLI flags from ``hermes model --manual-paste``
+ # / ``--no-browser`` / ``--timeout`` into the loopback
+ # login. Without this, browser-only remotes (#26923)
+ # can't reach the manual-paste path via ``hermes model``.
+ mock_args = argparse.Namespace(
+ manual_paste=bool(getattr(args, "manual_paste", False)),
+ no_browser=bool(getattr(args, "no_browser", False)),
+ timeout=getattr(args, "timeout", None),
+ )
+ _login_xai_oauth(
+ mock_args,
+ PROVIDER_REGISTRY["xai-oauth"],
+ force_new_login=True,
+ )
+ except SystemExit:
+ print("Login cancelled or failed.")
+ return
+ except Exception as exc:
+ print(f"Login failed: {exc}")
+ return
+ elif choice == "3":
+ return
+ else:
+ print("Not logged into xAI Grok OAuth (SuperGrok / Premium+). Starting login...")
+ print()
+ try:
+ mock_args = argparse.Namespace(
+ manual_paste=bool(getattr(args, "manual_paste", False)),
+ no_browser=bool(getattr(args, "no_browser", False)),
+ timeout=getattr(args, "timeout", None),
+ )
+ _login_xai_oauth(mock_args, PROVIDER_REGISTRY["xai-oauth"])
+ except SystemExit:
+ print("Login cancelled or failed.")
+ return
+ except Exception as exc:
+ print(f"Login failed: {exc}")
+ return
+
+ # Resolve a usable base URL. ``resolve_xai_oauth_runtime_credentials``
+ # only reads from the auth.json singleton — but credentials may legitimately
+ # live only in the pool (e.g. after ``hermes auth add xai-oauth``). Fall
+ # back to the default base URL in that case so the model picker still
+ # completes successfully instead of bailing out with
+ # ``Could not resolve xAI OAuth credentials``.
+ base_url = DEFAULT_XAI_OAUTH_BASE_URL
+ try:
+ creds = resolve_xai_oauth_runtime_credentials()
+ base_url = (creds.get("base_url") or "").strip().rstrip("/") or base_url
+ except Exception:
+ pass
+
+ models = list(_PROVIDER_MODELS.get("xai-oauth") or _PROVIDER_MODELS.get("xai") or [])
+ selected = _prompt_model_selection(models, current_model=current_model or (models[0] if models else "grok-4.3"))
+ if selected:
+ _save_model_choice(selected)
+ _update_config_for_provider("xai-oauth", base_url)
+ print(f"Default model set to: {selected} (via xAI Grok OAuth — SuperGrok / Premium+)")
+ else:
+ print("No change.")
+
+def _model_flow_qwen_oauth(_config, current_model=""):
+ """Qwen OAuth provider: reuse local Qwen CLI login, then pick model."""
+ from hermes_cli.main import _DEFAULT_QWEN_PORTAL_MODELS
+ from hermes_cli.auth import (
+ get_qwen_auth_status,
+ resolve_qwen_runtime_credentials,
+ _prompt_model_selection,
+ _save_model_choice,
+ _update_config_for_provider,
+ DEFAULT_QWEN_BASE_URL,
+ )
+ from hermes_cli.models import fetch_api_models
+
+ status = get_qwen_auth_status()
+ if not status.get("logged_in"):
+ print("Not logged into Qwen CLI OAuth.")
+ print("Run: qwen auth qwen-oauth")
+ auth_file = status.get("auth_file")
+ if auth_file:
+ print(f"Expected credentials file: {auth_file}")
+ if status.get("error"):
+ print(f"Error: {status.get('error')}")
+ return
+
+ # Try live model discovery, fall back to curated list.
+ models = None
+ try:
+ creds = resolve_qwen_runtime_credentials(refresh_if_expiring=True)
+ models = fetch_api_models(creds["api_key"], creds["base_url"])
+ except Exception:
+ pass
+ if not models:
+ models = list(_DEFAULT_QWEN_PORTAL_MODELS)
+
+ default = current_model or (models[0] if models else "qwen3-coder-plus")
+ selected = _prompt_model_selection(models, current_model=default)
+ if selected:
+ _save_model_choice(selected)
+ _update_config_for_provider("qwen-oauth", DEFAULT_QWEN_BASE_URL)
+ print(f"Default model set to: {selected} (via Qwen OAuth)")
+ else:
+ print("No change.")
+
+def _model_flow_minimax_oauth(config, current_model="", args=None):
+ """MiniMax OAuth provider: ensure logged in, then pick model."""
+ from hermes_cli.auth import (
+ get_provider_auth_state,
+ _prompt_model_selection,
+ _save_model_choice,
+ _update_config_for_provider,
+ resolve_minimax_oauth_runtime_credentials,
+ AuthError,
+ format_auth_error,
+ _login_minimax_oauth,
+ PROVIDER_REGISTRY,
+ )
+
+ state = get_provider_auth_state("minimax-oauth")
+ if not state or not state.get("access_token"):
+ print("Not logged into MiniMax. Starting OAuth login...")
+ print()
+ try:
+ mock_args = argparse.Namespace(
+ region=getattr(args, "region", None) or "global",
+ no_browser=bool(getattr(args, "no_browser", False)),
+ timeout=getattr(args, "timeout", None) or 15.0,
+ )
+ _login_minimax_oauth(mock_args, PROVIDER_REGISTRY["minimax-oauth"])
+ except SystemExit:
+ print("Login cancelled or failed.")
+ return
+ except Exception as exc:
+ print(f"Login failed: {exc}")
+ return
+
+ try:
+ creds = resolve_minimax_oauth_runtime_credentials()
+ except AuthError as exc:
+ print(format_auth_error(exc))
+ return
+
+ from hermes_cli.models import _PROVIDER_MODELS
+
+ model_ids = _PROVIDER_MODELS.get("minimax-oauth", [])
+ selected = _prompt_model_selection(model_ids, current_model)
+ if not selected:
+ return
+ _save_model_choice(selected)
+ _update_config_for_provider("minimax-oauth", creds["base_url"])
+ print(f"\u2713 Using MiniMax model: {selected}")
+
+def _model_flow_google_gemini_cli(_config, current_model=""):
+ """Google Gemini OAuth (PKCE) via Cloud Code Assist — supports free AND paid tiers.
+
+ Flow:
+ 1. Show upfront warning about Google's ToS stance (per opencode-gemini-auth).
+ 2. If creds missing, run PKCE browser OAuth via agent.google_oauth.
+ 3. Resolve project context (env -> config -> auto-discover -> free tier).
+ 4. Prompt user to pick a model.
+ 5. Save to ~/.hermes/config.yaml.
+ """
+ from hermes_cli.auth import (
+ DEFAULT_GEMINI_CLOUDCODE_BASE_URL,
+ get_gemini_oauth_auth_status,
+ resolve_gemini_oauth_runtime_credentials,
+ _prompt_model_selection,
+ _save_model_choice,
+ _update_config_for_provider,
+ )
+ from hermes_cli.models import _PROVIDER_MODELS
+
+ print()
+ print("⚠ Google considers using the Gemini CLI OAuth client with third-party")
+ print(" software a policy violation. Some users have reported account")
+ print(" restrictions. You can use your own API key via 'gemini' provider")
+ print(" for the lowest-risk experience.")
+ print()
+ try:
+ proceed = input("Continue with OAuth login? [y/N]: ").strip().lower()
+ except (EOFError, KeyboardInterrupt):
+ print("Cancelled.")
+ return
+ if proceed not in {"y", "yes"}:
+ print("Cancelled.")
+ return
+
+ status = get_gemini_oauth_auth_status()
+ if not status.get("logged_in"):
+ try:
+ from agent.google_oauth import resolve_project_id_from_env, start_oauth_flow
+
+ env_project = resolve_project_id_from_env()
+ start_oauth_flow(force_relogin=True, project_id=env_project)
+ except Exception as exc:
+ print(f"OAuth login failed: {exc}")
+ return
+
+ # Verify creds resolve + trigger project discovery
+ try:
+ creds = resolve_gemini_oauth_runtime_credentials(force_refresh=False)
+ project_id = creds.get("project_id", "")
+ if project_id:
+ print(f" Using GCP project: {project_id}")
+ else:
+ print(
+ " No GCP project configured — free tier will be auto-provisioned on first request."
+ )
+ except Exception as exc:
+ print(f"Failed to resolve Gemini credentials: {exc}")
+ return
+
+ models = list(_PROVIDER_MODELS.get("google-gemini-cli") or [])
+ default = current_model or (models[0] if models else "gemini-3-flash-preview")
+ selected = _prompt_model_selection(models, current_model=default)
+ if selected:
+ _save_model_choice(selected)
+ _update_config_for_provider(
+ "google-gemini-cli", DEFAULT_GEMINI_CLOUDCODE_BASE_URL
+ )
+ print(
+ f"Default model set to: {selected} (via Google Gemini OAuth / Code Assist)"
+ )
+ else:
+ print("No change.")
+
+def _model_flow_custom(config):
+ """Custom endpoint: collect URL, API key, and model name.
+
+ Automatically saves the endpoint to ``custom_providers`` in config.yaml
+ so it appears in the provider menu on subsequent runs.
+ """
+ from hermes_cli.main import _auto_provider_name, _prompt_custom_api_mode_selection, _save_custom_provider
+ from hermes_cli.auth import _save_model_choice, deactivate_provider
+ from hermes_cli.config import get_env_value, load_config, save_config
+ from hermes_cli.secret_prompt import masked_secret_prompt
+
+ current_url = get_env_value("OPENAI_BASE_URL") or ""
+ current_key = get_env_value("OPENAI_API_KEY") or ""
+
+ print("Custom OpenAI-compatible endpoint configuration:")
+ if current_url:
+ print(f" Current URL: {current_url}")
+ if current_key:
+ print(f" Current key: {current_key[:8]}...")
+ print()
+
+ try:
+ base_url = input(
+ f"API base URL [{current_url or 'e.g. https://api.example.com/v1'}]: "
+ ).strip()
+ api_key = masked_secret_prompt(
+ f"API key [{current_key[:8] + '...' if current_key else 'optional'}]: "
+ ).strip()
+ except (KeyboardInterrupt, EOFError):
+ print("\nCancelled.")
+ return
+
+ if not base_url and not current_url:
+ print("No URL provided. Cancelled.")
+ return
+
+ # Validate URL format
+ effective_url = base_url or current_url
+ if not effective_url.startswith(("http://", "https://")):
+ print(f"Invalid URL: {effective_url} (must start with http:// or https://)")
+ return
+
+ effective_key = api_key or current_key
+
+ # Hint: most local model servers (Ollama, vLLM, llama.cpp) require /v1
+ # in the base URL for OpenAI-compatible chat completions. Prompt the
+ # user if the URL looks like a local server without /v1.
+ _url_lower = effective_url.rstrip("/").lower()
+ _looks_local = any(
+ h in _url_lower
+ for h in ("localhost", "127.0.0.1", "0.0.0.0", ":11434", ":8080", ":5000")
+ )
+ if _looks_local and not _url_lower.endswith("/v1"):
+ print()
+ print(f" Hint: Did you mean to add /v1 at the end?")
+ print(f" Most local model servers (Ollama, vLLM, llama.cpp) require it.")
+ print(f" e.g. {effective_url.rstrip('/')}/v1")
+ try:
+ _add_v1 = input(" Add /v1? [Y/n]: ").strip().lower()
+ except (KeyboardInterrupt, EOFError):
+ _add_v1 = "n"
+ if _add_v1 in {"", "y", "yes"}:
+ effective_url = effective_url.rstrip("/") + "/v1"
+ if base_url:
+ base_url = effective_url
+ print(f" Updated URL: {effective_url}")
+ print()
+
+ from hermes_cli.models import probe_api_models
+
+ probe = probe_api_models(effective_key, effective_url)
+ if probe.get("used_fallback") and probe.get("resolved_base_url"):
+ print(
+ f"Warning: endpoint verification worked at {probe['resolved_base_url']}/models, "
+ f"not the exact URL you entered. Saving the working base URL instead."
+ )
+ effective_url = probe["resolved_base_url"]
+ if base_url:
+ base_url = effective_url
+ elif probe.get("models") is not None:
+ print(
+ f"Verified endpoint via {probe.get('probed_url')} "
+ f"({len(probe.get('models') or [])} model(s) visible)"
+ )
+ else:
+ print(
+ f"Warning: could not verify this endpoint via {probe.get('probed_url')}. "
+ f"Hermes will still save it."
+ )
+ if probe.get("suggested_base_url"):
+ suggested = probe["suggested_base_url"]
+ if suggested.endswith("/v1"):
+ print(
+ f" If this server expects /v1 in the path, try base URL: {suggested}"
+ )
+ else:
+ print(f" If /v1 should not be in the base URL, try: {suggested}")
+
+ # Prompt for API compatibility mode explicitly so codex-compatible custom
+ # providers don't silently fall back to chat_completions.
+ current_model_cfg = config.get("model")
+ current_api_mode = ""
+ if isinstance(current_model_cfg, dict):
+ current_api_mode = str(current_model_cfg.get("api_mode") or "").strip()
+ api_mode = _prompt_custom_api_mode_selection(
+ effective_url,
+ current_api_mode=current_api_mode,
+ )
+ if api_mode:
+ print(f" API mode: {api_mode}")
+ else:
+ print(" API mode: auto-detect")
+
+ # Select model — use probe results when available, fall back to manual input
+ model_name = ""
+ detected_models = probe.get("models") or []
+ try:
+ if len(detected_models) == 1:
+ print(f" Detected model: {detected_models[0]}")
+ confirm = input(" Use this model? [Y/n]: ").strip().lower()
+ if confirm in {"", "y", "yes"}:
+ model_name = detected_models[0]
+ else:
+ model_name = input("Model name (e.g. gpt-4, llama-3-70b): ").strip()
+ elif len(detected_models) > 1:
+ print(" Available models:")
+ for i, m in enumerate(detected_models, 1):
+ print(f" {i}. {m}")
+ pick = input(
+ f" Select model [1-{len(detected_models)}] or type name: "
+ ).strip()
+ if pick.isdigit() and 1 <= int(pick) <= len(detected_models):
+ model_name = detected_models[int(pick) - 1]
+ elif pick:
+ model_name = pick
+ else:
+ model_name = input("Model name (e.g. gpt-4, llama-3-70b): ").strip()
+
+ context_length_str = input(
+ "Context length in tokens [leave blank for auto-detect]: "
+ ).strip()
+
+ # Prompt for a display name — shown in the provider menu on future runs
+ default_name = _auto_provider_name(effective_url)
+ display_name = input(f"Display name [{default_name}]: ").strip() or default_name
+ except (KeyboardInterrupt, EOFError):
+ print("\nCancelled.")
+ return
+
+ context_length = None
+ if context_length_str:
+ try:
+ context_length = int(
+ context_length_str.replace(",", "")
+ .replace("k", "000")
+ .replace("K", "000")
+ )
+ if context_length <= 0:
+ context_length = None
+ except ValueError:
+ print(f"Invalid context length: {context_length_str} — will auto-detect.")
+ context_length = None
+
+ if model_name:
+ _save_model_choice(model_name)
+
+ # Update config and deactivate any OAuth provider
+ cfg = load_config()
+ model = cfg.get("model")
+ if not isinstance(model, dict):
+ model = {"default": model} if model else {}
+ cfg["model"] = model
+ model["provider"] = "custom"
+ model["base_url"] = effective_url
+ if effective_key:
+ model["api_key"] = effective_key
+ if api_mode:
+ model["api_mode"] = api_mode
+ else:
+ model.pop("api_mode", None)
+ save_config(cfg)
+ deactivate_provider()
+
+ # Sync the caller's config dict so the setup wizard's final
+ # save_config(config) preserves our model settings. Without
+ # this, the wizard overwrites model.provider/base_url with
+ # the stale values from its own config dict (#4172).
+ config["model"] = dict(model)
+
+ print(f"Default model set to: {model_name} (via {effective_url})")
+ else:
+ if base_url or api_key:
+ deactivate_provider()
+ # Even without a model name, persist the custom endpoint on the
+ # caller's config dict so the setup wizard doesn't lose it.
+ _caller_model = config.get("model")
+ if not isinstance(_caller_model, dict):
+ _caller_model = {"default": _caller_model} if _caller_model else {}
+ _caller_model["provider"] = "custom"
+ _caller_model["base_url"] = effective_url
+ if effective_key:
+ _caller_model["api_key"] = effective_key
+ if api_mode:
+ _caller_model["api_mode"] = api_mode
+ else:
+ _caller_model.pop("api_mode", None)
+ config["model"] = _caller_model
+ print("Endpoint saved. Use `/model` in chat or `hermes model` to set a model.")
+
+ # Auto-save to custom_providers so it appears in the menu next time
+ _save_custom_provider(
+ effective_url,
+ effective_key,
+ model_name or "",
+ context_length=context_length,
+ name=display_name,
+ api_mode=api_mode,
+ )
+
+def _model_flow_azure_foundry(config, current_model=""):
+ """Azure Foundry provider: configure endpoint, auth mode, API mode, and model.
+
+ Azure Foundry supports both OpenAI-style (``/v1/chat/completions``) and
+ Anthropic-style (``/v1/messages``) endpoints, and two authentication
+ modes:
+
+ * **API key** (default) — uses ``AZURE_FOUNDRY_API_KEY`` from .env.
+ * **Microsoft Entra ID** — keyless, RBAC-based auth via the
+ ``azure-identity`` SDK (Managed Identity / Workload Identity / az
+ login / VS Code / azd / service principal env vars). Works on both
+ OpenAI-style and Anthropic-style endpoints — Microsoft RBAC is
+ per-resource and the same ``Azure AI User`` role grants
+ both. For OpenAI-style the OpenAI SDK's native callable
+ ``api_key=`` contract is used; for Anthropic-style an
+ ``httpx.Client`` with a request event hook (built by
+ :func:`agent.azure_identity_adapter.build_bearer_http_client`)
+ mints a fresh JWT per request because the Anthropic SDK does not
+ accept a callable ``auth_token`` natively.
+
+ The wizard auto-detects the transport and available models when
+ possible:
+
+ * URLs ending in ``/anthropic`` → Anthropic Messages API.
+ * Successful ``GET /models`` probe → OpenAI-style + populates
+ a picker with the returned deployment / model IDs.
+ * Anthropic Messages probe fallback when ``/models`` fails.
+ * Manual entry when every probe fails (private endpoints, etc.).
+
+ Context lengths for the chosen model are resolved via the standard
+ :func:`agent.model_metadata.get_model_context_length` chain
+ (models.dev, provider metadata, hardcoded family fallbacks).
+ """
+ from hermes_cli.auth import _save_model_choice, deactivate_provider # noqa: F401
+ from hermes_cli.config import (
+ get_env_value,
+ save_env_value,
+ load_config,
+ save_config,
+ )
+ from hermes_cli import azure_detect
+
+ # ── Load current Azure Foundry configuration ─────────────────────
+ model_cfg = config.get("model", {})
+ if isinstance(model_cfg, dict) and model_cfg.get("provider") == "azure-foundry":
+ current_base_url = str(model_cfg.get("base_url", "") or "")
+ current_api_mode = str(model_cfg.get("api_mode", "") or "")
+ current_auth_mode = str(model_cfg.get("auth_mode") or "api_key").strip().lower() or "api_key"
+ _cur_entra = model_cfg.get("entra") or {}
+ current_entra = _cur_entra if isinstance(_cur_entra, dict) else {}
+ else:
+ current_base_url = ""
+ current_api_mode = ""
+ current_auth_mode = "api_key"
+ current_entra = {}
+
+ current_api_key = get_env_value("AZURE_FOUNDRY_API_KEY") or ""
+
+ print()
+ print("Azure Foundry Configuration")
+ print("=" * 50)
+ print()
+ print("Azure Foundry can host models with either OpenAI-style or")
+ print("Anthropic-style API endpoints. Hermes will probe your")
+ print("endpoint to auto-detect the transport and the deployed")
+ print("models when possible.")
+ print()
+
+ if current_base_url:
+ print(f" Current endpoint: {current_base_url}")
+ if current_api_mode:
+ _lbl = (
+ "OpenAI-style"
+ if current_api_mode == "chat_completions"
+ else "Anthropic-style"
+ )
+ print(f" Current API mode: {_lbl}")
+ if current_auth_mode == "entra_id":
+ print(f" Current auth mode: Microsoft Entra ID (keyless)")
+ elif current_api_key:
+ print(f" Current auth mode: API key ({current_api_key[:8]}...)")
+ print()
+
+ # ── Step 1: endpoint URL ─────────────────────────────────────────
+ try:
+ _placeholder = (
+ current_base_url
+ or "e.g. https://.openai.azure.com/openai/v1 "
+ "or https://.services.ai.azure.com/anthropic"
+ )
+ base_url = input(
+ f"API endpoint URL [{_placeholder}]: "
+ ).strip()
+ except (KeyboardInterrupt, EOFError):
+ print("\nCancelled.")
+ return
+
+ effective_url = (base_url or current_base_url).rstrip("/")
+ if not effective_url:
+ print("No endpoint URL provided. Cancelled.")
+ return
+ if not effective_url.startswith(("http://", "https://")):
+ print(f"Invalid URL: {effective_url} (must start with http:// or https://)")
+ return
+
+ # ── Step 2: authentication mode ──────────────────────────────────
+ print()
+ print("Authentication:")
+ print(" 1. API key (AZURE_FOUNDRY_API_KEY in .env)")
+ print(" 2. Microsoft Entra ID (managed identity / workload identity / az login)")
+ print(" Recommended by Microsoft. Works for both OpenAI-style and Anthropic-style endpoints.")
+ print(" Requires the 'Azure AI User' role on the Foundry resource.")
+ try:
+ _auth_default = "2" if current_auth_mode == "entra_id" else "1"
+ auth_choice = (
+ input(f"Authentication mode [1/2] ({_auth_default}): ").strip()
+ or _auth_default
+ )
+ except (KeyboardInterrupt, EOFError):
+ print("\nCancelled.")
+ return
+ use_entra = auth_choice == "2"
+ auth_mode_label = "entra_id" if use_entra else "api_key"
+
+ # ── Step 3: credentials (key OR Entra preflight) ─────────────────
+ effective_key: str = ""
+ entra_overrides: dict = {}
+ token_provider = None # callable when entra
+ entra_scope = ""
+
+ if use_entra:
+ try:
+ from agent.azure_identity_adapter import (
+ EntraIdentityConfig,
+ SCOPE_AI_AZURE_DEFAULT,
+ build_token_provider,
+ describe_active_credential,
+ has_azure_identity_installed,
+ )
+ except ImportError as exc:
+ print()
+ print(f"⚠ Could not import azure-identity adapter: {exc}")
+ print(" Falling back to API key auth.")
+ use_entra = False
+ auth_mode_label = "api_key"
+
+ if use_entra:
+ print()
+ if not has_azure_identity_installed():
+ print("◐ The 'azure-identity' package is not installed yet.")
+ print(
+ " Hermes will install it now (the preflight below "
+ "triggers the lazy-install). To skip lazy installs, "
+ "run: pip install azure-identity"
+ )
+
+ # Preserve only the optional scope override. Identity selection
+ # (tenant, user-assigned MI, workload identity, service principal)
+ # stays in Azure SDK env vars such as AZURE_CLIENT_ID.
+ _persisted_scope_override = str(current_entra.get("scope") or "").strip()
+ entra_scope = _persisted_scope_override or SCOPE_AI_AZURE_DEFAULT
+
+ entra_overrides = {}
+ if _persisted_scope_override:
+ entra_overrides["scope"] = _persisted_scope_override
+
+ print()
+ print("◐ Probing Microsoft Entra ID credential chain (up to 10s)...")
+ _config = EntraIdentityConfig(
+ scope=entra_scope,
+ )
+ info = describe_active_credential(config=_config, timeout_seconds=10.0)
+ if info.get("ok"):
+ env_sources = info.get("env_sources") or []
+ tag = ", ".join(env_sources) if env_sources else "default chain"
+ print(f"✓ Entra ID token acquired ({tag}, scope={entra_scope})")
+ else:
+ err = info.get("error") or "credential chain exhausted"
+ hint = info.get("hint") or (
+ "Run `az login`, attach a managed identity to this VM, or "
+ "set AZURE_TENANT_ID/AZURE_CLIENT_ID/AZURE_CLIENT_SECRET."
+ )
+ print(f"⚠ {err}")
+ print(f" Hint: {hint}")
+ try:
+ ans = input("Save Entra config anyway and validate later? [Y/n]: ").strip().lower()
+ except (KeyboardInterrupt, EOFError):
+ print("\nCancelled.")
+ return
+ if ans and ans not in ("y", "yes"):
+ print("Cancelled.")
+ return
+
+ # Build the token provider for the detection probe (best-effort —
+ # if the credential chain failed above, this will silently return
+ # None inside azure_detect and the probe falls back to manual).
+ try:
+ token_provider = build_token_provider(config=_config)
+ except Exception as exc:
+ print(f"⚠ Could not build token provider for probing: {exc}")
+ token_provider = None
+ else:
+ print()
+ from hermes_cli.secret_prompt import masked_secret_prompt
+
+ try:
+ api_key = masked_secret_prompt(
+ f"API key [{current_api_key[:8] + '...' if current_api_key else 'required'}]: "
+ ).strip()
+ except (KeyboardInterrupt, EOFError):
+ print("\nCancelled.")
+ return
+
+ effective_key = api_key or current_api_key
+ if not effective_key:
+ print("No API key provided. Cancelled.")
+ return
+
+ # ── Step 4: auto-detect transport + models ───────────────────────
+ print()
+ print("◐ Probing endpoint to auto-detect transport and models...")
+ detection = azure_detect.detect(
+ effective_url,
+ api_key=effective_key,
+ token_provider=token_provider,
+ )
+
+ discovered_models: list[str] = list(detection.models)
+ api_mode: str = detection.api_mode or ""
+
+ if api_mode:
+ mode_label = (
+ "OpenAI-style" if api_mode == "chat_completions" else "Anthropic-style"
+ )
+ print(f"✓ Detected API transport: {mode_label}")
+ if detection.reason:
+ print(f" ({detection.reason})")
+ if discovered_models:
+ print(
+ f"✓ Found {len(discovered_models)} deployed model(s) on this endpoint"
+ )
+ else:
+ print(f"⚠ Auto-detection incomplete: {detection.reason}")
+ print()
+ print("Select the API format your Azure Foundry endpoint uses:")
+ print(" 1. OpenAI-style (POST /v1/chat/completions)")
+ print(" For: GPT models, Llama, Mistral, and most open models")
+ print(" 2. Anthropic-style (POST /v1/messages)")
+ print(" For: Claude models deployed via Anthropic API format")
+ try:
+ default_choice = "2" if current_api_mode == "anthropic_messages" else "1"
+ mode_choice = (
+ input(f"API format [1/2] ({default_choice}): ").strip()
+ or default_choice
+ )
+ except (KeyboardInterrupt, EOFError):
+ print("\nCancelled.")
+ return
+ api_mode = "anthropic_messages" if mode_choice == "2" else "chat_completions"
+
+ # ── Step 5: model name ───────────────────────────────────────────
+ print()
+ effective_model = ""
+ if discovered_models:
+ print("Available models on this endpoint:")
+ for i, mid in enumerate(discovered_models[:30], start=1):
+ print(f" {i:>2}. {mid}")
+ if len(discovered_models) > 30:
+ print(
+ f" ... and {len(discovered_models) - 30} more (type name manually if not shown)"
+ )
+ print()
+ try:
+ pick = input(
+ f"Pick by number, or type a deployment name [{current_model or discovered_models[0]}]: "
+ ).strip()
+ except (KeyboardInterrupt, EOFError):
+ print("\nCancelled.")
+ return
+ if not pick:
+ effective_model = current_model or discovered_models[0]
+ elif pick.isdigit() and 1 <= int(pick) <= min(len(discovered_models), 30):
+ effective_model = discovered_models[int(pick) - 1]
+ else:
+ effective_model = pick
+ else:
+ try:
+ model_name = input(
+ f"Model / deployment name [{current_model or 'e.g. gpt-5.4, claude-sonnet-4-6'}]: "
+ ).strip()
+ except (KeyboardInterrupt, EOFError):
+ print("\nCancelled.")
+ return
+ effective_model = model_name or current_model
+
+ if not effective_model:
+ print("No model name provided. Cancelled.")
+ return
+
+ # ── Step 6: context-length lookup ────────────────────────────────
+ ctx_len = azure_detect.lookup_context_length(
+ effective_model,
+ effective_url,
+ api_key=effective_key,
+ token_provider=token_provider,
+ )
+
+ # ── Step 7: persist ──────────────────────────────────────────────
+ if not use_entra:
+ save_env_value("AZURE_FOUNDRY_API_KEY", effective_key)
+
+ cfg = load_config()
+ model = cfg.get("model")
+ if not isinstance(model, dict):
+ model = {"default": model} if model else {}
+ cfg["model"] = model
+
+ model["provider"] = "azure-foundry"
+ model["base_url"] = effective_url
+ model["api_mode"] = api_mode
+ model["default"] = effective_model
+ model["auth_mode"] = auth_mode_label
+ if use_entra:
+ # Persist only the non-default Entra scope so config.yaml stays tidy.
+ # Azure identity selection stays in standard AZURE_* env vars.
+ clean_entra: dict = {}
+ for key in ("scope",):
+ val = entra_overrides.get(key)
+ if val:
+ clean_entra[key] = val
+ if clean_entra:
+ model["entra"] = clean_entra
+ elif "entra" in model:
+ del model["entra"]
+ else:
+ if "entra" in model:
+ del model["entra"]
+ if ctx_len:
+ model["context_length"] = ctx_len
+
+ save_config(cfg)
+ deactivate_provider()
+ config["model"] = dict(model)
+
+ # Clear any conflicting env vars so auxiliary clients don't poison
+ # themselves with a stale OpenAI base URL / key.
+ if get_env_value("OPENAI_BASE_URL"):
+ save_env_value("OPENAI_BASE_URL", "")
+ if get_env_value("OPENAI_API_KEY"):
+ save_env_value("OPENAI_API_KEY", "")
+
+ mode_label = "OpenAI-style" if api_mode == "chat_completions" else "Anthropic-style"
+ auth_label = (
+ "Microsoft Entra ID (keyless)" if use_entra else "API key"
+ )
+ print()
+ print("✓ Azure Foundry configured:")
+ print(f" Endpoint: {effective_url}")
+ print(f" API mode: {mode_label}")
+ print(f" Auth: {auth_label}")
+ print(f" Model: {effective_model}")
+ if ctx_len:
+ print(f" Context length: {ctx_len:,} tokens")
+ else:
+ print(" Context length: not auto-detected (will fall back at runtime)")
+ print()
+
+def _model_flow_named_custom(config, provider_info):
+ """Handle a named custom provider from config.yaml custom_providers list.
+
+ Always probes the endpoint's /models API to let the user pick a model.
+ If a model was previously saved, it is pre-selected in the menu.
+ Falls back to the saved model if probing fails.
+ """
+ from hermes_cli.main import _custom_provider_api_key_config_value, _custom_provider_base_url_config_value, _save_custom_provider
+ from hermes_cli.auth import _save_model_choice, deactivate_provider
+ from hermes_cli.config import load_config, save_config
+ from hermes_cli.models import fetch_api_models
+
+ name = provider_info["name"]
+ base_url = provider_info["base_url"]
+ api_mode = provider_info.get("api_mode", "")
+ api_key = provider_info.get("api_key", "")
+ key_env = provider_info.get("key_env", "")
+ saved_model = provider_info.get("model", "")
+ provider_key = (provider_info.get("provider_key") or "").strip()
+
+ # Resolve key from env var if api_key not set directly
+ if not api_key and key_env:
+ api_key = os.environ.get(key_env, "")
+ config_api_key = _custom_provider_api_key_config_value(provider_info, api_key)
+
+ # Honor ``discover_models: false`` (default True) — when discovery is
+ # disabled, use the configured ``models:`` list verbatim and skip the
+ # live /models probe. This lets operators restrict the picker to the
+ # subset their plan actually serves instead of the endpoint's full
+ # catalog (#18726: Baidu Qianfan returns 100+ models for a 2-3 model
+ # plan). Same semantics as the slash-command picker (model_switch.py
+ # sections 3 & 4): default discovers, false keeps the explicit list.
+ discover = provider_info.get("discover_models", True)
+ if isinstance(discover, str):
+ discover = discover.lower() not in {"false", "no", "0"}
+ configured_models: list[str] = []
+ cfg_models = provider_info.get("models", {})
+ if isinstance(cfg_models, dict):
+ configured_models = [str(m) for m in cfg_models if str(m).strip()]
+ elif isinstance(cfg_models, list):
+ configured_models = [
+ str(m) for m in cfg_models if isinstance(m, str) and m.strip()
+ ]
+
+ print(f" Provider: {name}")
+ print(f" URL: {base_url}")
+ if saved_model:
+ print(f" Current: {saved_model}")
+ print()
+
+ if not discover and configured_models:
+ # Discovery disabled with an explicit list — use it verbatim, no probe.
+ print(f"Using configured models (discover_models: false): {len(configured_models)}")
+ models = configured_models
+ else:
+ print("Fetching available models...")
+ fetch_kwargs = {"timeout": 8.0}
+ if api_mode:
+ fetch_kwargs["api_mode"] = api_mode
+ models = fetch_api_models(api_key, base_url, **fetch_kwargs)
+ # If the probe came back empty but the operator configured an explicit
+ # list, fall back to it rather than forcing manual entry.
+ if not models and configured_models:
+ models = configured_models
+
+ if models:
+ default_idx = 0
+ if saved_model and saved_model in models:
+ default_idx = models.index(saved_model)
+
+ print(f"Found {len(models)} model(s):\n")
+ try:
+ from hermes_cli.curses_ui import curses_radiolist
+
+ menu_items = [
+ f"{m} (current)" if m == saved_model else m for m in models
+ ] + ["Cancel"]
+ idx = curses_radiolist(
+ f"Select model from {name}:",
+ menu_items,
+ selected=default_idx,
+ cancel_returns=-1,
+ searchable=True,
+ )
+ print()
+ if idx < 0 or idx >= len(models):
+ print("Cancelled.")
+ return
+ model_name = models[idx]
+ except (ImportError, NotImplementedError, OSError, subprocess.SubprocessError):
+ for i, m in enumerate(models, 1):
+ suffix = " (current)" if m == saved_model else ""
+ print(f" {i}. {m}{suffix}")
+ print(f" {len(models) + 1}. Cancel")
+ print()
+ try:
+ val = input(f"Choice [1-{len(models) + 1}]: ").strip()
+ if not val:
+ print("Cancelled.")
+ return
+ idx = int(val) - 1
+ if idx < 0 or idx >= len(models):
+ print("Cancelled.")
+ return
+ model_name = models[idx]
+ except (ValueError, KeyboardInterrupt, EOFError):
+ print("\nCancelled.")
+ return
+ elif saved_model:
+ print("Could not fetch models from endpoint.")
+ try:
+ model_name = input(f"Model name [{saved_model}]: ").strip() or saved_model
+ except (KeyboardInterrupt, EOFError):
+ print("\nCancelled.")
+ return
+ else:
+ print("Could not fetch models from endpoint. Enter model name manually.")
+ try:
+ model_name = input("Model name: ").strip()
+ except (KeyboardInterrupt, EOFError):
+ print("\nCancelled.")
+ return
+ if not model_name:
+ print("No model specified. Cancelled.")
+ return
+
+ # Activate and save the model to the custom_providers entry
+ _save_model_choice(model_name)
+
+ cfg = load_config()
+ model = cfg.get("model")
+ if not isinstance(model, dict):
+ model = {"default": model} if model else {}
+ cfg["model"] = model
+ if provider_key:
+ model["provider"] = provider_key
+ model.pop("base_url", None)
+ model.pop("api_key", None)
+ else:
+ model["provider"] = "custom"
+ model["base_url"] = _custom_provider_base_url_config_value(
+ provider_info, base_url
+ )
+ if config_api_key:
+ model["api_key"] = config_api_key
+ # Apply api_mode from custom_providers entry, or clear stale value
+ custom_api_mode = provider_info.get("api_mode", "")
+ if custom_api_mode:
+ model["api_mode"] = custom_api_mode
+ else:
+ model.pop("api_mode", None) # let runtime auto-detect from URL
+ save_config(cfg)
+ deactivate_provider()
+
+ # Persist the selected model back to whichever schema owns this endpoint.
+ if provider_key:
+ cfg = load_config()
+ providers_cfg = cfg.get("providers")
+ if isinstance(providers_cfg, dict):
+ provider_entry = providers_cfg.get(provider_key)
+ if isinstance(provider_entry, dict):
+ provider_entry["default_model"] = model_name
+ # Only persist an inline api_key when the user originally had
+ # one (either a literal secret or a ``${VAR}`` template). When
+ # the entry relies on ``key_env``, do not synthesize a
+ # ``${key_env}`` api_key — the runtime already resolves the
+ # key from ``key_env`` directly, and writing the resolved
+ # secret (or even a synthesized template) would silently
+ # downgrade credential hygiene on entries that intentionally
+ # keep plaintext out of ``config.yaml``. See issue #15803.
+ original_api_key_ref = str(
+ provider_info.get("api_key_ref", "") or ""
+ ).strip()
+ original_api_key = str(provider_info.get("api_key", "") or "").strip()
+ had_inline_api_key = bool(original_api_key_ref or original_api_key)
+ if (
+ had_inline_api_key
+ and config_api_key
+ and not str(provider_entry.get("api_key", "") or "").strip()
+ ):
+ provider_entry["api_key"] = config_api_key
+ if key_env and not str(provider_entry.get("key_env", "") or "").strip():
+ provider_entry["key_env"] = key_env
+ cfg["providers"] = providers_cfg
+ save_config(cfg)
+ else:
+ # Save model name to the custom_providers entry for next time
+ _save_custom_provider(base_url, config_api_key, model_name, api_mode=api_mode)
+
+ print(f"\n✅ Model set to: {model_name}")
+ print(f" Provider: {name} ({base_url})")
+
+def _model_flow_copilot(config, current_model=""):
+ """GitHub Copilot flow using env vars, gh CLI, or OAuth device code."""
+ from hermes_cli.main import _current_reasoning_effort, _prompt_reasoning_effort_selection, _set_reasoning_effort
+ from hermes_cli.auth import (
+ PROVIDER_REGISTRY,
+ _prompt_model_selection,
+ _save_model_choice,
+ deactivate_provider,
+ resolve_api_key_provider_credentials,
+ )
+ from hermes_cli.config import save_env_value, load_config, save_config
+ from hermes_cli.models import (
+ _PROVIDER_MODELS,
+ fetch_api_models,
+ fetch_github_model_catalog,
+ github_model_reasoning_efforts,
+ copilot_model_api_mode,
+ normalize_copilot_model_id,
+ )
+
+ provider_id = "copilot"
+ pconfig = PROVIDER_REGISTRY[provider_id]
+
+ creds = resolve_api_key_provider_credentials(provider_id)
+ api_key = creds.get("api_key", "")
+ source = creds.get("source", "")
+
+ if not api_key:
+ print("No GitHub token configured for GitHub Copilot.")
+ print()
+ print(" Supported token types:")
+ print(
+ " → OAuth token (gho_*) via `copilot login` or device code flow"
+ )
+ print(" → Fine-grained PAT (github_pat_*) with Copilot Requests permission")
+ print(" → GitHub App token (ghu_*) via environment variable")
+ print(" ✗ Classic PAT (ghp_*) NOT supported by Copilot API")
+ print()
+ print(" Options:")
+ print(" 1. Login with GitHub (OAuth device code flow)")
+ print(" 2. Enter a token manually")
+ print(" 3. Cancel")
+ print()
+ try:
+ choice = input(" Choice [1-3]: ").strip()
+ except (KeyboardInterrupt, EOFError):
+ print()
+ return
+
+ if choice == "1":
+ try:
+ from hermes_cli.copilot_auth import copilot_device_code_login
+
+ token = copilot_device_code_login()
+ if token:
+ save_env_value("COPILOT_GITHUB_TOKEN", token)
+ print(" Copilot token saved.")
+ print()
+ else:
+ print(" Login cancelled or failed.")
+ return
+ except Exception as exc:
+ print(f" Login failed: {exc}")
+ return
+ elif choice == "2":
+ from hermes_cli.secret_prompt import masked_secret_prompt
+
+ try:
+ new_key = masked_secret_prompt(" Token (COPILOT_GITHUB_TOKEN): ").strip()
+ except (KeyboardInterrupt, EOFError):
+ print()
+ return
+ if not new_key:
+ print(" Cancelled.")
+ return
+ # Validate token type
+ try:
+ from hermes_cli.copilot_auth import validate_copilot_token
+
+ valid, msg = validate_copilot_token(new_key)
+ if not valid:
+ print(f" ✗ {msg}")
+ return
+ except ImportError:
+ pass
+ save_env_value("COPILOT_GITHUB_TOKEN", new_key)
+ print(" Token saved.")
+ print()
+ else:
+ print(" Cancelled.")
+ return
+
+ creds = resolve_api_key_provider_credentials(provider_id)
+ api_key = creds.get("api_key", "")
+ source = creds.get("source", "")
+ else:
+ if source in {"GITHUB_TOKEN", "GH_TOKEN"}:
+ from hermes_cli.env_loader import format_secret_source_suffix
+ bw_suffix = format_secret_source_suffix(source)
+ print(f" GitHub token: {api_key[:8]}... ✓ ({source}{bw_suffix})")
+ elif source == "gh auth token":
+ print(" GitHub token: ✓ (from `gh auth token`)")
+ else:
+ print(" GitHub token: ✓")
+ print()
+
+ effective_base = pconfig.inference_base_url
+
+ catalog = fetch_github_model_catalog(api_key)
+ live_models = (
+ [item.get("id", "") for item in catalog if item.get("id")]
+ if catalog
+ else fetch_api_models(api_key, effective_base)
+ )
+ normalized_current_model = (
+ normalize_copilot_model_id(
+ current_model,
+ catalog=catalog,
+ api_key=api_key,
+ )
+ or current_model
+ )
+ if live_models:
+ model_list = [model_id for model_id in live_models if model_id]
+ print(f" Found {len(model_list)} model(s) from GitHub Copilot")
+ else:
+ model_list = _PROVIDER_MODELS.get(provider_id, [])
+ if model_list:
+ print(
+ " ⚠ Could not auto-detect models from GitHub Copilot — showing defaults."
+ )
+ print(' Use "Enter custom model name" if you do not see your model.')
+
+ if model_list:
+ selected = _prompt_model_selection(
+ model_list, current_model=normalized_current_model
+ )
+ else:
+ try:
+ selected = input("Model name: ").strip()
+ except (KeyboardInterrupt, EOFError):
+ selected = None
+
+ if selected:
+ selected = (
+ normalize_copilot_model_id(
+ selected,
+ catalog=catalog,
+ api_key=api_key,
+ )
+ or selected
+ )
+ initial_cfg = load_config()
+ current_effort = _current_reasoning_effort(initial_cfg)
+ reasoning_efforts = github_model_reasoning_efforts(
+ selected,
+ catalog=catalog,
+ api_key=api_key,
+ )
+ selected_effort = None
+ if reasoning_efforts:
+ print(f" {selected} supports reasoning controls.")
+ selected_effort = _prompt_reasoning_effort_selection(
+ reasoning_efforts, current_effort=current_effort
+ )
+
+ _save_model_choice(selected)
+
+ cfg = load_config()
+ model = cfg.get("model")
+ if not isinstance(model, dict):
+ model = {"default": model} if model else {}
+ cfg["model"] = model
+ model["provider"] = provider_id
+ model["base_url"] = effective_base
+ model["api_mode"] = copilot_model_api_mode(
+ selected,
+ catalog=catalog,
+ api_key=api_key,
+ )
+ if selected_effort is not None:
+ _set_reasoning_effort(cfg, selected_effort)
+ save_config(cfg)
+ deactivate_provider()
+
+ print(f"Default model set to: {selected} (via {pconfig.name})")
+ if reasoning_efforts:
+ if selected_effort == "none":
+ print("Reasoning disabled for this model.")
+ elif selected_effort:
+ print(f"Reasoning effort set to: {selected_effort}")
+ else:
+ print("No change.")
+
+def _model_flow_copilot_acp(config, current_model=""):
+ """GitHub Copilot ACP flow using the local Copilot CLI."""
+ from hermes_cli.auth import (
+ PROVIDER_REGISTRY,
+ _prompt_model_selection,
+ _save_model_choice,
+ deactivate_provider,
+ get_external_process_provider_status,
+ resolve_api_key_provider_credentials,
+ resolve_external_process_provider_credentials,
+ )
+ from hermes_cli.models import (
+ _PROVIDER_MODELS,
+ fetch_github_model_catalog,
+ normalize_copilot_model_id,
+ )
+ from hermes_cli.config import load_config, save_config
+
+ del config
+
+ provider_id = "copilot-acp"
+ pconfig = PROVIDER_REGISTRY[provider_id]
+
+ status = get_external_process_provider_status(provider_id)
+ resolved_command = (
+ status.get("resolved_command") or status.get("command") or "copilot"
+ )
+ effective_base = status.get("base_url") or pconfig.inference_base_url
+
+ print(" GitHub Copilot ACP delegates Hermes turns to `copilot --acp`.")
+ print(" Hermes currently starts its own ACP subprocess for each request.")
+ print(" Hermes uses your selected model as a hint for the Copilot ACP session.")
+ print(f" Command: {resolved_command}")
+ print(f" Backend marker: {effective_base}")
+ print()
+
+ try:
+ creds = resolve_external_process_provider_credentials(provider_id)
+ except Exception as exc:
+ print(f" ⚠ {exc}")
+ print(
+ " Set HERMES_COPILOT_ACP_COMMAND or COPILOT_CLI_PATH if Copilot CLI is installed elsewhere."
+ )
+ return
+
+ effective_base = creds.get("base_url") or effective_base
+
+ catalog_api_key = ""
+ try:
+ catalog_creds = resolve_api_key_provider_credentials("copilot")
+ catalog_api_key = catalog_creds.get("api_key", "")
+ except Exception:
+ pass
+
+ catalog = fetch_github_model_catalog(catalog_api_key)
+ normalized_current_model = (
+ normalize_copilot_model_id(
+ current_model,
+ catalog=catalog,
+ api_key=catalog_api_key,
+ )
+ or current_model
+ )
+
+ if catalog:
+ model_list = [item.get("id", "") for item in catalog if item.get("id")]
+ print(f" Found {len(model_list)} model(s) from GitHub Copilot")
+ else:
+ model_list = _PROVIDER_MODELS.get("copilot", [])
+ if model_list:
+ print(
+ " ⚠ Could not auto-detect models from GitHub Copilot — showing defaults."
+ )
+ print(' Use "Enter custom model name" if you do not see your model.')
+
+ if model_list:
+ selected = _prompt_model_selection(
+ model_list,
+ current_model=normalized_current_model,
+ )
+ else:
+ try:
+ selected = input("Model name: ").strip()
+ except (KeyboardInterrupt, EOFError):
+ selected = None
+
+ if not selected:
+ print("No change.")
+ return
+
+ selected = (
+ normalize_copilot_model_id(
+ selected,
+ catalog=catalog,
+ api_key=catalog_api_key,
+ )
+ or selected
+ )
+ _save_model_choice(selected)
+
+ cfg = load_config()
+ model = cfg.get("model")
+ if not isinstance(model, dict):
+ model = {"default": model} if model else {}
+ cfg["model"] = model
+ model["provider"] = provider_id
+ model["base_url"] = effective_base
+ model["api_mode"] = "chat_completions"
+ save_config(cfg)
+ deactivate_provider()
+
+ print(f"Default model set to: {selected} (via {pconfig.name})")
+
+def _model_flow_kimi(config, current_model=""):
+ """Kimi / Moonshot model selection with automatic endpoint routing.
+
+ - sk-kimi-* keys → api.kimi.com/coding/v1 (Kimi Coding Plan)
+ - Other keys → api.moonshot.ai/v1 (legacy Moonshot)
+
+ No manual base URL prompt — endpoint is determined by key prefix.
+ """
+ from hermes_cli.main import _prompt_api_key
+ from hermes_cli.auth import (
+ PROVIDER_REGISTRY,
+ KIMI_CODE_BASE_URL,
+ _prompt_model_selection,
+ _save_model_choice,
+ deactivate_provider,
+ )
+ from hermes_cli.config import (
+ get_env_value,
+ save_env_value,
+ load_config,
+ save_config,
+ )
+ from hermes_cli.models import _PROVIDER_MODELS
+
+ provider_id = "kimi-coding"
+ pconfig = PROVIDER_REGISTRY[provider_id]
+ key_env = pconfig.api_key_env_vars[0] if pconfig.api_key_env_vars else ""
+ base_url_env = pconfig.base_url_env_var or ""
+
+ # Step 1: Check / prompt for API key
+ existing_key = ""
+ for ev in pconfig.api_key_env_vars:
+ existing_key = get_env_value(ev) or os.getenv(ev, "")
+ if existing_key:
+ break
+
+ existing_key, abort = _prompt_api_key(
+ pconfig, existing_key, provider_id=provider_id
+ )
+ if abort:
+ return
+
+ # Step 2: Auto-detect endpoint from key prefix
+ is_coding_plan = existing_key.startswith("sk-kimi-")
+ if is_coding_plan:
+ effective_base = KIMI_CODE_BASE_URL
+ print(f" Detected Kimi Coding Plan key → {effective_base}")
+ else:
+ effective_base = pconfig.inference_base_url
+ print(f" Using Moonshot endpoint → {effective_base}")
+ # Clear any manual base URL override so auto-detection works at runtime
+ if base_url_env and get_env_value(base_url_env):
+ save_env_value(base_url_env, "")
+ print()
+
+ # Step 3: Model selection — show appropriate models for the endpoint
+ if is_coding_plan:
+ # Coding Plan models (kimi-k2.6 first)
+ model_list = [
+ "kimi-k2.6",
+ "kimi-k2.5",
+ "kimi-for-coding",
+ "kimi-k2-thinking",
+ "kimi-k2-thinking-turbo",
+ ]
+ else:
+ # Legacy Moonshot models (excludes Coding Plan-only models)
+ model_list = _PROVIDER_MODELS.get("moonshot", [])
+
+ if model_list:
+ selected = _prompt_model_selection(model_list, current_model=current_model)
+ else:
+ try:
+ selected = input("Enter model name: ").strip()
+ except (KeyboardInterrupt, EOFError):
+ selected = None
+
+ if selected:
+ _save_model_choice(selected)
+
+ # Update config with provider and base URL
+ cfg = load_config()
+ model = cfg.get("model")
+ if not isinstance(model, dict):
+ model = {"default": model} if model else {}
+ cfg["model"] = model
+ model["provider"] = provider_id
+ model["base_url"] = effective_base
+ model.pop("api_mode", None) # let runtime auto-detect from URL
+ save_config(cfg)
+ deactivate_provider()
+
+ endpoint_label = "Kimi Coding" if is_coding_plan else "Moonshot"
+ print(f"Default model set to: {selected} (via {endpoint_label})")
+ else:
+ print("No change.")
+
+def _model_flow_stepfun(config, current_model=""):
+ """StepFun Step Plan flow with region-specific endpoints."""
+ from hermes_cli.main import _infer_stepfun_region, _prompt_api_key, _prompt_provider_choice, _stepfun_base_url_for_region
+ from hermes_cli.auth import (
+ PROVIDER_REGISTRY,
+ _prompt_model_selection,
+ _save_model_choice,
+ deactivate_provider,
+ )
+ from hermes_cli.config import (
+ get_env_value,
+ save_env_value,
+ load_config,
+ save_config,
+ )
+ from hermes_cli.models import _PROVIDER_MODELS, fetch_api_models
+
+ provider_id = "stepfun"
+ pconfig = PROVIDER_REGISTRY[provider_id]
+ key_env = pconfig.api_key_env_vars[0] if pconfig.api_key_env_vars else ""
+ base_url_env = pconfig.base_url_env_var or ""
+
+ existing_key = ""
+ for ev in pconfig.api_key_env_vars:
+ existing_key = get_env_value(ev) or os.getenv(ev, "")
+ if existing_key:
+ break
+
+ existing_key, abort = _prompt_api_key(
+ pconfig, existing_key, provider_id=provider_id
+ )
+ if abort:
+ return
+
+ current_base = ""
+ if base_url_env:
+ current_base = get_env_value(base_url_env) or os.getenv(base_url_env, "")
+ if not current_base:
+ model_cfg = config.get("model")
+ if isinstance(model_cfg, dict):
+ current_base = str(model_cfg.get("base_url") or "").strip()
+ current_region = _infer_stepfun_region(current_base or pconfig.inference_base_url)
+
+ region_choices = [
+ (
+ "international",
+ f"International ({_stepfun_base_url_for_region('international')})",
+ ),
+ ("china", f"China ({_stepfun_base_url_for_region('china')})"),
+ ]
+ ordered_regions = []
+ for region_key, label in region_choices:
+ if region_key == current_region:
+ ordered_regions.insert(0, (region_key, f"{label} ← currently active"))
+ else:
+ ordered_regions.append((region_key, label))
+ ordered_regions.append(("cancel", "Cancel"))
+
+ region_idx = _prompt_provider_choice([label for _, label in ordered_regions])
+ if region_idx is None or ordered_regions[region_idx][0] == "cancel":
+ print("No change.")
+ return
+
+ selected_region = ordered_regions[region_idx][0]
+ effective_base = _stepfun_base_url_for_region(selected_region)
+ if base_url_env:
+ save_env_value(base_url_env, effective_base)
+
+ live_models = fetch_api_models(existing_key, effective_base)
+ if live_models:
+ model_list = live_models
+ print(f" Found {len(model_list)} model(s) from {pconfig.name} API")
+ else:
+ model_list = _PROVIDER_MODELS.get(provider_id, [])
+ if model_list:
+ print(
+ f" Could not auto-detect models from {pconfig.name} API — "
+ "showing Step Plan fallback catalog."
+ )
+
+ if model_list:
+ selected = _prompt_model_selection(model_list, current_model=current_model)
+ else:
+ try:
+ selected = input("Model name: ").strip()
+ except (KeyboardInterrupt, EOFError):
+ selected = None
+
+ if selected:
+ _save_model_choice(selected)
+
+ cfg = load_config()
+ model = cfg.get("model")
+ if not isinstance(model, dict):
+ model = {"default": model} if model else {}
+ cfg["model"] = model
+ model["provider"] = provider_id
+ model["base_url"] = effective_base
+ model.pop("api_mode", None)
+ save_config(cfg)
+ deactivate_provider()
+
+ config["model"] = dict(model)
+ print(f"Default model set to: {selected} (via {pconfig.name})")
+ else:
+ print("No change.")
+
+def _model_flow_bedrock_api_key(config, region, current_model=""):
+ """Bedrock API Key mode — uses the OpenAI-compatible bedrock-mantle endpoint.
+
+ For developers who don't have an AWS account but received a Bedrock API Key
+ from their AWS admin. Works like any OpenAI-compatible endpoint.
+ """
+ from hermes_cli.auth import (
+ _prompt_model_selection,
+ _save_model_choice,
+ deactivate_provider,
+ )
+ from hermes_cli.config import (
+ load_config,
+ save_config,
+ get_env_value,
+ save_env_value,
+ )
+ from hermes_cli.models import _PROVIDER_MODELS
+
+ mantle_base_url = f"https://bedrock-mantle.{region}.api.aws/v1"
+
+ # Prompt for API key
+ existing_key = get_env_value("AWS_BEARER_TOKEN_BEDROCK") or ""
+ if existing_key:
+ from hermes_cli.env_loader import format_secret_source_suffix
+ source_suffix = format_secret_source_suffix("AWS_BEARER_TOKEN_BEDROCK")
+ print(f" Bedrock API Key: {existing_key[:12]}... ✓{source_suffix}")
+ else:
+ print(f" Endpoint: {mantle_base_url}")
+ print()
+ from hermes_cli.secret_prompt import masked_secret_prompt
+
+ try:
+ api_key = masked_secret_prompt(" Bedrock API Key: ").strip()
+ except (KeyboardInterrupt, EOFError):
+ print()
+ return
+ if not api_key:
+ print(" Cancelled.")
+ return
+ save_env_value("AWS_BEARER_TOKEN_BEDROCK", api_key)
+ existing_key = api_key
+ print(" ✓ API key saved.")
+ print()
+
+ # Model selection — use static list (mantle doesn't need boto3 for discovery)
+ model_list = _PROVIDER_MODELS.get("bedrock", [])
+ print(f" Showing {len(model_list)} curated models")
+
+ if model_list:
+ selected = _prompt_model_selection(model_list, current_model=current_model)
+ else:
+ try:
+ selected = input(" Model ID: ").strip()
+ except (KeyboardInterrupt, EOFError):
+ selected = None
+
+ if selected:
+ _save_model_choice(selected)
+
+ # Save as custom provider pointing to bedrock-mantle
+ cfg = load_config()
+ model = cfg.get("model")
+ if not isinstance(model, dict):
+ model = {"default": model} if model else {}
+ cfg["model"] = model
+ model["provider"] = "custom"
+ model["base_url"] = mantle_base_url
+ model.pop("api_mode", None) # chat_completions is the default
+
+ # Also save region in bedrock config for reference
+ bedrock_cfg = cfg.get("bedrock", {})
+ if not isinstance(bedrock_cfg, dict):
+ bedrock_cfg = {}
+ bedrock_cfg["region"] = region
+ cfg["bedrock"] = bedrock_cfg
+
+ # Save the API key env var name so hermes knows where to find it
+ save_env_value("OPENAI_API_KEY", existing_key)
+ save_env_value("OPENAI_BASE_URL", mantle_base_url)
+
+ save_config(cfg)
+ deactivate_provider()
+
+ print(f" Default model set to: {selected} (via Bedrock API Key, {region})")
+ print(f" Endpoint: {mantle_base_url}")
+ else:
+ print(" No change.")
+
+def _model_flow_bedrock(config, current_model=""):
+ """AWS Bedrock provider: verify credentials, pick region, discover models.
+
+ Uses the native Converse API via boto3 — not the OpenAI-compatible endpoint.
+ Auth is handled by the AWS SDK default credential chain (env vars, profile,
+ instance role), so no API key prompt is needed.
+ """
+ from hermes_cli.auth import (
+ _prompt_model_selection,
+ _save_model_choice,
+ deactivate_provider,
+ )
+ from hermes_cli.config import load_config, save_config
+ from hermes_cli.models import _PROVIDER_MODELS
+
+ # 1. Check for AWS credentials
+ try:
+ from agent.bedrock_adapter import (
+ has_aws_credentials,
+ resolve_aws_auth_env_var,
+ resolve_bedrock_region,
+ discover_bedrock_models,
+ )
+ except ImportError:
+ print(" ✗ boto3 is not installed. Install it with:")
+ print(" pip install boto3")
+ print()
+ return
+
+ if not has_aws_credentials():
+ print(" ⚠ No AWS credentials detected via environment variables.")
+ print(" Bedrock will use boto3's default credential chain (IMDS, SSO, etc.)")
+ print()
+
+ auth_var = resolve_aws_auth_env_var()
+ if auth_var:
+ print(f" AWS credentials: {auth_var} ✓")
+ else:
+ print(" AWS credentials: boto3 default chain (instance role / SSO)")
+ print()
+
+ # 2. Region selection
+ current_region = resolve_bedrock_region()
+ try:
+ region_input = input(f" AWS Region [{current_region}]: ").strip()
+ except (KeyboardInterrupt, EOFError):
+ print()
+ return
+ region = region_input or current_region
+
+ # 2b. Authentication mode
+ print(" Choose authentication method:")
+ print()
+ print(" 1. IAM credential chain (recommended)")
+ print(" Works with EC2 instance roles, SSO, env vars, aws configure")
+ print(" 2. Bedrock API Key")
+ print(" Enter your Bedrock API Key directly — also supports")
+ print(" team scenarios where an admin distributes keys")
+ print()
+ try:
+ auth_choice = input(" Choice [1]: ").strip()
+ except (KeyboardInterrupt, EOFError):
+ print()
+ return
+
+ if auth_choice == "2":
+ _model_flow_bedrock_api_key(config, region, current_model)
+ return
+
+ # 3. Model discovery — try live API first, fall back to static list
+ print(f" Discovering models in {region}...")
+ live_models = discover_bedrock_models(region)
+
+ if live_models:
+ _EXCLUDE_PREFIXES = (
+ "stability.",
+ "cohere.embed",
+ "twelvelabs.",
+ "us.stability.",
+ "us.cohere.embed",
+ "us.twelvelabs.",
+ "global.cohere.embed",
+ "global.twelvelabs.",
+ )
+ _EXCLUDE_SUBSTRINGS = ("safeguard", "voxtral", "palmyra-vision")
+ filtered = []
+ for m in live_models:
+ mid = m["id"]
+ if any(mid.startswith(p) for p in _EXCLUDE_PREFIXES):
+ continue
+ if any(s in mid.lower() for s in _EXCLUDE_SUBSTRINGS):
+ continue
+ filtered.append(m)
+
+ # Deduplicate: prefer inference profiles (us.*, global.*) over bare
+ # foundation model IDs.
+ profile_base_ids = set()
+ for m in filtered:
+ mid = m["id"]
+ if mid.startswith(("us.", "global.")):
+ base = mid.split(".", 1)[1] if "." in mid[3:] else mid
+ profile_base_ids.add(base)
+
+ deduped = []
+ for m in filtered:
+ mid = m["id"]
+ if not mid.startswith(("us.", "global.")) and mid in profile_base_ids:
+ continue
+ deduped.append(m)
+
+ _RECOMMENDED = [
+ "us.anthropic.claude-sonnet-4-6",
+ "us.anthropic.claude-opus-4-6",
+ "us.anthropic.claude-haiku-4-5",
+ "us.amazon.nova-pro",
+ "us.amazon.nova-lite",
+ "us.amazon.nova-micro",
+ "deepseek.v3",
+ "us.meta.llama4-maverick",
+ "us.meta.llama4-scout",
+ ]
+
+ def _sort_key(m):
+ mid = m["id"]
+ for i, rec in enumerate(_RECOMMENDED):
+ if mid.startswith(rec):
+ return (0, i, mid)
+ if mid.startswith("global."):
+ return (1, 0, mid)
+ return (2, 0, mid)
+
+ deduped.sort(key=_sort_key)
+ model_list = [m["id"] for m in deduped]
+ print(
+ f" Found {len(model_list)} text model(s) (filtered from {len(live_models)} total)"
+ )
+ else:
+ model_list = _PROVIDER_MODELS.get("bedrock", [])
+ if model_list:
+ print(
+ f" Using {len(model_list)} curated models (live discovery unavailable)"
+ )
+ else:
+ print(
+ " No models found. Check IAM permissions for bedrock:ListFoundationModels."
+ )
+ return
+
+ # 4. Model selection
+ if model_list:
+ selected = _prompt_model_selection(model_list, current_model=current_model)
+ else:
+ try:
+ selected = input(" Model ID: ").strip()
+ except (KeyboardInterrupt, EOFError):
+ selected = None
+
+ if selected:
+ _save_model_choice(selected)
+
+ cfg = load_config()
+ model = cfg.get("model")
+ if not isinstance(model, dict):
+ model = {"default": model} if model else {}
+ cfg["model"] = model
+ model["provider"] = "bedrock"
+ model["base_url"] = f"https://bedrock-runtime.{region}.amazonaws.com"
+ model.pop("api_mode", None) # bedrock_converse is auto-detected
+
+ bedrock_cfg = cfg.get("bedrock", {})
+ if not isinstance(bedrock_cfg, dict):
+ bedrock_cfg = {}
+ bedrock_cfg["region"] = region
+ cfg["bedrock"] = bedrock_cfg
+
+ save_config(cfg)
+ deactivate_provider()
+
+ print(f" Default model set to: {selected} (via AWS Bedrock, {region})")
+ else:
+ print(" No change.")
+
+def _model_flow_api_key_provider(config, provider_id, current_model=""):
+ """Generic flow for API-key providers (z.ai, MiniMax, OpenCode, etc.)."""
+ from hermes_cli.main import _prompt_api_key
+ from hermes_cli.auth import (
+ PROVIDER_REGISTRY,
+ _prompt_model_selection,
+ _save_model_choice,
+ deactivate_provider,
+ )
+ from hermes_cli.config import (
+ get_env_value,
+ save_env_value,
+ load_config,
+ save_config,
+ )
+ from hermes_cli.models import (
+ _PROVIDER_MODELS,
+ fetch_api_models,
+ opencode_model_api_mode,
+ normalize_opencode_model_id,
+ )
+
+ pconfig = PROVIDER_REGISTRY[provider_id]
+ key_env = pconfig.api_key_env_vars[0] if pconfig.api_key_env_vars else ""
+ base_url_env = pconfig.base_url_env_var or ""
+
+ # Check / prompt for API key
+ existing_key = ""
+ for ev in pconfig.api_key_env_vars:
+ existing_key = get_env_value(ev) or os.getenv(ev, "")
+ if existing_key:
+ break
+
+ existing_key, abort = _prompt_api_key(
+ pconfig, existing_key, provider_id=provider_id
+ )
+ if abort:
+ return
+
+ # Gemini free-tier gate: free-tier daily quotas (<= 250 RPD for Flash)
+ # are exhausted in a handful of agent turns, so refuse to wire up the
+ # provider with a free-tier key. Probe is best-effort; network or auth
+ # errors fall through without blocking.
+ if provider_id == "gemini" and existing_key:
+ try:
+ from agent.gemini_native_adapter import probe_gemini_tier
+ except Exception:
+ probe_gemini_tier = None
+ if probe_gemini_tier is not None:
+ print(" Checking Gemini API tier...")
+ probe_base = (
+ (get_env_value(base_url_env) if base_url_env else "")
+ or os.getenv(base_url_env or "", "")
+ or pconfig.inference_base_url
+ )
+ tier = probe_gemini_tier(existing_key, probe_base)
+ if tier == "free":
+ print()
+ print(
+ "❌ This Google API key is on the free tier "
+ "(<= 250 requests/day for gemini-2.5-flash)."
+ )
+ print(
+ " Hermes typically makes 3-10 API calls per user turn "
+ "(tool iterations + auxiliary tasks),"
+ )
+ print(
+ " so the free tier is exhausted after a handful of "
+ "messages and cannot sustain"
+ )
+ print(" an agent session.")
+ print()
+ print(
+ " To use Gemini with Hermes, enable billing on your "
+ "Google Cloud project and regenerate"
+ )
+ print(
+ " the key in a billing-enabled project: "
+ "https://aistudio.google.com/apikey"
+ )
+ print()
+ print(
+ " Alternatives with workable free usage: DeepSeek, "
+ "OpenRouter (free models), Groq, Nous."
+ )
+ print()
+ print("Not saving Gemini as the default provider.")
+ return
+ if tier == "paid":
+ print(" Tier check: paid ✓")
+ else:
+ # "unknown" -- network issue, auth problem, unexpected response.
+ # Don't block; the runtime 429 handler will surface free-tier
+ # guidance if the key turns out to be free tier.
+ print(" Tier check: could not verify (proceeding anyway).")
+ print()
+
+ # Optional base URL override.
+ # Precedence: env var → config.yaml model.base_url → registry default.
+ # Reading config.yaml prevents silently overwriting a saved remote URL
+ # (e.g. a remote LM Studio endpoint) with localhost when the user just
+ # presses Enter at the prompt below.
+ current_base = ""
+ if base_url_env:
+ current_base = get_env_value(base_url_env) or os.getenv(base_url_env, "")
+ if not current_base:
+ try:
+ _m = load_config().get("model") or {}
+ if str(_m.get("provider") or "").strip().lower() == provider_id:
+ current_base = str(_m.get("base_url") or "").strip()
+ except Exception:
+ pass
+ effective_base = current_base or pconfig.inference_base_url
+
+ try:
+ override = input(f"Base URL [{effective_base}]: ").strip()
+ except (KeyboardInterrupt, EOFError):
+ print()
+ override = ""
+ if override and base_url_env:
+ if not override.startswith(("http://", "https://")):
+ print(
+ " Invalid URL — must start with http:// or https://. Keeping current value."
+ )
+ else:
+ save_env_value(base_url_env, override)
+ effective_base = override
+
+ # Model selection — resolution order:
+ # 1. models.dev registry (cached, filtered for agentic/tool-capable models)
+ # 2. Curated static fallback list (offline insurance)
+ # 3. Live /models endpoint probe (small providers without models.dev data)
+ #
+ # LM Studio: live /api/v1/models probe (no models.dev catalog).
+ # Ollama Cloud: merged discovery (live API + models.dev + disk cache).
+ if provider_id == "lmstudio":
+ from hermes_cli.auth import AuthError
+ from hermes_cli.models import fetch_lmstudio_models
+
+ api_key_for_probe = existing_key or (get_env_value(key_env) if key_env else "")
+ try:
+ model_list = fetch_lmstudio_models(
+ api_key=api_key_for_probe, base_url=effective_base
+ )
+ except AuthError as exc:
+ print(f" LM Studio rejected the request: {exc}")
+ print(" Set LM_API_KEY (or update it) to match the server's bearer token.")
+ model_list = []
+ if model_list:
+ print(f" Found {len(model_list)} model(s) from LM Studio")
+ elif provider_id == "ollama-cloud":
+ from hermes_cli.models import fetch_ollama_cloud_models
+
+ api_key_for_probe = existing_key or (get_env_value(key_env) if key_env else "")
+ # During setup, force a live refresh so the picker reflects newly
+ # released models (e.g. deepseek v4 flash, kimi k2.6) the moment
+ # the user enters their key — not an hour later when the disk
+ # cache TTL expires.
+ model_list = fetch_ollama_cloud_models(
+ api_key=api_key_for_probe,
+ base_url=effective_base,
+ force_refresh=True,
+ )
+ if model_list:
+ print(f" Found {len(model_list)} model(s) from Ollama Cloud")
+ elif provider_id == "novita":
+ from hermes_cli.models import fetch_api_models
+
+ api_key_for_probe = existing_key or (get_env_value(key_env) if key_env else "")
+ curated = _PROVIDER_MODELS.get(provider_id, [])
+ live_models = fetch_api_models(api_key_for_probe, effective_base)
+ if live_models:
+ model_list = live_models
+ print(f" Found {len(model_list)} model(s) from {pconfig.name} API")
+ else:
+ mdev_models: list = []
+ try:
+ from agent.models_dev import list_agentic_models
+
+ mdev_models = list_agentic_models(provider_id)
+ except Exception:
+ pass
+ if mdev_models:
+ seen = {m.lower() for m in mdev_models}
+ model_list = list(mdev_models)
+ for m in curated:
+ if m.lower() not in seen:
+ model_list.append(m)
+ seen.add(m.lower())
+ print(f" Found {len(model_list)} model(s) from models.dev registry")
+ else:
+ model_list = curated
+ if model_list:
+ print(
+ f' Showing {len(model_list)} curated models — use "Enter custom model name" for others.'
+ )
+ else:
+ curated = _PROVIDER_MODELS.get(provider_id, [])
+
+ # Try models.dev first — returns tool-capable models, filtered for noise
+ mdev_models: list = []
+ try:
+ from agent.models_dev import list_agentic_models
+
+ mdev_models = list_agentic_models(provider_id)
+ except Exception:
+ pass
+
+ if mdev_models:
+ # Merge models.dev with curated list so newly added models
+ # (not yet in models.dev) still appear in the picker.
+ if curated:
+ seen = {m.lower() for m in mdev_models}
+ merged = list(mdev_models)
+ for m in curated:
+ if m.lower() not in seen:
+ merged.append(m)
+ seen.add(m.lower())
+ model_list = merged
+ else:
+ model_list = mdev_models
+ print(f" Found {len(model_list)} model(s) from models.dev registry")
+ elif curated and len(curated) >= 8:
+ # Curated list is substantial — use it directly, skip live probe
+ model_list = curated
+ print(
+ f' Showing {len(model_list)} curated models — use "Enter custom model name" for others.'
+ )
+ else:
+ api_key_for_probe = existing_key or (
+ get_env_value(key_env) if key_env else ""
+ )
+ live_models = fetch_api_models(api_key_for_probe, effective_base)
+ if live_models and len(live_models) >= len(curated):
+ model_list = live_models
+ print(f" Found {len(model_list)} model(s) from {pconfig.name} API")
+ else:
+ model_list = curated
+ if model_list:
+ print(
+ f' Showing {len(model_list)} curated models — use "Enter custom model name" for others.'
+ )
+ # else: no defaults either, will fall through to raw input
+
+ if provider_id in {"opencode-zen", "opencode-go"}:
+ model_list = [
+ normalize_opencode_model_id(provider_id, mid) for mid in model_list
+ ]
+ current_model = normalize_opencode_model_id(provider_id, current_model)
+ model_list = list(dict.fromkeys(mid for mid in model_list if mid))
+
+ if model_list:
+ selected = _prompt_model_selection(model_list, current_model=current_model)
+ else:
+ try:
+ selected = input("Model name: ").strip()
+ except (KeyboardInterrupt, EOFError):
+ selected = None
+
+ if selected:
+ if provider_id in {"opencode-zen", "opencode-go"}:
+ selected = normalize_opencode_model_id(provider_id, selected)
+
+ _save_model_choice(selected)
+
+ # Update config with provider, base URL, and provider-specific API mode
+ cfg = load_config()
+ model = cfg.get("model")
+ if not isinstance(model, dict):
+ model = {"default": model} if model else {}
+ cfg["model"] = model
+ model["provider"] = provider_id
+ model["base_url"] = effective_base
+ if provider_id in {"opencode-zen", "opencode-go"}:
+ model["api_mode"] = opencode_model_api_mode(provider_id, selected)
+ else:
+ model.pop("api_mode", None)
+ save_config(cfg)
+ deactivate_provider()
+
+ print(f"Default model set to: {selected} (via {pconfig.name})")
+ else:
+ print("No change.")
+
+def _model_flow_anthropic(config, current_model=""):
+ """Flow for Anthropic provider — OAuth subscription, API key, or Claude Code creds."""
+ from hermes_cli.main import _run_anthropic_oauth_flow
+ from hermes_cli.auth import (
+ _prompt_model_selection,
+ _save_model_choice,
+ deactivate_provider,
+ )
+ from hermes_cli.config import (
+ save_env_value,
+ load_config,
+ save_config,
+ save_anthropic_api_key,
+ )
+ from hermes_cli.models import _PROVIDER_MODELS
+
+ # Check ALL credential sources
+ from hermes_cli.auth import get_anthropic_key
+
+ existing_key = get_anthropic_key()
+ cc_available = False
+ try:
+ from agent.anthropic_adapter import (
+ read_claude_code_credentials,
+ is_claude_code_token_valid,
+ _is_oauth_token,
+ )
+
+ cc_creds = read_claude_code_credentials()
+ if cc_creds and is_claude_code_token_valid(cc_creds):
+ cc_available = True
+ except Exception:
+ pass
+
+ # Stale-OAuth guard: if the only existing cred is an expired OAuth token
+ # (no valid cc_creds to fall back on), treat it as missing so the re-auth
+ # path is offered instead of silently accepting a broken token.
+ existing_is_stale_oauth = False
+ if existing_key and _is_oauth_token(existing_key) and not cc_available:
+ existing_is_stale_oauth = True
+
+ has_creds = (bool(existing_key) and not existing_is_stale_oauth) or cc_available
+ needs_auth = not has_creds
+
+ if has_creds:
+ # Show what we found
+ if existing_key:
+ from hermes_cli.env_loader import format_secret_source_suffix
+ from hermes_cli.auth import PROVIDER_REGISTRY
+
+ # Surface which env var supplied the key so users with
+ # Bitwarden see "(from Bitwarden)" — without this, a detected
+ # BSM key looks identical to a key in .env and users assume
+ # nothing is wired up.
+ source_suffix = ""
+ for var in PROVIDER_REGISTRY["anthropic"].api_key_env_vars:
+ if os.getenv(var, "").strip() == existing_key:
+ source_suffix = format_secret_source_suffix(var)
+ if source_suffix:
+ break
+ print(
+ f" Anthropic credentials: {existing_key[:12]}... ✓{source_suffix}"
+ )
+ elif cc_available:
+ print(" Claude Code credentials: ✓ (auto-detected)")
+ print()
+ print(" 1. Use existing credentials")
+ print(" 2. Reauthenticate (new OAuth login)")
+ print(" 3. Cancel")
+ print()
+ try:
+ choice = input(" Choice [1/2/3]: ").strip()
+ except (KeyboardInterrupt, EOFError):
+ choice = "1"
+
+ if choice == "2":
+ needs_auth = True
+ elif choice == "3":
+ return
+ # choice == "1" or default: use existing, proceed to model selection
+
+ if needs_auth:
+ # Show auth method choice
+ print()
+ print(" Choose authentication method:")
+ print()
+ print(" 1. Claude Pro/Max subscription (OAuth login)")
+ print(" 2. Anthropic API key (pay-per-token)")
+ print(" 3. Cancel")
+ print()
+ try:
+ choice = input(" Choice [1/2/3]: ").strip()
+ except (KeyboardInterrupt, EOFError):
+ print()
+ return
+
+ if choice == "1":
+ if not _run_anthropic_oauth_flow(save_env_value):
+ return
+
+ elif choice == "2":
+ print()
+ print(" Get an API key at: https://platform.claude.com/settings/keys")
+ print()
+ from hermes_cli.secret_prompt import masked_secret_prompt
+
+ try:
+ api_key = masked_secret_prompt(" API key (sk-ant-...): ").strip()
+ except (KeyboardInterrupt, EOFError):
+ print()
+ return
+ if not api_key:
+ print(" Cancelled.")
+ return
+ save_anthropic_api_key(api_key, save_fn=save_env_value)
+ print(" ✓ API key saved.")
+
+ else:
+ print(" No change.")
+ return
+ print()
+
+ # Model selection
+ model_list = _PROVIDER_MODELS.get("anthropic", [])
+ if model_list:
+ selected = _prompt_model_selection(model_list, current_model=current_model)
+ else:
+ try:
+ selected = input("Model name (e.g., claude-sonnet-4-20250514): ").strip()
+ except (KeyboardInterrupt, EOFError):
+ selected = None
+
+ if selected:
+ _save_model_choice(selected)
+
+ # Update config with provider — clear base_url since
+ # resolve_runtime_provider() always hardcodes Anthropic's URL.
+ # Leaving a stale base_url in config can contaminate other
+ # providers if the user switches without running 'hermes model'.
+ cfg = load_config()
+ model = cfg.get("model")
+ if not isinstance(model, dict):
+ model = {"default": model} if model else {}
+ cfg["model"] = model
+ model["provider"] = "anthropic"
+ model.pop("base_url", None)
+ save_config(cfg)
+ deactivate_provider()
+
+ print(f"Default model set to: {selected} (via Anthropic)")
+ else:
+ print("No change.")
diff --git a/tests/hermes_cli/test_setup_ollama_cloud_force_refresh.py b/tests/hermes_cli/test_setup_ollama_cloud_force_refresh.py
index 60f6ea99341..68870bf700d 100644
--- a/tests/hermes_cli/test_setup_ollama_cloud_force_refresh.py
+++ b/tests/hermes_cli/test_setup_ollama_cloud_force_refresh.py
@@ -9,10 +9,13 @@ from __future__ import annotations
def test_setup_ollama_cloud_passes_force_refresh(monkeypatch):
"""The provider-setup model-fetch for ollama-cloud must pass ``force_refresh=True``."""
- import hermes_cli.main as main_mod
+ # The ollama-cloud branch lives in ``_model_flow_api_key_provider``, which was
+ # extracted from main.py into hermes_cli/model_setup_flows.py (god-file
+ # decomposition Phase 2). Inspect the module the code now lives in.
+ import hermes_cli.model_setup_flows as flows_mod
import inspect
- src = inspect.getsource(main_mod)
+ src = inspect.getsource(flows_mod)
# Locate the ollama-cloud branch in the provider setup flow.
marker = 'provider_id == "ollama-cloud"'