From a77efada5f55436e6a17da45a30a3352ce24a780 Mon Sep 17 00:00:00 2001 From: teknium1 <127238744+teknium1@users.noreply.github.com> Date: Mon, 8 Jun 2026 07:53:41 -0700 Subject: [PATCH] refactor(cli): extract 18 model-flow wizard functions into model_setup_flows (god-file Phase 2) Lift the 18 _model_flow_* provider-setup wizard functions out of hermes_cli/main.py into hermes_cli/model_setup_flows.py. Behavior-neutral; main.py 14050 -> 11479 LOC. select_provider_and_model (the dispatcher) STAYS in main.py and re-imports the flows via an explicit 'from hermes_cli.model_setup_flows import (...)' block, so both its bare-name calls and existing test monkeypatches targeting hermes_cli.main._model_flow_* keep resolving against main's namespace unchanged. Imports: 3 neutral deps (argparse, os, subprocess) at the module top; the 14 main.py-internal helpers the flows call (_prompt_api_key, _save_custom_provider, the reasoning-effort/stepfun/qwen helpers, _run_anthropic_oauth_flow, ...) are lazy-imported per-flow (from hermes_cli.main import ...) so the new module never imports main at module scope -> no import cycle. Repointed one source-inspection change-detector (test_setup_ollama_cloud_force_refresh) to read the module the ollama-cloud branch moved to. Validation: 6563/6563 hermes_cli tests pass; live flow-dispatch probe confirms the lazy main-internal imports resolve at runtime. --- hermes_cli/main.py | 2619 +--------------- hermes_cli/model_setup_flows.py | 2648 +++++++++++++++++ .../test_setup_ollama_cloud_force_refresh.py | 7 +- 3 files changed, 2677 insertions(+), 2597 deletions(-) create mode 100644 hermes_cli/model_setup_flows.py diff --git a/hermes_cli/main.py b/hermes_cli/main.py index e9f02c72c90..2115764d5b5 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -494,6 +494,30 @@ import time as _time from datetime import datetime from hermes_cli import __version__, __release_date__ + +# Provider model-selection wizard flows extracted to hermes_cli/model_setup_flows.py +# (god-file decomposition Phase 2). Re-imported here so select_provider_and_model and +# existing test monkeypatches (hermes_cli.main._model_flow_*) keep resolving unchanged. +from hermes_cli.model_setup_flows import ( + _model_flow_openrouter, + _model_flow_nous, + _model_flow_openai_codex, + _model_flow_xai_oauth, + _model_flow_qwen_oauth, + _model_flow_minimax_oauth, + _model_flow_google_gemini_cli, + _model_flow_custom, + _model_flow_azure_foundry, + _model_flow_named_custom, + _model_flow_copilot, + _model_flow_copilot_acp, + _model_flow_kimi, + _model_flow_stepfun, + _model_flow_bedrock_api_key, + _model_flow_bedrock, + _model_flow_api_key_provider, + _model_flow_anthropic, +) logger = logging.getLogger(__name__) @@ -3341,467 +3365,12 @@ def _prompt_provider_choice(choices, *, default=0): return None -def _model_flow_openrouter(config, current_model=""): - """OpenRouter provider: ensure API key, then pick model.""" - from hermes_constants import OPENROUTER_BASE_URL - from hermes_cli.auth import ( - ProviderConfig, - _prompt_model_selection, - _save_model_choice, - deactivate_provider, - ) - from hermes_cli.config import get_env_value - - # Route through _prompt_api_key so users can replace a stale/broken key - # in-flow (K/R/C) instead of having to edit ~/.hermes/.env by hand. The - # previous bypass-when-key-exists branch left no way to recover from a - # bad paste short of re-running `hermes setup` from scratch. OpenRouter - # isn't in PROVIDER_REGISTRY so we synthesize a minimal pconfig. - pconfig = ProviderConfig( - id="openrouter", - name="OpenRouter", - auth_type="api_key", - api_key_env_vars=("OPENROUTER_API_KEY",), - ) - existing_key = get_env_value("OPENROUTER_API_KEY") or "" - if not existing_key: - print("Get one at: https://openrouter.ai/keys") - print() - _resolved, abort = _prompt_api_key(pconfig, existing_key, provider_id="openrouter") - if abort: - return - - from hermes_cli.models import model_ids, get_pricing_for_provider - - openrouter_models = model_ids(force_refresh=True) - - # Fetch live pricing (non-blocking — returns empty dict on failure) - pricing = get_pricing_for_provider("openrouter", force_refresh=True) - - selected = _prompt_model_selection( - openrouter_models, current_model=current_model, pricing=pricing - ) - if selected: - _save_model_choice(selected) - - # Update config provider and deactivate any OAuth provider - from hermes_cli.config import load_config, save_config - - cfg = load_config() - model = cfg.get("model") - if not isinstance(model, dict): - model = {"default": model} if model else {} - cfg["model"] = model - model["provider"] = "openrouter" - model["base_url"] = OPENROUTER_BASE_URL - model["api_mode"] = "chat_completions" - save_config(cfg) - deactivate_provider() - print(f"Default model set to: {selected} (via OpenRouter)") - else: - print("No change.") -def _model_flow_nous(config, current_model="", args=None): - """Nous Portal provider: ensure logged in, then pick model.""" - from hermes_cli.auth import ( - get_provider_auth_state, - _prompt_model_selection, - _save_model_choice, - _update_config_for_provider, - resolve_nous_runtime_credentials, - AuthError, - format_auth_error, - _login_nous, - PROVIDER_REGISTRY, - ) - from hermes_cli.config import ( - get_env_value, - load_config, - save_config, - save_env_value, - ) - from hermes_cli.nous_subscription import prompt_enable_tool_gateway - - state = get_provider_auth_state("nous") - if not state or not state.get("access_token"): - print("Not logged into Nous Portal. Starting login...") - print() - try: - mock_args = argparse.Namespace( - portal_url=getattr(args, "portal_url", None), - inference_url=getattr(args, "inference_url", None), - client_id=getattr(args, "client_id", None), - scope=getattr(args, "scope", None), - no_browser=bool(getattr(args, "no_browser", False)), - timeout=getattr(args, "timeout", None) or 15.0, - ca_bundle=getattr(args, "ca_bundle", None), - insecure=bool(getattr(args, "insecure", False)), - ) - _login_nous(mock_args, PROVIDER_REGISTRY["nous"]) - # Offer Tool Gateway enablement for paid subscribers - try: - _refreshed = load_config() or {} - prompt_enable_tool_gateway(_refreshed) - except Exception: - pass - except SystemExit: - print("Login cancelled or failed.") - return - except Exception as exc: - print(f"Login failed: {exc}") - return - # login_nous already handles model selection + config update - return - - # Already logged in — use curated model list (same as OpenRouter defaults). - # The live /models endpoint returns hundreds of models; the curated list - # shows only agentic models users recognize from OpenRouter. - from hermes_cli.models import ( - get_curated_nous_model_ids, - get_pricing_for_provider, - check_nous_free_tier, - partition_nous_models_by_tier, - union_with_portal_free_recommendations, - union_with_portal_paid_recommendations, - ) - - model_ids = get_curated_nous_model_ids() - if not model_ids: - print("No curated models available for Nous Portal.") - return - - # Verify credentials are still valid (catches expired sessions early) - try: - creds = resolve_nous_runtime_credentials() - except Exception as exc: - relogin = isinstance(exc, AuthError) and exc.relogin_required - msg = format_auth_error(exc) if isinstance(exc, AuthError) else str(exc) - if relogin: - print(f"Session expired: {msg}") - print("Re-authenticating with Nous Portal...\n") - try: - mock_args = argparse.Namespace( - portal_url=None, - inference_url=None, - client_id=None, - scope=None, - no_browser=False, - timeout=15.0, - ca_bundle=None, - insecure=False, - ) - _login_nous(mock_args, PROVIDER_REGISTRY["nous"]) - except Exception as login_exc: - print(f"Re-login failed: {login_exc}") - return - print(f"Could not verify credentials: {msg}") - return - - # Fetch live pricing (non-blocking — returns empty dict on failure) - pricing = get_pricing_for_provider("nous") - - # Force fresh account data for model selection so recent credit purchases - # are reflected immediately. - free_tier = check_nous_free_tier(force_fresh=True) - if not free_tier: - try: - refreshed_creds = resolve_nous_runtime_credentials( - force_refresh=True, - ) - if refreshed_creds: - creds = refreshed_creds - except Exception: - # Runtime inference has its own paid-entitlement recovery path; do - # not block model selection if this opportunistic refresh fails. - pass - - # Resolve portal URL early — needed both for upgrade links and for the - # freeRecommendedModels endpoint below. - _nous_portal_url = "" - try: - _nous_state = get_provider_auth_state("nous") - if _nous_state: - _nous_portal_url = _nous_state.get("portal_base_url", "") - except Exception: - pass - - # For free users: partition models into selectable/unavailable based on - # whether they are free per the Portal-reported pricing. First augment - # with the Portal's freeRecommendedModels list so newly-launched free - # models show up even if this CLI build's hardcoded curated list and - # docs-hosted manifest haven't caught up yet. - # - # For paid users: mirror the same idea with paidRecommendedModels so - # newly-launched paid models surface in the picker too — independent - # of CLI release cadence. - unavailable_models: list[str] = [] - unavailable_message = "" - if free_tier: - try: - from hermes_cli.nous_account import ( - format_nous_portal_entitlement_message, - get_nous_portal_account_info, - ) - - _account_info = get_nous_portal_account_info(force_fresh=True) - unavailable_message = ( - format_nous_portal_entitlement_message( - _account_info, - capability="paid Nous models", - ) - or "" - ) - except Exception: - unavailable_message = "" - model_ids, pricing = union_with_portal_free_recommendations( - model_ids, pricing, _nous_portal_url, - ) - model_ids, unavailable_models = partition_nous_models_by_tier( - model_ids, pricing, free_tier=True - ) - else: - model_ids, pricing = union_with_portal_paid_recommendations( - model_ids, pricing, _nous_portal_url, - ) - - if not model_ids and not unavailable_models: - print("No models available for Nous Portal after filtering.") - return - - if free_tier and not model_ids: - print("No free models currently available.") - if unavailable_models: - from hermes_cli.auth import DEFAULT_NOUS_PORTAL_URL - - _url = (_nous_portal_url or DEFAULT_NOUS_PORTAL_URL).rstrip("/") - print(unavailable_message or f"Upgrade at {_url} to access paid models.") - return - - print( - f'Showing {len(model_ids)} curated models — use "Enter custom model name" for others.' - ) - - selected = _prompt_model_selection( - model_ids, - current_model=current_model, - pricing=pricing, - unavailable_models=unavailable_models, - portal_url=_nous_portal_url, - unavailable_message=unavailable_message, - ) - if selected: - _save_model_choice(selected) - # Reactivate Nous as the provider and update config - inference_url = creds.get("base_url", "") - _update_config_for_provider("nous", inference_url) - current_model_cfg = config.get("model") - if isinstance(current_model_cfg, dict): - model_cfg = dict(current_model_cfg) - elif isinstance(current_model_cfg, str) and current_model_cfg.strip(): - model_cfg = {"default": current_model_cfg.strip()} - else: - model_cfg = {} - model_cfg["provider"] = "nous" - model_cfg["default"] = selected - if inference_url and inference_url.strip(): - model_cfg["base_url"] = inference_url.rstrip("/") - else: - model_cfg.pop("base_url", None) - config["model"] = model_cfg - # Clear any custom endpoint that might conflict - if get_env_value("OPENAI_BASE_URL"): - save_env_value("OPENAI_BASE_URL", "") - save_env_value("OPENAI_API_KEY", "") - save_config(config) - print(f"Default model set to: {selected} (via Nous Portal)") - # Offer Tool Gateway enablement for paid subscribers - prompt_enable_tool_gateway(config) - else: - print("No change.") -def _model_flow_openai_codex(config, current_model=""): - """OpenAI Codex provider: ensure logged in, then pick model.""" - from hermes_cli.auth import ( - get_codex_auth_status, - _prompt_model_selection, - _save_model_choice, - _update_config_for_provider, - _login_openai_codex, - PROVIDER_REGISTRY, - DEFAULT_CODEX_BASE_URL, - ) - from hermes_cli.codex_models import get_codex_model_ids - - status = get_codex_auth_status() - if status.get("logged_in"): - print(" OpenAI Codex credentials: ✓") - print() - print(" 1. Use existing credentials") - print(" 2. Reauthenticate (new OAuth login)") - print(" 3. Cancel") - print() - try: - choice = input(" Choice [1/2/3]: ").strip() - except (KeyboardInterrupt, EOFError): - choice = "1" - - if choice == "2": - print("Starting a fresh OpenAI Codex login...") - print() - try: - mock_args = argparse.Namespace() - _login_openai_codex( - mock_args, - PROVIDER_REGISTRY["openai-codex"], - force_new_login=True, - ) - except SystemExit: - print("Login cancelled or failed.") - return - except Exception as exc: - print(f"Login failed: {exc}") - return - status = get_codex_auth_status() - if not status.get("logged_in"): - print("Login failed.") - return - elif choice == "3": - return - else: - print("Not logged into OpenAI Codex. Starting login...") - print() - try: - mock_args = argparse.Namespace() - _login_openai_codex(mock_args, PROVIDER_REGISTRY["openai-codex"]) - except SystemExit: - print("Login cancelled or failed.") - return - except Exception as exc: - print(f"Login failed: {exc}") - return - - _codex_token = None - # Prefer credential pool (where `hermes auth` stores device_code tokens), - # fall back to legacy provider state. - try: - _codex_status = get_codex_auth_status() - if _codex_status.get("logged_in"): - _codex_token = _codex_status.get("api_key") - except Exception: - pass - if not _codex_token: - try: - from hermes_cli.auth import resolve_codex_runtime_credentials - - _codex_creds = resolve_codex_runtime_credentials() - _codex_token = _codex_creds.get("api_key") - except Exception: - pass - - codex_models = get_codex_model_ids(access_token=_codex_token) - - selected = _prompt_model_selection(codex_models, current_model=current_model) - if selected: - _save_model_choice(selected) - _update_config_for_provider("openai-codex", DEFAULT_CODEX_BASE_URL) - print(f"Default model set to: {selected} (via OpenAI Codex)") - else: - print("No change.") -def _model_flow_xai_oauth(_config, current_model="", *, args=None): - """xAI Grok OAuth (SuperGrok / Premium+) provider: ensure logged in, then pick model.""" - from hermes_cli.auth import ( - get_xai_oauth_auth_status, - _prompt_model_selection, - _save_model_choice, - _update_config_for_provider, - resolve_xai_oauth_runtime_credentials, - _login_xai_oauth, - DEFAULT_XAI_OAUTH_BASE_URL, - PROVIDER_REGISTRY, - ) - from hermes_cli.models import _PROVIDER_MODELS - - status = get_xai_oauth_auth_status() - if status.get("logged_in"): - print(" xAI Grok OAuth (SuperGrok / Premium+) credentials: ✓") - print() - print(" 1. Use existing credentials") - print(" 2. Reauthenticate (new OAuth login)") - print(" 3. Cancel") - print() - try: - choice = input(" Choice [1/2/3]: ").strip() - except (KeyboardInterrupt, EOFError): - choice = "1" - - if choice == "2": - print("Starting a fresh xAI OAuth login...") - print() - try: - # Forward CLI flags from ``hermes model --manual-paste`` - # / ``--no-browser`` / ``--timeout`` into the loopback - # login. Without this, browser-only remotes (#26923) - # can't reach the manual-paste path via ``hermes model``. - mock_args = argparse.Namespace( - manual_paste=bool(getattr(args, "manual_paste", False)), - no_browser=bool(getattr(args, "no_browser", False)), - timeout=getattr(args, "timeout", None), - ) - _login_xai_oauth( - mock_args, - PROVIDER_REGISTRY["xai-oauth"], - force_new_login=True, - ) - except SystemExit: - print("Login cancelled or failed.") - return - except Exception as exc: - print(f"Login failed: {exc}") - return - elif choice == "3": - return - else: - print("Not logged into xAI Grok OAuth (SuperGrok / Premium+). Starting login...") - print() - try: - mock_args = argparse.Namespace( - manual_paste=bool(getattr(args, "manual_paste", False)), - no_browser=bool(getattr(args, "no_browser", False)), - timeout=getattr(args, "timeout", None), - ) - _login_xai_oauth(mock_args, PROVIDER_REGISTRY["xai-oauth"]) - except SystemExit: - print("Login cancelled or failed.") - return - except Exception as exc: - print(f"Login failed: {exc}") - return - - # Resolve a usable base URL. ``resolve_xai_oauth_runtime_credentials`` - # only reads from the auth.json singleton — but credentials may legitimately - # live only in the pool (e.g. after ``hermes auth add xai-oauth``). Fall - # back to the default base URL in that case so the model picker still - # completes successfully instead of bailing out with - # ``Could not resolve xAI OAuth credentials``. - base_url = DEFAULT_XAI_OAUTH_BASE_URL - try: - creds = resolve_xai_oauth_runtime_credentials() - base_url = (creds.get("base_url") or "").strip().rstrip("/") or base_url - except Exception: - pass - - models = list(_PROVIDER_MODELS.get("xai-oauth") or _PROVIDER_MODELS.get("xai") or []) - selected = _prompt_model_selection(models, current_model=current_model or (models[0] if models else "grok-4.3")) - if selected: - _save_model_choice(selected) - _update_config_for_provider("xai-oauth", base_url) - print(f"Default model set to: {selected} (via xAI Grok OAuth — SuperGrok / Premium+)") - else: - print("No change.") _DEFAULT_QWEN_PORTAL_MODELS = [ @@ -3810,390 +3379,12 @@ _DEFAULT_QWEN_PORTAL_MODELS = [ ] -def _model_flow_qwen_oauth(_config, current_model=""): - """Qwen OAuth provider: reuse local Qwen CLI login, then pick model.""" - from hermes_cli.auth import ( - get_qwen_auth_status, - resolve_qwen_runtime_credentials, - _prompt_model_selection, - _save_model_choice, - _update_config_for_provider, - DEFAULT_QWEN_BASE_URL, - ) - from hermes_cli.models import fetch_api_models - - status = get_qwen_auth_status() - if not status.get("logged_in"): - print("Not logged into Qwen CLI OAuth.") - print("Run: qwen auth qwen-oauth") - auth_file = status.get("auth_file") - if auth_file: - print(f"Expected credentials file: {auth_file}") - if status.get("error"): - print(f"Error: {status.get('error')}") - return - - # Try live model discovery, fall back to curated list. - models = None - try: - creds = resolve_qwen_runtime_credentials(refresh_if_expiring=True) - models = fetch_api_models(creds["api_key"], creds["base_url"]) - except Exception: - pass - if not models: - models = list(_DEFAULT_QWEN_PORTAL_MODELS) - - default = current_model or (models[0] if models else "qwen3-coder-plus") - selected = _prompt_model_selection(models, current_model=default) - if selected: - _save_model_choice(selected) - _update_config_for_provider("qwen-oauth", DEFAULT_QWEN_BASE_URL) - print(f"Default model set to: {selected} (via Qwen OAuth)") - else: - print("No change.") -def _model_flow_minimax_oauth(config, current_model="", args=None): - """MiniMax OAuth provider: ensure logged in, then pick model.""" - from hermes_cli.auth import ( - get_provider_auth_state, - _prompt_model_selection, - _save_model_choice, - _update_config_for_provider, - resolve_minimax_oauth_runtime_credentials, - AuthError, - format_auth_error, - _login_minimax_oauth, - PROVIDER_REGISTRY, - ) - - state = get_provider_auth_state("minimax-oauth") - if not state or not state.get("access_token"): - print("Not logged into MiniMax. Starting OAuth login...") - print() - try: - mock_args = argparse.Namespace( - region=getattr(args, "region", None) or "global", - no_browser=bool(getattr(args, "no_browser", False)), - timeout=getattr(args, "timeout", None) or 15.0, - ) - _login_minimax_oauth(mock_args, PROVIDER_REGISTRY["minimax-oauth"]) - except SystemExit: - print("Login cancelled or failed.") - return - except Exception as exc: - print(f"Login failed: {exc}") - return - - try: - creds = resolve_minimax_oauth_runtime_credentials() - except AuthError as exc: - print(format_auth_error(exc)) - return - - from hermes_cli.models import _PROVIDER_MODELS - - model_ids = _PROVIDER_MODELS.get("minimax-oauth", []) - selected = _prompt_model_selection(model_ids, current_model) - if not selected: - return - _save_model_choice(selected) - _update_config_for_provider("minimax-oauth", creds["base_url"]) - print(f"\u2713 Using MiniMax model: {selected}") -def _model_flow_google_gemini_cli(_config, current_model=""): - """Google Gemini OAuth (PKCE) via Cloud Code Assist — supports free AND paid tiers. - - Flow: - 1. Show upfront warning about Google's ToS stance (per opencode-gemini-auth). - 2. If creds missing, run PKCE browser OAuth via agent.google_oauth. - 3. Resolve project context (env -> config -> auto-discover -> free tier). - 4. Prompt user to pick a model. - 5. Save to ~/.hermes/config.yaml. - """ - from hermes_cli.auth import ( - DEFAULT_GEMINI_CLOUDCODE_BASE_URL, - get_gemini_oauth_auth_status, - resolve_gemini_oauth_runtime_credentials, - _prompt_model_selection, - _save_model_choice, - _update_config_for_provider, - ) - from hermes_cli.models import _PROVIDER_MODELS - - print() - print("⚠ Google considers using the Gemini CLI OAuth client with third-party") - print(" software a policy violation. Some users have reported account") - print(" restrictions. You can use your own API key via 'gemini' provider") - print(" for the lowest-risk experience.") - print() - try: - proceed = input("Continue with OAuth login? [y/N]: ").strip().lower() - except (EOFError, KeyboardInterrupt): - print("Cancelled.") - return - if proceed not in {"y", "yes"}: - print("Cancelled.") - return - - status = get_gemini_oauth_auth_status() - if not status.get("logged_in"): - try: - from agent.google_oauth import resolve_project_id_from_env, start_oauth_flow - - env_project = resolve_project_id_from_env() - start_oauth_flow(force_relogin=True, project_id=env_project) - except Exception as exc: - print(f"OAuth login failed: {exc}") - return - - # Verify creds resolve + trigger project discovery - try: - creds = resolve_gemini_oauth_runtime_credentials(force_refresh=False) - project_id = creds.get("project_id", "") - if project_id: - print(f" Using GCP project: {project_id}") - else: - print( - " No GCP project configured — free tier will be auto-provisioned on first request." - ) - except Exception as exc: - print(f"Failed to resolve Gemini credentials: {exc}") - return - - models = list(_PROVIDER_MODELS.get("google-gemini-cli") or []) - default = current_model or (models[0] if models else "gemini-3-flash-preview") - selected = _prompt_model_selection(models, current_model=default) - if selected: - _save_model_choice(selected) - _update_config_for_provider( - "google-gemini-cli", DEFAULT_GEMINI_CLOUDCODE_BASE_URL - ) - print( - f"Default model set to: {selected} (via Google Gemini OAuth / Code Assist)" - ) - else: - print("No change.") -def _model_flow_custom(config): - """Custom endpoint: collect URL, API key, and model name. - - Automatically saves the endpoint to ``custom_providers`` in config.yaml - so it appears in the provider menu on subsequent runs. - """ - from hermes_cli.auth import _save_model_choice, deactivate_provider - from hermes_cli.config import get_env_value, load_config, save_config - from hermes_cli.secret_prompt import masked_secret_prompt - - current_url = get_env_value("OPENAI_BASE_URL") or "" - current_key = get_env_value("OPENAI_API_KEY") or "" - - print("Custom OpenAI-compatible endpoint configuration:") - if current_url: - print(f" Current URL: {current_url}") - if current_key: - print(f" Current key: {current_key[:8]}...") - print() - - try: - base_url = input( - f"API base URL [{current_url or 'e.g. https://api.example.com/v1'}]: " - ).strip() - api_key = masked_secret_prompt( - f"API key [{current_key[:8] + '...' if current_key else 'optional'}]: " - ).strip() - except (KeyboardInterrupt, EOFError): - print("\nCancelled.") - return - - if not base_url and not current_url: - print("No URL provided. Cancelled.") - return - - # Validate URL format - effective_url = base_url or current_url - if not effective_url.startswith(("http://", "https://")): - print(f"Invalid URL: {effective_url} (must start with http:// or https://)") - return - - effective_key = api_key or current_key - - # Hint: most local model servers (Ollama, vLLM, llama.cpp) require /v1 - # in the base URL for OpenAI-compatible chat completions. Prompt the - # user if the URL looks like a local server without /v1. - _url_lower = effective_url.rstrip("/").lower() - _looks_local = any( - h in _url_lower - for h in ("localhost", "127.0.0.1", "0.0.0.0", ":11434", ":8080", ":5000") - ) - if _looks_local and not _url_lower.endswith("/v1"): - print() - print(f" Hint: Did you mean to add /v1 at the end?") - print(f" Most local model servers (Ollama, vLLM, llama.cpp) require it.") - print(f" e.g. {effective_url.rstrip('/')}/v1") - try: - _add_v1 = input(" Add /v1? [Y/n]: ").strip().lower() - except (KeyboardInterrupt, EOFError): - _add_v1 = "n" - if _add_v1 in {"", "y", "yes"}: - effective_url = effective_url.rstrip("/") + "/v1" - if base_url: - base_url = effective_url - print(f" Updated URL: {effective_url}") - print() - - from hermes_cli.models import probe_api_models - - probe = probe_api_models(effective_key, effective_url) - if probe.get("used_fallback") and probe.get("resolved_base_url"): - print( - f"Warning: endpoint verification worked at {probe['resolved_base_url']}/models, " - f"not the exact URL you entered. Saving the working base URL instead." - ) - effective_url = probe["resolved_base_url"] - if base_url: - base_url = effective_url - elif probe.get("models") is not None: - print( - f"Verified endpoint via {probe.get('probed_url')} " - f"({len(probe.get('models') or [])} model(s) visible)" - ) - else: - print( - f"Warning: could not verify this endpoint via {probe.get('probed_url')}. " - f"Hermes will still save it." - ) - if probe.get("suggested_base_url"): - suggested = probe["suggested_base_url"] - if suggested.endswith("/v1"): - print( - f" If this server expects /v1 in the path, try base URL: {suggested}" - ) - else: - print(f" If /v1 should not be in the base URL, try: {suggested}") - - # Prompt for API compatibility mode explicitly so codex-compatible custom - # providers don't silently fall back to chat_completions. - current_model_cfg = config.get("model") - current_api_mode = "" - if isinstance(current_model_cfg, dict): - current_api_mode = str(current_model_cfg.get("api_mode") or "").strip() - api_mode = _prompt_custom_api_mode_selection( - effective_url, - current_api_mode=current_api_mode, - ) - if api_mode: - print(f" API mode: {api_mode}") - else: - print(" API mode: auto-detect") - - # Select model — use probe results when available, fall back to manual input - model_name = "" - detected_models = probe.get("models") or [] - try: - if len(detected_models) == 1: - print(f" Detected model: {detected_models[0]}") - confirm = input(" Use this model? [Y/n]: ").strip().lower() - if confirm in {"", "y", "yes"}: - model_name = detected_models[0] - else: - model_name = input("Model name (e.g. gpt-4, llama-3-70b): ").strip() - elif len(detected_models) > 1: - print(" Available models:") - for i, m in enumerate(detected_models, 1): - print(f" {i}. {m}") - pick = input( - f" Select model [1-{len(detected_models)}] or type name: " - ).strip() - if pick.isdigit() and 1 <= int(pick) <= len(detected_models): - model_name = detected_models[int(pick) - 1] - elif pick: - model_name = pick - else: - model_name = input("Model name (e.g. gpt-4, llama-3-70b): ").strip() - - context_length_str = input( - "Context length in tokens [leave blank for auto-detect]: " - ).strip() - - # Prompt for a display name — shown in the provider menu on future runs - default_name = _auto_provider_name(effective_url) - display_name = input(f"Display name [{default_name}]: ").strip() or default_name - except (KeyboardInterrupt, EOFError): - print("\nCancelled.") - return - - context_length = None - if context_length_str: - try: - context_length = int( - context_length_str.replace(",", "") - .replace("k", "000") - .replace("K", "000") - ) - if context_length <= 0: - context_length = None - except ValueError: - print(f"Invalid context length: {context_length_str} — will auto-detect.") - context_length = None - - if model_name: - _save_model_choice(model_name) - - # Update config and deactivate any OAuth provider - cfg = load_config() - model = cfg.get("model") - if not isinstance(model, dict): - model = {"default": model} if model else {} - cfg["model"] = model - model["provider"] = "custom" - model["base_url"] = effective_url - if effective_key: - model["api_key"] = effective_key - if api_mode: - model["api_mode"] = api_mode - else: - model.pop("api_mode", None) - save_config(cfg) - deactivate_provider() - - # Sync the caller's config dict so the setup wizard's final - # save_config(config) preserves our model settings. Without - # this, the wizard overwrites model.provider/base_url with - # the stale values from its own config dict (#4172). - config["model"] = dict(model) - - print(f"Default model set to: {model_name} (via {effective_url})") - else: - if base_url or api_key: - deactivate_provider() - # Even without a model name, persist the custom endpoint on the - # caller's config dict so the setup wizard doesn't lose it. - _caller_model = config.get("model") - if not isinstance(_caller_model, dict): - _caller_model = {"default": _caller_model} if _caller_model else {} - _caller_model["provider"] = "custom" - _caller_model["base_url"] = effective_url - if effective_key: - _caller_model["api_key"] = effective_key - if api_mode: - _caller_model["api_mode"] = api_mode - else: - _caller_model.pop("api_mode", None) - config["model"] = _caller_model - print("Endpoint saved. Use `/model` in chat or `hermes model` to set a model.") - - # Auto-save to custom_providers so it appears in the menu next time - _save_custom_provider( - effective_url, - effective_key, - model_name or "", - context_length=context_length, - name=display_name, - api_mode=api_mode, - ) def _prompt_custom_api_mode_selection(base_url: str, current_api_mode: str = "") -> Optional[str]: @@ -4372,372 +3563,6 @@ def _save_custom_provider( print(f' 💾 Saved to custom providers as "{name}" (edit in config.yaml)') -def _model_flow_azure_foundry(config, current_model=""): - """Azure Foundry provider: configure endpoint, auth mode, API mode, and model. - - Azure Foundry supports both OpenAI-style (``/v1/chat/completions``) and - Anthropic-style (``/v1/messages``) endpoints, and two authentication - modes: - - * **API key** (default) — uses ``AZURE_FOUNDRY_API_KEY`` from .env. - * **Microsoft Entra ID** — keyless, RBAC-based auth via the - ``azure-identity`` SDK (Managed Identity / Workload Identity / az - login / VS Code / azd / service principal env vars). Works on both - OpenAI-style and Anthropic-style endpoints — Microsoft RBAC is - per-resource and the same ``Azure AI User`` role grants - both. For OpenAI-style the OpenAI SDK's native callable - ``api_key=`` contract is used; for Anthropic-style an - ``httpx.Client`` with a request event hook (built by - :func:`agent.azure_identity_adapter.build_bearer_http_client`) - mints a fresh JWT per request because the Anthropic SDK does not - accept a callable ``auth_token`` natively. - - The wizard auto-detects the transport and available models when - possible: - - * URLs ending in ``/anthropic`` → Anthropic Messages API. - * Successful ``GET /models`` probe → OpenAI-style + populates - a picker with the returned deployment / model IDs. - * Anthropic Messages probe fallback when ``/models`` fails. - * Manual entry when every probe fails (private endpoints, etc.). - - Context lengths for the chosen model are resolved via the standard - :func:`agent.model_metadata.get_model_context_length` chain - (models.dev, provider metadata, hardcoded family fallbacks). - """ - from hermes_cli.auth import _save_model_choice, deactivate_provider # noqa: F401 - from hermes_cli.config import ( - get_env_value, - save_env_value, - load_config, - save_config, - ) - from hermes_cli import azure_detect - - # ── Load current Azure Foundry configuration ───────────────────── - model_cfg = config.get("model", {}) - if isinstance(model_cfg, dict) and model_cfg.get("provider") == "azure-foundry": - current_base_url = str(model_cfg.get("base_url", "") or "") - current_api_mode = str(model_cfg.get("api_mode", "") or "") - current_auth_mode = str(model_cfg.get("auth_mode") or "api_key").strip().lower() or "api_key" - _cur_entra = model_cfg.get("entra") or {} - current_entra = _cur_entra if isinstance(_cur_entra, dict) else {} - else: - current_base_url = "" - current_api_mode = "" - current_auth_mode = "api_key" - current_entra = {} - - current_api_key = get_env_value("AZURE_FOUNDRY_API_KEY") or "" - - print() - print("Azure Foundry Configuration") - print("=" * 50) - print() - print("Azure Foundry can host models with either OpenAI-style or") - print("Anthropic-style API endpoints. Hermes will probe your") - print("endpoint to auto-detect the transport and the deployed") - print("models when possible.") - print() - - if current_base_url: - print(f" Current endpoint: {current_base_url}") - if current_api_mode: - _lbl = ( - "OpenAI-style" - if current_api_mode == "chat_completions" - else "Anthropic-style" - ) - print(f" Current API mode: {_lbl}") - if current_auth_mode == "entra_id": - print(f" Current auth mode: Microsoft Entra ID (keyless)") - elif current_api_key: - print(f" Current auth mode: API key ({current_api_key[:8]}...)") - print() - - # ── Step 1: endpoint URL ───────────────────────────────────────── - try: - _placeholder = ( - current_base_url - or "e.g. https://.openai.azure.com/openai/v1 " - "or https://.services.ai.azure.com/anthropic" - ) - base_url = input( - f"API endpoint URL [{_placeholder}]: " - ).strip() - except (KeyboardInterrupt, EOFError): - print("\nCancelled.") - return - - effective_url = (base_url or current_base_url).rstrip("/") - if not effective_url: - print("No endpoint URL provided. Cancelled.") - return - if not effective_url.startswith(("http://", "https://")): - print(f"Invalid URL: {effective_url} (must start with http:// or https://)") - return - - # ── Step 2: authentication mode ────────────────────────────────── - print() - print("Authentication:") - print(" 1. API key (AZURE_FOUNDRY_API_KEY in .env)") - print(" 2. Microsoft Entra ID (managed identity / workload identity / az login)") - print(" Recommended by Microsoft. Works for both OpenAI-style and Anthropic-style endpoints.") - print(" Requires the 'Azure AI User' role on the Foundry resource.") - try: - _auth_default = "2" if current_auth_mode == "entra_id" else "1" - auth_choice = ( - input(f"Authentication mode [1/2] ({_auth_default}): ").strip() - or _auth_default - ) - except (KeyboardInterrupt, EOFError): - print("\nCancelled.") - return - use_entra = auth_choice == "2" - auth_mode_label = "entra_id" if use_entra else "api_key" - - # ── Step 3: credentials (key OR Entra preflight) ───────────────── - effective_key: str = "" - entra_overrides: dict = {} - token_provider = None # callable when entra - entra_scope = "" - - if use_entra: - try: - from agent.azure_identity_adapter import ( - EntraIdentityConfig, - SCOPE_AI_AZURE_DEFAULT, - build_token_provider, - describe_active_credential, - has_azure_identity_installed, - ) - except ImportError as exc: - print() - print(f"⚠ Could not import azure-identity adapter: {exc}") - print(" Falling back to API key auth.") - use_entra = False - auth_mode_label = "api_key" - - if use_entra: - print() - if not has_azure_identity_installed(): - print("◐ The 'azure-identity' package is not installed yet.") - print( - " Hermes will install it now (the preflight below " - "triggers the lazy-install). To skip lazy installs, " - "run: pip install azure-identity" - ) - - # Preserve only the optional scope override. Identity selection - # (tenant, user-assigned MI, workload identity, service principal) - # stays in Azure SDK env vars such as AZURE_CLIENT_ID. - _persisted_scope_override = str(current_entra.get("scope") or "").strip() - entra_scope = _persisted_scope_override or SCOPE_AI_AZURE_DEFAULT - - entra_overrides = {} - if _persisted_scope_override: - entra_overrides["scope"] = _persisted_scope_override - - print() - print("◐ Probing Microsoft Entra ID credential chain (up to 10s)...") - _config = EntraIdentityConfig( - scope=entra_scope, - ) - info = describe_active_credential(config=_config, timeout_seconds=10.0) - if info.get("ok"): - env_sources = info.get("env_sources") or [] - tag = ", ".join(env_sources) if env_sources else "default chain" - print(f"✓ Entra ID token acquired ({tag}, scope={entra_scope})") - else: - err = info.get("error") or "credential chain exhausted" - hint = info.get("hint") or ( - "Run `az login`, attach a managed identity to this VM, or " - "set AZURE_TENANT_ID/AZURE_CLIENT_ID/AZURE_CLIENT_SECRET." - ) - print(f"⚠ {err}") - print(f" Hint: {hint}") - try: - ans = input("Save Entra config anyway and validate later? [Y/n]: ").strip().lower() - except (KeyboardInterrupt, EOFError): - print("\nCancelled.") - return - if ans and ans not in ("y", "yes"): - print("Cancelled.") - return - - # Build the token provider for the detection probe (best-effort — - # if the credential chain failed above, this will silently return - # None inside azure_detect and the probe falls back to manual). - try: - token_provider = build_token_provider(config=_config) - except Exception as exc: - print(f"⚠ Could not build token provider for probing: {exc}") - token_provider = None - else: - print() - from hermes_cli.secret_prompt import masked_secret_prompt - - try: - api_key = masked_secret_prompt( - f"API key [{current_api_key[:8] + '...' if current_api_key else 'required'}]: " - ).strip() - except (KeyboardInterrupt, EOFError): - print("\nCancelled.") - return - - effective_key = api_key or current_api_key - if not effective_key: - print("No API key provided. Cancelled.") - return - - # ── Step 4: auto-detect transport + models ─────────────────────── - print() - print("◐ Probing endpoint to auto-detect transport and models...") - detection = azure_detect.detect( - effective_url, - api_key=effective_key, - token_provider=token_provider, - ) - - discovered_models: list[str] = list(detection.models) - api_mode: str = detection.api_mode or "" - - if api_mode: - mode_label = ( - "OpenAI-style" if api_mode == "chat_completions" else "Anthropic-style" - ) - print(f"✓ Detected API transport: {mode_label}") - if detection.reason: - print(f" ({detection.reason})") - if discovered_models: - print( - f"✓ Found {len(discovered_models)} deployed model(s) on this endpoint" - ) - else: - print(f"⚠ Auto-detection incomplete: {detection.reason}") - print() - print("Select the API format your Azure Foundry endpoint uses:") - print(" 1. OpenAI-style (POST /v1/chat/completions)") - print(" For: GPT models, Llama, Mistral, and most open models") - print(" 2. Anthropic-style (POST /v1/messages)") - print(" For: Claude models deployed via Anthropic API format") - try: - default_choice = "2" if current_api_mode == "anthropic_messages" else "1" - mode_choice = ( - input(f"API format [1/2] ({default_choice}): ").strip() - or default_choice - ) - except (KeyboardInterrupt, EOFError): - print("\nCancelled.") - return - api_mode = "anthropic_messages" if mode_choice == "2" else "chat_completions" - - # ── Step 5: model name ─────────────────────────────────────────── - print() - effective_model = "" - if discovered_models: - print("Available models on this endpoint:") - for i, mid in enumerate(discovered_models[:30], start=1): - print(f" {i:>2}. {mid}") - if len(discovered_models) > 30: - print( - f" ... and {len(discovered_models) - 30} more (type name manually if not shown)" - ) - print() - try: - pick = input( - f"Pick by number, or type a deployment name [{current_model or discovered_models[0]}]: " - ).strip() - except (KeyboardInterrupt, EOFError): - print("\nCancelled.") - return - if not pick: - effective_model = current_model or discovered_models[0] - elif pick.isdigit() and 1 <= int(pick) <= min(len(discovered_models), 30): - effective_model = discovered_models[int(pick) - 1] - else: - effective_model = pick - else: - try: - model_name = input( - f"Model / deployment name [{current_model or 'e.g. gpt-5.4, claude-sonnet-4-6'}]: " - ).strip() - except (KeyboardInterrupt, EOFError): - print("\nCancelled.") - return - effective_model = model_name or current_model - - if not effective_model: - print("No model name provided. Cancelled.") - return - - # ── Step 6: context-length lookup ──────────────────────────────── - ctx_len = azure_detect.lookup_context_length( - effective_model, - effective_url, - api_key=effective_key, - token_provider=token_provider, - ) - - # ── Step 7: persist ────────────────────────────────────────────── - if not use_entra: - save_env_value("AZURE_FOUNDRY_API_KEY", effective_key) - - cfg = load_config() - model = cfg.get("model") - if not isinstance(model, dict): - model = {"default": model} if model else {} - cfg["model"] = model - - model["provider"] = "azure-foundry" - model["base_url"] = effective_url - model["api_mode"] = api_mode - model["default"] = effective_model - model["auth_mode"] = auth_mode_label - if use_entra: - # Persist only the non-default Entra scope so config.yaml stays tidy. - # Azure identity selection stays in standard AZURE_* env vars. - clean_entra: dict = {} - for key in ("scope",): - val = entra_overrides.get(key) - if val: - clean_entra[key] = val - if clean_entra: - model["entra"] = clean_entra - elif "entra" in model: - del model["entra"] - else: - if "entra" in model: - del model["entra"] - if ctx_len: - model["context_length"] = ctx_len - - save_config(cfg) - deactivate_provider() - config["model"] = dict(model) - - # Clear any conflicting env vars so auxiliary clients don't poison - # themselves with a stale OpenAI base URL / key. - if get_env_value("OPENAI_BASE_URL"): - save_env_value("OPENAI_BASE_URL", "") - if get_env_value("OPENAI_API_KEY"): - save_env_value("OPENAI_API_KEY", "") - - mode_label = "OpenAI-style" if api_mode == "chat_completions" else "Anthropic-style" - auth_label = ( - "Microsoft Entra ID (keyless)" if use_entra else "API key" - ) - print() - print("✓ Azure Foundry configured:") - print(f" Endpoint: {effective_url}") - print(f" API mode: {mode_label}") - print(f" Auth: {auth_label}") - print(f" Model: {effective_model}") - if ctx_len: - print(f" Context length: {ctx_len:,} tokens") - else: - print(" Context length: not auto-detected (will fall back at runtime)") - print() def _remove_custom_provider(config): @@ -4798,196 +3623,6 @@ def _remove_custom_provider(config): print(f'✅ Removed "{removed_name}" from custom providers.') -def _model_flow_named_custom(config, provider_info): - """Handle a named custom provider from config.yaml custom_providers list. - - Always probes the endpoint's /models API to let the user pick a model. - If a model was previously saved, it is pre-selected in the menu. - Falls back to the saved model if probing fails. - """ - from hermes_cli.auth import _save_model_choice, deactivate_provider - from hermes_cli.config import load_config, save_config - from hermes_cli.models import fetch_api_models - - name = provider_info["name"] - base_url = provider_info["base_url"] - api_mode = provider_info.get("api_mode", "") - api_key = provider_info.get("api_key", "") - key_env = provider_info.get("key_env", "") - saved_model = provider_info.get("model", "") - provider_key = (provider_info.get("provider_key") or "").strip() - - # Resolve key from env var if api_key not set directly - if not api_key and key_env: - api_key = os.environ.get(key_env, "") - config_api_key = _custom_provider_api_key_config_value(provider_info, api_key) - - # Honor ``discover_models: false`` (default True) — when discovery is - # disabled, use the configured ``models:`` list verbatim and skip the - # live /models probe. This lets operators restrict the picker to the - # subset their plan actually serves instead of the endpoint's full - # catalog (#18726: Baidu Qianfan returns 100+ models for a 2-3 model - # plan). Same semantics as the slash-command picker (model_switch.py - # sections 3 & 4): default discovers, false keeps the explicit list. - discover = provider_info.get("discover_models", True) - if isinstance(discover, str): - discover = discover.lower() not in {"false", "no", "0"} - configured_models: list[str] = [] - cfg_models = provider_info.get("models", {}) - if isinstance(cfg_models, dict): - configured_models = [str(m) for m in cfg_models if str(m).strip()] - elif isinstance(cfg_models, list): - configured_models = [ - str(m) for m in cfg_models if isinstance(m, str) and m.strip() - ] - - print(f" Provider: {name}") - print(f" URL: {base_url}") - if saved_model: - print(f" Current: {saved_model}") - print() - - if not discover and configured_models: - # Discovery disabled with an explicit list — use it verbatim, no probe. - print(f"Using configured models (discover_models: false): {len(configured_models)}") - models = configured_models - else: - print("Fetching available models...") - fetch_kwargs = {"timeout": 8.0} - if api_mode: - fetch_kwargs["api_mode"] = api_mode - models = fetch_api_models(api_key, base_url, **fetch_kwargs) - # If the probe came back empty but the operator configured an explicit - # list, fall back to it rather than forcing manual entry. - if not models and configured_models: - models = configured_models - - if models: - default_idx = 0 - if saved_model and saved_model in models: - default_idx = models.index(saved_model) - - print(f"Found {len(models)} model(s):\n") - try: - from hermes_cli.curses_ui import curses_radiolist - - menu_items = [ - f"{m} (current)" if m == saved_model else m for m in models - ] + ["Cancel"] - idx = curses_radiolist( - f"Select model from {name}:", - menu_items, - selected=default_idx, - cancel_returns=-1, - searchable=True, - ) - print() - if idx < 0 or idx >= len(models): - print("Cancelled.") - return - model_name = models[idx] - except (ImportError, NotImplementedError, OSError, subprocess.SubprocessError): - for i, m in enumerate(models, 1): - suffix = " (current)" if m == saved_model else "" - print(f" {i}. {m}{suffix}") - print(f" {len(models) + 1}. Cancel") - print() - try: - val = input(f"Choice [1-{len(models) + 1}]: ").strip() - if not val: - print("Cancelled.") - return - idx = int(val) - 1 - if idx < 0 or idx >= len(models): - print("Cancelled.") - return - model_name = models[idx] - except (ValueError, KeyboardInterrupt, EOFError): - print("\nCancelled.") - return - elif saved_model: - print("Could not fetch models from endpoint.") - try: - model_name = input(f"Model name [{saved_model}]: ").strip() or saved_model - except (KeyboardInterrupt, EOFError): - print("\nCancelled.") - return - else: - print("Could not fetch models from endpoint. Enter model name manually.") - try: - model_name = input("Model name: ").strip() - except (KeyboardInterrupt, EOFError): - print("\nCancelled.") - return - if not model_name: - print("No model specified. Cancelled.") - return - - # Activate and save the model to the custom_providers entry - _save_model_choice(model_name) - - cfg = load_config() - model = cfg.get("model") - if not isinstance(model, dict): - model = {"default": model} if model else {} - cfg["model"] = model - if provider_key: - model["provider"] = provider_key - model.pop("base_url", None) - model.pop("api_key", None) - else: - model["provider"] = "custom" - model["base_url"] = _custom_provider_base_url_config_value( - provider_info, base_url - ) - if config_api_key: - model["api_key"] = config_api_key - # Apply api_mode from custom_providers entry, or clear stale value - custom_api_mode = provider_info.get("api_mode", "") - if custom_api_mode: - model["api_mode"] = custom_api_mode - else: - model.pop("api_mode", None) # let runtime auto-detect from URL - save_config(cfg) - deactivate_provider() - - # Persist the selected model back to whichever schema owns this endpoint. - if provider_key: - cfg = load_config() - providers_cfg = cfg.get("providers") - if isinstance(providers_cfg, dict): - provider_entry = providers_cfg.get(provider_key) - if isinstance(provider_entry, dict): - provider_entry["default_model"] = model_name - # Only persist an inline api_key when the user originally had - # one (either a literal secret or a ``${VAR}`` template). When - # the entry relies on ``key_env``, do not synthesize a - # ``${key_env}`` api_key — the runtime already resolves the - # key from ``key_env`` directly, and writing the resolved - # secret (or even a synthesized template) would silently - # downgrade credential hygiene on entries that intentionally - # keep plaintext out of ``config.yaml``. See issue #15803. - original_api_key_ref = str( - provider_info.get("api_key_ref", "") or "" - ).strip() - original_api_key = str(provider_info.get("api_key", "") or "").strip() - had_inline_api_key = bool(original_api_key_ref or original_api_key) - if ( - had_inline_api_key - and config_api_key - and not str(provider_entry.get("api_key", "") or "").strip() - ): - provider_entry["api_key"] = config_api_key - if key_env and not str(provider_entry.get("key_env", "") or "").strip(): - provider_entry["key_env"] = key_env - cfg["providers"] = providers_cfg - save_config(cfg) - else: - # Save model name to the custom_providers entry for next time - _save_custom_provider(base_url, config_api_key, model_name, api_mode=api_mode) - - print(f"\n✅ Model set to: {model_name}") - print(f" Provider: {name} ({base_url})") # Lazy-export the model catalog at module level. Tests and a handful of @@ -5108,312 +3743,8 @@ def _prompt_reasoning_effort_selection(efforts, current_effort=""): return None -def _model_flow_copilot(config, current_model=""): - """GitHub Copilot flow using env vars, gh CLI, or OAuth device code.""" - from hermes_cli.auth import ( - PROVIDER_REGISTRY, - _prompt_model_selection, - _save_model_choice, - deactivate_provider, - resolve_api_key_provider_credentials, - ) - from hermes_cli.config import save_env_value, load_config, save_config - from hermes_cli.models import ( - _PROVIDER_MODELS, - fetch_api_models, - fetch_github_model_catalog, - github_model_reasoning_efforts, - copilot_model_api_mode, - normalize_copilot_model_id, - ) - - provider_id = "copilot" - pconfig = PROVIDER_REGISTRY[provider_id] - - creds = resolve_api_key_provider_credentials(provider_id) - api_key = creds.get("api_key", "") - source = creds.get("source", "") - - if not api_key: - print("No GitHub token configured for GitHub Copilot.") - print() - print(" Supported token types:") - print( - " → OAuth token (gho_*) via `copilot login` or device code flow" - ) - print(" → Fine-grained PAT (github_pat_*) with Copilot Requests permission") - print(" → GitHub App token (ghu_*) via environment variable") - print(" ✗ Classic PAT (ghp_*) NOT supported by Copilot API") - print() - print(" Options:") - print(" 1. Login with GitHub (OAuth device code flow)") - print(" 2. Enter a token manually") - print(" 3. Cancel") - print() - try: - choice = input(" Choice [1-3]: ").strip() - except (KeyboardInterrupt, EOFError): - print() - return - - if choice == "1": - try: - from hermes_cli.copilot_auth import copilot_device_code_login - - token = copilot_device_code_login() - if token: - save_env_value("COPILOT_GITHUB_TOKEN", token) - print(" Copilot token saved.") - print() - else: - print(" Login cancelled or failed.") - return - except Exception as exc: - print(f" Login failed: {exc}") - return - elif choice == "2": - from hermes_cli.secret_prompt import masked_secret_prompt - - try: - new_key = masked_secret_prompt(" Token (COPILOT_GITHUB_TOKEN): ").strip() - except (KeyboardInterrupt, EOFError): - print() - return - if not new_key: - print(" Cancelled.") - return - # Validate token type - try: - from hermes_cli.copilot_auth import validate_copilot_token - - valid, msg = validate_copilot_token(new_key) - if not valid: - print(f" ✗ {msg}") - return - except ImportError: - pass - save_env_value("COPILOT_GITHUB_TOKEN", new_key) - print(" Token saved.") - print() - else: - print(" Cancelled.") - return - - creds = resolve_api_key_provider_credentials(provider_id) - api_key = creds.get("api_key", "") - source = creds.get("source", "") - else: - if source in {"GITHUB_TOKEN", "GH_TOKEN"}: - from hermes_cli.env_loader import format_secret_source_suffix - bw_suffix = format_secret_source_suffix(source) - print(f" GitHub token: {api_key[:8]}... ✓ ({source}{bw_suffix})") - elif source == "gh auth token": - print(" GitHub token: ✓ (from `gh auth token`)") - else: - print(" GitHub token: ✓") - print() - - effective_base = pconfig.inference_base_url - - catalog = fetch_github_model_catalog(api_key) - live_models = ( - [item.get("id", "") for item in catalog if item.get("id")] - if catalog - else fetch_api_models(api_key, effective_base) - ) - normalized_current_model = ( - normalize_copilot_model_id( - current_model, - catalog=catalog, - api_key=api_key, - ) - or current_model - ) - if live_models: - model_list = [model_id for model_id in live_models if model_id] - print(f" Found {len(model_list)} model(s) from GitHub Copilot") - else: - model_list = _PROVIDER_MODELS.get(provider_id, []) - if model_list: - print( - " ⚠ Could not auto-detect models from GitHub Copilot — showing defaults." - ) - print(' Use "Enter custom model name" if you do not see your model.') - - if model_list: - selected = _prompt_model_selection( - model_list, current_model=normalized_current_model - ) - else: - try: - selected = input("Model name: ").strip() - except (KeyboardInterrupt, EOFError): - selected = None - - if selected: - selected = ( - normalize_copilot_model_id( - selected, - catalog=catalog, - api_key=api_key, - ) - or selected - ) - initial_cfg = load_config() - current_effort = _current_reasoning_effort(initial_cfg) - reasoning_efforts = github_model_reasoning_efforts( - selected, - catalog=catalog, - api_key=api_key, - ) - selected_effort = None - if reasoning_efforts: - print(f" {selected} supports reasoning controls.") - selected_effort = _prompt_reasoning_effort_selection( - reasoning_efforts, current_effort=current_effort - ) - - _save_model_choice(selected) - - cfg = load_config() - model = cfg.get("model") - if not isinstance(model, dict): - model = {"default": model} if model else {} - cfg["model"] = model - model["provider"] = provider_id - model["base_url"] = effective_base - model["api_mode"] = copilot_model_api_mode( - selected, - catalog=catalog, - api_key=api_key, - ) - if selected_effort is not None: - _set_reasoning_effort(cfg, selected_effort) - save_config(cfg) - deactivate_provider() - - print(f"Default model set to: {selected} (via {pconfig.name})") - if reasoning_efforts: - if selected_effort == "none": - print("Reasoning disabled for this model.") - elif selected_effort: - print(f"Reasoning effort set to: {selected_effort}") - else: - print("No change.") -def _model_flow_copilot_acp(config, current_model=""): - """GitHub Copilot ACP flow using the local Copilot CLI.""" - from hermes_cli.auth import ( - PROVIDER_REGISTRY, - _prompt_model_selection, - _save_model_choice, - deactivate_provider, - get_external_process_provider_status, - resolve_api_key_provider_credentials, - resolve_external_process_provider_credentials, - ) - from hermes_cli.models import ( - _PROVIDER_MODELS, - fetch_github_model_catalog, - normalize_copilot_model_id, - ) - from hermes_cli.config import load_config, save_config - - del config - - provider_id = "copilot-acp" - pconfig = PROVIDER_REGISTRY[provider_id] - - status = get_external_process_provider_status(provider_id) - resolved_command = ( - status.get("resolved_command") or status.get("command") or "copilot" - ) - effective_base = status.get("base_url") or pconfig.inference_base_url - - print(" GitHub Copilot ACP delegates Hermes turns to `copilot --acp`.") - print(" Hermes currently starts its own ACP subprocess for each request.") - print(" Hermes uses your selected model as a hint for the Copilot ACP session.") - print(f" Command: {resolved_command}") - print(f" Backend marker: {effective_base}") - print() - - try: - creds = resolve_external_process_provider_credentials(provider_id) - except Exception as exc: - print(f" ⚠ {exc}") - print( - " Set HERMES_COPILOT_ACP_COMMAND or COPILOT_CLI_PATH if Copilot CLI is installed elsewhere." - ) - return - - effective_base = creds.get("base_url") or effective_base - - catalog_api_key = "" - try: - catalog_creds = resolve_api_key_provider_credentials("copilot") - catalog_api_key = catalog_creds.get("api_key", "") - except Exception: - pass - - catalog = fetch_github_model_catalog(catalog_api_key) - normalized_current_model = ( - normalize_copilot_model_id( - current_model, - catalog=catalog, - api_key=catalog_api_key, - ) - or current_model - ) - - if catalog: - model_list = [item.get("id", "") for item in catalog if item.get("id")] - print(f" Found {len(model_list)} model(s) from GitHub Copilot") - else: - model_list = _PROVIDER_MODELS.get("copilot", []) - if model_list: - print( - " ⚠ Could not auto-detect models from GitHub Copilot — showing defaults." - ) - print(' Use "Enter custom model name" if you do not see your model.') - - if model_list: - selected = _prompt_model_selection( - model_list, - current_model=normalized_current_model, - ) - else: - try: - selected = input("Model name: ").strip() - except (KeyboardInterrupt, EOFError): - selected = None - - if not selected: - print("No change.") - return - - selected = ( - normalize_copilot_model_id( - selected, - catalog=catalog, - api_key=catalog_api_key, - ) - or selected - ) - _save_model_choice(selected) - - cfg = load_config() - model = cfg.get("model") - if not isinstance(model, dict): - model = {"default": model} if model else {} - cfg["model"] = model - model["provider"] = provider_id - model["base_url"] = effective_base - model["api_mode"] = "chat_completions" - save_config(cfg) - deactivate_provider() - - print(f"Default model set to: {selected} (via {pconfig.name})") def _prompt_api_key(pconfig, existing_key: str, provider_id: str = "") -> tuple: @@ -5499,101 +3830,6 @@ def _prompt_api_key(pconfig, existing_key: str, provider_id: str = "") -> tuple: return existing_key, False -def _model_flow_kimi(config, current_model=""): - """Kimi / Moonshot model selection with automatic endpoint routing. - - - sk-kimi-* keys → api.kimi.com/coding/v1 (Kimi Coding Plan) - - Other keys → api.moonshot.ai/v1 (legacy Moonshot) - - No manual base URL prompt — endpoint is determined by key prefix. - """ - from hermes_cli.auth import ( - PROVIDER_REGISTRY, - KIMI_CODE_BASE_URL, - _prompt_model_selection, - _save_model_choice, - deactivate_provider, - ) - from hermes_cli.config import ( - get_env_value, - save_env_value, - load_config, - save_config, - ) - from hermes_cli.models import _PROVIDER_MODELS - - provider_id = "kimi-coding" - pconfig = PROVIDER_REGISTRY[provider_id] - key_env = pconfig.api_key_env_vars[0] if pconfig.api_key_env_vars else "" - base_url_env = pconfig.base_url_env_var or "" - - # Step 1: Check / prompt for API key - existing_key = "" - for ev in pconfig.api_key_env_vars: - existing_key = get_env_value(ev) or os.getenv(ev, "") - if existing_key: - break - - existing_key, abort = _prompt_api_key( - pconfig, existing_key, provider_id=provider_id - ) - if abort: - return - - # Step 2: Auto-detect endpoint from key prefix - is_coding_plan = existing_key.startswith("sk-kimi-") - if is_coding_plan: - effective_base = KIMI_CODE_BASE_URL - print(f" Detected Kimi Coding Plan key → {effective_base}") - else: - effective_base = pconfig.inference_base_url - print(f" Using Moonshot endpoint → {effective_base}") - # Clear any manual base URL override so auto-detection works at runtime - if base_url_env and get_env_value(base_url_env): - save_env_value(base_url_env, "") - print() - - # Step 3: Model selection — show appropriate models for the endpoint - if is_coding_plan: - # Coding Plan models (kimi-k2.6 first) - model_list = [ - "kimi-k2.6", - "kimi-k2.5", - "kimi-for-coding", - "kimi-k2-thinking", - "kimi-k2-thinking-turbo", - ] - else: - # Legacy Moonshot models (excludes Coding Plan-only models) - model_list = _PROVIDER_MODELS.get("moonshot", []) - - if model_list: - selected = _prompt_model_selection(model_list, current_model=current_model) - else: - try: - selected = input("Enter model name: ").strip() - except (KeyboardInterrupt, EOFError): - selected = None - - if selected: - _save_model_choice(selected) - - # Update config with provider and base URL - cfg = load_config() - model = cfg.get("model") - if not isinstance(model, dict): - model = {"default": model} if model else {} - cfg["model"] = model - model["provider"] = provider_id - model["base_url"] = effective_base - model.pop("api_mode", None) # let runtime auto-detect from URL - save_config(cfg) - deactivate_provider() - - endpoint_label = "Kimi Coding" if is_coding_plan else "Moonshot" - print(f"Default model set to: {selected} (via {endpoint_label})") - else: - print("No change.") def _infer_stepfun_region(base_url: str) -> str: @@ -5617,668 +3853,12 @@ def _stepfun_base_url_for_region(region: str) -> str: ) -def _model_flow_stepfun(config, current_model=""): - """StepFun Step Plan flow with region-specific endpoints.""" - from hermes_cli.auth import ( - PROVIDER_REGISTRY, - _prompt_model_selection, - _save_model_choice, - deactivate_provider, - ) - from hermes_cli.config import ( - get_env_value, - save_env_value, - load_config, - save_config, - ) - from hermes_cli.models import _PROVIDER_MODELS, fetch_api_models - provider_id = "stepfun" - pconfig = PROVIDER_REGISTRY[provider_id] - key_env = pconfig.api_key_env_vars[0] if pconfig.api_key_env_vars else "" - base_url_env = pconfig.base_url_env_var or "" - existing_key = "" - for ev in pconfig.api_key_env_vars: - existing_key = get_env_value(ev) or os.getenv(ev, "") - if existing_key: - break - existing_key, abort = _prompt_api_key( - pconfig, existing_key, provider_id=provider_id - ) - if abort: - return - current_base = "" - if base_url_env: - current_base = get_env_value(base_url_env) or os.getenv(base_url_env, "") - if not current_base: - model_cfg = config.get("model") - if isinstance(model_cfg, dict): - current_base = str(model_cfg.get("base_url") or "").strip() - current_region = _infer_stepfun_region(current_base or pconfig.inference_base_url) - region_choices = [ - ( - "international", - f"International ({_stepfun_base_url_for_region('international')})", - ), - ("china", f"China ({_stepfun_base_url_for_region('china')})"), - ] - ordered_regions = [] - for region_key, label in region_choices: - if region_key == current_region: - ordered_regions.insert(0, (region_key, f"{label} ← currently active")) - else: - ordered_regions.append((region_key, label)) - ordered_regions.append(("cancel", "Cancel")) - region_idx = _prompt_provider_choice([label for _, label in ordered_regions]) - if region_idx is None or ordered_regions[region_idx][0] == "cancel": - print("No change.") - return - - selected_region = ordered_regions[region_idx][0] - effective_base = _stepfun_base_url_for_region(selected_region) - if base_url_env: - save_env_value(base_url_env, effective_base) - - live_models = fetch_api_models(existing_key, effective_base) - if live_models: - model_list = live_models - print(f" Found {len(model_list)} model(s) from {pconfig.name} API") - else: - model_list = _PROVIDER_MODELS.get(provider_id, []) - if model_list: - print( - f" Could not auto-detect models from {pconfig.name} API — " - "showing Step Plan fallback catalog." - ) - - if model_list: - selected = _prompt_model_selection(model_list, current_model=current_model) - else: - try: - selected = input("Model name: ").strip() - except (KeyboardInterrupt, EOFError): - selected = None - - if selected: - _save_model_choice(selected) - - cfg = load_config() - model = cfg.get("model") - if not isinstance(model, dict): - model = {"default": model} if model else {} - cfg["model"] = model - model["provider"] = provider_id - model["base_url"] = effective_base - model.pop("api_mode", None) - save_config(cfg) - deactivate_provider() - - config["model"] = dict(model) - print(f"Default model set to: {selected} (via {pconfig.name})") - else: - print("No change.") - - -def _model_flow_bedrock_api_key(config, region, current_model=""): - """Bedrock API Key mode — uses the OpenAI-compatible bedrock-mantle endpoint. - - For developers who don't have an AWS account but received a Bedrock API Key - from their AWS admin. Works like any OpenAI-compatible endpoint. - """ - from hermes_cli.auth import ( - _prompt_model_selection, - _save_model_choice, - deactivate_provider, - ) - from hermes_cli.config import ( - load_config, - save_config, - get_env_value, - save_env_value, - ) - from hermes_cli.models import _PROVIDER_MODELS - - mantle_base_url = f"https://bedrock-mantle.{region}.api.aws/v1" - - # Prompt for API key - existing_key = get_env_value("AWS_BEARER_TOKEN_BEDROCK") or "" - if existing_key: - from hermes_cli.env_loader import format_secret_source_suffix - source_suffix = format_secret_source_suffix("AWS_BEARER_TOKEN_BEDROCK") - print(f" Bedrock API Key: {existing_key[:12]}... ✓{source_suffix}") - else: - print(f" Endpoint: {mantle_base_url}") - print() - from hermes_cli.secret_prompt import masked_secret_prompt - - try: - api_key = masked_secret_prompt(" Bedrock API Key: ").strip() - except (KeyboardInterrupt, EOFError): - print() - return - if not api_key: - print(" Cancelled.") - return - save_env_value("AWS_BEARER_TOKEN_BEDROCK", api_key) - existing_key = api_key - print(" ✓ API key saved.") - print() - - # Model selection — use static list (mantle doesn't need boto3 for discovery) - model_list = _PROVIDER_MODELS.get("bedrock", []) - print(f" Showing {len(model_list)} curated models") - - if model_list: - selected = _prompt_model_selection(model_list, current_model=current_model) - else: - try: - selected = input(" Model ID: ").strip() - except (KeyboardInterrupt, EOFError): - selected = None - - if selected: - _save_model_choice(selected) - - # Save as custom provider pointing to bedrock-mantle - cfg = load_config() - model = cfg.get("model") - if not isinstance(model, dict): - model = {"default": model} if model else {} - cfg["model"] = model - model["provider"] = "custom" - model["base_url"] = mantle_base_url - model.pop("api_mode", None) # chat_completions is the default - - # Also save region in bedrock config for reference - bedrock_cfg = cfg.get("bedrock", {}) - if not isinstance(bedrock_cfg, dict): - bedrock_cfg = {} - bedrock_cfg["region"] = region - cfg["bedrock"] = bedrock_cfg - - # Save the API key env var name so hermes knows where to find it - save_env_value("OPENAI_API_KEY", existing_key) - save_env_value("OPENAI_BASE_URL", mantle_base_url) - - save_config(cfg) - deactivate_provider() - - print(f" Default model set to: {selected} (via Bedrock API Key, {region})") - print(f" Endpoint: {mantle_base_url}") - else: - print(" No change.") - - -def _model_flow_bedrock(config, current_model=""): - """AWS Bedrock provider: verify credentials, pick region, discover models. - - Uses the native Converse API via boto3 — not the OpenAI-compatible endpoint. - Auth is handled by the AWS SDK default credential chain (env vars, profile, - instance role), so no API key prompt is needed. - """ - from hermes_cli.auth import ( - _prompt_model_selection, - _save_model_choice, - deactivate_provider, - ) - from hermes_cli.config import load_config, save_config - from hermes_cli.models import _PROVIDER_MODELS - - # 1. Check for AWS credentials - try: - from agent.bedrock_adapter import ( - has_aws_credentials, - resolve_aws_auth_env_var, - resolve_bedrock_region, - discover_bedrock_models, - ) - except ImportError: - print(" ✗ boto3 is not installed. Install it with:") - print(" pip install boto3") - print() - return - - if not has_aws_credentials(): - print(" ⚠ No AWS credentials detected via environment variables.") - print(" Bedrock will use boto3's default credential chain (IMDS, SSO, etc.)") - print() - - auth_var = resolve_aws_auth_env_var() - if auth_var: - print(f" AWS credentials: {auth_var} ✓") - else: - print(" AWS credentials: boto3 default chain (instance role / SSO)") - print() - - # 2. Region selection - current_region = resolve_bedrock_region() - try: - region_input = input(f" AWS Region [{current_region}]: ").strip() - except (KeyboardInterrupt, EOFError): - print() - return - region = region_input or current_region - - # 2b. Authentication mode - print(" Choose authentication method:") - print() - print(" 1. IAM credential chain (recommended)") - print(" Works with EC2 instance roles, SSO, env vars, aws configure") - print(" 2. Bedrock API Key") - print(" Enter your Bedrock API Key directly — also supports") - print(" team scenarios where an admin distributes keys") - print() - try: - auth_choice = input(" Choice [1]: ").strip() - except (KeyboardInterrupt, EOFError): - print() - return - - if auth_choice == "2": - _model_flow_bedrock_api_key(config, region, current_model) - return - - # 3. Model discovery — try live API first, fall back to static list - print(f" Discovering models in {region}...") - live_models = discover_bedrock_models(region) - - if live_models: - _EXCLUDE_PREFIXES = ( - "stability.", - "cohere.embed", - "twelvelabs.", - "us.stability.", - "us.cohere.embed", - "us.twelvelabs.", - "global.cohere.embed", - "global.twelvelabs.", - ) - _EXCLUDE_SUBSTRINGS = ("safeguard", "voxtral", "palmyra-vision") - filtered = [] - for m in live_models: - mid = m["id"] - if any(mid.startswith(p) for p in _EXCLUDE_PREFIXES): - continue - if any(s in mid.lower() for s in _EXCLUDE_SUBSTRINGS): - continue - filtered.append(m) - - # Deduplicate: prefer inference profiles (us.*, global.*) over bare - # foundation model IDs. - profile_base_ids = set() - for m in filtered: - mid = m["id"] - if mid.startswith(("us.", "global.")): - base = mid.split(".", 1)[1] if "." in mid[3:] else mid - profile_base_ids.add(base) - - deduped = [] - for m in filtered: - mid = m["id"] - if not mid.startswith(("us.", "global.")) and mid in profile_base_ids: - continue - deduped.append(m) - - _RECOMMENDED = [ - "us.anthropic.claude-sonnet-4-6", - "us.anthropic.claude-opus-4-6", - "us.anthropic.claude-haiku-4-5", - "us.amazon.nova-pro", - "us.amazon.nova-lite", - "us.amazon.nova-micro", - "deepseek.v3", - "us.meta.llama4-maverick", - "us.meta.llama4-scout", - ] - - def _sort_key(m): - mid = m["id"] - for i, rec in enumerate(_RECOMMENDED): - if mid.startswith(rec): - return (0, i, mid) - if mid.startswith("global."): - return (1, 0, mid) - return (2, 0, mid) - - deduped.sort(key=_sort_key) - model_list = [m["id"] for m in deduped] - print( - f" Found {len(model_list)} text model(s) (filtered from {len(live_models)} total)" - ) - else: - model_list = _PROVIDER_MODELS.get("bedrock", []) - if model_list: - print( - f" Using {len(model_list)} curated models (live discovery unavailable)" - ) - else: - print( - " No models found. Check IAM permissions for bedrock:ListFoundationModels." - ) - return - - # 4. Model selection - if model_list: - selected = _prompt_model_selection(model_list, current_model=current_model) - else: - try: - selected = input(" Model ID: ").strip() - except (KeyboardInterrupt, EOFError): - selected = None - - if selected: - _save_model_choice(selected) - - cfg = load_config() - model = cfg.get("model") - if not isinstance(model, dict): - model = {"default": model} if model else {} - cfg["model"] = model - model["provider"] = "bedrock" - model["base_url"] = f"https://bedrock-runtime.{region}.amazonaws.com" - model.pop("api_mode", None) # bedrock_converse is auto-detected - - bedrock_cfg = cfg.get("bedrock", {}) - if not isinstance(bedrock_cfg, dict): - bedrock_cfg = {} - bedrock_cfg["region"] = region - cfg["bedrock"] = bedrock_cfg - - save_config(cfg) - deactivate_provider() - - print(f" Default model set to: {selected} (via AWS Bedrock, {region})") - else: - print(" No change.") - - -def _model_flow_api_key_provider(config, provider_id, current_model=""): - """Generic flow for API-key providers (z.ai, MiniMax, OpenCode, etc.).""" - from hermes_cli.auth import ( - PROVIDER_REGISTRY, - _prompt_model_selection, - _save_model_choice, - deactivate_provider, - ) - from hermes_cli.config import ( - get_env_value, - save_env_value, - load_config, - save_config, - ) - from hermes_cli.models import ( - _PROVIDER_MODELS, - fetch_api_models, - opencode_model_api_mode, - normalize_opencode_model_id, - ) - - pconfig = PROVIDER_REGISTRY[provider_id] - key_env = pconfig.api_key_env_vars[0] if pconfig.api_key_env_vars else "" - base_url_env = pconfig.base_url_env_var or "" - - # Check / prompt for API key - existing_key = "" - for ev in pconfig.api_key_env_vars: - existing_key = get_env_value(ev) or os.getenv(ev, "") - if existing_key: - break - - existing_key, abort = _prompt_api_key( - pconfig, existing_key, provider_id=provider_id - ) - if abort: - return - - # Gemini free-tier gate: free-tier daily quotas (<= 250 RPD for Flash) - # are exhausted in a handful of agent turns, so refuse to wire up the - # provider with a free-tier key. Probe is best-effort; network or auth - # errors fall through without blocking. - if provider_id == "gemini" and existing_key: - try: - from agent.gemini_native_adapter import probe_gemini_tier - except Exception: - probe_gemini_tier = None - if probe_gemini_tier is not None: - print(" Checking Gemini API tier...") - probe_base = ( - (get_env_value(base_url_env) if base_url_env else "") - or os.getenv(base_url_env or "", "") - or pconfig.inference_base_url - ) - tier = probe_gemini_tier(existing_key, probe_base) - if tier == "free": - print() - print( - "❌ This Google API key is on the free tier " - "(<= 250 requests/day for gemini-2.5-flash)." - ) - print( - " Hermes typically makes 3-10 API calls per user turn " - "(tool iterations + auxiliary tasks)," - ) - print( - " so the free tier is exhausted after a handful of " - "messages and cannot sustain" - ) - print(" an agent session.") - print() - print( - " To use Gemini with Hermes, enable billing on your " - "Google Cloud project and regenerate" - ) - print( - " the key in a billing-enabled project: " - "https://aistudio.google.com/apikey" - ) - print() - print( - " Alternatives with workable free usage: DeepSeek, " - "OpenRouter (free models), Groq, Nous." - ) - print() - print("Not saving Gemini as the default provider.") - return - if tier == "paid": - print(" Tier check: paid ✓") - else: - # "unknown" -- network issue, auth problem, unexpected response. - # Don't block; the runtime 429 handler will surface free-tier - # guidance if the key turns out to be free tier. - print(" Tier check: could not verify (proceeding anyway).") - print() - - # Optional base URL override. - # Precedence: env var → config.yaml model.base_url → registry default. - # Reading config.yaml prevents silently overwriting a saved remote URL - # (e.g. a remote LM Studio endpoint) with localhost when the user just - # presses Enter at the prompt below. - current_base = "" - if base_url_env: - current_base = get_env_value(base_url_env) or os.getenv(base_url_env, "") - if not current_base: - try: - _m = load_config().get("model") or {} - if str(_m.get("provider") or "").strip().lower() == provider_id: - current_base = str(_m.get("base_url") or "").strip() - except Exception: - pass - effective_base = current_base or pconfig.inference_base_url - - try: - override = input(f"Base URL [{effective_base}]: ").strip() - except (KeyboardInterrupt, EOFError): - print() - override = "" - if override and base_url_env: - if not override.startswith(("http://", "https://")): - print( - " Invalid URL — must start with http:// or https://. Keeping current value." - ) - else: - save_env_value(base_url_env, override) - effective_base = override - - # Model selection — resolution order: - # 1. models.dev registry (cached, filtered for agentic/tool-capable models) - # 2. Curated static fallback list (offline insurance) - # 3. Live /models endpoint probe (small providers without models.dev data) - # - # LM Studio: live /api/v1/models probe (no models.dev catalog). - # Ollama Cloud: merged discovery (live API + models.dev + disk cache). - if provider_id == "lmstudio": - from hermes_cli.auth import AuthError - from hermes_cli.models import fetch_lmstudio_models - - api_key_for_probe = existing_key or (get_env_value(key_env) if key_env else "") - try: - model_list = fetch_lmstudio_models( - api_key=api_key_for_probe, base_url=effective_base - ) - except AuthError as exc: - print(f" LM Studio rejected the request: {exc}") - print(" Set LM_API_KEY (or update it) to match the server's bearer token.") - model_list = [] - if model_list: - print(f" Found {len(model_list)} model(s) from LM Studio") - elif provider_id == "ollama-cloud": - from hermes_cli.models import fetch_ollama_cloud_models - - api_key_for_probe = existing_key or (get_env_value(key_env) if key_env else "") - # During setup, force a live refresh so the picker reflects newly - # released models (e.g. deepseek v4 flash, kimi k2.6) the moment - # the user enters their key — not an hour later when the disk - # cache TTL expires. - model_list = fetch_ollama_cloud_models( - api_key=api_key_for_probe, - base_url=effective_base, - force_refresh=True, - ) - if model_list: - print(f" Found {len(model_list)} model(s) from Ollama Cloud") - elif provider_id == "novita": - from hermes_cli.models import fetch_api_models - - api_key_for_probe = existing_key or (get_env_value(key_env) if key_env else "") - curated = _PROVIDER_MODELS.get(provider_id, []) - live_models = fetch_api_models(api_key_for_probe, effective_base) - if live_models: - model_list = live_models - print(f" Found {len(model_list)} model(s) from {pconfig.name} API") - else: - mdev_models: list = [] - try: - from agent.models_dev import list_agentic_models - - mdev_models = list_agentic_models(provider_id) - except Exception: - pass - if mdev_models: - seen = {m.lower() for m in mdev_models} - model_list = list(mdev_models) - for m in curated: - if m.lower() not in seen: - model_list.append(m) - seen.add(m.lower()) - print(f" Found {len(model_list)} model(s) from models.dev registry") - else: - model_list = curated - if model_list: - print( - f' Showing {len(model_list)} curated models — use "Enter custom model name" for others.' - ) - else: - curated = _PROVIDER_MODELS.get(provider_id, []) - - # Try models.dev first — returns tool-capable models, filtered for noise - mdev_models: list = [] - try: - from agent.models_dev import list_agentic_models - - mdev_models = list_agentic_models(provider_id) - except Exception: - pass - - if mdev_models: - # Merge models.dev with curated list so newly added models - # (not yet in models.dev) still appear in the picker. - if curated: - seen = {m.lower() for m in mdev_models} - merged = list(mdev_models) - for m in curated: - if m.lower() not in seen: - merged.append(m) - seen.add(m.lower()) - model_list = merged - else: - model_list = mdev_models - print(f" Found {len(model_list)} model(s) from models.dev registry") - elif curated and len(curated) >= 8: - # Curated list is substantial — use it directly, skip live probe - model_list = curated - print( - f' Showing {len(model_list)} curated models — use "Enter custom model name" for others.' - ) - else: - api_key_for_probe = existing_key or ( - get_env_value(key_env) if key_env else "" - ) - live_models = fetch_api_models(api_key_for_probe, effective_base) - if live_models and len(live_models) >= len(curated): - model_list = live_models - print(f" Found {len(model_list)} model(s) from {pconfig.name} API") - else: - model_list = curated - if model_list: - print( - f' Showing {len(model_list)} curated models — use "Enter custom model name" for others.' - ) - # else: no defaults either, will fall through to raw input - - if provider_id in {"opencode-zen", "opencode-go"}: - model_list = [ - normalize_opencode_model_id(provider_id, mid) for mid in model_list - ] - current_model = normalize_opencode_model_id(provider_id, current_model) - model_list = list(dict.fromkeys(mid for mid in model_list if mid)) - - if model_list: - selected = _prompt_model_selection(model_list, current_model=current_model) - else: - try: - selected = input("Model name: ").strip() - except (KeyboardInterrupt, EOFError): - selected = None - - if selected: - if provider_id in {"opencode-zen", "opencode-go"}: - selected = normalize_opencode_model_id(provider_id, selected) - - _save_model_choice(selected) - - # Update config with provider, base URL, and provider-specific API mode - cfg = load_config() - model = cfg.get("model") - if not isinstance(model, dict): - model = {"default": model} if model else {} - cfg["model"] = model - model["provider"] = provider_id - model["base_url"] = effective_base - if provider_id in {"opencode-zen", "opencode-go"}: - model["api_mode"] = opencode_model_api_mode(provider_id, selected) - else: - model.pop("api_mode", None) - save_config(cfg) - deactivate_provider() - - print(f"Default model set to: {selected} (via {pconfig.name})") - else: - print("No change.") def _run_anthropic_oauth_flow(save_env_value): @@ -6374,157 +3954,6 @@ def _run_anthropic_oauth_flow(save_env_value): return False -def _model_flow_anthropic(config, current_model=""): - """Flow for Anthropic provider — OAuth subscription, API key, or Claude Code creds.""" - from hermes_cli.auth import ( - _prompt_model_selection, - _save_model_choice, - deactivate_provider, - ) - from hermes_cli.config import ( - save_env_value, - load_config, - save_config, - save_anthropic_api_key, - ) - from hermes_cli.models import _PROVIDER_MODELS - - # Check ALL credential sources - from hermes_cli.auth import get_anthropic_key - - existing_key = get_anthropic_key() - cc_available = False - try: - from agent.anthropic_adapter import ( - read_claude_code_credentials, - is_claude_code_token_valid, - _is_oauth_token, - ) - - cc_creds = read_claude_code_credentials() - if cc_creds and is_claude_code_token_valid(cc_creds): - cc_available = True - except Exception: - pass - - # Stale-OAuth guard: if the only existing cred is an expired OAuth token - # (no valid cc_creds to fall back on), treat it as missing so the re-auth - # path is offered instead of silently accepting a broken token. - existing_is_stale_oauth = False - if existing_key and _is_oauth_token(existing_key) and not cc_available: - existing_is_stale_oauth = True - - has_creds = (bool(existing_key) and not existing_is_stale_oauth) or cc_available - needs_auth = not has_creds - - if has_creds: - # Show what we found - if existing_key: - from hermes_cli.env_loader import format_secret_source_suffix - from hermes_cli.auth import PROVIDER_REGISTRY - - # Surface which env var supplied the key so users with - # Bitwarden see "(from Bitwarden)" — without this, a detected - # BSM key looks identical to a key in .env and users assume - # nothing is wired up. - source_suffix = "" - for var in PROVIDER_REGISTRY["anthropic"].api_key_env_vars: - if os.getenv(var, "").strip() == existing_key: - source_suffix = format_secret_source_suffix(var) - if source_suffix: - break - print( - f" Anthropic credentials: {existing_key[:12]}... ✓{source_suffix}" - ) - elif cc_available: - print(" Claude Code credentials: ✓ (auto-detected)") - print() - print(" 1. Use existing credentials") - print(" 2. Reauthenticate (new OAuth login)") - print(" 3. Cancel") - print() - try: - choice = input(" Choice [1/2/3]: ").strip() - except (KeyboardInterrupt, EOFError): - choice = "1" - - if choice == "2": - needs_auth = True - elif choice == "3": - return - # choice == "1" or default: use existing, proceed to model selection - - if needs_auth: - # Show auth method choice - print() - print(" Choose authentication method:") - print() - print(" 1. Claude Pro/Max subscription (OAuth login)") - print(" 2. Anthropic API key (pay-per-token)") - print(" 3. Cancel") - print() - try: - choice = input(" Choice [1/2/3]: ").strip() - except (KeyboardInterrupt, EOFError): - print() - return - - if choice == "1": - if not _run_anthropic_oauth_flow(save_env_value): - return - - elif choice == "2": - print() - print(" Get an API key at: https://platform.claude.com/settings/keys") - print() - from hermes_cli.secret_prompt import masked_secret_prompt - - try: - api_key = masked_secret_prompt(" API key (sk-ant-...): ").strip() - except (KeyboardInterrupt, EOFError): - print() - return - if not api_key: - print(" Cancelled.") - return - save_anthropic_api_key(api_key, save_fn=save_env_value) - print(" ✓ API key saved.") - - else: - print(" No change.") - return - print() - - # Model selection - model_list = _PROVIDER_MODELS.get("anthropic", []) - if model_list: - selected = _prompt_model_selection(model_list, current_model=current_model) - else: - try: - selected = input("Model name (e.g., claude-sonnet-4-20250514): ").strip() - except (KeyboardInterrupt, EOFError): - selected = None - - if selected: - _save_model_choice(selected) - - # Update config with provider — clear base_url since - # resolve_runtime_provider() always hardcodes Anthropic's URL. - # Leaving a stale base_url in config can contaminate other - # providers if the user switches without running 'hermes model'. - cfg = load_config() - model = cfg.get("model") - if not isinstance(model, dict): - model = {"default": model} if model else {} - cfg["model"] = model - model["provider"] = "anthropic" - model.pop("base_url", None) - save_config(cfg) - deactivate_provider() - - print(f"Default model set to: {selected} (via Anthropic)") - else: - print("No change.") def cmd_login(args): diff --git a/hermes_cli/model_setup_flows.py b/hermes_cli/model_setup_flows.py new file mode 100644 index 00000000000..f4d8e43cff9 --- /dev/null +++ b/hermes_cli/model_setup_flows.py @@ -0,0 +1,2648 @@ +"""Per-provider model-selection wizard flows for ``hermes setup`` / ``hermes model``. + +Extracted from ``hermes_cli/main.py`` as part of the god-file decomposition +campaign (``~/.hermes/plans/god-file-decomposition.md``, Phase 2 — splitting +main.py handler/flow bodies out of the module). These 18 ``_model_flow_*`` +functions are the interactive provider-setup branches dispatched by +``select_provider_and_model`` (which stays in main.py). + +Behavior-neutral: each function is lifted verbatim. ``select_provider_and_model`` +in main.py re-imports them (``from hermes_cli.model_setup_flows import *``-style +explicit import) so existing call sites — and test monkeypatches that target +``hermes_cli.main._model_flow_*`` — keep resolving against main.py's namespace. + +main.py-internal helpers the flows call (``_prompt_api_key``, ``_save_custom_provider``, +the reasoning-effort/stepfun/qwen helpers, ``_run_anthropic_oauth_flow``, …) are +imported lazily inside the flows (``from hermes_cli.main import ...`` resolves at +call time, when main.py is fully loaded) so this module never imports +``hermes_cli.main`` at import time -> no import cycle. +""" + +from __future__ import annotations + +import argparse +import os +import subprocess + + +def _model_flow_openrouter(config, current_model=""): + """OpenRouter provider: ensure API key, then pick model.""" + from hermes_cli.main import _prompt_api_key + from hermes_constants import OPENROUTER_BASE_URL + from hermes_cli.auth import ( + ProviderConfig, + _prompt_model_selection, + _save_model_choice, + deactivate_provider, + ) + from hermes_cli.config import get_env_value + + # Route through _prompt_api_key so users can replace a stale/broken key + # in-flow (K/R/C) instead of having to edit ~/.hermes/.env by hand. The + # previous bypass-when-key-exists branch left no way to recover from a + # bad paste short of re-running `hermes setup` from scratch. OpenRouter + # isn't in PROVIDER_REGISTRY so we synthesize a minimal pconfig. + pconfig = ProviderConfig( + id="openrouter", + name="OpenRouter", + auth_type="api_key", + api_key_env_vars=("OPENROUTER_API_KEY",), + ) + existing_key = get_env_value("OPENROUTER_API_KEY") or "" + if not existing_key: + print("Get one at: https://openrouter.ai/keys") + print() + _resolved, abort = _prompt_api_key(pconfig, existing_key, provider_id="openrouter") + if abort: + return + + from hermes_cli.models import model_ids, get_pricing_for_provider + + openrouter_models = model_ids(force_refresh=True) + + # Fetch live pricing (non-blocking — returns empty dict on failure) + pricing = get_pricing_for_provider("openrouter", force_refresh=True) + + selected = _prompt_model_selection( + openrouter_models, current_model=current_model, pricing=pricing + ) + if selected: + _save_model_choice(selected) + + # Update config provider and deactivate any OAuth provider + from hermes_cli.config import load_config, save_config + + cfg = load_config() + model = cfg.get("model") + if not isinstance(model, dict): + model = {"default": model} if model else {} + cfg["model"] = model + model["provider"] = "openrouter" + model["base_url"] = OPENROUTER_BASE_URL + model["api_mode"] = "chat_completions" + save_config(cfg) + deactivate_provider() + print(f"Default model set to: {selected} (via OpenRouter)") + else: + print("No change.") + +def _model_flow_nous(config, current_model="", args=None): + """Nous Portal provider: ensure logged in, then pick model.""" + from hermes_cli.auth import ( + get_provider_auth_state, + _prompt_model_selection, + _save_model_choice, + _update_config_for_provider, + resolve_nous_runtime_credentials, + AuthError, + format_auth_error, + _login_nous, + PROVIDER_REGISTRY, + ) + from hermes_cli.config import ( + get_env_value, + load_config, + save_config, + save_env_value, + ) + from hermes_cli.nous_subscription import prompt_enable_tool_gateway + + state = get_provider_auth_state("nous") + if not state or not state.get("access_token"): + print("Not logged into Nous Portal. Starting login...") + print() + try: + mock_args = argparse.Namespace( + portal_url=getattr(args, "portal_url", None), + inference_url=getattr(args, "inference_url", None), + client_id=getattr(args, "client_id", None), + scope=getattr(args, "scope", None), + no_browser=bool(getattr(args, "no_browser", False)), + timeout=getattr(args, "timeout", None) or 15.0, + ca_bundle=getattr(args, "ca_bundle", None), + insecure=bool(getattr(args, "insecure", False)), + ) + _login_nous(mock_args, PROVIDER_REGISTRY["nous"]) + # Offer Tool Gateway enablement for paid subscribers + try: + _refreshed = load_config() or {} + prompt_enable_tool_gateway(_refreshed) + except Exception: + pass + except SystemExit: + print("Login cancelled or failed.") + return + except Exception as exc: + print(f"Login failed: {exc}") + return + # login_nous already handles model selection + config update + return + + # Already logged in — use curated model list (same as OpenRouter defaults). + # The live /models endpoint returns hundreds of models; the curated list + # shows only agentic models users recognize from OpenRouter. + from hermes_cli.models import ( + get_curated_nous_model_ids, + get_pricing_for_provider, + check_nous_free_tier, + partition_nous_models_by_tier, + union_with_portal_free_recommendations, + union_with_portal_paid_recommendations, + ) + + model_ids = get_curated_nous_model_ids() + if not model_ids: + print("No curated models available for Nous Portal.") + return + + # Verify credentials are still valid (catches expired sessions early) + try: + creds = resolve_nous_runtime_credentials() + except Exception as exc: + relogin = isinstance(exc, AuthError) and exc.relogin_required + msg = format_auth_error(exc) if isinstance(exc, AuthError) else str(exc) + if relogin: + print(f"Session expired: {msg}") + print("Re-authenticating with Nous Portal...\n") + try: + mock_args = argparse.Namespace( + portal_url=None, + inference_url=None, + client_id=None, + scope=None, + no_browser=False, + timeout=15.0, + ca_bundle=None, + insecure=False, + ) + _login_nous(mock_args, PROVIDER_REGISTRY["nous"]) + except Exception as login_exc: + print(f"Re-login failed: {login_exc}") + return + print(f"Could not verify credentials: {msg}") + return + + # Fetch live pricing (non-blocking — returns empty dict on failure) + pricing = get_pricing_for_provider("nous") + + # Force fresh account data for model selection so recent credit purchases + # are reflected immediately. + free_tier = check_nous_free_tier(force_fresh=True) + if not free_tier: + try: + refreshed_creds = resolve_nous_runtime_credentials( + force_refresh=True, + ) + if refreshed_creds: + creds = refreshed_creds + except Exception: + # Runtime inference has its own paid-entitlement recovery path; do + # not block model selection if this opportunistic refresh fails. + pass + + # Resolve portal URL early — needed both for upgrade links and for the + # freeRecommendedModels endpoint below. + _nous_portal_url = "" + try: + _nous_state = get_provider_auth_state("nous") + if _nous_state: + _nous_portal_url = _nous_state.get("portal_base_url", "") + except Exception: + pass + + # For free users: partition models into selectable/unavailable based on + # whether they are free per the Portal-reported pricing. First augment + # with the Portal's freeRecommendedModels list so newly-launched free + # models show up even if this CLI build's hardcoded curated list and + # docs-hosted manifest haven't caught up yet. + # + # For paid users: mirror the same idea with paidRecommendedModels so + # newly-launched paid models surface in the picker too — independent + # of CLI release cadence. + unavailable_models: list[str] = [] + unavailable_message = "" + if free_tier: + try: + from hermes_cli.nous_account import ( + format_nous_portal_entitlement_message, + get_nous_portal_account_info, + ) + + _account_info = get_nous_portal_account_info(force_fresh=True) + unavailable_message = ( + format_nous_portal_entitlement_message( + _account_info, + capability="paid Nous models", + ) + or "" + ) + except Exception: + unavailable_message = "" + model_ids, pricing = union_with_portal_free_recommendations( + model_ids, pricing, _nous_portal_url, + ) + model_ids, unavailable_models = partition_nous_models_by_tier( + model_ids, pricing, free_tier=True + ) + else: + model_ids, pricing = union_with_portal_paid_recommendations( + model_ids, pricing, _nous_portal_url, + ) + + if not model_ids and not unavailable_models: + print("No models available for Nous Portal after filtering.") + return + + if free_tier and not model_ids: + print("No free models currently available.") + if unavailable_models: + from hermes_cli.auth import DEFAULT_NOUS_PORTAL_URL + + _url = (_nous_portal_url or DEFAULT_NOUS_PORTAL_URL).rstrip("/") + print(unavailable_message or f"Upgrade at {_url} to access paid models.") + return + + print( + f'Showing {len(model_ids)} curated models — use "Enter custom model name" for others.' + ) + + selected = _prompt_model_selection( + model_ids, + current_model=current_model, + pricing=pricing, + unavailable_models=unavailable_models, + portal_url=_nous_portal_url, + unavailable_message=unavailable_message, + ) + if selected: + _save_model_choice(selected) + # Reactivate Nous as the provider and update config + inference_url = creds.get("base_url", "") + _update_config_for_provider("nous", inference_url) + current_model_cfg = config.get("model") + if isinstance(current_model_cfg, dict): + model_cfg = dict(current_model_cfg) + elif isinstance(current_model_cfg, str) and current_model_cfg.strip(): + model_cfg = {"default": current_model_cfg.strip()} + else: + model_cfg = {} + model_cfg["provider"] = "nous" + model_cfg["default"] = selected + if inference_url and inference_url.strip(): + model_cfg["base_url"] = inference_url.rstrip("/") + else: + model_cfg.pop("base_url", None) + config["model"] = model_cfg + # Clear any custom endpoint that might conflict + if get_env_value("OPENAI_BASE_URL"): + save_env_value("OPENAI_BASE_URL", "") + save_env_value("OPENAI_API_KEY", "") + save_config(config) + print(f"Default model set to: {selected} (via Nous Portal)") + # Offer Tool Gateway enablement for paid subscribers + prompt_enable_tool_gateway(config) + else: + print("No change.") + +def _model_flow_openai_codex(config, current_model=""): + """OpenAI Codex provider: ensure logged in, then pick model.""" + from hermes_cli.auth import ( + get_codex_auth_status, + _prompt_model_selection, + _save_model_choice, + _update_config_for_provider, + _login_openai_codex, + PROVIDER_REGISTRY, + DEFAULT_CODEX_BASE_URL, + ) + from hermes_cli.codex_models import get_codex_model_ids + + status = get_codex_auth_status() + if status.get("logged_in"): + print(" OpenAI Codex credentials: ✓") + print() + print(" 1. Use existing credentials") + print(" 2. Reauthenticate (new OAuth login)") + print(" 3. Cancel") + print() + try: + choice = input(" Choice [1/2/3]: ").strip() + except (KeyboardInterrupt, EOFError): + choice = "1" + + if choice == "2": + print("Starting a fresh OpenAI Codex login...") + print() + try: + mock_args = argparse.Namespace() + _login_openai_codex( + mock_args, + PROVIDER_REGISTRY["openai-codex"], + force_new_login=True, + ) + except SystemExit: + print("Login cancelled or failed.") + return + except Exception as exc: + print(f"Login failed: {exc}") + return + status = get_codex_auth_status() + if not status.get("logged_in"): + print("Login failed.") + return + elif choice == "3": + return + else: + print("Not logged into OpenAI Codex. Starting login...") + print() + try: + mock_args = argparse.Namespace() + _login_openai_codex(mock_args, PROVIDER_REGISTRY["openai-codex"]) + except SystemExit: + print("Login cancelled or failed.") + return + except Exception as exc: + print(f"Login failed: {exc}") + return + + _codex_token = None + # Prefer credential pool (where `hermes auth` stores device_code tokens), + # fall back to legacy provider state. + try: + _codex_status = get_codex_auth_status() + if _codex_status.get("logged_in"): + _codex_token = _codex_status.get("api_key") + except Exception: + pass + if not _codex_token: + try: + from hermes_cli.auth import resolve_codex_runtime_credentials + + _codex_creds = resolve_codex_runtime_credentials() + _codex_token = _codex_creds.get("api_key") + except Exception: + pass + + codex_models = get_codex_model_ids(access_token=_codex_token) + + selected = _prompt_model_selection(codex_models, current_model=current_model) + if selected: + _save_model_choice(selected) + _update_config_for_provider("openai-codex", DEFAULT_CODEX_BASE_URL) + print(f"Default model set to: {selected} (via OpenAI Codex)") + else: + print("No change.") + +def _model_flow_xai_oauth(_config, current_model="", *, args=None): + """xAI Grok OAuth (SuperGrok / Premium+) provider: ensure logged in, then pick model.""" + from hermes_cli.auth import ( + get_xai_oauth_auth_status, + _prompt_model_selection, + _save_model_choice, + _update_config_for_provider, + resolve_xai_oauth_runtime_credentials, + _login_xai_oauth, + DEFAULT_XAI_OAUTH_BASE_URL, + PROVIDER_REGISTRY, + ) + from hermes_cli.models import _PROVIDER_MODELS + + status = get_xai_oauth_auth_status() + if status.get("logged_in"): + print(" xAI Grok OAuth (SuperGrok / Premium+) credentials: ✓") + print() + print(" 1. Use existing credentials") + print(" 2. Reauthenticate (new OAuth login)") + print(" 3. Cancel") + print() + try: + choice = input(" Choice [1/2/3]: ").strip() + except (KeyboardInterrupt, EOFError): + choice = "1" + + if choice == "2": + print("Starting a fresh xAI OAuth login...") + print() + try: + # Forward CLI flags from ``hermes model --manual-paste`` + # / ``--no-browser`` / ``--timeout`` into the loopback + # login. Without this, browser-only remotes (#26923) + # can't reach the manual-paste path via ``hermes model``. + mock_args = argparse.Namespace( + manual_paste=bool(getattr(args, "manual_paste", False)), + no_browser=bool(getattr(args, "no_browser", False)), + timeout=getattr(args, "timeout", None), + ) + _login_xai_oauth( + mock_args, + PROVIDER_REGISTRY["xai-oauth"], + force_new_login=True, + ) + except SystemExit: + print("Login cancelled or failed.") + return + except Exception as exc: + print(f"Login failed: {exc}") + return + elif choice == "3": + return + else: + print("Not logged into xAI Grok OAuth (SuperGrok / Premium+). Starting login...") + print() + try: + mock_args = argparse.Namespace( + manual_paste=bool(getattr(args, "manual_paste", False)), + no_browser=bool(getattr(args, "no_browser", False)), + timeout=getattr(args, "timeout", None), + ) + _login_xai_oauth(mock_args, PROVIDER_REGISTRY["xai-oauth"]) + except SystemExit: + print("Login cancelled or failed.") + return + except Exception as exc: + print(f"Login failed: {exc}") + return + + # Resolve a usable base URL. ``resolve_xai_oauth_runtime_credentials`` + # only reads from the auth.json singleton — but credentials may legitimately + # live only in the pool (e.g. after ``hermes auth add xai-oauth``). Fall + # back to the default base URL in that case so the model picker still + # completes successfully instead of bailing out with + # ``Could not resolve xAI OAuth credentials``. + base_url = DEFAULT_XAI_OAUTH_BASE_URL + try: + creds = resolve_xai_oauth_runtime_credentials() + base_url = (creds.get("base_url") or "").strip().rstrip("/") or base_url + except Exception: + pass + + models = list(_PROVIDER_MODELS.get("xai-oauth") or _PROVIDER_MODELS.get("xai") or []) + selected = _prompt_model_selection(models, current_model=current_model or (models[0] if models else "grok-4.3")) + if selected: + _save_model_choice(selected) + _update_config_for_provider("xai-oauth", base_url) + print(f"Default model set to: {selected} (via xAI Grok OAuth — SuperGrok / Premium+)") + else: + print("No change.") + +def _model_flow_qwen_oauth(_config, current_model=""): + """Qwen OAuth provider: reuse local Qwen CLI login, then pick model.""" + from hermes_cli.main import _DEFAULT_QWEN_PORTAL_MODELS + from hermes_cli.auth import ( + get_qwen_auth_status, + resolve_qwen_runtime_credentials, + _prompt_model_selection, + _save_model_choice, + _update_config_for_provider, + DEFAULT_QWEN_BASE_URL, + ) + from hermes_cli.models import fetch_api_models + + status = get_qwen_auth_status() + if not status.get("logged_in"): + print("Not logged into Qwen CLI OAuth.") + print("Run: qwen auth qwen-oauth") + auth_file = status.get("auth_file") + if auth_file: + print(f"Expected credentials file: {auth_file}") + if status.get("error"): + print(f"Error: {status.get('error')}") + return + + # Try live model discovery, fall back to curated list. + models = None + try: + creds = resolve_qwen_runtime_credentials(refresh_if_expiring=True) + models = fetch_api_models(creds["api_key"], creds["base_url"]) + except Exception: + pass + if not models: + models = list(_DEFAULT_QWEN_PORTAL_MODELS) + + default = current_model or (models[0] if models else "qwen3-coder-plus") + selected = _prompt_model_selection(models, current_model=default) + if selected: + _save_model_choice(selected) + _update_config_for_provider("qwen-oauth", DEFAULT_QWEN_BASE_URL) + print(f"Default model set to: {selected} (via Qwen OAuth)") + else: + print("No change.") + +def _model_flow_minimax_oauth(config, current_model="", args=None): + """MiniMax OAuth provider: ensure logged in, then pick model.""" + from hermes_cli.auth import ( + get_provider_auth_state, + _prompt_model_selection, + _save_model_choice, + _update_config_for_provider, + resolve_minimax_oauth_runtime_credentials, + AuthError, + format_auth_error, + _login_minimax_oauth, + PROVIDER_REGISTRY, + ) + + state = get_provider_auth_state("minimax-oauth") + if not state or not state.get("access_token"): + print("Not logged into MiniMax. Starting OAuth login...") + print() + try: + mock_args = argparse.Namespace( + region=getattr(args, "region", None) or "global", + no_browser=bool(getattr(args, "no_browser", False)), + timeout=getattr(args, "timeout", None) or 15.0, + ) + _login_minimax_oauth(mock_args, PROVIDER_REGISTRY["minimax-oauth"]) + except SystemExit: + print("Login cancelled or failed.") + return + except Exception as exc: + print(f"Login failed: {exc}") + return + + try: + creds = resolve_minimax_oauth_runtime_credentials() + except AuthError as exc: + print(format_auth_error(exc)) + return + + from hermes_cli.models import _PROVIDER_MODELS + + model_ids = _PROVIDER_MODELS.get("minimax-oauth", []) + selected = _prompt_model_selection(model_ids, current_model) + if not selected: + return + _save_model_choice(selected) + _update_config_for_provider("minimax-oauth", creds["base_url"]) + print(f"\u2713 Using MiniMax model: {selected}") + +def _model_flow_google_gemini_cli(_config, current_model=""): + """Google Gemini OAuth (PKCE) via Cloud Code Assist — supports free AND paid tiers. + + Flow: + 1. Show upfront warning about Google's ToS stance (per opencode-gemini-auth). + 2. If creds missing, run PKCE browser OAuth via agent.google_oauth. + 3. Resolve project context (env -> config -> auto-discover -> free tier). + 4. Prompt user to pick a model. + 5. Save to ~/.hermes/config.yaml. + """ + from hermes_cli.auth import ( + DEFAULT_GEMINI_CLOUDCODE_BASE_URL, + get_gemini_oauth_auth_status, + resolve_gemini_oauth_runtime_credentials, + _prompt_model_selection, + _save_model_choice, + _update_config_for_provider, + ) + from hermes_cli.models import _PROVIDER_MODELS + + print() + print("⚠ Google considers using the Gemini CLI OAuth client with third-party") + print(" software a policy violation. Some users have reported account") + print(" restrictions. You can use your own API key via 'gemini' provider") + print(" for the lowest-risk experience.") + print() + try: + proceed = input("Continue with OAuth login? [y/N]: ").strip().lower() + except (EOFError, KeyboardInterrupt): + print("Cancelled.") + return + if proceed not in {"y", "yes"}: + print("Cancelled.") + return + + status = get_gemini_oauth_auth_status() + if not status.get("logged_in"): + try: + from agent.google_oauth import resolve_project_id_from_env, start_oauth_flow + + env_project = resolve_project_id_from_env() + start_oauth_flow(force_relogin=True, project_id=env_project) + except Exception as exc: + print(f"OAuth login failed: {exc}") + return + + # Verify creds resolve + trigger project discovery + try: + creds = resolve_gemini_oauth_runtime_credentials(force_refresh=False) + project_id = creds.get("project_id", "") + if project_id: + print(f" Using GCP project: {project_id}") + else: + print( + " No GCP project configured — free tier will be auto-provisioned on first request." + ) + except Exception as exc: + print(f"Failed to resolve Gemini credentials: {exc}") + return + + models = list(_PROVIDER_MODELS.get("google-gemini-cli") or []) + default = current_model or (models[0] if models else "gemini-3-flash-preview") + selected = _prompt_model_selection(models, current_model=default) + if selected: + _save_model_choice(selected) + _update_config_for_provider( + "google-gemini-cli", DEFAULT_GEMINI_CLOUDCODE_BASE_URL + ) + print( + f"Default model set to: {selected} (via Google Gemini OAuth / Code Assist)" + ) + else: + print("No change.") + +def _model_flow_custom(config): + """Custom endpoint: collect URL, API key, and model name. + + Automatically saves the endpoint to ``custom_providers`` in config.yaml + so it appears in the provider menu on subsequent runs. + """ + from hermes_cli.main import _auto_provider_name, _prompt_custom_api_mode_selection, _save_custom_provider + from hermes_cli.auth import _save_model_choice, deactivate_provider + from hermes_cli.config import get_env_value, load_config, save_config + from hermes_cli.secret_prompt import masked_secret_prompt + + current_url = get_env_value("OPENAI_BASE_URL") or "" + current_key = get_env_value("OPENAI_API_KEY") or "" + + print("Custom OpenAI-compatible endpoint configuration:") + if current_url: + print(f" Current URL: {current_url}") + if current_key: + print(f" Current key: {current_key[:8]}...") + print() + + try: + base_url = input( + f"API base URL [{current_url or 'e.g. https://api.example.com/v1'}]: " + ).strip() + api_key = masked_secret_prompt( + f"API key [{current_key[:8] + '...' if current_key else 'optional'}]: " + ).strip() + except (KeyboardInterrupt, EOFError): + print("\nCancelled.") + return + + if not base_url and not current_url: + print("No URL provided. Cancelled.") + return + + # Validate URL format + effective_url = base_url or current_url + if not effective_url.startswith(("http://", "https://")): + print(f"Invalid URL: {effective_url} (must start with http:// or https://)") + return + + effective_key = api_key or current_key + + # Hint: most local model servers (Ollama, vLLM, llama.cpp) require /v1 + # in the base URL for OpenAI-compatible chat completions. Prompt the + # user if the URL looks like a local server without /v1. + _url_lower = effective_url.rstrip("/").lower() + _looks_local = any( + h in _url_lower + for h in ("localhost", "127.0.0.1", "0.0.0.0", ":11434", ":8080", ":5000") + ) + if _looks_local and not _url_lower.endswith("/v1"): + print() + print(f" Hint: Did you mean to add /v1 at the end?") + print(f" Most local model servers (Ollama, vLLM, llama.cpp) require it.") + print(f" e.g. {effective_url.rstrip('/')}/v1") + try: + _add_v1 = input(" Add /v1? [Y/n]: ").strip().lower() + except (KeyboardInterrupt, EOFError): + _add_v1 = "n" + if _add_v1 in {"", "y", "yes"}: + effective_url = effective_url.rstrip("/") + "/v1" + if base_url: + base_url = effective_url + print(f" Updated URL: {effective_url}") + print() + + from hermes_cli.models import probe_api_models + + probe = probe_api_models(effective_key, effective_url) + if probe.get("used_fallback") and probe.get("resolved_base_url"): + print( + f"Warning: endpoint verification worked at {probe['resolved_base_url']}/models, " + f"not the exact URL you entered. Saving the working base URL instead." + ) + effective_url = probe["resolved_base_url"] + if base_url: + base_url = effective_url + elif probe.get("models") is not None: + print( + f"Verified endpoint via {probe.get('probed_url')} " + f"({len(probe.get('models') or [])} model(s) visible)" + ) + else: + print( + f"Warning: could not verify this endpoint via {probe.get('probed_url')}. " + f"Hermes will still save it." + ) + if probe.get("suggested_base_url"): + suggested = probe["suggested_base_url"] + if suggested.endswith("/v1"): + print( + f" If this server expects /v1 in the path, try base URL: {suggested}" + ) + else: + print(f" If /v1 should not be in the base URL, try: {suggested}") + + # Prompt for API compatibility mode explicitly so codex-compatible custom + # providers don't silently fall back to chat_completions. + current_model_cfg = config.get("model") + current_api_mode = "" + if isinstance(current_model_cfg, dict): + current_api_mode = str(current_model_cfg.get("api_mode") or "").strip() + api_mode = _prompt_custom_api_mode_selection( + effective_url, + current_api_mode=current_api_mode, + ) + if api_mode: + print(f" API mode: {api_mode}") + else: + print(" API mode: auto-detect") + + # Select model — use probe results when available, fall back to manual input + model_name = "" + detected_models = probe.get("models") or [] + try: + if len(detected_models) == 1: + print(f" Detected model: {detected_models[0]}") + confirm = input(" Use this model? [Y/n]: ").strip().lower() + if confirm in {"", "y", "yes"}: + model_name = detected_models[0] + else: + model_name = input("Model name (e.g. gpt-4, llama-3-70b): ").strip() + elif len(detected_models) > 1: + print(" Available models:") + for i, m in enumerate(detected_models, 1): + print(f" {i}. {m}") + pick = input( + f" Select model [1-{len(detected_models)}] or type name: " + ).strip() + if pick.isdigit() and 1 <= int(pick) <= len(detected_models): + model_name = detected_models[int(pick) - 1] + elif pick: + model_name = pick + else: + model_name = input("Model name (e.g. gpt-4, llama-3-70b): ").strip() + + context_length_str = input( + "Context length in tokens [leave blank for auto-detect]: " + ).strip() + + # Prompt for a display name — shown in the provider menu on future runs + default_name = _auto_provider_name(effective_url) + display_name = input(f"Display name [{default_name}]: ").strip() or default_name + except (KeyboardInterrupt, EOFError): + print("\nCancelled.") + return + + context_length = None + if context_length_str: + try: + context_length = int( + context_length_str.replace(",", "") + .replace("k", "000") + .replace("K", "000") + ) + if context_length <= 0: + context_length = None + except ValueError: + print(f"Invalid context length: {context_length_str} — will auto-detect.") + context_length = None + + if model_name: + _save_model_choice(model_name) + + # Update config and deactivate any OAuth provider + cfg = load_config() + model = cfg.get("model") + if not isinstance(model, dict): + model = {"default": model} if model else {} + cfg["model"] = model + model["provider"] = "custom" + model["base_url"] = effective_url + if effective_key: + model["api_key"] = effective_key + if api_mode: + model["api_mode"] = api_mode + else: + model.pop("api_mode", None) + save_config(cfg) + deactivate_provider() + + # Sync the caller's config dict so the setup wizard's final + # save_config(config) preserves our model settings. Without + # this, the wizard overwrites model.provider/base_url with + # the stale values from its own config dict (#4172). + config["model"] = dict(model) + + print(f"Default model set to: {model_name} (via {effective_url})") + else: + if base_url or api_key: + deactivate_provider() + # Even without a model name, persist the custom endpoint on the + # caller's config dict so the setup wizard doesn't lose it. + _caller_model = config.get("model") + if not isinstance(_caller_model, dict): + _caller_model = {"default": _caller_model} if _caller_model else {} + _caller_model["provider"] = "custom" + _caller_model["base_url"] = effective_url + if effective_key: + _caller_model["api_key"] = effective_key + if api_mode: + _caller_model["api_mode"] = api_mode + else: + _caller_model.pop("api_mode", None) + config["model"] = _caller_model + print("Endpoint saved. Use `/model` in chat or `hermes model` to set a model.") + + # Auto-save to custom_providers so it appears in the menu next time + _save_custom_provider( + effective_url, + effective_key, + model_name or "", + context_length=context_length, + name=display_name, + api_mode=api_mode, + ) + +def _model_flow_azure_foundry(config, current_model=""): + """Azure Foundry provider: configure endpoint, auth mode, API mode, and model. + + Azure Foundry supports both OpenAI-style (``/v1/chat/completions``) and + Anthropic-style (``/v1/messages``) endpoints, and two authentication + modes: + + * **API key** (default) — uses ``AZURE_FOUNDRY_API_KEY`` from .env. + * **Microsoft Entra ID** — keyless, RBAC-based auth via the + ``azure-identity`` SDK (Managed Identity / Workload Identity / az + login / VS Code / azd / service principal env vars). Works on both + OpenAI-style and Anthropic-style endpoints — Microsoft RBAC is + per-resource and the same ``Azure AI User`` role grants + both. For OpenAI-style the OpenAI SDK's native callable + ``api_key=`` contract is used; for Anthropic-style an + ``httpx.Client`` with a request event hook (built by + :func:`agent.azure_identity_adapter.build_bearer_http_client`) + mints a fresh JWT per request because the Anthropic SDK does not + accept a callable ``auth_token`` natively. + + The wizard auto-detects the transport and available models when + possible: + + * URLs ending in ``/anthropic`` → Anthropic Messages API. + * Successful ``GET /models`` probe → OpenAI-style + populates + a picker with the returned deployment / model IDs. + * Anthropic Messages probe fallback when ``/models`` fails. + * Manual entry when every probe fails (private endpoints, etc.). + + Context lengths for the chosen model are resolved via the standard + :func:`agent.model_metadata.get_model_context_length` chain + (models.dev, provider metadata, hardcoded family fallbacks). + """ + from hermes_cli.auth import _save_model_choice, deactivate_provider # noqa: F401 + from hermes_cli.config import ( + get_env_value, + save_env_value, + load_config, + save_config, + ) + from hermes_cli import azure_detect + + # ── Load current Azure Foundry configuration ───────────────────── + model_cfg = config.get("model", {}) + if isinstance(model_cfg, dict) and model_cfg.get("provider") == "azure-foundry": + current_base_url = str(model_cfg.get("base_url", "") or "") + current_api_mode = str(model_cfg.get("api_mode", "") or "") + current_auth_mode = str(model_cfg.get("auth_mode") or "api_key").strip().lower() or "api_key" + _cur_entra = model_cfg.get("entra") or {} + current_entra = _cur_entra if isinstance(_cur_entra, dict) else {} + else: + current_base_url = "" + current_api_mode = "" + current_auth_mode = "api_key" + current_entra = {} + + current_api_key = get_env_value("AZURE_FOUNDRY_API_KEY") or "" + + print() + print("Azure Foundry Configuration") + print("=" * 50) + print() + print("Azure Foundry can host models with either OpenAI-style or") + print("Anthropic-style API endpoints. Hermes will probe your") + print("endpoint to auto-detect the transport and the deployed") + print("models when possible.") + print() + + if current_base_url: + print(f" Current endpoint: {current_base_url}") + if current_api_mode: + _lbl = ( + "OpenAI-style" + if current_api_mode == "chat_completions" + else "Anthropic-style" + ) + print(f" Current API mode: {_lbl}") + if current_auth_mode == "entra_id": + print(f" Current auth mode: Microsoft Entra ID (keyless)") + elif current_api_key: + print(f" Current auth mode: API key ({current_api_key[:8]}...)") + print() + + # ── Step 1: endpoint URL ───────────────────────────────────────── + try: + _placeholder = ( + current_base_url + or "e.g. https://.openai.azure.com/openai/v1 " + "or https://.services.ai.azure.com/anthropic" + ) + base_url = input( + f"API endpoint URL [{_placeholder}]: " + ).strip() + except (KeyboardInterrupt, EOFError): + print("\nCancelled.") + return + + effective_url = (base_url or current_base_url).rstrip("/") + if not effective_url: + print("No endpoint URL provided. Cancelled.") + return + if not effective_url.startswith(("http://", "https://")): + print(f"Invalid URL: {effective_url} (must start with http:// or https://)") + return + + # ── Step 2: authentication mode ────────────────────────────────── + print() + print("Authentication:") + print(" 1. API key (AZURE_FOUNDRY_API_KEY in .env)") + print(" 2. Microsoft Entra ID (managed identity / workload identity / az login)") + print(" Recommended by Microsoft. Works for both OpenAI-style and Anthropic-style endpoints.") + print(" Requires the 'Azure AI User' role on the Foundry resource.") + try: + _auth_default = "2" if current_auth_mode == "entra_id" else "1" + auth_choice = ( + input(f"Authentication mode [1/2] ({_auth_default}): ").strip() + or _auth_default + ) + except (KeyboardInterrupt, EOFError): + print("\nCancelled.") + return + use_entra = auth_choice == "2" + auth_mode_label = "entra_id" if use_entra else "api_key" + + # ── Step 3: credentials (key OR Entra preflight) ───────────────── + effective_key: str = "" + entra_overrides: dict = {} + token_provider = None # callable when entra + entra_scope = "" + + if use_entra: + try: + from agent.azure_identity_adapter import ( + EntraIdentityConfig, + SCOPE_AI_AZURE_DEFAULT, + build_token_provider, + describe_active_credential, + has_azure_identity_installed, + ) + except ImportError as exc: + print() + print(f"⚠ Could not import azure-identity adapter: {exc}") + print(" Falling back to API key auth.") + use_entra = False + auth_mode_label = "api_key" + + if use_entra: + print() + if not has_azure_identity_installed(): + print("◐ The 'azure-identity' package is not installed yet.") + print( + " Hermes will install it now (the preflight below " + "triggers the lazy-install). To skip lazy installs, " + "run: pip install azure-identity" + ) + + # Preserve only the optional scope override. Identity selection + # (tenant, user-assigned MI, workload identity, service principal) + # stays in Azure SDK env vars such as AZURE_CLIENT_ID. + _persisted_scope_override = str(current_entra.get("scope") or "").strip() + entra_scope = _persisted_scope_override or SCOPE_AI_AZURE_DEFAULT + + entra_overrides = {} + if _persisted_scope_override: + entra_overrides["scope"] = _persisted_scope_override + + print() + print("◐ Probing Microsoft Entra ID credential chain (up to 10s)...") + _config = EntraIdentityConfig( + scope=entra_scope, + ) + info = describe_active_credential(config=_config, timeout_seconds=10.0) + if info.get("ok"): + env_sources = info.get("env_sources") or [] + tag = ", ".join(env_sources) if env_sources else "default chain" + print(f"✓ Entra ID token acquired ({tag}, scope={entra_scope})") + else: + err = info.get("error") or "credential chain exhausted" + hint = info.get("hint") or ( + "Run `az login`, attach a managed identity to this VM, or " + "set AZURE_TENANT_ID/AZURE_CLIENT_ID/AZURE_CLIENT_SECRET." + ) + print(f"⚠ {err}") + print(f" Hint: {hint}") + try: + ans = input("Save Entra config anyway and validate later? [Y/n]: ").strip().lower() + except (KeyboardInterrupt, EOFError): + print("\nCancelled.") + return + if ans and ans not in ("y", "yes"): + print("Cancelled.") + return + + # Build the token provider for the detection probe (best-effort — + # if the credential chain failed above, this will silently return + # None inside azure_detect and the probe falls back to manual). + try: + token_provider = build_token_provider(config=_config) + except Exception as exc: + print(f"⚠ Could not build token provider for probing: {exc}") + token_provider = None + else: + print() + from hermes_cli.secret_prompt import masked_secret_prompt + + try: + api_key = masked_secret_prompt( + f"API key [{current_api_key[:8] + '...' if current_api_key else 'required'}]: " + ).strip() + except (KeyboardInterrupt, EOFError): + print("\nCancelled.") + return + + effective_key = api_key or current_api_key + if not effective_key: + print("No API key provided. Cancelled.") + return + + # ── Step 4: auto-detect transport + models ─────────────────────── + print() + print("◐ Probing endpoint to auto-detect transport and models...") + detection = azure_detect.detect( + effective_url, + api_key=effective_key, + token_provider=token_provider, + ) + + discovered_models: list[str] = list(detection.models) + api_mode: str = detection.api_mode or "" + + if api_mode: + mode_label = ( + "OpenAI-style" if api_mode == "chat_completions" else "Anthropic-style" + ) + print(f"✓ Detected API transport: {mode_label}") + if detection.reason: + print(f" ({detection.reason})") + if discovered_models: + print( + f"✓ Found {len(discovered_models)} deployed model(s) on this endpoint" + ) + else: + print(f"⚠ Auto-detection incomplete: {detection.reason}") + print() + print("Select the API format your Azure Foundry endpoint uses:") + print(" 1. OpenAI-style (POST /v1/chat/completions)") + print(" For: GPT models, Llama, Mistral, and most open models") + print(" 2. Anthropic-style (POST /v1/messages)") + print(" For: Claude models deployed via Anthropic API format") + try: + default_choice = "2" if current_api_mode == "anthropic_messages" else "1" + mode_choice = ( + input(f"API format [1/2] ({default_choice}): ").strip() + or default_choice + ) + except (KeyboardInterrupt, EOFError): + print("\nCancelled.") + return + api_mode = "anthropic_messages" if mode_choice == "2" else "chat_completions" + + # ── Step 5: model name ─────────────────────────────────────────── + print() + effective_model = "" + if discovered_models: + print("Available models on this endpoint:") + for i, mid in enumerate(discovered_models[:30], start=1): + print(f" {i:>2}. {mid}") + if len(discovered_models) > 30: + print( + f" ... and {len(discovered_models) - 30} more (type name manually if not shown)" + ) + print() + try: + pick = input( + f"Pick by number, or type a deployment name [{current_model or discovered_models[0]}]: " + ).strip() + except (KeyboardInterrupt, EOFError): + print("\nCancelled.") + return + if not pick: + effective_model = current_model or discovered_models[0] + elif pick.isdigit() and 1 <= int(pick) <= min(len(discovered_models), 30): + effective_model = discovered_models[int(pick) - 1] + else: + effective_model = pick + else: + try: + model_name = input( + f"Model / deployment name [{current_model or 'e.g. gpt-5.4, claude-sonnet-4-6'}]: " + ).strip() + except (KeyboardInterrupt, EOFError): + print("\nCancelled.") + return + effective_model = model_name or current_model + + if not effective_model: + print("No model name provided. Cancelled.") + return + + # ── Step 6: context-length lookup ──────────────────────────────── + ctx_len = azure_detect.lookup_context_length( + effective_model, + effective_url, + api_key=effective_key, + token_provider=token_provider, + ) + + # ── Step 7: persist ────────────────────────────────────────────── + if not use_entra: + save_env_value("AZURE_FOUNDRY_API_KEY", effective_key) + + cfg = load_config() + model = cfg.get("model") + if not isinstance(model, dict): + model = {"default": model} if model else {} + cfg["model"] = model + + model["provider"] = "azure-foundry" + model["base_url"] = effective_url + model["api_mode"] = api_mode + model["default"] = effective_model + model["auth_mode"] = auth_mode_label + if use_entra: + # Persist only the non-default Entra scope so config.yaml stays tidy. + # Azure identity selection stays in standard AZURE_* env vars. + clean_entra: dict = {} + for key in ("scope",): + val = entra_overrides.get(key) + if val: + clean_entra[key] = val + if clean_entra: + model["entra"] = clean_entra + elif "entra" in model: + del model["entra"] + else: + if "entra" in model: + del model["entra"] + if ctx_len: + model["context_length"] = ctx_len + + save_config(cfg) + deactivate_provider() + config["model"] = dict(model) + + # Clear any conflicting env vars so auxiliary clients don't poison + # themselves with a stale OpenAI base URL / key. + if get_env_value("OPENAI_BASE_URL"): + save_env_value("OPENAI_BASE_URL", "") + if get_env_value("OPENAI_API_KEY"): + save_env_value("OPENAI_API_KEY", "") + + mode_label = "OpenAI-style" if api_mode == "chat_completions" else "Anthropic-style" + auth_label = ( + "Microsoft Entra ID (keyless)" if use_entra else "API key" + ) + print() + print("✓ Azure Foundry configured:") + print(f" Endpoint: {effective_url}") + print(f" API mode: {mode_label}") + print(f" Auth: {auth_label}") + print(f" Model: {effective_model}") + if ctx_len: + print(f" Context length: {ctx_len:,} tokens") + else: + print(" Context length: not auto-detected (will fall back at runtime)") + print() + +def _model_flow_named_custom(config, provider_info): + """Handle a named custom provider from config.yaml custom_providers list. + + Always probes the endpoint's /models API to let the user pick a model. + If a model was previously saved, it is pre-selected in the menu. + Falls back to the saved model if probing fails. + """ + from hermes_cli.main import _custom_provider_api_key_config_value, _custom_provider_base_url_config_value, _save_custom_provider + from hermes_cli.auth import _save_model_choice, deactivate_provider + from hermes_cli.config import load_config, save_config + from hermes_cli.models import fetch_api_models + + name = provider_info["name"] + base_url = provider_info["base_url"] + api_mode = provider_info.get("api_mode", "") + api_key = provider_info.get("api_key", "") + key_env = provider_info.get("key_env", "") + saved_model = provider_info.get("model", "") + provider_key = (provider_info.get("provider_key") or "").strip() + + # Resolve key from env var if api_key not set directly + if not api_key and key_env: + api_key = os.environ.get(key_env, "") + config_api_key = _custom_provider_api_key_config_value(provider_info, api_key) + + # Honor ``discover_models: false`` (default True) — when discovery is + # disabled, use the configured ``models:`` list verbatim and skip the + # live /models probe. This lets operators restrict the picker to the + # subset their plan actually serves instead of the endpoint's full + # catalog (#18726: Baidu Qianfan returns 100+ models for a 2-3 model + # plan). Same semantics as the slash-command picker (model_switch.py + # sections 3 & 4): default discovers, false keeps the explicit list. + discover = provider_info.get("discover_models", True) + if isinstance(discover, str): + discover = discover.lower() not in {"false", "no", "0"} + configured_models: list[str] = [] + cfg_models = provider_info.get("models", {}) + if isinstance(cfg_models, dict): + configured_models = [str(m) for m in cfg_models if str(m).strip()] + elif isinstance(cfg_models, list): + configured_models = [ + str(m) for m in cfg_models if isinstance(m, str) and m.strip() + ] + + print(f" Provider: {name}") + print(f" URL: {base_url}") + if saved_model: + print(f" Current: {saved_model}") + print() + + if not discover and configured_models: + # Discovery disabled with an explicit list — use it verbatim, no probe. + print(f"Using configured models (discover_models: false): {len(configured_models)}") + models = configured_models + else: + print("Fetching available models...") + fetch_kwargs = {"timeout": 8.0} + if api_mode: + fetch_kwargs["api_mode"] = api_mode + models = fetch_api_models(api_key, base_url, **fetch_kwargs) + # If the probe came back empty but the operator configured an explicit + # list, fall back to it rather than forcing manual entry. + if not models and configured_models: + models = configured_models + + if models: + default_idx = 0 + if saved_model and saved_model in models: + default_idx = models.index(saved_model) + + print(f"Found {len(models)} model(s):\n") + try: + from hermes_cli.curses_ui import curses_radiolist + + menu_items = [ + f"{m} (current)" if m == saved_model else m for m in models + ] + ["Cancel"] + idx = curses_radiolist( + f"Select model from {name}:", + menu_items, + selected=default_idx, + cancel_returns=-1, + searchable=True, + ) + print() + if idx < 0 or idx >= len(models): + print("Cancelled.") + return + model_name = models[idx] + except (ImportError, NotImplementedError, OSError, subprocess.SubprocessError): + for i, m in enumerate(models, 1): + suffix = " (current)" if m == saved_model else "" + print(f" {i}. {m}{suffix}") + print(f" {len(models) + 1}. Cancel") + print() + try: + val = input(f"Choice [1-{len(models) + 1}]: ").strip() + if not val: + print("Cancelled.") + return + idx = int(val) - 1 + if idx < 0 or idx >= len(models): + print("Cancelled.") + return + model_name = models[idx] + except (ValueError, KeyboardInterrupt, EOFError): + print("\nCancelled.") + return + elif saved_model: + print("Could not fetch models from endpoint.") + try: + model_name = input(f"Model name [{saved_model}]: ").strip() or saved_model + except (KeyboardInterrupt, EOFError): + print("\nCancelled.") + return + else: + print("Could not fetch models from endpoint. Enter model name manually.") + try: + model_name = input("Model name: ").strip() + except (KeyboardInterrupt, EOFError): + print("\nCancelled.") + return + if not model_name: + print("No model specified. Cancelled.") + return + + # Activate and save the model to the custom_providers entry + _save_model_choice(model_name) + + cfg = load_config() + model = cfg.get("model") + if not isinstance(model, dict): + model = {"default": model} if model else {} + cfg["model"] = model + if provider_key: + model["provider"] = provider_key + model.pop("base_url", None) + model.pop("api_key", None) + else: + model["provider"] = "custom" + model["base_url"] = _custom_provider_base_url_config_value( + provider_info, base_url + ) + if config_api_key: + model["api_key"] = config_api_key + # Apply api_mode from custom_providers entry, or clear stale value + custom_api_mode = provider_info.get("api_mode", "") + if custom_api_mode: + model["api_mode"] = custom_api_mode + else: + model.pop("api_mode", None) # let runtime auto-detect from URL + save_config(cfg) + deactivate_provider() + + # Persist the selected model back to whichever schema owns this endpoint. + if provider_key: + cfg = load_config() + providers_cfg = cfg.get("providers") + if isinstance(providers_cfg, dict): + provider_entry = providers_cfg.get(provider_key) + if isinstance(provider_entry, dict): + provider_entry["default_model"] = model_name + # Only persist an inline api_key when the user originally had + # one (either a literal secret or a ``${VAR}`` template). When + # the entry relies on ``key_env``, do not synthesize a + # ``${key_env}`` api_key — the runtime already resolves the + # key from ``key_env`` directly, and writing the resolved + # secret (or even a synthesized template) would silently + # downgrade credential hygiene on entries that intentionally + # keep plaintext out of ``config.yaml``. See issue #15803. + original_api_key_ref = str( + provider_info.get("api_key_ref", "") or "" + ).strip() + original_api_key = str(provider_info.get("api_key", "") or "").strip() + had_inline_api_key = bool(original_api_key_ref or original_api_key) + if ( + had_inline_api_key + and config_api_key + and not str(provider_entry.get("api_key", "") or "").strip() + ): + provider_entry["api_key"] = config_api_key + if key_env and not str(provider_entry.get("key_env", "") or "").strip(): + provider_entry["key_env"] = key_env + cfg["providers"] = providers_cfg + save_config(cfg) + else: + # Save model name to the custom_providers entry for next time + _save_custom_provider(base_url, config_api_key, model_name, api_mode=api_mode) + + print(f"\n✅ Model set to: {model_name}") + print(f" Provider: {name} ({base_url})") + +def _model_flow_copilot(config, current_model=""): + """GitHub Copilot flow using env vars, gh CLI, or OAuth device code.""" + from hermes_cli.main import _current_reasoning_effort, _prompt_reasoning_effort_selection, _set_reasoning_effort + from hermes_cli.auth import ( + PROVIDER_REGISTRY, + _prompt_model_selection, + _save_model_choice, + deactivate_provider, + resolve_api_key_provider_credentials, + ) + from hermes_cli.config import save_env_value, load_config, save_config + from hermes_cli.models import ( + _PROVIDER_MODELS, + fetch_api_models, + fetch_github_model_catalog, + github_model_reasoning_efforts, + copilot_model_api_mode, + normalize_copilot_model_id, + ) + + provider_id = "copilot" + pconfig = PROVIDER_REGISTRY[provider_id] + + creds = resolve_api_key_provider_credentials(provider_id) + api_key = creds.get("api_key", "") + source = creds.get("source", "") + + if not api_key: + print("No GitHub token configured for GitHub Copilot.") + print() + print(" Supported token types:") + print( + " → OAuth token (gho_*) via `copilot login` or device code flow" + ) + print(" → Fine-grained PAT (github_pat_*) with Copilot Requests permission") + print(" → GitHub App token (ghu_*) via environment variable") + print(" ✗ Classic PAT (ghp_*) NOT supported by Copilot API") + print() + print(" Options:") + print(" 1. Login with GitHub (OAuth device code flow)") + print(" 2. Enter a token manually") + print(" 3. Cancel") + print() + try: + choice = input(" Choice [1-3]: ").strip() + except (KeyboardInterrupt, EOFError): + print() + return + + if choice == "1": + try: + from hermes_cli.copilot_auth import copilot_device_code_login + + token = copilot_device_code_login() + if token: + save_env_value("COPILOT_GITHUB_TOKEN", token) + print(" Copilot token saved.") + print() + else: + print(" Login cancelled or failed.") + return + except Exception as exc: + print(f" Login failed: {exc}") + return + elif choice == "2": + from hermes_cli.secret_prompt import masked_secret_prompt + + try: + new_key = masked_secret_prompt(" Token (COPILOT_GITHUB_TOKEN): ").strip() + except (KeyboardInterrupt, EOFError): + print() + return + if not new_key: + print(" Cancelled.") + return + # Validate token type + try: + from hermes_cli.copilot_auth import validate_copilot_token + + valid, msg = validate_copilot_token(new_key) + if not valid: + print(f" ✗ {msg}") + return + except ImportError: + pass + save_env_value("COPILOT_GITHUB_TOKEN", new_key) + print(" Token saved.") + print() + else: + print(" Cancelled.") + return + + creds = resolve_api_key_provider_credentials(provider_id) + api_key = creds.get("api_key", "") + source = creds.get("source", "") + else: + if source in {"GITHUB_TOKEN", "GH_TOKEN"}: + from hermes_cli.env_loader import format_secret_source_suffix + bw_suffix = format_secret_source_suffix(source) + print(f" GitHub token: {api_key[:8]}... ✓ ({source}{bw_suffix})") + elif source == "gh auth token": + print(" GitHub token: ✓ (from `gh auth token`)") + else: + print(" GitHub token: ✓") + print() + + effective_base = pconfig.inference_base_url + + catalog = fetch_github_model_catalog(api_key) + live_models = ( + [item.get("id", "") for item in catalog if item.get("id")] + if catalog + else fetch_api_models(api_key, effective_base) + ) + normalized_current_model = ( + normalize_copilot_model_id( + current_model, + catalog=catalog, + api_key=api_key, + ) + or current_model + ) + if live_models: + model_list = [model_id for model_id in live_models if model_id] + print(f" Found {len(model_list)} model(s) from GitHub Copilot") + else: + model_list = _PROVIDER_MODELS.get(provider_id, []) + if model_list: + print( + " ⚠ Could not auto-detect models from GitHub Copilot — showing defaults." + ) + print(' Use "Enter custom model name" if you do not see your model.') + + if model_list: + selected = _prompt_model_selection( + model_list, current_model=normalized_current_model + ) + else: + try: + selected = input("Model name: ").strip() + except (KeyboardInterrupt, EOFError): + selected = None + + if selected: + selected = ( + normalize_copilot_model_id( + selected, + catalog=catalog, + api_key=api_key, + ) + or selected + ) + initial_cfg = load_config() + current_effort = _current_reasoning_effort(initial_cfg) + reasoning_efforts = github_model_reasoning_efforts( + selected, + catalog=catalog, + api_key=api_key, + ) + selected_effort = None + if reasoning_efforts: + print(f" {selected} supports reasoning controls.") + selected_effort = _prompt_reasoning_effort_selection( + reasoning_efforts, current_effort=current_effort + ) + + _save_model_choice(selected) + + cfg = load_config() + model = cfg.get("model") + if not isinstance(model, dict): + model = {"default": model} if model else {} + cfg["model"] = model + model["provider"] = provider_id + model["base_url"] = effective_base + model["api_mode"] = copilot_model_api_mode( + selected, + catalog=catalog, + api_key=api_key, + ) + if selected_effort is not None: + _set_reasoning_effort(cfg, selected_effort) + save_config(cfg) + deactivate_provider() + + print(f"Default model set to: {selected} (via {pconfig.name})") + if reasoning_efforts: + if selected_effort == "none": + print("Reasoning disabled for this model.") + elif selected_effort: + print(f"Reasoning effort set to: {selected_effort}") + else: + print("No change.") + +def _model_flow_copilot_acp(config, current_model=""): + """GitHub Copilot ACP flow using the local Copilot CLI.""" + from hermes_cli.auth import ( + PROVIDER_REGISTRY, + _prompt_model_selection, + _save_model_choice, + deactivate_provider, + get_external_process_provider_status, + resolve_api_key_provider_credentials, + resolve_external_process_provider_credentials, + ) + from hermes_cli.models import ( + _PROVIDER_MODELS, + fetch_github_model_catalog, + normalize_copilot_model_id, + ) + from hermes_cli.config import load_config, save_config + + del config + + provider_id = "copilot-acp" + pconfig = PROVIDER_REGISTRY[provider_id] + + status = get_external_process_provider_status(provider_id) + resolved_command = ( + status.get("resolved_command") or status.get("command") or "copilot" + ) + effective_base = status.get("base_url") or pconfig.inference_base_url + + print(" GitHub Copilot ACP delegates Hermes turns to `copilot --acp`.") + print(" Hermes currently starts its own ACP subprocess for each request.") + print(" Hermes uses your selected model as a hint for the Copilot ACP session.") + print(f" Command: {resolved_command}") + print(f" Backend marker: {effective_base}") + print() + + try: + creds = resolve_external_process_provider_credentials(provider_id) + except Exception as exc: + print(f" ⚠ {exc}") + print( + " Set HERMES_COPILOT_ACP_COMMAND or COPILOT_CLI_PATH if Copilot CLI is installed elsewhere." + ) + return + + effective_base = creds.get("base_url") or effective_base + + catalog_api_key = "" + try: + catalog_creds = resolve_api_key_provider_credentials("copilot") + catalog_api_key = catalog_creds.get("api_key", "") + except Exception: + pass + + catalog = fetch_github_model_catalog(catalog_api_key) + normalized_current_model = ( + normalize_copilot_model_id( + current_model, + catalog=catalog, + api_key=catalog_api_key, + ) + or current_model + ) + + if catalog: + model_list = [item.get("id", "") for item in catalog if item.get("id")] + print(f" Found {len(model_list)} model(s) from GitHub Copilot") + else: + model_list = _PROVIDER_MODELS.get("copilot", []) + if model_list: + print( + " ⚠ Could not auto-detect models from GitHub Copilot — showing defaults." + ) + print(' Use "Enter custom model name" if you do not see your model.') + + if model_list: + selected = _prompt_model_selection( + model_list, + current_model=normalized_current_model, + ) + else: + try: + selected = input("Model name: ").strip() + except (KeyboardInterrupt, EOFError): + selected = None + + if not selected: + print("No change.") + return + + selected = ( + normalize_copilot_model_id( + selected, + catalog=catalog, + api_key=catalog_api_key, + ) + or selected + ) + _save_model_choice(selected) + + cfg = load_config() + model = cfg.get("model") + if not isinstance(model, dict): + model = {"default": model} if model else {} + cfg["model"] = model + model["provider"] = provider_id + model["base_url"] = effective_base + model["api_mode"] = "chat_completions" + save_config(cfg) + deactivate_provider() + + print(f"Default model set to: {selected} (via {pconfig.name})") + +def _model_flow_kimi(config, current_model=""): + """Kimi / Moonshot model selection with automatic endpoint routing. + + - sk-kimi-* keys → api.kimi.com/coding/v1 (Kimi Coding Plan) + - Other keys → api.moonshot.ai/v1 (legacy Moonshot) + + No manual base URL prompt — endpoint is determined by key prefix. + """ + from hermes_cli.main import _prompt_api_key + from hermes_cli.auth import ( + PROVIDER_REGISTRY, + KIMI_CODE_BASE_URL, + _prompt_model_selection, + _save_model_choice, + deactivate_provider, + ) + from hermes_cli.config import ( + get_env_value, + save_env_value, + load_config, + save_config, + ) + from hermes_cli.models import _PROVIDER_MODELS + + provider_id = "kimi-coding" + pconfig = PROVIDER_REGISTRY[provider_id] + key_env = pconfig.api_key_env_vars[0] if pconfig.api_key_env_vars else "" + base_url_env = pconfig.base_url_env_var or "" + + # Step 1: Check / prompt for API key + existing_key = "" + for ev in pconfig.api_key_env_vars: + existing_key = get_env_value(ev) or os.getenv(ev, "") + if existing_key: + break + + existing_key, abort = _prompt_api_key( + pconfig, existing_key, provider_id=provider_id + ) + if abort: + return + + # Step 2: Auto-detect endpoint from key prefix + is_coding_plan = existing_key.startswith("sk-kimi-") + if is_coding_plan: + effective_base = KIMI_CODE_BASE_URL + print(f" Detected Kimi Coding Plan key → {effective_base}") + else: + effective_base = pconfig.inference_base_url + print(f" Using Moonshot endpoint → {effective_base}") + # Clear any manual base URL override so auto-detection works at runtime + if base_url_env and get_env_value(base_url_env): + save_env_value(base_url_env, "") + print() + + # Step 3: Model selection — show appropriate models for the endpoint + if is_coding_plan: + # Coding Plan models (kimi-k2.6 first) + model_list = [ + "kimi-k2.6", + "kimi-k2.5", + "kimi-for-coding", + "kimi-k2-thinking", + "kimi-k2-thinking-turbo", + ] + else: + # Legacy Moonshot models (excludes Coding Plan-only models) + model_list = _PROVIDER_MODELS.get("moonshot", []) + + if model_list: + selected = _prompt_model_selection(model_list, current_model=current_model) + else: + try: + selected = input("Enter model name: ").strip() + except (KeyboardInterrupt, EOFError): + selected = None + + if selected: + _save_model_choice(selected) + + # Update config with provider and base URL + cfg = load_config() + model = cfg.get("model") + if not isinstance(model, dict): + model = {"default": model} if model else {} + cfg["model"] = model + model["provider"] = provider_id + model["base_url"] = effective_base + model.pop("api_mode", None) # let runtime auto-detect from URL + save_config(cfg) + deactivate_provider() + + endpoint_label = "Kimi Coding" if is_coding_plan else "Moonshot" + print(f"Default model set to: {selected} (via {endpoint_label})") + else: + print("No change.") + +def _model_flow_stepfun(config, current_model=""): + """StepFun Step Plan flow with region-specific endpoints.""" + from hermes_cli.main import _infer_stepfun_region, _prompt_api_key, _prompt_provider_choice, _stepfun_base_url_for_region + from hermes_cli.auth import ( + PROVIDER_REGISTRY, + _prompt_model_selection, + _save_model_choice, + deactivate_provider, + ) + from hermes_cli.config import ( + get_env_value, + save_env_value, + load_config, + save_config, + ) + from hermes_cli.models import _PROVIDER_MODELS, fetch_api_models + + provider_id = "stepfun" + pconfig = PROVIDER_REGISTRY[provider_id] + key_env = pconfig.api_key_env_vars[0] if pconfig.api_key_env_vars else "" + base_url_env = pconfig.base_url_env_var or "" + + existing_key = "" + for ev in pconfig.api_key_env_vars: + existing_key = get_env_value(ev) or os.getenv(ev, "") + if existing_key: + break + + existing_key, abort = _prompt_api_key( + pconfig, existing_key, provider_id=provider_id + ) + if abort: + return + + current_base = "" + if base_url_env: + current_base = get_env_value(base_url_env) or os.getenv(base_url_env, "") + if not current_base: + model_cfg = config.get("model") + if isinstance(model_cfg, dict): + current_base = str(model_cfg.get("base_url") or "").strip() + current_region = _infer_stepfun_region(current_base or pconfig.inference_base_url) + + region_choices = [ + ( + "international", + f"International ({_stepfun_base_url_for_region('international')})", + ), + ("china", f"China ({_stepfun_base_url_for_region('china')})"), + ] + ordered_regions = [] + for region_key, label in region_choices: + if region_key == current_region: + ordered_regions.insert(0, (region_key, f"{label} ← currently active")) + else: + ordered_regions.append((region_key, label)) + ordered_regions.append(("cancel", "Cancel")) + + region_idx = _prompt_provider_choice([label for _, label in ordered_regions]) + if region_idx is None or ordered_regions[region_idx][0] == "cancel": + print("No change.") + return + + selected_region = ordered_regions[region_idx][0] + effective_base = _stepfun_base_url_for_region(selected_region) + if base_url_env: + save_env_value(base_url_env, effective_base) + + live_models = fetch_api_models(existing_key, effective_base) + if live_models: + model_list = live_models + print(f" Found {len(model_list)} model(s) from {pconfig.name} API") + else: + model_list = _PROVIDER_MODELS.get(provider_id, []) + if model_list: + print( + f" Could not auto-detect models from {pconfig.name} API — " + "showing Step Plan fallback catalog." + ) + + if model_list: + selected = _prompt_model_selection(model_list, current_model=current_model) + else: + try: + selected = input("Model name: ").strip() + except (KeyboardInterrupt, EOFError): + selected = None + + if selected: + _save_model_choice(selected) + + cfg = load_config() + model = cfg.get("model") + if not isinstance(model, dict): + model = {"default": model} if model else {} + cfg["model"] = model + model["provider"] = provider_id + model["base_url"] = effective_base + model.pop("api_mode", None) + save_config(cfg) + deactivate_provider() + + config["model"] = dict(model) + print(f"Default model set to: {selected} (via {pconfig.name})") + else: + print("No change.") + +def _model_flow_bedrock_api_key(config, region, current_model=""): + """Bedrock API Key mode — uses the OpenAI-compatible bedrock-mantle endpoint. + + For developers who don't have an AWS account but received a Bedrock API Key + from their AWS admin. Works like any OpenAI-compatible endpoint. + """ + from hermes_cli.auth import ( + _prompt_model_selection, + _save_model_choice, + deactivate_provider, + ) + from hermes_cli.config import ( + load_config, + save_config, + get_env_value, + save_env_value, + ) + from hermes_cli.models import _PROVIDER_MODELS + + mantle_base_url = f"https://bedrock-mantle.{region}.api.aws/v1" + + # Prompt for API key + existing_key = get_env_value("AWS_BEARER_TOKEN_BEDROCK") or "" + if existing_key: + from hermes_cli.env_loader import format_secret_source_suffix + source_suffix = format_secret_source_suffix("AWS_BEARER_TOKEN_BEDROCK") + print(f" Bedrock API Key: {existing_key[:12]}... ✓{source_suffix}") + else: + print(f" Endpoint: {mantle_base_url}") + print() + from hermes_cli.secret_prompt import masked_secret_prompt + + try: + api_key = masked_secret_prompt(" Bedrock API Key: ").strip() + except (KeyboardInterrupt, EOFError): + print() + return + if not api_key: + print(" Cancelled.") + return + save_env_value("AWS_BEARER_TOKEN_BEDROCK", api_key) + existing_key = api_key + print(" ✓ API key saved.") + print() + + # Model selection — use static list (mantle doesn't need boto3 for discovery) + model_list = _PROVIDER_MODELS.get("bedrock", []) + print(f" Showing {len(model_list)} curated models") + + if model_list: + selected = _prompt_model_selection(model_list, current_model=current_model) + else: + try: + selected = input(" Model ID: ").strip() + except (KeyboardInterrupt, EOFError): + selected = None + + if selected: + _save_model_choice(selected) + + # Save as custom provider pointing to bedrock-mantle + cfg = load_config() + model = cfg.get("model") + if not isinstance(model, dict): + model = {"default": model} if model else {} + cfg["model"] = model + model["provider"] = "custom" + model["base_url"] = mantle_base_url + model.pop("api_mode", None) # chat_completions is the default + + # Also save region in bedrock config for reference + bedrock_cfg = cfg.get("bedrock", {}) + if not isinstance(bedrock_cfg, dict): + bedrock_cfg = {} + bedrock_cfg["region"] = region + cfg["bedrock"] = bedrock_cfg + + # Save the API key env var name so hermes knows where to find it + save_env_value("OPENAI_API_KEY", existing_key) + save_env_value("OPENAI_BASE_URL", mantle_base_url) + + save_config(cfg) + deactivate_provider() + + print(f" Default model set to: {selected} (via Bedrock API Key, {region})") + print(f" Endpoint: {mantle_base_url}") + else: + print(" No change.") + +def _model_flow_bedrock(config, current_model=""): + """AWS Bedrock provider: verify credentials, pick region, discover models. + + Uses the native Converse API via boto3 — not the OpenAI-compatible endpoint. + Auth is handled by the AWS SDK default credential chain (env vars, profile, + instance role), so no API key prompt is needed. + """ + from hermes_cli.auth import ( + _prompt_model_selection, + _save_model_choice, + deactivate_provider, + ) + from hermes_cli.config import load_config, save_config + from hermes_cli.models import _PROVIDER_MODELS + + # 1. Check for AWS credentials + try: + from agent.bedrock_adapter import ( + has_aws_credentials, + resolve_aws_auth_env_var, + resolve_bedrock_region, + discover_bedrock_models, + ) + except ImportError: + print(" ✗ boto3 is not installed. Install it with:") + print(" pip install boto3") + print() + return + + if not has_aws_credentials(): + print(" ⚠ No AWS credentials detected via environment variables.") + print(" Bedrock will use boto3's default credential chain (IMDS, SSO, etc.)") + print() + + auth_var = resolve_aws_auth_env_var() + if auth_var: + print(f" AWS credentials: {auth_var} ✓") + else: + print(" AWS credentials: boto3 default chain (instance role / SSO)") + print() + + # 2. Region selection + current_region = resolve_bedrock_region() + try: + region_input = input(f" AWS Region [{current_region}]: ").strip() + except (KeyboardInterrupt, EOFError): + print() + return + region = region_input or current_region + + # 2b. Authentication mode + print(" Choose authentication method:") + print() + print(" 1. IAM credential chain (recommended)") + print(" Works with EC2 instance roles, SSO, env vars, aws configure") + print(" 2. Bedrock API Key") + print(" Enter your Bedrock API Key directly — also supports") + print(" team scenarios where an admin distributes keys") + print() + try: + auth_choice = input(" Choice [1]: ").strip() + except (KeyboardInterrupt, EOFError): + print() + return + + if auth_choice == "2": + _model_flow_bedrock_api_key(config, region, current_model) + return + + # 3. Model discovery — try live API first, fall back to static list + print(f" Discovering models in {region}...") + live_models = discover_bedrock_models(region) + + if live_models: + _EXCLUDE_PREFIXES = ( + "stability.", + "cohere.embed", + "twelvelabs.", + "us.stability.", + "us.cohere.embed", + "us.twelvelabs.", + "global.cohere.embed", + "global.twelvelabs.", + ) + _EXCLUDE_SUBSTRINGS = ("safeguard", "voxtral", "palmyra-vision") + filtered = [] + for m in live_models: + mid = m["id"] + if any(mid.startswith(p) for p in _EXCLUDE_PREFIXES): + continue + if any(s in mid.lower() for s in _EXCLUDE_SUBSTRINGS): + continue + filtered.append(m) + + # Deduplicate: prefer inference profiles (us.*, global.*) over bare + # foundation model IDs. + profile_base_ids = set() + for m in filtered: + mid = m["id"] + if mid.startswith(("us.", "global.")): + base = mid.split(".", 1)[1] if "." in mid[3:] else mid + profile_base_ids.add(base) + + deduped = [] + for m in filtered: + mid = m["id"] + if not mid.startswith(("us.", "global.")) and mid in profile_base_ids: + continue + deduped.append(m) + + _RECOMMENDED = [ + "us.anthropic.claude-sonnet-4-6", + "us.anthropic.claude-opus-4-6", + "us.anthropic.claude-haiku-4-5", + "us.amazon.nova-pro", + "us.amazon.nova-lite", + "us.amazon.nova-micro", + "deepseek.v3", + "us.meta.llama4-maverick", + "us.meta.llama4-scout", + ] + + def _sort_key(m): + mid = m["id"] + for i, rec in enumerate(_RECOMMENDED): + if mid.startswith(rec): + return (0, i, mid) + if mid.startswith("global."): + return (1, 0, mid) + return (2, 0, mid) + + deduped.sort(key=_sort_key) + model_list = [m["id"] for m in deduped] + print( + f" Found {len(model_list)} text model(s) (filtered from {len(live_models)} total)" + ) + else: + model_list = _PROVIDER_MODELS.get("bedrock", []) + if model_list: + print( + f" Using {len(model_list)} curated models (live discovery unavailable)" + ) + else: + print( + " No models found. Check IAM permissions for bedrock:ListFoundationModels." + ) + return + + # 4. Model selection + if model_list: + selected = _prompt_model_selection(model_list, current_model=current_model) + else: + try: + selected = input(" Model ID: ").strip() + except (KeyboardInterrupt, EOFError): + selected = None + + if selected: + _save_model_choice(selected) + + cfg = load_config() + model = cfg.get("model") + if not isinstance(model, dict): + model = {"default": model} if model else {} + cfg["model"] = model + model["provider"] = "bedrock" + model["base_url"] = f"https://bedrock-runtime.{region}.amazonaws.com" + model.pop("api_mode", None) # bedrock_converse is auto-detected + + bedrock_cfg = cfg.get("bedrock", {}) + if not isinstance(bedrock_cfg, dict): + bedrock_cfg = {} + bedrock_cfg["region"] = region + cfg["bedrock"] = bedrock_cfg + + save_config(cfg) + deactivate_provider() + + print(f" Default model set to: {selected} (via AWS Bedrock, {region})") + else: + print(" No change.") + +def _model_flow_api_key_provider(config, provider_id, current_model=""): + """Generic flow for API-key providers (z.ai, MiniMax, OpenCode, etc.).""" + from hermes_cli.main import _prompt_api_key + from hermes_cli.auth import ( + PROVIDER_REGISTRY, + _prompt_model_selection, + _save_model_choice, + deactivate_provider, + ) + from hermes_cli.config import ( + get_env_value, + save_env_value, + load_config, + save_config, + ) + from hermes_cli.models import ( + _PROVIDER_MODELS, + fetch_api_models, + opencode_model_api_mode, + normalize_opencode_model_id, + ) + + pconfig = PROVIDER_REGISTRY[provider_id] + key_env = pconfig.api_key_env_vars[0] if pconfig.api_key_env_vars else "" + base_url_env = pconfig.base_url_env_var or "" + + # Check / prompt for API key + existing_key = "" + for ev in pconfig.api_key_env_vars: + existing_key = get_env_value(ev) or os.getenv(ev, "") + if existing_key: + break + + existing_key, abort = _prompt_api_key( + pconfig, existing_key, provider_id=provider_id + ) + if abort: + return + + # Gemini free-tier gate: free-tier daily quotas (<= 250 RPD for Flash) + # are exhausted in a handful of agent turns, so refuse to wire up the + # provider with a free-tier key. Probe is best-effort; network or auth + # errors fall through without blocking. + if provider_id == "gemini" and existing_key: + try: + from agent.gemini_native_adapter import probe_gemini_tier + except Exception: + probe_gemini_tier = None + if probe_gemini_tier is not None: + print(" Checking Gemini API tier...") + probe_base = ( + (get_env_value(base_url_env) if base_url_env else "") + or os.getenv(base_url_env or "", "") + or pconfig.inference_base_url + ) + tier = probe_gemini_tier(existing_key, probe_base) + if tier == "free": + print() + print( + "❌ This Google API key is on the free tier " + "(<= 250 requests/day for gemini-2.5-flash)." + ) + print( + " Hermes typically makes 3-10 API calls per user turn " + "(tool iterations + auxiliary tasks)," + ) + print( + " so the free tier is exhausted after a handful of " + "messages and cannot sustain" + ) + print(" an agent session.") + print() + print( + " To use Gemini with Hermes, enable billing on your " + "Google Cloud project and regenerate" + ) + print( + " the key in a billing-enabled project: " + "https://aistudio.google.com/apikey" + ) + print() + print( + " Alternatives with workable free usage: DeepSeek, " + "OpenRouter (free models), Groq, Nous." + ) + print() + print("Not saving Gemini as the default provider.") + return + if tier == "paid": + print(" Tier check: paid ✓") + else: + # "unknown" -- network issue, auth problem, unexpected response. + # Don't block; the runtime 429 handler will surface free-tier + # guidance if the key turns out to be free tier. + print(" Tier check: could not verify (proceeding anyway).") + print() + + # Optional base URL override. + # Precedence: env var → config.yaml model.base_url → registry default. + # Reading config.yaml prevents silently overwriting a saved remote URL + # (e.g. a remote LM Studio endpoint) with localhost when the user just + # presses Enter at the prompt below. + current_base = "" + if base_url_env: + current_base = get_env_value(base_url_env) or os.getenv(base_url_env, "") + if not current_base: + try: + _m = load_config().get("model") or {} + if str(_m.get("provider") or "").strip().lower() == provider_id: + current_base = str(_m.get("base_url") or "").strip() + except Exception: + pass + effective_base = current_base or pconfig.inference_base_url + + try: + override = input(f"Base URL [{effective_base}]: ").strip() + except (KeyboardInterrupt, EOFError): + print() + override = "" + if override and base_url_env: + if not override.startswith(("http://", "https://")): + print( + " Invalid URL — must start with http:// or https://. Keeping current value." + ) + else: + save_env_value(base_url_env, override) + effective_base = override + + # Model selection — resolution order: + # 1. models.dev registry (cached, filtered for agentic/tool-capable models) + # 2. Curated static fallback list (offline insurance) + # 3. Live /models endpoint probe (small providers without models.dev data) + # + # LM Studio: live /api/v1/models probe (no models.dev catalog). + # Ollama Cloud: merged discovery (live API + models.dev + disk cache). + if provider_id == "lmstudio": + from hermes_cli.auth import AuthError + from hermes_cli.models import fetch_lmstudio_models + + api_key_for_probe = existing_key or (get_env_value(key_env) if key_env else "") + try: + model_list = fetch_lmstudio_models( + api_key=api_key_for_probe, base_url=effective_base + ) + except AuthError as exc: + print(f" LM Studio rejected the request: {exc}") + print(" Set LM_API_KEY (or update it) to match the server's bearer token.") + model_list = [] + if model_list: + print(f" Found {len(model_list)} model(s) from LM Studio") + elif provider_id == "ollama-cloud": + from hermes_cli.models import fetch_ollama_cloud_models + + api_key_for_probe = existing_key or (get_env_value(key_env) if key_env else "") + # During setup, force a live refresh so the picker reflects newly + # released models (e.g. deepseek v4 flash, kimi k2.6) the moment + # the user enters their key — not an hour later when the disk + # cache TTL expires. + model_list = fetch_ollama_cloud_models( + api_key=api_key_for_probe, + base_url=effective_base, + force_refresh=True, + ) + if model_list: + print(f" Found {len(model_list)} model(s) from Ollama Cloud") + elif provider_id == "novita": + from hermes_cli.models import fetch_api_models + + api_key_for_probe = existing_key or (get_env_value(key_env) if key_env else "") + curated = _PROVIDER_MODELS.get(provider_id, []) + live_models = fetch_api_models(api_key_for_probe, effective_base) + if live_models: + model_list = live_models + print(f" Found {len(model_list)} model(s) from {pconfig.name} API") + else: + mdev_models: list = [] + try: + from agent.models_dev import list_agentic_models + + mdev_models = list_agentic_models(provider_id) + except Exception: + pass + if mdev_models: + seen = {m.lower() for m in mdev_models} + model_list = list(mdev_models) + for m in curated: + if m.lower() not in seen: + model_list.append(m) + seen.add(m.lower()) + print(f" Found {len(model_list)} model(s) from models.dev registry") + else: + model_list = curated + if model_list: + print( + f' Showing {len(model_list)} curated models — use "Enter custom model name" for others.' + ) + else: + curated = _PROVIDER_MODELS.get(provider_id, []) + + # Try models.dev first — returns tool-capable models, filtered for noise + mdev_models: list = [] + try: + from agent.models_dev import list_agentic_models + + mdev_models = list_agentic_models(provider_id) + except Exception: + pass + + if mdev_models: + # Merge models.dev with curated list so newly added models + # (not yet in models.dev) still appear in the picker. + if curated: + seen = {m.lower() for m in mdev_models} + merged = list(mdev_models) + for m in curated: + if m.lower() not in seen: + merged.append(m) + seen.add(m.lower()) + model_list = merged + else: + model_list = mdev_models + print(f" Found {len(model_list)} model(s) from models.dev registry") + elif curated and len(curated) >= 8: + # Curated list is substantial — use it directly, skip live probe + model_list = curated + print( + f' Showing {len(model_list)} curated models — use "Enter custom model name" for others.' + ) + else: + api_key_for_probe = existing_key or ( + get_env_value(key_env) if key_env else "" + ) + live_models = fetch_api_models(api_key_for_probe, effective_base) + if live_models and len(live_models) >= len(curated): + model_list = live_models + print(f" Found {len(model_list)} model(s) from {pconfig.name} API") + else: + model_list = curated + if model_list: + print( + f' Showing {len(model_list)} curated models — use "Enter custom model name" for others.' + ) + # else: no defaults either, will fall through to raw input + + if provider_id in {"opencode-zen", "opencode-go"}: + model_list = [ + normalize_opencode_model_id(provider_id, mid) for mid in model_list + ] + current_model = normalize_opencode_model_id(provider_id, current_model) + model_list = list(dict.fromkeys(mid for mid in model_list if mid)) + + if model_list: + selected = _prompt_model_selection(model_list, current_model=current_model) + else: + try: + selected = input("Model name: ").strip() + except (KeyboardInterrupt, EOFError): + selected = None + + if selected: + if provider_id in {"opencode-zen", "opencode-go"}: + selected = normalize_opencode_model_id(provider_id, selected) + + _save_model_choice(selected) + + # Update config with provider, base URL, and provider-specific API mode + cfg = load_config() + model = cfg.get("model") + if not isinstance(model, dict): + model = {"default": model} if model else {} + cfg["model"] = model + model["provider"] = provider_id + model["base_url"] = effective_base + if provider_id in {"opencode-zen", "opencode-go"}: + model["api_mode"] = opencode_model_api_mode(provider_id, selected) + else: + model.pop("api_mode", None) + save_config(cfg) + deactivate_provider() + + print(f"Default model set to: {selected} (via {pconfig.name})") + else: + print("No change.") + +def _model_flow_anthropic(config, current_model=""): + """Flow for Anthropic provider — OAuth subscription, API key, or Claude Code creds.""" + from hermes_cli.main import _run_anthropic_oauth_flow + from hermes_cli.auth import ( + _prompt_model_selection, + _save_model_choice, + deactivate_provider, + ) + from hermes_cli.config import ( + save_env_value, + load_config, + save_config, + save_anthropic_api_key, + ) + from hermes_cli.models import _PROVIDER_MODELS + + # Check ALL credential sources + from hermes_cli.auth import get_anthropic_key + + existing_key = get_anthropic_key() + cc_available = False + try: + from agent.anthropic_adapter import ( + read_claude_code_credentials, + is_claude_code_token_valid, + _is_oauth_token, + ) + + cc_creds = read_claude_code_credentials() + if cc_creds and is_claude_code_token_valid(cc_creds): + cc_available = True + except Exception: + pass + + # Stale-OAuth guard: if the only existing cred is an expired OAuth token + # (no valid cc_creds to fall back on), treat it as missing so the re-auth + # path is offered instead of silently accepting a broken token. + existing_is_stale_oauth = False + if existing_key and _is_oauth_token(existing_key) and not cc_available: + existing_is_stale_oauth = True + + has_creds = (bool(existing_key) and not existing_is_stale_oauth) or cc_available + needs_auth = not has_creds + + if has_creds: + # Show what we found + if existing_key: + from hermes_cli.env_loader import format_secret_source_suffix + from hermes_cli.auth import PROVIDER_REGISTRY + + # Surface which env var supplied the key so users with + # Bitwarden see "(from Bitwarden)" — without this, a detected + # BSM key looks identical to a key in .env and users assume + # nothing is wired up. + source_suffix = "" + for var in PROVIDER_REGISTRY["anthropic"].api_key_env_vars: + if os.getenv(var, "").strip() == existing_key: + source_suffix = format_secret_source_suffix(var) + if source_suffix: + break + print( + f" Anthropic credentials: {existing_key[:12]}... ✓{source_suffix}" + ) + elif cc_available: + print(" Claude Code credentials: ✓ (auto-detected)") + print() + print(" 1. Use existing credentials") + print(" 2. Reauthenticate (new OAuth login)") + print(" 3. Cancel") + print() + try: + choice = input(" Choice [1/2/3]: ").strip() + except (KeyboardInterrupt, EOFError): + choice = "1" + + if choice == "2": + needs_auth = True + elif choice == "3": + return + # choice == "1" or default: use existing, proceed to model selection + + if needs_auth: + # Show auth method choice + print() + print(" Choose authentication method:") + print() + print(" 1. Claude Pro/Max subscription (OAuth login)") + print(" 2. Anthropic API key (pay-per-token)") + print(" 3. Cancel") + print() + try: + choice = input(" Choice [1/2/3]: ").strip() + except (KeyboardInterrupt, EOFError): + print() + return + + if choice == "1": + if not _run_anthropic_oauth_flow(save_env_value): + return + + elif choice == "2": + print() + print(" Get an API key at: https://platform.claude.com/settings/keys") + print() + from hermes_cli.secret_prompt import masked_secret_prompt + + try: + api_key = masked_secret_prompt(" API key (sk-ant-...): ").strip() + except (KeyboardInterrupt, EOFError): + print() + return + if not api_key: + print(" Cancelled.") + return + save_anthropic_api_key(api_key, save_fn=save_env_value) + print(" ✓ API key saved.") + + else: + print(" No change.") + return + print() + + # Model selection + model_list = _PROVIDER_MODELS.get("anthropic", []) + if model_list: + selected = _prompt_model_selection(model_list, current_model=current_model) + else: + try: + selected = input("Model name (e.g., claude-sonnet-4-20250514): ").strip() + except (KeyboardInterrupt, EOFError): + selected = None + + if selected: + _save_model_choice(selected) + + # Update config with provider — clear base_url since + # resolve_runtime_provider() always hardcodes Anthropic's URL. + # Leaving a stale base_url in config can contaminate other + # providers if the user switches without running 'hermes model'. + cfg = load_config() + model = cfg.get("model") + if not isinstance(model, dict): + model = {"default": model} if model else {} + cfg["model"] = model + model["provider"] = "anthropic" + model.pop("base_url", None) + save_config(cfg) + deactivate_provider() + + print(f"Default model set to: {selected} (via Anthropic)") + else: + print("No change.") diff --git a/tests/hermes_cli/test_setup_ollama_cloud_force_refresh.py b/tests/hermes_cli/test_setup_ollama_cloud_force_refresh.py index 60f6ea99341..68870bf700d 100644 --- a/tests/hermes_cli/test_setup_ollama_cloud_force_refresh.py +++ b/tests/hermes_cli/test_setup_ollama_cloud_force_refresh.py @@ -9,10 +9,13 @@ from __future__ import annotations def test_setup_ollama_cloud_passes_force_refresh(monkeypatch): """The provider-setup model-fetch for ollama-cloud must pass ``force_refresh=True``.""" - import hermes_cli.main as main_mod + # The ollama-cloud branch lives in ``_model_flow_api_key_provider``, which was + # extracted from main.py into hermes_cli/model_setup_flows.py (god-file + # decomposition Phase 2). Inspect the module the code now lives in. + import hermes_cli.model_setup_flows as flows_mod import inspect - src = inspect.getsource(main_mod) + src = inspect.getsource(flows_mod) # Locate the ollama-cloud branch in the provider setup flow. marker = 'provider_id == "ollama-cloud"'