hermes-agent/hermes_cli/model_setup_flows.py
teknium1 a77efada5f refactor(cli): extract 18 model-flow wizard functions into model_setup_flows (god-file Phase 2)
Lift the 18 _model_flow_* provider-setup wizard functions out of hermes_cli/main.py
into hermes_cli/model_setup_flows.py. Behavior-neutral; main.py 14050 -> 11479 LOC.

select_provider_and_model (the dispatcher) STAYS in main.py and re-imports the
flows via an explicit 'from hermes_cli.model_setup_flows import (...)' block, so
both its bare-name calls and existing test monkeypatches targeting
hermes_cli.main._model_flow_* keep resolving against main's namespace unchanged.

Imports: 3 neutral deps (argparse, os, subprocess) at the module top; the 14
main.py-internal helpers the flows call (_prompt_api_key, _save_custom_provider,
the reasoning-effort/stepfun/qwen helpers, _run_anthropic_oauth_flow, ...) are
lazy-imported per-flow (from hermes_cli.main import ...) so the new module never
imports main at module scope -> no import cycle.

Repointed one source-inspection change-detector (test_setup_ollama_cloud_force_refresh)
to read the module the ollama-cloud branch moved to.

Validation: 6563/6563 hermes_cli tests pass; live flow-dispatch probe confirms the
lazy main-internal imports resolve at runtime.
2026-06-08 09:42:44 -07:00

2648 lines
98 KiB
Python

"""Per-provider model-selection wizard flows for ``hermes setup`` / ``hermes model``.
Extracted from ``hermes_cli/main.py`` as part of the god-file decomposition
campaign (``~/.hermes/plans/god-file-decomposition.md``, Phase 2 — splitting
main.py handler/flow bodies out of the module). These 18 ``_model_flow_*``
functions are the interactive provider-setup branches dispatched by
``select_provider_and_model`` (which stays in main.py).
Behavior-neutral: each function is lifted verbatim. ``select_provider_and_model``
in main.py re-imports them (``from hermes_cli.model_setup_flows import *``-style
explicit import) so existing call sites — and test monkeypatches that target
``hermes_cli.main._model_flow_*`` — keep resolving against main.py's namespace.
main.py-internal helpers the flows call (``_prompt_api_key``, ``_save_custom_provider``,
the reasoning-effort/stepfun/qwen helpers, ``_run_anthropic_oauth_flow``, …) are
imported lazily inside the flows (``from hermes_cli.main import ...`` resolves at
call time, when main.py is fully loaded) so this module never imports
``hermes_cli.main`` at import time -> no import cycle.
"""
from __future__ import annotations
import argparse
import os
import subprocess
def _model_flow_openrouter(config, current_model=""):
"""OpenRouter provider: ensure API key, then pick model."""
from hermes_cli.main import _prompt_api_key
from hermes_constants import OPENROUTER_BASE_URL
from hermes_cli.auth import (
ProviderConfig,
_prompt_model_selection,
_save_model_choice,
deactivate_provider,
)
from hermes_cli.config import get_env_value
# Route through _prompt_api_key so users can replace a stale/broken key
# in-flow (K/R/C) instead of having to edit ~/.hermes/.env by hand. The
# previous bypass-when-key-exists branch left no way to recover from a
# bad paste short of re-running `hermes setup` from scratch. OpenRouter
# isn't in PROVIDER_REGISTRY so we synthesize a minimal pconfig.
pconfig = ProviderConfig(
id="openrouter",
name="OpenRouter",
auth_type="api_key",
api_key_env_vars=("OPENROUTER_API_KEY",),
)
existing_key = get_env_value("OPENROUTER_API_KEY") or ""
if not existing_key:
print("Get one at: https://openrouter.ai/keys")
print()
_resolved, abort = _prompt_api_key(pconfig, existing_key, provider_id="openrouter")
if abort:
return
from hermes_cli.models import model_ids, get_pricing_for_provider
openrouter_models = model_ids(force_refresh=True)
# Fetch live pricing (non-blocking — returns empty dict on failure)
pricing = get_pricing_for_provider("openrouter", force_refresh=True)
selected = _prompt_model_selection(
openrouter_models, current_model=current_model, pricing=pricing
)
if selected:
_save_model_choice(selected)
# Update config provider and deactivate any OAuth provider
from hermes_cli.config import load_config, save_config
cfg = load_config()
model = cfg.get("model")
if not isinstance(model, dict):
model = {"default": model} if model else {}
cfg["model"] = model
model["provider"] = "openrouter"
model["base_url"] = OPENROUTER_BASE_URL
model["api_mode"] = "chat_completions"
save_config(cfg)
deactivate_provider()
print(f"Default model set to: {selected} (via OpenRouter)")
else:
print("No change.")
def _model_flow_nous(config, current_model="", args=None):
"""Nous Portal provider: ensure logged in, then pick model."""
from hermes_cli.auth import (
get_provider_auth_state,
_prompt_model_selection,
_save_model_choice,
_update_config_for_provider,
resolve_nous_runtime_credentials,
AuthError,
format_auth_error,
_login_nous,
PROVIDER_REGISTRY,
)
from hermes_cli.config import (
get_env_value,
load_config,
save_config,
save_env_value,
)
from hermes_cli.nous_subscription import prompt_enable_tool_gateway
state = get_provider_auth_state("nous")
if not state or not state.get("access_token"):
print("Not logged into Nous Portal. Starting login...")
print()
try:
mock_args = argparse.Namespace(
portal_url=getattr(args, "portal_url", None),
inference_url=getattr(args, "inference_url", None),
client_id=getattr(args, "client_id", None),
scope=getattr(args, "scope", None),
no_browser=bool(getattr(args, "no_browser", False)),
timeout=getattr(args, "timeout", None) or 15.0,
ca_bundle=getattr(args, "ca_bundle", None),
insecure=bool(getattr(args, "insecure", False)),
)
_login_nous(mock_args, PROVIDER_REGISTRY["nous"])
# Offer Tool Gateway enablement for paid subscribers
try:
_refreshed = load_config() or {}
prompt_enable_tool_gateway(_refreshed)
except Exception:
pass
except SystemExit:
print("Login cancelled or failed.")
return
except Exception as exc:
print(f"Login failed: {exc}")
return
# login_nous already handles model selection + config update
return
# Already logged in — use curated model list (same as OpenRouter defaults).
# The live /models endpoint returns hundreds of models; the curated list
# shows only agentic models users recognize from OpenRouter.
from hermes_cli.models import (
get_curated_nous_model_ids,
get_pricing_for_provider,
check_nous_free_tier,
partition_nous_models_by_tier,
union_with_portal_free_recommendations,
union_with_portal_paid_recommendations,
)
model_ids = get_curated_nous_model_ids()
if not model_ids:
print("No curated models available for Nous Portal.")
return
# Verify credentials are still valid (catches expired sessions early)
try:
creds = resolve_nous_runtime_credentials()
except Exception as exc:
relogin = isinstance(exc, AuthError) and exc.relogin_required
msg = format_auth_error(exc) if isinstance(exc, AuthError) else str(exc)
if relogin:
print(f"Session expired: {msg}")
print("Re-authenticating with Nous Portal...\n")
try:
mock_args = argparse.Namespace(
portal_url=None,
inference_url=None,
client_id=None,
scope=None,
no_browser=False,
timeout=15.0,
ca_bundle=None,
insecure=False,
)
_login_nous(mock_args, PROVIDER_REGISTRY["nous"])
except Exception as login_exc:
print(f"Re-login failed: {login_exc}")
return
print(f"Could not verify credentials: {msg}")
return
# Fetch live pricing (non-blocking — returns empty dict on failure)
pricing = get_pricing_for_provider("nous")
# Force fresh account data for model selection so recent credit purchases
# are reflected immediately.
free_tier = check_nous_free_tier(force_fresh=True)
if not free_tier:
try:
refreshed_creds = resolve_nous_runtime_credentials(
force_refresh=True,
)
if refreshed_creds:
creds = refreshed_creds
except Exception:
# Runtime inference has its own paid-entitlement recovery path; do
# not block model selection if this opportunistic refresh fails.
pass
# Resolve portal URL early — needed both for upgrade links and for the
# freeRecommendedModels endpoint below.
_nous_portal_url = ""
try:
_nous_state = get_provider_auth_state("nous")
if _nous_state:
_nous_portal_url = _nous_state.get("portal_base_url", "")
except Exception:
pass
# For free users: partition models into selectable/unavailable based on
# whether they are free per the Portal-reported pricing. First augment
# with the Portal's freeRecommendedModels list so newly-launched free
# models show up even if this CLI build's hardcoded curated list and
# docs-hosted manifest haven't caught up yet.
#
# For paid users: mirror the same idea with paidRecommendedModels so
# newly-launched paid models surface in the picker too — independent
# of CLI release cadence.
unavailable_models: list[str] = []
unavailable_message = ""
if free_tier:
try:
from hermes_cli.nous_account import (
format_nous_portal_entitlement_message,
get_nous_portal_account_info,
)
_account_info = get_nous_portal_account_info(force_fresh=True)
unavailable_message = (
format_nous_portal_entitlement_message(
_account_info,
capability="paid Nous models",
)
or ""
)
except Exception:
unavailable_message = ""
model_ids, pricing = union_with_portal_free_recommendations(
model_ids, pricing, _nous_portal_url,
)
model_ids, unavailable_models = partition_nous_models_by_tier(
model_ids, pricing, free_tier=True
)
else:
model_ids, pricing = union_with_portal_paid_recommendations(
model_ids, pricing, _nous_portal_url,
)
if not model_ids and not unavailable_models:
print("No models available for Nous Portal after filtering.")
return
if free_tier and not model_ids:
print("No free models currently available.")
if unavailable_models:
from hermes_cli.auth import DEFAULT_NOUS_PORTAL_URL
_url = (_nous_portal_url or DEFAULT_NOUS_PORTAL_URL).rstrip("/")
print(unavailable_message or f"Upgrade at {_url} to access paid models.")
return
print(
f'Showing {len(model_ids)} curated models — use "Enter custom model name" for others.'
)
selected = _prompt_model_selection(
model_ids,
current_model=current_model,
pricing=pricing,
unavailable_models=unavailable_models,
portal_url=_nous_portal_url,
unavailable_message=unavailable_message,
)
if selected:
_save_model_choice(selected)
# Reactivate Nous as the provider and update config
inference_url = creds.get("base_url", "")
_update_config_for_provider("nous", inference_url)
current_model_cfg = config.get("model")
if isinstance(current_model_cfg, dict):
model_cfg = dict(current_model_cfg)
elif isinstance(current_model_cfg, str) and current_model_cfg.strip():
model_cfg = {"default": current_model_cfg.strip()}
else:
model_cfg = {}
model_cfg["provider"] = "nous"
model_cfg["default"] = selected
if inference_url and inference_url.strip():
model_cfg["base_url"] = inference_url.rstrip("/")
else:
model_cfg.pop("base_url", None)
config["model"] = model_cfg
# Clear any custom endpoint that might conflict
if get_env_value("OPENAI_BASE_URL"):
save_env_value("OPENAI_BASE_URL", "")
save_env_value("OPENAI_API_KEY", "")
save_config(config)
print(f"Default model set to: {selected} (via Nous Portal)")
# Offer Tool Gateway enablement for paid subscribers
prompt_enable_tool_gateway(config)
else:
print("No change.")
def _model_flow_openai_codex(config, current_model=""):
"""OpenAI Codex provider: ensure logged in, then pick model."""
from hermes_cli.auth import (
get_codex_auth_status,
_prompt_model_selection,
_save_model_choice,
_update_config_for_provider,
_login_openai_codex,
PROVIDER_REGISTRY,
DEFAULT_CODEX_BASE_URL,
)
from hermes_cli.codex_models import get_codex_model_ids
status = get_codex_auth_status()
if status.get("logged_in"):
print(" OpenAI Codex credentials: ✓")
print()
print(" 1. Use existing credentials")
print(" 2. Reauthenticate (new OAuth login)")
print(" 3. Cancel")
print()
try:
choice = input(" Choice [1/2/3]: ").strip()
except (KeyboardInterrupt, EOFError):
choice = "1"
if choice == "2":
print("Starting a fresh OpenAI Codex login...")
print()
try:
mock_args = argparse.Namespace()
_login_openai_codex(
mock_args,
PROVIDER_REGISTRY["openai-codex"],
force_new_login=True,
)
except SystemExit:
print("Login cancelled or failed.")
return
except Exception as exc:
print(f"Login failed: {exc}")
return
status = get_codex_auth_status()
if not status.get("logged_in"):
print("Login failed.")
return
elif choice == "3":
return
else:
print("Not logged into OpenAI Codex. Starting login...")
print()
try:
mock_args = argparse.Namespace()
_login_openai_codex(mock_args, PROVIDER_REGISTRY["openai-codex"])
except SystemExit:
print("Login cancelled or failed.")
return
except Exception as exc:
print(f"Login failed: {exc}")
return
_codex_token = None
# Prefer credential pool (where `hermes auth` stores device_code tokens),
# fall back to legacy provider state.
try:
_codex_status = get_codex_auth_status()
if _codex_status.get("logged_in"):
_codex_token = _codex_status.get("api_key")
except Exception:
pass
if not _codex_token:
try:
from hermes_cli.auth import resolve_codex_runtime_credentials
_codex_creds = resolve_codex_runtime_credentials()
_codex_token = _codex_creds.get("api_key")
except Exception:
pass
codex_models = get_codex_model_ids(access_token=_codex_token)
selected = _prompt_model_selection(codex_models, current_model=current_model)
if selected:
_save_model_choice(selected)
_update_config_for_provider("openai-codex", DEFAULT_CODEX_BASE_URL)
print(f"Default model set to: {selected} (via OpenAI Codex)")
else:
print("No change.")
def _model_flow_xai_oauth(_config, current_model="", *, args=None):
"""xAI Grok OAuth (SuperGrok / Premium+) provider: ensure logged in, then pick model."""
from hermes_cli.auth import (
get_xai_oauth_auth_status,
_prompt_model_selection,
_save_model_choice,
_update_config_for_provider,
resolve_xai_oauth_runtime_credentials,
_login_xai_oauth,
DEFAULT_XAI_OAUTH_BASE_URL,
PROVIDER_REGISTRY,
)
from hermes_cli.models import _PROVIDER_MODELS
status = get_xai_oauth_auth_status()
if status.get("logged_in"):
print(" xAI Grok OAuth (SuperGrok / Premium+) credentials: ✓")
print()
print(" 1. Use existing credentials")
print(" 2. Reauthenticate (new OAuth login)")
print(" 3. Cancel")
print()
try:
choice = input(" Choice [1/2/3]: ").strip()
except (KeyboardInterrupt, EOFError):
choice = "1"
if choice == "2":
print("Starting a fresh xAI OAuth login...")
print()
try:
# Forward CLI flags from ``hermes model --manual-paste``
# / ``--no-browser`` / ``--timeout`` into the loopback
# login. Without this, browser-only remotes (#26923)
# can't reach the manual-paste path via ``hermes model``.
mock_args = argparse.Namespace(
manual_paste=bool(getattr(args, "manual_paste", False)),
no_browser=bool(getattr(args, "no_browser", False)),
timeout=getattr(args, "timeout", None),
)
_login_xai_oauth(
mock_args,
PROVIDER_REGISTRY["xai-oauth"],
force_new_login=True,
)
except SystemExit:
print("Login cancelled or failed.")
return
except Exception as exc:
print(f"Login failed: {exc}")
return
elif choice == "3":
return
else:
print("Not logged into xAI Grok OAuth (SuperGrok / Premium+). Starting login...")
print()
try:
mock_args = argparse.Namespace(
manual_paste=bool(getattr(args, "manual_paste", False)),
no_browser=bool(getattr(args, "no_browser", False)),
timeout=getattr(args, "timeout", None),
)
_login_xai_oauth(mock_args, PROVIDER_REGISTRY["xai-oauth"])
except SystemExit:
print("Login cancelled or failed.")
return
except Exception as exc:
print(f"Login failed: {exc}")
return
# Resolve a usable base URL. ``resolve_xai_oauth_runtime_credentials``
# only reads from the auth.json singleton — but credentials may legitimately
# live only in the pool (e.g. after ``hermes auth add xai-oauth``). Fall
# back to the default base URL in that case so the model picker still
# completes successfully instead of bailing out with
# ``Could not resolve xAI OAuth credentials``.
base_url = DEFAULT_XAI_OAUTH_BASE_URL
try:
creds = resolve_xai_oauth_runtime_credentials()
base_url = (creds.get("base_url") or "").strip().rstrip("/") or base_url
except Exception:
pass
models = list(_PROVIDER_MODELS.get("xai-oauth") or _PROVIDER_MODELS.get("xai") or [])
selected = _prompt_model_selection(models, current_model=current_model or (models[0] if models else "grok-4.3"))
if selected:
_save_model_choice(selected)
_update_config_for_provider("xai-oauth", base_url)
print(f"Default model set to: {selected} (via xAI Grok OAuth — SuperGrok / Premium+)")
else:
print("No change.")
def _model_flow_qwen_oauth(_config, current_model=""):
"""Qwen OAuth provider: reuse local Qwen CLI login, then pick model."""
from hermes_cli.main import _DEFAULT_QWEN_PORTAL_MODELS
from hermes_cli.auth import (
get_qwen_auth_status,
resolve_qwen_runtime_credentials,
_prompt_model_selection,
_save_model_choice,
_update_config_for_provider,
DEFAULT_QWEN_BASE_URL,
)
from hermes_cli.models import fetch_api_models
status = get_qwen_auth_status()
if not status.get("logged_in"):
print("Not logged into Qwen CLI OAuth.")
print("Run: qwen auth qwen-oauth")
auth_file = status.get("auth_file")
if auth_file:
print(f"Expected credentials file: {auth_file}")
if status.get("error"):
print(f"Error: {status.get('error')}")
return
# Try live model discovery, fall back to curated list.
models = None
try:
creds = resolve_qwen_runtime_credentials(refresh_if_expiring=True)
models = fetch_api_models(creds["api_key"], creds["base_url"])
except Exception:
pass
if not models:
models = list(_DEFAULT_QWEN_PORTAL_MODELS)
default = current_model or (models[0] if models else "qwen3-coder-plus")
selected = _prompt_model_selection(models, current_model=default)
if selected:
_save_model_choice(selected)
_update_config_for_provider("qwen-oauth", DEFAULT_QWEN_BASE_URL)
print(f"Default model set to: {selected} (via Qwen OAuth)")
else:
print("No change.")
def _model_flow_minimax_oauth(config, current_model="", args=None):
"""MiniMax OAuth provider: ensure logged in, then pick model."""
from hermes_cli.auth import (
get_provider_auth_state,
_prompt_model_selection,
_save_model_choice,
_update_config_for_provider,
resolve_minimax_oauth_runtime_credentials,
AuthError,
format_auth_error,
_login_minimax_oauth,
PROVIDER_REGISTRY,
)
state = get_provider_auth_state("minimax-oauth")
if not state or not state.get("access_token"):
print("Not logged into MiniMax. Starting OAuth login...")
print()
try:
mock_args = argparse.Namespace(
region=getattr(args, "region", None) or "global",
no_browser=bool(getattr(args, "no_browser", False)),
timeout=getattr(args, "timeout", None) or 15.0,
)
_login_minimax_oauth(mock_args, PROVIDER_REGISTRY["minimax-oauth"])
except SystemExit:
print("Login cancelled or failed.")
return
except Exception as exc:
print(f"Login failed: {exc}")
return
try:
creds = resolve_minimax_oauth_runtime_credentials()
except AuthError as exc:
print(format_auth_error(exc))
return
from hermes_cli.models import _PROVIDER_MODELS
model_ids = _PROVIDER_MODELS.get("minimax-oauth", [])
selected = _prompt_model_selection(model_ids, current_model)
if not selected:
return
_save_model_choice(selected)
_update_config_for_provider("minimax-oauth", creds["base_url"])
print(f"\u2713 Using MiniMax model: {selected}")
def _model_flow_google_gemini_cli(_config, current_model=""):
"""Google Gemini OAuth (PKCE) via Cloud Code Assist — supports free AND paid tiers.
Flow:
1. Show upfront warning about Google's ToS stance (per opencode-gemini-auth).
2. If creds missing, run PKCE browser OAuth via agent.google_oauth.
3. Resolve project context (env -> config -> auto-discover -> free tier).
4. Prompt user to pick a model.
5. Save to ~/.hermes/config.yaml.
"""
from hermes_cli.auth import (
DEFAULT_GEMINI_CLOUDCODE_BASE_URL,
get_gemini_oauth_auth_status,
resolve_gemini_oauth_runtime_credentials,
_prompt_model_selection,
_save_model_choice,
_update_config_for_provider,
)
from hermes_cli.models import _PROVIDER_MODELS
print()
print("⚠ Google considers using the Gemini CLI OAuth client with third-party")
print(" software a policy violation. Some users have reported account")
print(" restrictions. You can use your own API key via 'gemini' provider")
print(" for the lowest-risk experience.")
print()
try:
proceed = input("Continue with OAuth login? [y/N]: ").strip().lower()
except (EOFError, KeyboardInterrupt):
print("Cancelled.")
return
if proceed not in {"y", "yes"}:
print("Cancelled.")
return
status = get_gemini_oauth_auth_status()
if not status.get("logged_in"):
try:
from agent.google_oauth import resolve_project_id_from_env, start_oauth_flow
env_project = resolve_project_id_from_env()
start_oauth_flow(force_relogin=True, project_id=env_project)
except Exception as exc:
print(f"OAuth login failed: {exc}")
return
# Verify creds resolve + trigger project discovery
try:
creds = resolve_gemini_oauth_runtime_credentials(force_refresh=False)
project_id = creds.get("project_id", "")
if project_id:
print(f" Using GCP project: {project_id}")
else:
print(
" No GCP project configured — free tier will be auto-provisioned on first request."
)
except Exception as exc:
print(f"Failed to resolve Gemini credentials: {exc}")
return
models = list(_PROVIDER_MODELS.get("google-gemini-cli") or [])
default = current_model or (models[0] if models else "gemini-3-flash-preview")
selected = _prompt_model_selection(models, current_model=default)
if selected:
_save_model_choice(selected)
_update_config_for_provider(
"google-gemini-cli", DEFAULT_GEMINI_CLOUDCODE_BASE_URL
)
print(
f"Default model set to: {selected} (via Google Gemini OAuth / Code Assist)"
)
else:
print("No change.")
def _model_flow_custom(config):
"""Custom endpoint: collect URL, API key, and model name.
Automatically saves the endpoint to ``custom_providers`` in config.yaml
so it appears in the provider menu on subsequent runs.
"""
from hermes_cli.main import _auto_provider_name, _prompt_custom_api_mode_selection, _save_custom_provider
from hermes_cli.auth import _save_model_choice, deactivate_provider
from hermes_cli.config import get_env_value, load_config, save_config
from hermes_cli.secret_prompt import masked_secret_prompt
current_url = get_env_value("OPENAI_BASE_URL") or ""
current_key = get_env_value("OPENAI_API_KEY") or ""
print("Custom OpenAI-compatible endpoint configuration:")
if current_url:
print(f" Current URL: {current_url}")
if current_key:
print(f" Current key: {current_key[:8]}...")
print()
try:
base_url = input(
f"API base URL [{current_url or 'e.g. https://api.example.com/v1'}]: "
).strip()
api_key = masked_secret_prompt(
f"API key [{current_key[:8] + '...' if current_key else 'optional'}]: "
).strip()
except (KeyboardInterrupt, EOFError):
print("\nCancelled.")
return
if not base_url and not current_url:
print("No URL provided. Cancelled.")
return
# Validate URL format
effective_url = base_url or current_url
if not effective_url.startswith(("http://", "https://")):
print(f"Invalid URL: {effective_url} (must start with http:// or https://)")
return
effective_key = api_key or current_key
# Hint: most local model servers (Ollama, vLLM, llama.cpp) require /v1
# in the base URL for OpenAI-compatible chat completions. Prompt the
# user if the URL looks like a local server without /v1.
_url_lower = effective_url.rstrip("/").lower()
_looks_local = any(
h in _url_lower
for h in ("localhost", "127.0.0.1", "0.0.0.0", ":11434", ":8080", ":5000")
)
if _looks_local and not _url_lower.endswith("/v1"):
print()
print(f" Hint: Did you mean to add /v1 at the end?")
print(f" Most local model servers (Ollama, vLLM, llama.cpp) require it.")
print(f" e.g. {effective_url.rstrip('/')}/v1")
try:
_add_v1 = input(" Add /v1? [Y/n]: ").strip().lower()
except (KeyboardInterrupt, EOFError):
_add_v1 = "n"
if _add_v1 in {"", "y", "yes"}:
effective_url = effective_url.rstrip("/") + "/v1"
if base_url:
base_url = effective_url
print(f" Updated URL: {effective_url}")
print()
from hermes_cli.models import probe_api_models
probe = probe_api_models(effective_key, effective_url)
if probe.get("used_fallback") and probe.get("resolved_base_url"):
print(
f"Warning: endpoint verification worked at {probe['resolved_base_url']}/models, "
f"not the exact URL you entered. Saving the working base URL instead."
)
effective_url = probe["resolved_base_url"]
if base_url:
base_url = effective_url
elif probe.get("models") is not None:
print(
f"Verified endpoint via {probe.get('probed_url')} "
f"({len(probe.get('models') or [])} model(s) visible)"
)
else:
print(
f"Warning: could not verify this endpoint via {probe.get('probed_url')}. "
f"Hermes will still save it."
)
if probe.get("suggested_base_url"):
suggested = probe["suggested_base_url"]
if suggested.endswith("/v1"):
print(
f" If this server expects /v1 in the path, try base URL: {suggested}"
)
else:
print(f" If /v1 should not be in the base URL, try: {suggested}")
# Prompt for API compatibility mode explicitly so codex-compatible custom
# providers don't silently fall back to chat_completions.
current_model_cfg = config.get("model")
current_api_mode = ""
if isinstance(current_model_cfg, dict):
current_api_mode = str(current_model_cfg.get("api_mode") or "").strip()
api_mode = _prompt_custom_api_mode_selection(
effective_url,
current_api_mode=current_api_mode,
)
if api_mode:
print(f" API mode: {api_mode}")
else:
print(" API mode: auto-detect")
# Select model — use probe results when available, fall back to manual input
model_name = ""
detected_models = probe.get("models") or []
try:
if len(detected_models) == 1:
print(f" Detected model: {detected_models[0]}")
confirm = input(" Use this model? [Y/n]: ").strip().lower()
if confirm in {"", "y", "yes"}:
model_name = detected_models[0]
else:
model_name = input("Model name (e.g. gpt-4, llama-3-70b): ").strip()
elif len(detected_models) > 1:
print(" Available models:")
for i, m in enumerate(detected_models, 1):
print(f" {i}. {m}")
pick = input(
f" Select model [1-{len(detected_models)}] or type name: "
).strip()
if pick.isdigit() and 1 <= int(pick) <= len(detected_models):
model_name = detected_models[int(pick) - 1]
elif pick:
model_name = pick
else:
model_name = input("Model name (e.g. gpt-4, llama-3-70b): ").strip()
context_length_str = input(
"Context length in tokens [leave blank for auto-detect]: "
).strip()
# Prompt for a display name — shown in the provider menu on future runs
default_name = _auto_provider_name(effective_url)
display_name = input(f"Display name [{default_name}]: ").strip() or default_name
except (KeyboardInterrupt, EOFError):
print("\nCancelled.")
return
context_length = None
if context_length_str:
try:
context_length = int(
context_length_str.replace(",", "")
.replace("k", "000")
.replace("K", "000")
)
if context_length <= 0:
context_length = None
except ValueError:
print(f"Invalid context length: {context_length_str} — will auto-detect.")
context_length = None
if model_name:
_save_model_choice(model_name)
# Update config and deactivate any OAuth provider
cfg = load_config()
model = cfg.get("model")
if not isinstance(model, dict):
model = {"default": model} if model else {}
cfg["model"] = model
model["provider"] = "custom"
model["base_url"] = effective_url
if effective_key:
model["api_key"] = effective_key
if api_mode:
model["api_mode"] = api_mode
else:
model.pop("api_mode", None)
save_config(cfg)
deactivate_provider()
# Sync the caller's config dict so the setup wizard's final
# save_config(config) preserves our model settings. Without
# this, the wizard overwrites model.provider/base_url with
# the stale values from its own config dict (#4172).
config["model"] = dict(model)
print(f"Default model set to: {model_name} (via {effective_url})")
else:
if base_url or api_key:
deactivate_provider()
# Even without a model name, persist the custom endpoint on the
# caller's config dict so the setup wizard doesn't lose it.
_caller_model = config.get("model")
if not isinstance(_caller_model, dict):
_caller_model = {"default": _caller_model} if _caller_model else {}
_caller_model["provider"] = "custom"
_caller_model["base_url"] = effective_url
if effective_key:
_caller_model["api_key"] = effective_key
if api_mode:
_caller_model["api_mode"] = api_mode
else:
_caller_model.pop("api_mode", None)
config["model"] = _caller_model
print("Endpoint saved. Use `/model` in chat or `hermes model` to set a model.")
# Auto-save to custom_providers so it appears in the menu next time
_save_custom_provider(
effective_url,
effective_key,
model_name or "",
context_length=context_length,
name=display_name,
api_mode=api_mode,
)
def _model_flow_azure_foundry(config, current_model=""):
"""Azure Foundry provider: configure endpoint, auth mode, API mode, and model.
Azure Foundry supports both OpenAI-style (``/v1/chat/completions``) and
Anthropic-style (``/v1/messages``) endpoints, and two authentication
modes:
* **API key** (default) — uses ``AZURE_FOUNDRY_API_KEY`` from .env.
* **Microsoft Entra ID** — keyless, RBAC-based auth via the
``azure-identity`` SDK (Managed Identity / Workload Identity / az
login / VS Code / azd / service principal env vars). Works on both
OpenAI-style and Anthropic-style endpoints — Microsoft RBAC is
per-resource and the same ``Azure AI User`` role grants
both. For OpenAI-style the OpenAI SDK's native callable
``api_key=`` contract is used; for Anthropic-style an
``httpx.Client`` with a request event hook (built by
:func:`agent.azure_identity_adapter.build_bearer_http_client`)
mints a fresh JWT per request because the Anthropic SDK does not
accept a callable ``auth_token`` natively.
The wizard auto-detects the transport and available models when
possible:
* URLs ending in ``/anthropic`` → Anthropic Messages API.
* Successful ``GET <base>/models`` probe → OpenAI-style + populates
a picker with the returned deployment / model IDs.
* Anthropic Messages probe fallback when ``/models`` fails.
* Manual entry when every probe fails (private endpoints, etc.).
Context lengths for the chosen model are resolved via the standard
:func:`agent.model_metadata.get_model_context_length` chain
(models.dev, provider metadata, hardcoded family fallbacks).
"""
from hermes_cli.auth import _save_model_choice, deactivate_provider # noqa: F401
from hermes_cli.config import (
get_env_value,
save_env_value,
load_config,
save_config,
)
from hermes_cli import azure_detect
# ── Load current Azure Foundry configuration ─────────────────────
model_cfg = config.get("model", {})
if isinstance(model_cfg, dict) and model_cfg.get("provider") == "azure-foundry":
current_base_url = str(model_cfg.get("base_url", "") or "")
current_api_mode = str(model_cfg.get("api_mode", "") or "")
current_auth_mode = str(model_cfg.get("auth_mode") or "api_key").strip().lower() or "api_key"
_cur_entra = model_cfg.get("entra") or {}
current_entra = _cur_entra if isinstance(_cur_entra, dict) else {}
else:
current_base_url = ""
current_api_mode = ""
current_auth_mode = "api_key"
current_entra = {}
current_api_key = get_env_value("AZURE_FOUNDRY_API_KEY") or ""
print()
print("Azure Foundry Configuration")
print("=" * 50)
print()
print("Azure Foundry can host models with either OpenAI-style or")
print("Anthropic-style API endpoints. Hermes will probe your")
print("endpoint to auto-detect the transport and the deployed")
print("models when possible.")
print()
if current_base_url:
print(f" Current endpoint: {current_base_url}")
if current_api_mode:
_lbl = (
"OpenAI-style"
if current_api_mode == "chat_completions"
else "Anthropic-style"
)
print(f" Current API mode: {_lbl}")
if current_auth_mode == "entra_id":
print(f" Current auth mode: Microsoft Entra ID (keyless)")
elif current_api_key:
print(f" Current auth mode: API key ({current_api_key[:8]}...)")
print()
# ── Step 1: endpoint URL ─────────────────────────────────────────
try:
_placeholder = (
current_base_url
or "e.g. https://<resource>.openai.azure.com/openai/v1 "
"or https://<resource>.services.ai.azure.com/anthropic"
)
base_url = input(
f"API endpoint URL [{_placeholder}]: "
).strip()
except (KeyboardInterrupt, EOFError):
print("\nCancelled.")
return
effective_url = (base_url or current_base_url).rstrip("/")
if not effective_url:
print("No endpoint URL provided. Cancelled.")
return
if not effective_url.startswith(("http://", "https://")):
print(f"Invalid URL: {effective_url} (must start with http:// or https://)")
return
# ── Step 2: authentication mode ──────────────────────────────────
print()
print("Authentication:")
print(" 1. API key (AZURE_FOUNDRY_API_KEY in .env)")
print(" 2. Microsoft Entra ID (managed identity / workload identity / az login)")
print(" Recommended by Microsoft. Works for both OpenAI-style and Anthropic-style endpoints.")
print(" Requires the 'Azure AI User' role on the Foundry resource.")
try:
_auth_default = "2" if current_auth_mode == "entra_id" else "1"
auth_choice = (
input(f"Authentication mode [1/2] ({_auth_default}): ").strip()
or _auth_default
)
except (KeyboardInterrupt, EOFError):
print("\nCancelled.")
return
use_entra = auth_choice == "2"
auth_mode_label = "entra_id" if use_entra else "api_key"
# ── Step 3: credentials (key OR Entra preflight) ─────────────────
effective_key: str = ""
entra_overrides: dict = {}
token_provider = None # callable when entra
entra_scope = ""
if use_entra:
try:
from agent.azure_identity_adapter import (
EntraIdentityConfig,
SCOPE_AI_AZURE_DEFAULT,
build_token_provider,
describe_active_credential,
has_azure_identity_installed,
)
except ImportError as exc:
print()
print(f"⚠ Could not import azure-identity adapter: {exc}")
print(" Falling back to API key auth.")
use_entra = False
auth_mode_label = "api_key"
if use_entra:
print()
if not has_azure_identity_installed():
print("◐ The 'azure-identity' package is not installed yet.")
print(
" Hermes will install it now (the preflight below "
"triggers the lazy-install). To skip lazy installs, "
"run: pip install azure-identity"
)
# Preserve only the optional scope override. Identity selection
# (tenant, user-assigned MI, workload identity, service principal)
# stays in Azure SDK env vars such as AZURE_CLIENT_ID.
_persisted_scope_override = str(current_entra.get("scope") or "").strip()
entra_scope = _persisted_scope_override or SCOPE_AI_AZURE_DEFAULT
entra_overrides = {}
if _persisted_scope_override:
entra_overrides["scope"] = _persisted_scope_override
print()
print("◐ Probing Microsoft Entra ID credential chain (up to 10s)...")
_config = EntraIdentityConfig(
scope=entra_scope,
)
info = describe_active_credential(config=_config, timeout_seconds=10.0)
if info.get("ok"):
env_sources = info.get("env_sources") or []
tag = ", ".join(env_sources) if env_sources else "default chain"
print(f"✓ Entra ID token acquired ({tag}, scope={entra_scope})")
else:
err = info.get("error") or "credential chain exhausted"
hint = info.get("hint") or (
"Run `az login`, attach a managed identity to this VM, or "
"set AZURE_TENANT_ID/AZURE_CLIENT_ID/AZURE_CLIENT_SECRET."
)
print(f"{err}")
print(f" Hint: {hint}")
try:
ans = input("Save Entra config anyway and validate later? [Y/n]: ").strip().lower()
except (KeyboardInterrupt, EOFError):
print("\nCancelled.")
return
if ans and ans not in ("y", "yes"):
print("Cancelled.")
return
# Build the token provider for the detection probe (best-effort —
# if the credential chain failed above, this will silently return
# None inside azure_detect and the probe falls back to manual).
try:
token_provider = build_token_provider(config=_config)
except Exception as exc:
print(f"⚠ Could not build token provider for probing: {exc}")
token_provider = None
else:
print()
from hermes_cli.secret_prompt import masked_secret_prompt
try:
api_key = masked_secret_prompt(
f"API key [{current_api_key[:8] + '...' if current_api_key else 'required'}]: "
).strip()
except (KeyboardInterrupt, EOFError):
print("\nCancelled.")
return
effective_key = api_key or current_api_key
if not effective_key:
print("No API key provided. Cancelled.")
return
# ── Step 4: auto-detect transport + models ───────────────────────
print()
print("◐ Probing endpoint to auto-detect transport and models...")
detection = azure_detect.detect(
effective_url,
api_key=effective_key,
token_provider=token_provider,
)
discovered_models: list[str] = list(detection.models)
api_mode: str = detection.api_mode or ""
if api_mode:
mode_label = (
"OpenAI-style" if api_mode == "chat_completions" else "Anthropic-style"
)
print(f"✓ Detected API transport: {mode_label}")
if detection.reason:
print(f" ({detection.reason})")
if discovered_models:
print(
f"✓ Found {len(discovered_models)} deployed model(s) on this endpoint"
)
else:
print(f"⚠ Auto-detection incomplete: {detection.reason}")
print()
print("Select the API format your Azure Foundry endpoint uses:")
print(" 1. OpenAI-style (POST /v1/chat/completions)")
print(" For: GPT models, Llama, Mistral, and most open models")
print(" 2. Anthropic-style (POST /v1/messages)")
print(" For: Claude models deployed via Anthropic API format")
try:
default_choice = "2" if current_api_mode == "anthropic_messages" else "1"
mode_choice = (
input(f"API format [1/2] ({default_choice}): ").strip()
or default_choice
)
except (KeyboardInterrupt, EOFError):
print("\nCancelled.")
return
api_mode = "anthropic_messages" if mode_choice == "2" else "chat_completions"
# ── Step 5: model name ───────────────────────────────────────────
print()
effective_model = ""
if discovered_models:
print("Available models on this endpoint:")
for i, mid in enumerate(discovered_models[:30], start=1):
print(f" {i:>2}. {mid}")
if len(discovered_models) > 30:
print(
f" ... and {len(discovered_models) - 30} more (type name manually if not shown)"
)
print()
try:
pick = input(
f"Pick by number, or type a deployment name [{current_model or discovered_models[0]}]: "
).strip()
except (KeyboardInterrupt, EOFError):
print("\nCancelled.")
return
if not pick:
effective_model = current_model or discovered_models[0]
elif pick.isdigit() and 1 <= int(pick) <= min(len(discovered_models), 30):
effective_model = discovered_models[int(pick) - 1]
else:
effective_model = pick
else:
try:
model_name = input(
f"Model / deployment name [{current_model or 'e.g. gpt-5.4, claude-sonnet-4-6'}]: "
).strip()
except (KeyboardInterrupt, EOFError):
print("\nCancelled.")
return
effective_model = model_name or current_model
if not effective_model:
print("No model name provided. Cancelled.")
return
# ── Step 6: context-length lookup ────────────────────────────────
ctx_len = azure_detect.lookup_context_length(
effective_model,
effective_url,
api_key=effective_key,
token_provider=token_provider,
)
# ── Step 7: persist ──────────────────────────────────────────────
if not use_entra:
save_env_value("AZURE_FOUNDRY_API_KEY", effective_key)
cfg = load_config()
model = cfg.get("model")
if not isinstance(model, dict):
model = {"default": model} if model else {}
cfg["model"] = model
model["provider"] = "azure-foundry"
model["base_url"] = effective_url
model["api_mode"] = api_mode
model["default"] = effective_model
model["auth_mode"] = auth_mode_label
if use_entra:
# Persist only the non-default Entra scope so config.yaml stays tidy.
# Azure identity selection stays in standard AZURE_* env vars.
clean_entra: dict = {}
for key in ("scope",):
val = entra_overrides.get(key)
if val:
clean_entra[key] = val
if clean_entra:
model["entra"] = clean_entra
elif "entra" in model:
del model["entra"]
else:
if "entra" in model:
del model["entra"]
if ctx_len:
model["context_length"] = ctx_len
save_config(cfg)
deactivate_provider()
config["model"] = dict(model)
# Clear any conflicting env vars so auxiliary clients don't poison
# themselves with a stale OpenAI base URL / key.
if get_env_value("OPENAI_BASE_URL"):
save_env_value("OPENAI_BASE_URL", "")
if get_env_value("OPENAI_API_KEY"):
save_env_value("OPENAI_API_KEY", "")
mode_label = "OpenAI-style" if api_mode == "chat_completions" else "Anthropic-style"
auth_label = (
"Microsoft Entra ID (keyless)" if use_entra else "API key"
)
print()
print("✓ Azure Foundry configured:")
print(f" Endpoint: {effective_url}")
print(f" API mode: {mode_label}")
print(f" Auth: {auth_label}")
print(f" Model: {effective_model}")
if ctx_len:
print(f" Context length: {ctx_len:,} tokens")
else:
print(" Context length: not auto-detected (will fall back at runtime)")
print()
def _model_flow_named_custom(config, provider_info):
"""Handle a named custom provider from config.yaml custom_providers list.
Always probes the endpoint's /models API to let the user pick a model.
If a model was previously saved, it is pre-selected in the menu.
Falls back to the saved model if probing fails.
"""
from hermes_cli.main import _custom_provider_api_key_config_value, _custom_provider_base_url_config_value, _save_custom_provider
from hermes_cli.auth import _save_model_choice, deactivate_provider
from hermes_cli.config import load_config, save_config
from hermes_cli.models import fetch_api_models
name = provider_info["name"]
base_url = provider_info["base_url"]
api_mode = provider_info.get("api_mode", "")
api_key = provider_info.get("api_key", "")
key_env = provider_info.get("key_env", "")
saved_model = provider_info.get("model", "")
provider_key = (provider_info.get("provider_key") or "").strip()
# Resolve key from env var if api_key not set directly
if not api_key and key_env:
api_key = os.environ.get(key_env, "")
config_api_key = _custom_provider_api_key_config_value(provider_info, api_key)
# Honor ``discover_models: false`` (default True) — when discovery is
# disabled, use the configured ``models:`` list verbatim and skip the
# live /models probe. This lets operators restrict the picker to the
# subset their plan actually serves instead of the endpoint's full
# catalog (#18726: Baidu Qianfan returns 100+ models for a 2-3 model
# plan). Same semantics as the slash-command picker (model_switch.py
# sections 3 & 4): default discovers, false keeps the explicit list.
discover = provider_info.get("discover_models", True)
if isinstance(discover, str):
discover = discover.lower() not in {"false", "no", "0"}
configured_models: list[str] = []
cfg_models = provider_info.get("models", {})
if isinstance(cfg_models, dict):
configured_models = [str(m) for m in cfg_models if str(m).strip()]
elif isinstance(cfg_models, list):
configured_models = [
str(m) for m in cfg_models if isinstance(m, str) and m.strip()
]
print(f" Provider: {name}")
print(f" URL: {base_url}")
if saved_model:
print(f" Current: {saved_model}")
print()
if not discover and configured_models:
# Discovery disabled with an explicit list — use it verbatim, no probe.
print(f"Using configured models (discover_models: false): {len(configured_models)}")
models = configured_models
else:
print("Fetching available models...")
fetch_kwargs = {"timeout": 8.0}
if api_mode:
fetch_kwargs["api_mode"] = api_mode
models = fetch_api_models(api_key, base_url, **fetch_kwargs)
# If the probe came back empty but the operator configured an explicit
# list, fall back to it rather than forcing manual entry.
if not models and configured_models:
models = configured_models
if models:
default_idx = 0
if saved_model and saved_model in models:
default_idx = models.index(saved_model)
print(f"Found {len(models)} model(s):\n")
try:
from hermes_cli.curses_ui import curses_radiolist
menu_items = [
f"{m} (current)" if m == saved_model else m for m in models
] + ["Cancel"]
idx = curses_radiolist(
f"Select model from {name}:",
menu_items,
selected=default_idx,
cancel_returns=-1,
searchable=True,
)
print()
if idx < 0 or idx >= len(models):
print("Cancelled.")
return
model_name = models[idx]
except (ImportError, NotImplementedError, OSError, subprocess.SubprocessError):
for i, m in enumerate(models, 1):
suffix = " (current)" if m == saved_model else ""
print(f" {i}. {m}{suffix}")
print(f" {len(models) + 1}. Cancel")
print()
try:
val = input(f"Choice [1-{len(models) + 1}]: ").strip()
if not val:
print("Cancelled.")
return
idx = int(val) - 1
if idx < 0 or idx >= len(models):
print("Cancelled.")
return
model_name = models[idx]
except (ValueError, KeyboardInterrupt, EOFError):
print("\nCancelled.")
return
elif saved_model:
print("Could not fetch models from endpoint.")
try:
model_name = input(f"Model name [{saved_model}]: ").strip() or saved_model
except (KeyboardInterrupt, EOFError):
print("\nCancelled.")
return
else:
print("Could not fetch models from endpoint. Enter model name manually.")
try:
model_name = input("Model name: ").strip()
except (KeyboardInterrupt, EOFError):
print("\nCancelled.")
return
if not model_name:
print("No model specified. Cancelled.")
return
# Activate and save the model to the custom_providers entry
_save_model_choice(model_name)
cfg = load_config()
model = cfg.get("model")
if not isinstance(model, dict):
model = {"default": model} if model else {}
cfg["model"] = model
if provider_key:
model["provider"] = provider_key
model.pop("base_url", None)
model.pop("api_key", None)
else:
model["provider"] = "custom"
model["base_url"] = _custom_provider_base_url_config_value(
provider_info, base_url
)
if config_api_key:
model["api_key"] = config_api_key
# Apply api_mode from custom_providers entry, or clear stale value
custom_api_mode = provider_info.get("api_mode", "")
if custom_api_mode:
model["api_mode"] = custom_api_mode
else:
model.pop("api_mode", None) # let runtime auto-detect from URL
save_config(cfg)
deactivate_provider()
# Persist the selected model back to whichever schema owns this endpoint.
if provider_key:
cfg = load_config()
providers_cfg = cfg.get("providers")
if isinstance(providers_cfg, dict):
provider_entry = providers_cfg.get(provider_key)
if isinstance(provider_entry, dict):
provider_entry["default_model"] = model_name
# Only persist an inline api_key when the user originally had
# one (either a literal secret or a ``${VAR}`` template). When
# the entry relies on ``key_env``, do not synthesize a
# ``${key_env}`` api_key — the runtime already resolves the
# key from ``key_env`` directly, and writing the resolved
# secret (or even a synthesized template) would silently
# downgrade credential hygiene on entries that intentionally
# keep plaintext out of ``config.yaml``. See issue #15803.
original_api_key_ref = str(
provider_info.get("api_key_ref", "") or ""
).strip()
original_api_key = str(provider_info.get("api_key", "") or "").strip()
had_inline_api_key = bool(original_api_key_ref or original_api_key)
if (
had_inline_api_key
and config_api_key
and not str(provider_entry.get("api_key", "") or "").strip()
):
provider_entry["api_key"] = config_api_key
if key_env and not str(provider_entry.get("key_env", "") or "").strip():
provider_entry["key_env"] = key_env
cfg["providers"] = providers_cfg
save_config(cfg)
else:
# Save model name to the custom_providers entry for next time
_save_custom_provider(base_url, config_api_key, model_name, api_mode=api_mode)
print(f"\n✅ Model set to: {model_name}")
print(f" Provider: {name} ({base_url})")
def _model_flow_copilot(config, current_model=""):
"""GitHub Copilot flow using env vars, gh CLI, or OAuth device code."""
from hermes_cli.main import _current_reasoning_effort, _prompt_reasoning_effort_selection, _set_reasoning_effort
from hermes_cli.auth import (
PROVIDER_REGISTRY,
_prompt_model_selection,
_save_model_choice,
deactivate_provider,
resolve_api_key_provider_credentials,
)
from hermes_cli.config import save_env_value, load_config, save_config
from hermes_cli.models import (
_PROVIDER_MODELS,
fetch_api_models,
fetch_github_model_catalog,
github_model_reasoning_efforts,
copilot_model_api_mode,
normalize_copilot_model_id,
)
provider_id = "copilot"
pconfig = PROVIDER_REGISTRY[provider_id]
creds = resolve_api_key_provider_credentials(provider_id)
api_key = creds.get("api_key", "")
source = creds.get("source", "")
if not api_key:
print("No GitHub token configured for GitHub Copilot.")
print()
print(" Supported token types:")
print(
" → OAuth token (gho_*) via `copilot login` or device code flow"
)
print(" → Fine-grained PAT (github_pat_*) with Copilot Requests permission")
print(" → GitHub App token (ghu_*) via environment variable")
print(" ✗ Classic PAT (ghp_*) NOT supported by Copilot API")
print()
print(" Options:")
print(" 1. Login with GitHub (OAuth device code flow)")
print(" 2. Enter a token manually")
print(" 3. Cancel")
print()
try:
choice = input(" Choice [1-3]: ").strip()
except (KeyboardInterrupt, EOFError):
print()
return
if choice == "1":
try:
from hermes_cli.copilot_auth import copilot_device_code_login
token = copilot_device_code_login()
if token:
save_env_value("COPILOT_GITHUB_TOKEN", token)
print(" Copilot token saved.")
print()
else:
print(" Login cancelled or failed.")
return
except Exception as exc:
print(f" Login failed: {exc}")
return
elif choice == "2":
from hermes_cli.secret_prompt import masked_secret_prompt
try:
new_key = masked_secret_prompt(" Token (COPILOT_GITHUB_TOKEN): ").strip()
except (KeyboardInterrupt, EOFError):
print()
return
if not new_key:
print(" Cancelled.")
return
# Validate token type
try:
from hermes_cli.copilot_auth import validate_copilot_token
valid, msg = validate_copilot_token(new_key)
if not valid:
print(f"{msg}")
return
except ImportError:
pass
save_env_value("COPILOT_GITHUB_TOKEN", new_key)
print(" Token saved.")
print()
else:
print(" Cancelled.")
return
creds = resolve_api_key_provider_credentials(provider_id)
api_key = creds.get("api_key", "")
source = creds.get("source", "")
else:
if source in {"GITHUB_TOKEN", "GH_TOKEN"}:
from hermes_cli.env_loader import format_secret_source_suffix
bw_suffix = format_secret_source_suffix(source)
print(f" GitHub token: {api_key[:8]}... ✓ ({source}{bw_suffix})")
elif source == "gh auth token":
print(" GitHub token: ✓ (from `gh auth token`)")
else:
print(" GitHub token: ✓")
print()
effective_base = pconfig.inference_base_url
catalog = fetch_github_model_catalog(api_key)
live_models = (
[item.get("id", "") for item in catalog if item.get("id")]
if catalog
else fetch_api_models(api_key, effective_base)
)
normalized_current_model = (
normalize_copilot_model_id(
current_model,
catalog=catalog,
api_key=api_key,
)
or current_model
)
if live_models:
model_list = [model_id for model_id in live_models if model_id]
print(f" Found {len(model_list)} model(s) from GitHub Copilot")
else:
model_list = _PROVIDER_MODELS.get(provider_id, [])
if model_list:
print(
" ⚠ Could not auto-detect models from GitHub Copilot — showing defaults."
)
print(' Use "Enter custom model name" if you do not see your model.')
if model_list:
selected = _prompt_model_selection(
model_list, current_model=normalized_current_model
)
else:
try:
selected = input("Model name: ").strip()
except (KeyboardInterrupt, EOFError):
selected = None
if selected:
selected = (
normalize_copilot_model_id(
selected,
catalog=catalog,
api_key=api_key,
)
or selected
)
initial_cfg = load_config()
current_effort = _current_reasoning_effort(initial_cfg)
reasoning_efforts = github_model_reasoning_efforts(
selected,
catalog=catalog,
api_key=api_key,
)
selected_effort = None
if reasoning_efforts:
print(f" {selected} supports reasoning controls.")
selected_effort = _prompt_reasoning_effort_selection(
reasoning_efforts, current_effort=current_effort
)
_save_model_choice(selected)
cfg = load_config()
model = cfg.get("model")
if not isinstance(model, dict):
model = {"default": model} if model else {}
cfg["model"] = model
model["provider"] = provider_id
model["base_url"] = effective_base
model["api_mode"] = copilot_model_api_mode(
selected,
catalog=catalog,
api_key=api_key,
)
if selected_effort is not None:
_set_reasoning_effort(cfg, selected_effort)
save_config(cfg)
deactivate_provider()
print(f"Default model set to: {selected} (via {pconfig.name})")
if reasoning_efforts:
if selected_effort == "none":
print("Reasoning disabled for this model.")
elif selected_effort:
print(f"Reasoning effort set to: {selected_effort}")
else:
print("No change.")
def _model_flow_copilot_acp(config, current_model=""):
"""GitHub Copilot ACP flow using the local Copilot CLI."""
from hermes_cli.auth import (
PROVIDER_REGISTRY,
_prompt_model_selection,
_save_model_choice,
deactivate_provider,
get_external_process_provider_status,
resolve_api_key_provider_credentials,
resolve_external_process_provider_credentials,
)
from hermes_cli.models import (
_PROVIDER_MODELS,
fetch_github_model_catalog,
normalize_copilot_model_id,
)
from hermes_cli.config import load_config, save_config
del config
provider_id = "copilot-acp"
pconfig = PROVIDER_REGISTRY[provider_id]
status = get_external_process_provider_status(provider_id)
resolved_command = (
status.get("resolved_command") or status.get("command") or "copilot"
)
effective_base = status.get("base_url") or pconfig.inference_base_url
print(" GitHub Copilot ACP delegates Hermes turns to `copilot --acp`.")
print(" Hermes currently starts its own ACP subprocess for each request.")
print(" Hermes uses your selected model as a hint for the Copilot ACP session.")
print(f" Command: {resolved_command}")
print(f" Backend marker: {effective_base}")
print()
try:
creds = resolve_external_process_provider_credentials(provider_id)
except Exception as exc:
print(f"{exc}")
print(
" Set HERMES_COPILOT_ACP_COMMAND or COPILOT_CLI_PATH if Copilot CLI is installed elsewhere."
)
return
effective_base = creds.get("base_url") or effective_base
catalog_api_key = ""
try:
catalog_creds = resolve_api_key_provider_credentials("copilot")
catalog_api_key = catalog_creds.get("api_key", "")
except Exception:
pass
catalog = fetch_github_model_catalog(catalog_api_key)
normalized_current_model = (
normalize_copilot_model_id(
current_model,
catalog=catalog,
api_key=catalog_api_key,
)
or current_model
)
if catalog:
model_list = [item.get("id", "") for item in catalog if item.get("id")]
print(f" Found {len(model_list)} model(s) from GitHub Copilot")
else:
model_list = _PROVIDER_MODELS.get("copilot", [])
if model_list:
print(
" ⚠ Could not auto-detect models from GitHub Copilot — showing defaults."
)
print(' Use "Enter custom model name" if you do not see your model.')
if model_list:
selected = _prompt_model_selection(
model_list,
current_model=normalized_current_model,
)
else:
try:
selected = input("Model name: ").strip()
except (KeyboardInterrupt, EOFError):
selected = None
if not selected:
print("No change.")
return
selected = (
normalize_copilot_model_id(
selected,
catalog=catalog,
api_key=catalog_api_key,
)
or selected
)
_save_model_choice(selected)
cfg = load_config()
model = cfg.get("model")
if not isinstance(model, dict):
model = {"default": model} if model else {}
cfg["model"] = model
model["provider"] = provider_id
model["base_url"] = effective_base
model["api_mode"] = "chat_completions"
save_config(cfg)
deactivate_provider()
print(f"Default model set to: {selected} (via {pconfig.name})")
def _model_flow_kimi(config, current_model=""):
"""Kimi / Moonshot model selection with automatic endpoint routing.
- sk-kimi-* keys → api.kimi.com/coding/v1 (Kimi Coding Plan)
- Other keys → api.moonshot.ai/v1 (legacy Moonshot)
No manual base URL prompt — endpoint is determined by key prefix.
"""
from hermes_cli.main import _prompt_api_key
from hermes_cli.auth import (
PROVIDER_REGISTRY,
KIMI_CODE_BASE_URL,
_prompt_model_selection,
_save_model_choice,
deactivate_provider,
)
from hermes_cli.config import (
get_env_value,
save_env_value,
load_config,
save_config,
)
from hermes_cli.models import _PROVIDER_MODELS
provider_id = "kimi-coding"
pconfig = PROVIDER_REGISTRY[provider_id]
key_env = pconfig.api_key_env_vars[0] if pconfig.api_key_env_vars else ""
base_url_env = pconfig.base_url_env_var or ""
# Step 1: Check / prompt for API key
existing_key = ""
for ev in pconfig.api_key_env_vars:
existing_key = get_env_value(ev) or os.getenv(ev, "")
if existing_key:
break
existing_key, abort = _prompt_api_key(
pconfig, existing_key, provider_id=provider_id
)
if abort:
return
# Step 2: Auto-detect endpoint from key prefix
is_coding_plan = existing_key.startswith("sk-kimi-")
if is_coding_plan:
effective_base = KIMI_CODE_BASE_URL
print(f" Detected Kimi Coding Plan key → {effective_base}")
else:
effective_base = pconfig.inference_base_url
print(f" Using Moonshot endpoint → {effective_base}")
# Clear any manual base URL override so auto-detection works at runtime
if base_url_env and get_env_value(base_url_env):
save_env_value(base_url_env, "")
print()
# Step 3: Model selection — show appropriate models for the endpoint
if is_coding_plan:
# Coding Plan models (kimi-k2.6 first)
model_list = [
"kimi-k2.6",
"kimi-k2.5",
"kimi-for-coding",
"kimi-k2-thinking",
"kimi-k2-thinking-turbo",
]
else:
# Legacy Moonshot models (excludes Coding Plan-only models)
model_list = _PROVIDER_MODELS.get("moonshot", [])
if model_list:
selected = _prompt_model_selection(model_list, current_model=current_model)
else:
try:
selected = input("Enter model name: ").strip()
except (KeyboardInterrupt, EOFError):
selected = None
if selected:
_save_model_choice(selected)
# Update config with provider and base URL
cfg = load_config()
model = cfg.get("model")
if not isinstance(model, dict):
model = {"default": model} if model else {}
cfg["model"] = model
model["provider"] = provider_id
model["base_url"] = effective_base
model.pop("api_mode", None) # let runtime auto-detect from URL
save_config(cfg)
deactivate_provider()
endpoint_label = "Kimi Coding" if is_coding_plan else "Moonshot"
print(f"Default model set to: {selected} (via {endpoint_label})")
else:
print("No change.")
def _model_flow_stepfun(config, current_model=""):
"""StepFun Step Plan flow with region-specific endpoints."""
from hermes_cli.main import _infer_stepfun_region, _prompt_api_key, _prompt_provider_choice, _stepfun_base_url_for_region
from hermes_cli.auth import (
PROVIDER_REGISTRY,
_prompt_model_selection,
_save_model_choice,
deactivate_provider,
)
from hermes_cli.config import (
get_env_value,
save_env_value,
load_config,
save_config,
)
from hermes_cli.models import _PROVIDER_MODELS, fetch_api_models
provider_id = "stepfun"
pconfig = PROVIDER_REGISTRY[provider_id]
key_env = pconfig.api_key_env_vars[0] if pconfig.api_key_env_vars else ""
base_url_env = pconfig.base_url_env_var or ""
existing_key = ""
for ev in pconfig.api_key_env_vars:
existing_key = get_env_value(ev) or os.getenv(ev, "")
if existing_key:
break
existing_key, abort = _prompt_api_key(
pconfig, existing_key, provider_id=provider_id
)
if abort:
return
current_base = ""
if base_url_env:
current_base = get_env_value(base_url_env) or os.getenv(base_url_env, "")
if not current_base:
model_cfg = config.get("model")
if isinstance(model_cfg, dict):
current_base = str(model_cfg.get("base_url") or "").strip()
current_region = _infer_stepfun_region(current_base or pconfig.inference_base_url)
region_choices = [
(
"international",
f"International ({_stepfun_base_url_for_region('international')})",
),
("china", f"China ({_stepfun_base_url_for_region('china')})"),
]
ordered_regions = []
for region_key, label in region_choices:
if region_key == current_region:
ordered_regions.insert(0, (region_key, f"{label} ← currently active"))
else:
ordered_regions.append((region_key, label))
ordered_regions.append(("cancel", "Cancel"))
region_idx = _prompt_provider_choice([label for _, label in ordered_regions])
if region_idx is None or ordered_regions[region_idx][0] == "cancel":
print("No change.")
return
selected_region = ordered_regions[region_idx][0]
effective_base = _stepfun_base_url_for_region(selected_region)
if base_url_env:
save_env_value(base_url_env, effective_base)
live_models = fetch_api_models(existing_key, effective_base)
if live_models:
model_list = live_models
print(f" Found {len(model_list)} model(s) from {pconfig.name} API")
else:
model_list = _PROVIDER_MODELS.get(provider_id, [])
if model_list:
print(
f" Could not auto-detect models from {pconfig.name} API — "
"showing Step Plan fallback catalog."
)
if model_list:
selected = _prompt_model_selection(model_list, current_model=current_model)
else:
try:
selected = input("Model name: ").strip()
except (KeyboardInterrupt, EOFError):
selected = None
if selected:
_save_model_choice(selected)
cfg = load_config()
model = cfg.get("model")
if not isinstance(model, dict):
model = {"default": model} if model else {}
cfg["model"] = model
model["provider"] = provider_id
model["base_url"] = effective_base
model.pop("api_mode", None)
save_config(cfg)
deactivate_provider()
config["model"] = dict(model)
print(f"Default model set to: {selected} (via {pconfig.name})")
else:
print("No change.")
def _model_flow_bedrock_api_key(config, region, current_model=""):
"""Bedrock API Key mode — uses the OpenAI-compatible bedrock-mantle endpoint.
For developers who don't have an AWS account but received a Bedrock API Key
from their AWS admin. Works like any OpenAI-compatible endpoint.
"""
from hermes_cli.auth import (
_prompt_model_selection,
_save_model_choice,
deactivate_provider,
)
from hermes_cli.config import (
load_config,
save_config,
get_env_value,
save_env_value,
)
from hermes_cli.models import _PROVIDER_MODELS
mantle_base_url = f"https://bedrock-mantle.{region}.api.aws/v1"
# Prompt for API key
existing_key = get_env_value("AWS_BEARER_TOKEN_BEDROCK") or ""
if existing_key:
from hermes_cli.env_loader import format_secret_source_suffix
source_suffix = format_secret_source_suffix("AWS_BEARER_TOKEN_BEDROCK")
print(f" Bedrock API Key: {existing_key[:12]}... ✓{source_suffix}")
else:
print(f" Endpoint: {mantle_base_url}")
print()
from hermes_cli.secret_prompt import masked_secret_prompt
try:
api_key = masked_secret_prompt(" Bedrock API Key: ").strip()
except (KeyboardInterrupt, EOFError):
print()
return
if not api_key:
print(" Cancelled.")
return
save_env_value("AWS_BEARER_TOKEN_BEDROCK", api_key)
existing_key = api_key
print(" ✓ API key saved.")
print()
# Model selection — use static list (mantle doesn't need boto3 for discovery)
model_list = _PROVIDER_MODELS.get("bedrock", [])
print(f" Showing {len(model_list)} curated models")
if model_list:
selected = _prompt_model_selection(model_list, current_model=current_model)
else:
try:
selected = input(" Model ID: ").strip()
except (KeyboardInterrupt, EOFError):
selected = None
if selected:
_save_model_choice(selected)
# Save as custom provider pointing to bedrock-mantle
cfg = load_config()
model = cfg.get("model")
if not isinstance(model, dict):
model = {"default": model} if model else {}
cfg["model"] = model
model["provider"] = "custom"
model["base_url"] = mantle_base_url
model.pop("api_mode", None) # chat_completions is the default
# Also save region in bedrock config for reference
bedrock_cfg = cfg.get("bedrock", {})
if not isinstance(bedrock_cfg, dict):
bedrock_cfg = {}
bedrock_cfg["region"] = region
cfg["bedrock"] = bedrock_cfg
# Save the API key env var name so hermes knows where to find it
save_env_value("OPENAI_API_KEY", existing_key)
save_env_value("OPENAI_BASE_URL", mantle_base_url)
save_config(cfg)
deactivate_provider()
print(f" Default model set to: {selected} (via Bedrock API Key, {region})")
print(f" Endpoint: {mantle_base_url}")
else:
print(" No change.")
def _model_flow_bedrock(config, current_model=""):
"""AWS Bedrock provider: verify credentials, pick region, discover models.
Uses the native Converse API via boto3 — not the OpenAI-compatible endpoint.
Auth is handled by the AWS SDK default credential chain (env vars, profile,
instance role), so no API key prompt is needed.
"""
from hermes_cli.auth import (
_prompt_model_selection,
_save_model_choice,
deactivate_provider,
)
from hermes_cli.config import load_config, save_config
from hermes_cli.models import _PROVIDER_MODELS
# 1. Check for AWS credentials
try:
from agent.bedrock_adapter import (
has_aws_credentials,
resolve_aws_auth_env_var,
resolve_bedrock_region,
discover_bedrock_models,
)
except ImportError:
print(" ✗ boto3 is not installed. Install it with:")
print(" pip install boto3")
print()
return
if not has_aws_credentials():
print(" ⚠ No AWS credentials detected via environment variables.")
print(" Bedrock will use boto3's default credential chain (IMDS, SSO, etc.)")
print()
auth_var = resolve_aws_auth_env_var()
if auth_var:
print(f" AWS credentials: {auth_var}")
else:
print(" AWS credentials: boto3 default chain (instance role / SSO)")
print()
# 2. Region selection
current_region = resolve_bedrock_region()
try:
region_input = input(f" AWS Region [{current_region}]: ").strip()
except (KeyboardInterrupt, EOFError):
print()
return
region = region_input or current_region
# 2b. Authentication mode
print(" Choose authentication method:")
print()
print(" 1. IAM credential chain (recommended)")
print(" Works with EC2 instance roles, SSO, env vars, aws configure")
print(" 2. Bedrock API Key")
print(" Enter your Bedrock API Key directly — also supports")
print(" team scenarios where an admin distributes keys")
print()
try:
auth_choice = input(" Choice [1]: ").strip()
except (KeyboardInterrupt, EOFError):
print()
return
if auth_choice == "2":
_model_flow_bedrock_api_key(config, region, current_model)
return
# 3. Model discovery — try live API first, fall back to static list
print(f" Discovering models in {region}...")
live_models = discover_bedrock_models(region)
if live_models:
_EXCLUDE_PREFIXES = (
"stability.",
"cohere.embed",
"twelvelabs.",
"us.stability.",
"us.cohere.embed",
"us.twelvelabs.",
"global.cohere.embed",
"global.twelvelabs.",
)
_EXCLUDE_SUBSTRINGS = ("safeguard", "voxtral", "palmyra-vision")
filtered = []
for m in live_models:
mid = m["id"]
if any(mid.startswith(p) for p in _EXCLUDE_PREFIXES):
continue
if any(s in mid.lower() for s in _EXCLUDE_SUBSTRINGS):
continue
filtered.append(m)
# Deduplicate: prefer inference profiles (us.*, global.*) over bare
# foundation model IDs.
profile_base_ids = set()
for m in filtered:
mid = m["id"]
if mid.startswith(("us.", "global.")):
base = mid.split(".", 1)[1] if "." in mid[3:] else mid
profile_base_ids.add(base)
deduped = []
for m in filtered:
mid = m["id"]
if not mid.startswith(("us.", "global.")) and mid in profile_base_ids:
continue
deduped.append(m)
_RECOMMENDED = [
"us.anthropic.claude-sonnet-4-6",
"us.anthropic.claude-opus-4-6",
"us.anthropic.claude-haiku-4-5",
"us.amazon.nova-pro",
"us.amazon.nova-lite",
"us.amazon.nova-micro",
"deepseek.v3",
"us.meta.llama4-maverick",
"us.meta.llama4-scout",
]
def _sort_key(m):
mid = m["id"]
for i, rec in enumerate(_RECOMMENDED):
if mid.startswith(rec):
return (0, i, mid)
if mid.startswith("global."):
return (1, 0, mid)
return (2, 0, mid)
deduped.sort(key=_sort_key)
model_list = [m["id"] for m in deduped]
print(
f" Found {len(model_list)} text model(s) (filtered from {len(live_models)} total)"
)
else:
model_list = _PROVIDER_MODELS.get("bedrock", [])
if model_list:
print(
f" Using {len(model_list)} curated models (live discovery unavailable)"
)
else:
print(
" No models found. Check IAM permissions for bedrock:ListFoundationModels."
)
return
# 4. Model selection
if model_list:
selected = _prompt_model_selection(model_list, current_model=current_model)
else:
try:
selected = input(" Model ID: ").strip()
except (KeyboardInterrupt, EOFError):
selected = None
if selected:
_save_model_choice(selected)
cfg = load_config()
model = cfg.get("model")
if not isinstance(model, dict):
model = {"default": model} if model else {}
cfg["model"] = model
model["provider"] = "bedrock"
model["base_url"] = f"https://bedrock-runtime.{region}.amazonaws.com"
model.pop("api_mode", None) # bedrock_converse is auto-detected
bedrock_cfg = cfg.get("bedrock", {})
if not isinstance(bedrock_cfg, dict):
bedrock_cfg = {}
bedrock_cfg["region"] = region
cfg["bedrock"] = bedrock_cfg
save_config(cfg)
deactivate_provider()
print(f" Default model set to: {selected} (via AWS Bedrock, {region})")
else:
print(" No change.")
def _model_flow_api_key_provider(config, provider_id, current_model=""):
"""Generic flow for API-key providers (z.ai, MiniMax, OpenCode, etc.)."""
from hermes_cli.main import _prompt_api_key
from hermes_cli.auth import (
PROVIDER_REGISTRY,
_prompt_model_selection,
_save_model_choice,
deactivate_provider,
)
from hermes_cli.config import (
get_env_value,
save_env_value,
load_config,
save_config,
)
from hermes_cli.models import (
_PROVIDER_MODELS,
fetch_api_models,
opencode_model_api_mode,
normalize_opencode_model_id,
)
pconfig = PROVIDER_REGISTRY[provider_id]
key_env = pconfig.api_key_env_vars[0] if pconfig.api_key_env_vars else ""
base_url_env = pconfig.base_url_env_var or ""
# Check / prompt for API key
existing_key = ""
for ev in pconfig.api_key_env_vars:
existing_key = get_env_value(ev) or os.getenv(ev, "")
if existing_key:
break
existing_key, abort = _prompt_api_key(
pconfig, existing_key, provider_id=provider_id
)
if abort:
return
# Gemini free-tier gate: free-tier daily quotas (<= 250 RPD for Flash)
# are exhausted in a handful of agent turns, so refuse to wire up the
# provider with a free-tier key. Probe is best-effort; network or auth
# errors fall through without blocking.
if provider_id == "gemini" and existing_key:
try:
from agent.gemini_native_adapter import probe_gemini_tier
except Exception:
probe_gemini_tier = None
if probe_gemini_tier is not None:
print(" Checking Gemini API tier...")
probe_base = (
(get_env_value(base_url_env) if base_url_env else "")
or os.getenv(base_url_env or "", "")
or pconfig.inference_base_url
)
tier = probe_gemini_tier(existing_key, probe_base)
if tier == "free":
print()
print(
"❌ This Google API key is on the free tier "
"(<= 250 requests/day for gemini-2.5-flash)."
)
print(
" Hermes typically makes 3-10 API calls per user turn "
"(tool iterations + auxiliary tasks),"
)
print(
" so the free tier is exhausted after a handful of "
"messages and cannot sustain"
)
print(" an agent session.")
print()
print(
" To use Gemini with Hermes, enable billing on your "
"Google Cloud project and regenerate"
)
print(
" the key in a billing-enabled project: "
"https://aistudio.google.com/apikey"
)
print()
print(
" Alternatives with workable free usage: DeepSeek, "
"OpenRouter (free models), Groq, Nous."
)
print()
print("Not saving Gemini as the default provider.")
return
if tier == "paid":
print(" Tier check: paid ✓")
else:
# "unknown" -- network issue, auth problem, unexpected response.
# Don't block; the runtime 429 handler will surface free-tier
# guidance if the key turns out to be free tier.
print(" Tier check: could not verify (proceeding anyway).")
print()
# Optional base URL override.
# Precedence: env var → config.yaml model.base_url → registry default.
# Reading config.yaml prevents silently overwriting a saved remote URL
# (e.g. a remote LM Studio endpoint) with localhost when the user just
# presses Enter at the prompt below.
current_base = ""
if base_url_env:
current_base = get_env_value(base_url_env) or os.getenv(base_url_env, "")
if not current_base:
try:
_m = load_config().get("model") or {}
if str(_m.get("provider") or "").strip().lower() == provider_id:
current_base = str(_m.get("base_url") or "").strip()
except Exception:
pass
effective_base = current_base or pconfig.inference_base_url
try:
override = input(f"Base URL [{effective_base}]: ").strip()
except (KeyboardInterrupt, EOFError):
print()
override = ""
if override and base_url_env:
if not override.startswith(("http://", "https://")):
print(
" Invalid URL — must start with http:// or https://. Keeping current value."
)
else:
save_env_value(base_url_env, override)
effective_base = override
# Model selection — resolution order:
# 1. models.dev registry (cached, filtered for agentic/tool-capable models)
# 2. Curated static fallback list (offline insurance)
# 3. Live /models endpoint probe (small providers without models.dev data)
#
# LM Studio: live /api/v1/models probe (no models.dev catalog).
# Ollama Cloud: merged discovery (live API + models.dev + disk cache).
if provider_id == "lmstudio":
from hermes_cli.auth import AuthError
from hermes_cli.models import fetch_lmstudio_models
api_key_for_probe = existing_key or (get_env_value(key_env) if key_env else "")
try:
model_list = fetch_lmstudio_models(
api_key=api_key_for_probe, base_url=effective_base
)
except AuthError as exc:
print(f" LM Studio rejected the request: {exc}")
print(" Set LM_API_KEY (or update it) to match the server's bearer token.")
model_list = []
if model_list:
print(f" Found {len(model_list)} model(s) from LM Studio")
elif provider_id == "ollama-cloud":
from hermes_cli.models import fetch_ollama_cloud_models
api_key_for_probe = existing_key or (get_env_value(key_env) if key_env else "")
# During setup, force a live refresh so the picker reflects newly
# released models (e.g. deepseek v4 flash, kimi k2.6) the moment
# the user enters their key — not an hour later when the disk
# cache TTL expires.
model_list = fetch_ollama_cloud_models(
api_key=api_key_for_probe,
base_url=effective_base,
force_refresh=True,
)
if model_list:
print(f" Found {len(model_list)} model(s) from Ollama Cloud")
elif provider_id == "novita":
from hermes_cli.models import fetch_api_models
api_key_for_probe = existing_key or (get_env_value(key_env) if key_env else "")
curated = _PROVIDER_MODELS.get(provider_id, [])
live_models = fetch_api_models(api_key_for_probe, effective_base)
if live_models:
model_list = live_models
print(f" Found {len(model_list)} model(s) from {pconfig.name} API")
else:
mdev_models: list = []
try:
from agent.models_dev import list_agentic_models
mdev_models = list_agentic_models(provider_id)
except Exception:
pass
if mdev_models:
seen = {m.lower() for m in mdev_models}
model_list = list(mdev_models)
for m in curated:
if m.lower() not in seen:
model_list.append(m)
seen.add(m.lower())
print(f" Found {len(model_list)} model(s) from models.dev registry")
else:
model_list = curated
if model_list:
print(
f' Showing {len(model_list)} curated models — use "Enter custom model name" for others.'
)
else:
curated = _PROVIDER_MODELS.get(provider_id, [])
# Try models.dev first — returns tool-capable models, filtered for noise
mdev_models: list = []
try:
from agent.models_dev import list_agentic_models
mdev_models = list_agentic_models(provider_id)
except Exception:
pass
if mdev_models:
# Merge models.dev with curated list so newly added models
# (not yet in models.dev) still appear in the picker.
if curated:
seen = {m.lower() for m in mdev_models}
merged = list(mdev_models)
for m in curated:
if m.lower() not in seen:
merged.append(m)
seen.add(m.lower())
model_list = merged
else:
model_list = mdev_models
print(f" Found {len(model_list)} model(s) from models.dev registry")
elif curated and len(curated) >= 8:
# Curated list is substantial — use it directly, skip live probe
model_list = curated
print(
f' Showing {len(model_list)} curated models — use "Enter custom model name" for others.'
)
else:
api_key_for_probe = existing_key or (
get_env_value(key_env) if key_env else ""
)
live_models = fetch_api_models(api_key_for_probe, effective_base)
if live_models and len(live_models) >= len(curated):
model_list = live_models
print(f" Found {len(model_list)} model(s) from {pconfig.name} API")
else:
model_list = curated
if model_list:
print(
f' Showing {len(model_list)} curated models — use "Enter custom model name" for others.'
)
# else: no defaults either, will fall through to raw input
if provider_id in {"opencode-zen", "opencode-go"}:
model_list = [
normalize_opencode_model_id(provider_id, mid) for mid in model_list
]
current_model = normalize_opencode_model_id(provider_id, current_model)
model_list = list(dict.fromkeys(mid for mid in model_list if mid))
if model_list:
selected = _prompt_model_selection(model_list, current_model=current_model)
else:
try:
selected = input("Model name: ").strip()
except (KeyboardInterrupt, EOFError):
selected = None
if selected:
if provider_id in {"opencode-zen", "opencode-go"}:
selected = normalize_opencode_model_id(provider_id, selected)
_save_model_choice(selected)
# Update config with provider, base URL, and provider-specific API mode
cfg = load_config()
model = cfg.get("model")
if not isinstance(model, dict):
model = {"default": model} if model else {}
cfg["model"] = model
model["provider"] = provider_id
model["base_url"] = effective_base
if provider_id in {"opencode-zen", "opencode-go"}:
model["api_mode"] = opencode_model_api_mode(provider_id, selected)
else:
model.pop("api_mode", None)
save_config(cfg)
deactivate_provider()
print(f"Default model set to: {selected} (via {pconfig.name})")
else:
print("No change.")
def _model_flow_anthropic(config, current_model=""):
"""Flow for Anthropic provider — OAuth subscription, API key, or Claude Code creds."""
from hermes_cli.main import _run_anthropic_oauth_flow
from hermes_cli.auth import (
_prompt_model_selection,
_save_model_choice,
deactivate_provider,
)
from hermes_cli.config import (
save_env_value,
load_config,
save_config,
save_anthropic_api_key,
)
from hermes_cli.models import _PROVIDER_MODELS
# Check ALL credential sources
from hermes_cli.auth import get_anthropic_key
existing_key = get_anthropic_key()
cc_available = False
try:
from agent.anthropic_adapter import (
read_claude_code_credentials,
is_claude_code_token_valid,
_is_oauth_token,
)
cc_creds = read_claude_code_credentials()
if cc_creds and is_claude_code_token_valid(cc_creds):
cc_available = True
except Exception:
pass
# Stale-OAuth guard: if the only existing cred is an expired OAuth token
# (no valid cc_creds to fall back on), treat it as missing so the re-auth
# path is offered instead of silently accepting a broken token.
existing_is_stale_oauth = False
if existing_key and _is_oauth_token(existing_key) and not cc_available:
existing_is_stale_oauth = True
has_creds = (bool(existing_key) and not existing_is_stale_oauth) or cc_available
needs_auth = not has_creds
if has_creds:
# Show what we found
if existing_key:
from hermes_cli.env_loader import format_secret_source_suffix
from hermes_cli.auth import PROVIDER_REGISTRY
# Surface which env var supplied the key so users with
# Bitwarden see "(from Bitwarden)" — without this, a detected
# BSM key looks identical to a key in .env and users assume
# nothing is wired up.
source_suffix = ""
for var in PROVIDER_REGISTRY["anthropic"].api_key_env_vars:
if os.getenv(var, "").strip() == existing_key:
source_suffix = format_secret_source_suffix(var)
if source_suffix:
break
print(
f" Anthropic credentials: {existing_key[:12]}... ✓{source_suffix}"
)
elif cc_available:
print(" Claude Code credentials: ✓ (auto-detected)")
print()
print(" 1. Use existing credentials")
print(" 2. Reauthenticate (new OAuth login)")
print(" 3. Cancel")
print()
try:
choice = input(" Choice [1/2/3]: ").strip()
except (KeyboardInterrupt, EOFError):
choice = "1"
if choice == "2":
needs_auth = True
elif choice == "3":
return
# choice == "1" or default: use existing, proceed to model selection
if needs_auth:
# Show auth method choice
print()
print(" Choose authentication method:")
print()
print(" 1. Claude Pro/Max subscription (OAuth login)")
print(" 2. Anthropic API key (pay-per-token)")
print(" 3. Cancel")
print()
try:
choice = input(" Choice [1/2/3]: ").strip()
except (KeyboardInterrupt, EOFError):
print()
return
if choice == "1":
if not _run_anthropic_oauth_flow(save_env_value):
return
elif choice == "2":
print()
print(" Get an API key at: https://platform.claude.com/settings/keys")
print()
from hermes_cli.secret_prompt import masked_secret_prompt
try:
api_key = masked_secret_prompt(" API key (sk-ant-...): ").strip()
except (KeyboardInterrupt, EOFError):
print()
return
if not api_key:
print(" Cancelled.")
return
save_anthropic_api_key(api_key, save_fn=save_env_value)
print(" ✓ API key saved.")
else:
print(" No change.")
return
print()
# Model selection
model_list = _PROVIDER_MODELS.get("anthropic", [])
if model_list:
selected = _prompt_model_selection(model_list, current_model=current_model)
else:
try:
selected = input("Model name (e.g., claude-sonnet-4-20250514): ").strip()
except (KeyboardInterrupt, EOFError):
selected = None
if selected:
_save_model_choice(selected)
# Update config with provider — clear base_url since
# resolve_runtime_provider() always hardcodes Anthropic's URL.
# Leaving a stale base_url in config can contaminate other
# providers if the user switches without running 'hermes model'.
cfg = load_config()
model = cfg.get("model")
if not isinstance(model, dict):
model = {"default": model} if model else {}
cfg["model"] = model
model["provider"] = "anthropic"
model.pop("base_url", None)
save_config(cfg)
deactivate_provider()
print(f"Default model set to: {selected} (via Anthropic)")
else:
print("No change.")