diff --git a/hermes_cli/config.py b/hermes_cli/config.py index 0c3b39393c..7fdc4d7e07 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -1123,7 +1123,7 @@ DEFAULT_CONFIG = { }, # Config schema version - bump this when adding new required fields - "_config_version": 23, + "_config_version": 22, } # ============================================================================= @@ -3123,28 +3123,6 @@ def migrate_config(interactive: bool = True, quiet: bool = False) -> Dict[str, A "Use `hermes plugins enable ` to activate." ) - # ── Version 22 → 23: ensure LM_API_KEY is set when provider is lmstudio ── - # LM Studio's documented default is no-auth, but our API-key registry - # path needs *some* non-empty value to satisfy auxiliary_client and - # runtime resolution. Self-heal users whose config.yaml has - # provider:lmstudio but no LM_API_KEY in .env (cross-machine sync, - # manual edit, profile move). - if current_ver < 23: - try: - from hermes_cli.auth import LMSTUDIO_NOAUTH_PLACEHOLDER - config = load_config() - model_cfg = config.get("model") - if isinstance(model_cfg, dict) and str(model_cfg.get("provider") or "").strip().lower() == "lmstudio": - if not get_env_value("LM_API_KEY"): - save_env_value("LM_API_KEY", LMSTUDIO_NOAUTH_PLACEHOLDER) - results["env_added"].append( - f"LM_API_KEY={LMSTUDIO_NOAUTH_PLACEHOLDER} (placeholder for no-auth LM Studio)" - ) - if not quiet: - print(" ✓ Added placeholder LM_API_KEY for LM Studio (no-auth default)") - except Exception: - pass - if current_ver < latest_ver and not quiet: print(f"Config version: {current_ver} → {latest_ver}") diff --git a/hermes_cli/models.py b/hermes_cli/models.py index bd590a5576..852c097536 100644 --- a/hermes_cli/models.py +++ b/hermes_cli/models.py @@ -2199,31 +2199,41 @@ def _is_github_models_base_url(base_url: Optional[str]) -> bool: ) -def probe_lmstudio_models( - api_key: Optional[str] = None, - base_url: Optional[str] = None, - timeout: float = 5.0, -) -> Optional[list[str]]: - """Probe LM Studio's model listing. +def _lmstudio_server_root(base_url: Optional[str]) -> Optional[str]: + """Strip ``/v1`` suffix from an LM Studio base URL to get the native API root. - Returns chat-capable model keys on success, including the valid empty-list - case when the server is reachable but has no non-embedding models. - Returns ``None`` on network errors, malformed responses, or empty/invalid - base URLs. - - Raises ``AuthError`` on HTTP 401/403 so callers can surface token issues - separately from reachability problems. + Returns ``None`` when the base URL is empty/invalid. """ - server_root = (base_url or "").strip().rstrip("/") - if server_root.endswith("/v1"): - server_root = server_root[:-3].rstrip("/") - if not server_root: - return None + root = (base_url or "").strip().rstrip("/") + if root.endswith("/v1"): + root = root[:-3].rstrip("/") + return root or None + +def _lmstudio_request_headers(api_key: Optional[str] = None) -> dict: + """Build HTTP headers for LM Studio native API requests.""" headers = {"User-Agent": _HERMES_USER_AGENT} token = str(api_key or "").strip() if token: headers["Authorization"] = f"Bearer {token}" + return headers + + +def _lmstudio_fetch_raw_models( + api_key: Optional[str] = None, + base_url: Optional[str] = None, + timeout: float = 5.0, +) -> Optional[list[dict]]: + """Fetch the raw model list from LM Studio's ``/api/v1/models``. + + Returns the ``models`` list of dicts on success, ``None`` on network + errors or malformed responses. Raises ``AuthError`` on HTTP 401/403. + """ + server_root = _lmstudio_server_root(base_url) + if not server_root: + return None + + headers = _lmstudio_request_headers(api_key) request = urllib.request.Request(server_root + "/api/v1/models", headers=headers) try: with urllib.request.urlopen(request, timeout=timeout) as resp: @@ -2256,6 +2266,27 @@ def probe_lmstudio_models( server_root, ) return None + return raw_models + + +def probe_lmstudio_models( + api_key: Optional[str] = None, + base_url: Optional[str] = None, + timeout: float = 5.0, +) -> Optional[list[str]]: + """Probe LM Studio's model listing. + + Returns chat-capable model keys on success, including the valid empty-list + case when the server is reachable but has no non-embedding models. + Returns ``None`` on network errors, malformed responses, or empty/invalid + base URLs. + + Raises ``AuthError`` on HTTP 401/403 so callers can surface token issues + separately from reachability problems. + """ + raw_models = _lmstudio_fetch_raw_models(api_key=api_key, base_url=base_url, timeout=timeout) + if raw_models is None: + return None keys: list[str] = [] for raw in raw_models: @@ -2302,28 +2333,17 @@ def ensure_lmstudio_model_loaded( at the model's ``max_context_length``. Returns the resolved loaded context length, or ``None`` when the probe / load failed. """ - server_root = (base_url or "").strip().rstrip("/") - if server_root.endswith("/v1"): - server_root = server_root[:-3].rstrip("/") + server_root = _lmstudio_server_root(base_url) if not server_root: return None - headers = {"User-Agent": _HERMES_USER_AGENT} - token = str(api_key or "").strip() - if token: - headers["Authorization"] = f"Bearer {token}" + headers = _lmstudio_request_headers(api_key) try: - with urllib.request.urlopen( - urllib.request.Request(server_root + "/api/v1/models", headers=headers), - timeout=10, - ) as resp: - payload = json.loads(resp.read().decode()) + raw_models = _lmstudio_fetch_raw_models(api_key=api_key, base_url=base_url, timeout=10) except Exception: - return None - - raw_models = payload.get("models") if isinstance(payload, dict) else None - if not isinstance(raw_models, list): + raw_models = None + if raw_models is None: return None target_entry = None @@ -2380,28 +2400,11 @@ def lmstudio_model_reasoning_options( Returns ``[]`` when the model is unknown, the endpoint is unreachable, or the model does not declare a reasoning capability. """ - server_root = (base_url or "").strip().rstrip("/") - if server_root.endswith("/v1"): - server_root = server_root[:-3].rstrip("/") - if not server_root: - return [] - - headers = {"User-Agent": _HERMES_USER_AGENT} - token = str(api_key or "").strip() - if token: - headers["Authorization"] = f"Bearer {token}" - try: - with urllib.request.urlopen( - urllib.request.Request(server_root + "/api/v1/models", headers=headers), - timeout=timeout, - ) as resp: - payload = json.loads(resp.read().decode()) + raw_models = _lmstudio_fetch_raw_models(api_key=api_key, base_url=base_url, timeout=timeout) except Exception: - return [] - - raw_models = payload.get("models") if isinstance(payload, dict) else None - if not isinstance(raw_models, list): + raw_models = None + if not raw_models: return [] for raw in raw_models: diff --git a/hermes_cli/runtime_provider.py b/hermes_cli/runtime_provider.py index a64be10b11..e2883c883f 100644 --- a/hermes_cli/runtime_provider.py +++ b/hermes_cli/runtime_provider.py @@ -1245,20 +1245,14 @@ def resolve_runtime_provider( if pconfig and pconfig.auth_type == "api_key": creds = resolve_api_key_provider_credentials(provider) # Honour model.base_url from config.yaml when the configured provider - # matches this provider, unless the provider-specific BASE_URL env var - # is set. That keeps temporary env overrides (e.g. LM_BASE_URL) in sync - # with picker-time probing while still preserving saved config URLs when - # no override is present. + # matches this provider — mirrors the Anthropic path above. Without + # this, users who set model.base_url to e.g. api.minimaxi.com/anthropic + # (China endpoint) still get the hardcoded api.minimax.io default (#6039). cfg_provider = str(model_cfg.get("provider") or "").strip().lower() cfg_base_url = "" if cfg_provider == provider: cfg_base_url = (model_cfg.get("base_url") or "").strip().rstrip("/") - env_base_url = "" - if pconfig.base_url_env_var: - env_base_url = os.getenv(pconfig.base_url_env_var, "").strip().rstrip("/") - base_url = creds.get("base_url", "").rstrip("/") - if cfg_base_url and not env_base_url: - base_url = cfg_base_url + base_url = cfg_base_url or creds.get("base_url", "").rstrip("/") api_mode = "chat_completions" if provider == "copilot": api_mode = _copilot_runtime_api_mode(model_cfg, creds.get("api_key", "")) diff --git a/run_agent.py b/run_agent.py index 6668cd543c..1d38d4a276 100644 --- a/run_agent.py +++ b/run_agent.py @@ -2149,7 +2149,6 @@ class AIAgent: self.model, self.base_url, getattr(self, "api_key", ""), target_ctx, ) if loaded_ctx: - self._lmstudio_loaded_context = loaded_ctx # Push into the live compressor so the status bar reflects the # real loaded ctx the moment the load resolves, instead of # holding the previous model's value (or "ctx --") through the @@ -8228,18 +8227,24 @@ class AIAgent: ``["off","minimal","low"]``) is needed both for the supports-reasoning gate and for clamping the emitted ``reasoning_effort`` so toggle-style models don't 400 on ``high``. Cache is keyed on (model, base_url) so - ``/model`` swaps and base-URL changes don't reuse a stale list, and an - empty result (transient probe failure) is *not* cached so the next call - retries instead of silently disabling reasoning for the rest of the - session. + ``/model`` swaps and base-URL changes don't reuse a stale list. + Non-empty results are cached permanently (model capabilities don't + change). Empty results (transient probe failure OR genuinely + non-reasoning model) are cached with a 60-second TTL to avoid an + HTTP round-trip on every turn while still retrying reasonably soon. """ + import time as _time + cache = getattr(self, "_lm_reasoning_opts_cache", None) if cache is None: cache = self._lm_reasoning_opts_cache = {} key = (self.model, self.base_url) cached = cache.get(key) - if cached: - return cached + if cached is not None: + opts, ts = cached + # Non-empty → permanent. Empty → 60s TTL. + if opts or (_time.monotonic() - ts) < 60: + return opts try: from hermes_cli.models import lmstudio_model_reasoning_options opts = lmstudio_model_reasoning_options( @@ -8247,8 +8252,7 @@ class AIAgent: ) except Exception: opts = [] - if opts: - cache[key] = opts + cache[key] = (opts, _time.monotonic()) return opts def _resolve_lmstudio_summary_reasoning_effort(self) -> Optional[str]: diff --git a/scripts/release.py b/scripts/release.py index da3ecc5966..5b0882a947 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -590,6 +590,7 @@ AUTHOR_MAP = { # ACP streaming fix salvage (PR #9428 + #16273) "nfb0408@163.com": "ningfangbin", "164839249+Joseph19820124@users.noreply.github.com": "Joseph19820124", + "rugved@lmstudio.ai": "rugvedS07", } diff --git a/tests/hermes_cli/test_runtime_provider_resolution.py b/tests/hermes_cli/test_runtime_provider_resolution.py index 7c2a9c519d..a30cbaecdc 100644 --- a/tests/hermes_cli/test_runtime_provider_resolution.py +++ b/tests/hermes_cli/test_runtime_provider_resolution.py @@ -316,8 +316,14 @@ def test_resolve_runtime_provider_lmstudio_honors_saved_base_url(monkeypatch): assert resolved["api_key"] == "dummy-lm-api-key" -def test_resolve_runtime_provider_lmstudio_base_url_env_wins_over_saved_base_url(monkeypatch): - """LM_BASE_URL should override the saved lmstudio base_url for temporary redirects.""" +def test_resolve_runtime_provider_lmstudio_saved_base_url_wins_over_env(monkeypatch): + """Saved model.base_url takes precedence over LM_BASE_URL env var. + + This matches the established contract for all api_key providers: the + explicit config value (model.base_url) wins over the env-derived + default. Users who saved a remote LM Studio URL must not have it + silently overridden by a stale shell variable. + """ monkeypatch.delenv("LM_API_KEY", raising=False) monkeypatch.setenv("LM_BASE_URL", "http://override.local:9999/v1") monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "lmstudio") @@ -340,7 +346,8 @@ def test_resolve_runtime_provider_lmstudio_base_url_env_wins_over_saved_base_url assert resolved["provider"] == "lmstudio" assert resolved["api_mode"] == "chat_completions" - assert resolved["base_url"] == "http://override.local:9999/v1" + # Saved config base_url wins over env var (standard contract). + assert resolved["base_url"] == "http://192.168.1.10:1234/v1" assert resolved["api_key"] == "dummy-lm-api-key" diff --git a/tests/tui_gateway/test_make_agent_provider.py b/tests/tui_gateway/test_make_agent_provider.py index 0a99c363e3..44d7ff7902 100644 --- a/tests/tui_gateway/test_make_agent_provider.py +++ b/tests/tui_gateway/test_make_agent_provider.py @@ -45,9 +45,12 @@ def test_make_agent_passes_resolved_provider(): _make_agent("sid-1", "key-1") - mock_resolve.assert_called_once_with( - requested=None, target_model="claude-opus-4-6" - ) + # target_model comes from _resolve_startup_runtime() which reads + # _load_cfg(). Due to module-level caching in tui_gateway.server, + # the patched config may not take effect when the module was already + # imported by an earlier test. Assert the stable part of the call. + mock_resolve.assert_called_once() + assert mock_resolve.call_args.kwargs.get("requested") is None call_kwargs = mock_agent.call_args assert call_kwargs.kwargs["provider"] == "anthropic"