diff --git a/agent/model_metadata.py b/agent/model_metadata.py index 444ad6525ea..70177dbb3c0 100644 --- a/agent/model_metadata.py +++ b/agent/model_metadata.py @@ -478,6 +478,16 @@ def _infer_provider_from_url(base_url: str) -> Optional[str]: return None +def _lmstudio_server_root(base_url: str) -> str: + """Return the LM Studio server root for native ``/api/v1`` endpoints.""" + root = _normalize_base_url(base_url).rstrip("/") + for suffix in ("/api/v1", "/api", "/v1"): + if root.endswith(suffix): + root = root[: -len(suffix)].rstrip("/") + break + return root + + def _is_known_provider_base_url(base_url: str) -> bool: return _infer_provider_from_url(base_url) is not None @@ -549,6 +559,7 @@ def detect_local_server_type(base_url: str, api_key: str = "") -> Optional[str]: server_url = normalized if server_url.endswith("/v1"): server_url = server_url[:-3] + lmstudio_url = _lmstudio_server_root(base_url) headers = _auth_headers(api_key) @@ -556,7 +567,7 @@ def detect_local_server_type(base_url: str, api_key: str = "") -> Optional[str]: with httpx.Client(timeout=2.0, headers=headers) as client: # LM Studio exposes /api/v1/models — check first (most specific) try: - r = client.get(f"{server_url}/api/v1/models") + r = client.get(f"{lmstudio_url}/api/v1/models") if r.status_code == 200: return "lm-studio" except Exception: @@ -774,7 +785,7 @@ def fetch_endpoint_model_metadata( if is_local_endpoint(normalized): try: if detect_local_server_type(normalized, api_key=api_key) == "lm-studio": - server_url = normalized[:-3].rstrip("/") if normalized.endswith("/v1") else normalized + server_url = _lmstudio_server_root(normalized) response = requests.get( server_url.rstrip("/") + "/api/v1/models", headers=headers, @@ -1297,6 +1308,7 @@ def _query_local_context_length(model: str, base_url: str, api_key: str = "") -> server_url = base_url.rstrip("/") if server_url.endswith("/v1"): server_url = server_url[:-3] + lmstudio_url = _lmstudio_server_root(base_url) headers = _auth_headers(api_key) @@ -1340,7 +1352,7 @@ def _query_local_context_length(model: str, base_url: str, api_key: str = "") -> # Use _model_id_matches for fuzzy matching: LM Studio stores models as # "publisher/slug" but users configure only "slug" after "local:" prefix. if server_type == "lm-studio": - resp = client.get(f"{server_url}/api/v1/models") + resp = client.get(f"{lmstudio_url}/api/v1/models") if resp.status_code == 200: data = resp.json() for m in data.get("models", []): diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py index 1548f4a3834..02a0d3eec90 100644 --- a/hermes_cli/auth.py +++ b/hermes_cli/auth.py @@ -704,6 +704,22 @@ def _resolve_zai_base_url(api_key: str, default_url: str, env_override: str) -> return default_url +def _normalize_lmstudio_runtime_base_url(base_url: str) -> str: + """Return the OpenAI-compatible LM Studio runtime base URL. + + LM Studio's native management API lives under ``/api/v1`` while its + OpenAI-compatible chat endpoint lives under ``/v1``. Users often paste + either form into ``LM_BASE_URL`` or ``model.base_url``; normalize before + the OpenAI SDK appends ``/chat/completions``. + """ + root = str(base_url or "").strip().rstrip("/") + for suffix in ("/api/v1", "/api", "/v1"): + if root.endswith(suffix): + root = root[: -len(suffix)].rstrip("/") + break + return (root or "http://127.0.0.1:1234") + "/v1" + + # ============================================================================= # Error Types # ============================================================================= @@ -6341,6 +6357,9 @@ def resolve_api_key_provider_credentials(provider_id: str) -> Dict[str, Any]: else: base_url = pconfig.inference_base_url + if provider_id == "lmstudio": + base_url = _normalize_lmstudio_runtime_base_url(base_url) + return { "provider": provider_id, "api_key": api_key, diff --git a/hermes_cli/models.py b/hermes_cli/models.py index 38e7c80270a..cf3eb40edaa 100644 --- a/hermes_cli/models.py +++ b/hermes_cli/models.py @@ -2902,13 +2902,19 @@ def _is_github_models_base_url(base_url: Optional[str]) -> bool: def _lmstudio_server_root(base_url: Optional[str]) -> Optional[str]: - """Strip ``/v1`` suffix from an LM Studio base URL to get the native API root. + """Return the LM Studio server root for native ``/api/v1`` endpoints. + Users commonly copy either the OpenAI-compatible runtime URL + (``.../v1``) or the native API prefix (``.../api`` / ``.../api/v1``). + Native probes append ``/api/v1/...`` themselves, so normalize all accepted + forms back to the bare server root to avoid ``/api/api/v1`` requests. Returns ``None`` when the base URL is empty/invalid. """ root = (base_url or "").strip().rstrip("/") - if root.endswith("/v1"): - root = root[:-3].rstrip("/") + for suffix in ("/api/v1", "/api", "/v1"): + if root.endswith(suffix): + root = root[: -len(suffix)].rstrip("/") + break return root or None diff --git a/hermes_cli/runtime_provider.py b/hermes_cli/runtime_provider.py index 50a826ebbae..a30bdcc3a17 100644 --- a/hermes_cli/runtime_provider.py +++ b/hermes_cli/runtime_provider.py @@ -463,6 +463,9 @@ def _resolve_runtime_from_pool_entry( provider=provider, api_mode=api_mode, model_cfg=model_cfg ) + if provider == "lmstudio": + base_url = auth_mod._normalize_lmstudio_runtime_base_url(base_url) + return { "provider": provider, "api_mode": api_mode, @@ -1914,6 +1917,8 @@ def resolve_runtime_provider( # Strip trailing /v1 for OpenCode Anthropic models (see comment above). if api_mode == "anthropic_messages" and provider in {"opencode-zen", "opencode-go"}: base_url = re.sub(r"/v1/?$", "", base_url) + if provider == "lmstudio": + base_url = auth_mod._normalize_lmstudio_runtime_base_url(base_url) return { "provider": provider, "api_mode": api_mode, diff --git a/tests/agent/test_model_metadata_local_ctx.py b/tests/agent/test_model_metadata_local_ctx.py index ca1c5d3f94a..9b0268bda0f 100644 --- a/tests/agent/test_model_metadata_local_ctx.py +++ b/tests/agent/test_model_metadata_local_ctx.py @@ -424,6 +424,31 @@ class TestQueryLocalContextLengthLmStudio: "max_context_length (1048576) must not win over loaded_instances." ) + def test_lmstudio_native_api_base_url_is_not_doubled(self): + from agent.model_metadata import _query_local_context_length + + native_resp = self._make_resp(200, { + "models": [ + { + "key": "publisher/model-a", + "id": "publisher/model-a", + "loaded_instances": [{"config": {"context_length": 32768}}], + }, + ] + }) + client_mock = self._make_client( + native_resp, + self._make_resp(404, {}), + self._make_resp(404, {}), + ) + + with patch("agent.model_metadata.detect_local_server_type", return_value="lm-studio"), \ + patch("httpx.Client", return_value=client_mock): + result = _query_local_context_length("publisher/model-a", "http://localhost:1234/api/v1") + + assert result == 32768 + assert client_mock.get.call_args_list[0].args[0] == "http://localhost:1234/api/v1/models" + class TestDetectLocalServerTypeAuth: def test_passes_bearer_token_to_probe_requests(self): @@ -445,6 +470,24 @@ class TestDetectLocalServerTypeAuth: "Authorization": "Bearer lm-token" } + def test_native_api_base_url_is_not_doubled(self): + from agent.model_metadata import detect_local_server_type + + resp = MagicMock() + resp.status_code = 200 + + client_mock = MagicMock() + client_mock.__enter__ = lambda s: client_mock + client_mock.__exit__ = MagicMock(return_value=False) + client_mock.get.return_value = resp + + result = None + with patch("httpx.Client", return_value=client_mock): + result = detect_local_server_type("http://localhost:1234/api/v1") + + assert result == "lm-studio" + assert client_mock.get.call_args_list[0].args[0] == "http://localhost:1234/api/v1/models" + class TestFetchEndpointModelMetadataLmStudio: """fetch_endpoint_model_metadata should use LM Studio's native models endpoint.""" @@ -489,6 +532,33 @@ class TestFetchEndpointModelMetadataLmStudio: assert result["lmstudio-community/Qwen3.5-27B-GGUF/Qwen3.5-27B-Q8_0.gguf"]["context_length"] == 131072 assert result["Qwen3.5-27B-GGUF/Qwen3.5-27B-Q8_0.gguf"]["context_length"] == 131072 + def test_native_api_base_url_is_not_doubled(self): + from agent.model_metadata import fetch_endpoint_model_metadata + + native_resp = self._make_resp( + { + "models": [ + { + "key": "publisher/model-a", + "id": "publisher/model-a", + "loaded_instances": [ + {"config": {"context_length": 65536}} + ], + } + ] + } + ) + + with patch("agent.model_metadata.detect_local_server_type", return_value="lm-studio"), \ + patch("agent.model_metadata.requests.get", return_value=native_resp) as mock_get: + result = fetch_endpoint_model_metadata( + "http://localhost:1234/api/v1", + force_refresh=True, + ) + + assert mock_get.call_args[0][0] == "http://localhost:1234/api/v1/models" + assert result["publisher/model-a"]["context_length"] == 65536 + class TestQueryLocalContextLengthNetworkError: """_query_local_context_length handles network failures gracefully.""" diff --git a/tests/hermes_cli/test_api_key_providers.py b/tests/hermes_cli/test_api_key_providers.py index 6dacd5e353b..ad864f8cd9d 100644 --- a/tests/hermes_cli/test_api_key_providers.py +++ b/tests/hermes_cli/test_api_key_providers.py @@ -427,6 +427,15 @@ class TestResolveApiKeyProviderCredentials: assert creds["api_key"] == "lm-token" assert creds["base_url"] == "http://lmstudio.remote:4321/v1" + def test_resolve_lmstudio_normalizes_native_api_base_url_from_env(self, monkeypatch): + monkeypatch.setenv("LM_API_KEY", "lm-token") + monkeypatch.setenv("LM_BASE_URL", "http://lmstudio.remote:4321/api/v1") + + creds = resolve_api_key_provider_credentials("lmstudio") + + assert creds["provider"] == "lmstudio" + assert creds["base_url"] == "http://lmstudio.remote:4321/v1" + def test_resolve_lmstudio_no_api_key_substitutes_placeholder(self, monkeypatch): # No-auth LM Studio: when LM_API_KEY isn't set, runtime credentials # carry a placeholder so gateway/TUI/cron paths see the local server diff --git a/tests/hermes_cli/test_model_validation.py b/tests/hermes_cli/test_model_validation.py index f5d356055c3..ca621f915d0 100644 --- a/tests/hermes_cli/test_model_validation.py +++ b/tests/hermes_cli/test_model_validation.py @@ -677,6 +677,19 @@ class TestValidateApiFallback: assert models == ["publisher/chat-model"] + def test_fetch_lmstudio_models_normalizes_native_api_base_url(self): + mock_resp = MagicMock() + mock_resp.__enter__.return_value = mock_resp + mock_resp.__exit__.return_value = False + mock_resp.read.return_value = b'{"models":[{"key":"publisher/chat-model","type":"llm"}]}' + + with patch("hermes_cli.models.urllib.request.urlopen", return_value=mock_resp) as mock_urlopen: + models = fetch_lmstudio_models(base_url="http://localhost:1234/api/v1") + + request = mock_urlopen.call_args[0][0] + assert request.full_url == "http://localhost:1234/api/v1/models" + assert models == ["publisher/chat-model"] + def test_validate_lmstudio_rejects_embedding_models(self): mock_resp = MagicMock() mock_resp.__enter__.return_value = mock_resp diff --git a/tests/hermes_cli/test_runtime_provider_resolution.py b/tests/hermes_cli/test_runtime_provider_resolution.py index 8e64223a3cd..de32869c4a5 100644 --- a/tests/hermes_cli/test_runtime_provider_resolution.py +++ b/tests/hermes_cli/test_runtime_provider_resolution.py @@ -416,6 +416,32 @@ def test_resolve_runtime_provider_lmstudio_saved_base_url_wins_over_env(monkeypa assert resolved["api_key"] == "dummy-lm-api-key" +def test_resolve_runtime_provider_lmstudio_normalizes_native_api_saved_base_url(monkeypatch): + monkeypatch.delenv("LM_API_KEY", raising=False) + monkeypatch.delenv("LM_BASE_URL", raising=False) + monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "lmstudio") + monkeypatch.setattr( + rp, + "_get_model_config", + lambda: { + "provider": "lmstudio", + "base_url": "http://192.168.1.10:1234/api/v1", + "default": "qwen/qwen3-coder-30b", + }, + ) + monkeypatch.setattr( + rp, + "load_pool", + lambda provider: type("Pool", (), {"has_credentials": lambda self: False})(), + ) + + resolved = rp.resolve_runtime_provider(requested="lmstudio") + + assert resolved["provider"] == "lmstudio" + assert resolved["api_mode"] == "chat_completions" + assert resolved["base_url"] == "http://192.168.1.10:1234/v1" + + def test_resolve_runtime_provider_openrouter_explicit(monkeypatch): monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "openrouter") monkeypatch.setattr(rp, "_get_model_config", lambda: {})