fix: normalize lmstudio base urls

This commit is contained in:
lkevincc 2026-06-27 08:06:51 +10:00 committed by Teknium
parent 43eaf79ae6
commit 163562bf88
8 changed files with 166 additions and 6 deletions

View file

@ -478,6 +478,16 @@ def _infer_provider_from_url(base_url: str) -> Optional[str]:
return None
def _lmstudio_server_root(base_url: str) -> str:
"""Return the LM Studio server root for native ``/api/v1`` endpoints."""
root = _normalize_base_url(base_url).rstrip("/")
for suffix in ("/api/v1", "/api", "/v1"):
if root.endswith(suffix):
root = root[: -len(suffix)].rstrip("/")
break
return root
def _is_known_provider_base_url(base_url: str) -> bool:
return _infer_provider_from_url(base_url) is not None
@ -549,6 +559,7 @@ def detect_local_server_type(base_url: str, api_key: str = "") -> Optional[str]:
server_url = normalized
if server_url.endswith("/v1"):
server_url = server_url[:-3]
lmstudio_url = _lmstudio_server_root(base_url)
headers = _auth_headers(api_key)
@ -556,7 +567,7 @@ def detect_local_server_type(base_url: str, api_key: str = "") -> Optional[str]:
with httpx.Client(timeout=2.0, headers=headers) as client:
# LM Studio exposes /api/v1/models — check first (most specific)
try:
r = client.get(f"{server_url}/api/v1/models")
r = client.get(f"{lmstudio_url}/api/v1/models")
if r.status_code == 200:
return "lm-studio"
except Exception:
@ -774,7 +785,7 @@ def fetch_endpoint_model_metadata(
if is_local_endpoint(normalized):
try:
if detect_local_server_type(normalized, api_key=api_key) == "lm-studio":
server_url = normalized[:-3].rstrip("/") if normalized.endswith("/v1") else normalized
server_url = _lmstudio_server_root(normalized)
response = requests.get(
server_url.rstrip("/") + "/api/v1/models",
headers=headers,
@ -1297,6 +1308,7 @@ def _query_local_context_length(model: str, base_url: str, api_key: str = "") ->
server_url = base_url.rstrip("/")
if server_url.endswith("/v1"):
server_url = server_url[:-3]
lmstudio_url = _lmstudio_server_root(base_url)
headers = _auth_headers(api_key)
@ -1340,7 +1352,7 @@ def _query_local_context_length(model: str, base_url: str, api_key: str = "") ->
# Use _model_id_matches for fuzzy matching: LM Studio stores models as
# "publisher/slug" but users configure only "slug" after "local:" prefix.
if server_type == "lm-studio":
resp = client.get(f"{server_url}/api/v1/models")
resp = client.get(f"{lmstudio_url}/api/v1/models")
if resp.status_code == 200:
data = resp.json()
for m in data.get("models", []):

View file

@ -704,6 +704,22 @@ def _resolve_zai_base_url(api_key: str, default_url: str, env_override: str) ->
return default_url
def _normalize_lmstudio_runtime_base_url(base_url: str) -> str:
"""Return the OpenAI-compatible LM Studio runtime base URL.
LM Studio's native management API lives under ``/api/v1`` while its
OpenAI-compatible chat endpoint lives under ``/v1``. Users often paste
either form into ``LM_BASE_URL`` or ``model.base_url``; normalize before
the OpenAI SDK appends ``/chat/completions``.
"""
root = str(base_url or "").strip().rstrip("/")
for suffix in ("/api/v1", "/api", "/v1"):
if root.endswith(suffix):
root = root[: -len(suffix)].rstrip("/")
break
return (root or "http://127.0.0.1:1234") + "/v1"
# =============================================================================
# Error Types
# =============================================================================
@ -6341,6 +6357,9 @@ def resolve_api_key_provider_credentials(provider_id: str) -> Dict[str, Any]:
else:
base_url = pconfig.inference_base_url
if provider_id == "lmstudio":
base_url = _normalize_lmstudio_runtime_base_url(base_url)
return {
"provider": provider_id,
"api_key": api_key,

View file

@ -2902,13 +2902,19 @@ def _is_github_models_base_url(base_url: Optional[str]) -> bool:
def _lmstudio_server_root(base_url: Optional[str]) -> Optional[str]:
"""Strip ``/v1`` suffix from an LM Studio base URL to get the native API root.
"""Return the LM Studio server root for native ``/api/v1`` endpoints.
Users commonly copy either the OpenAI-compatible runtime URL
(``.../v1``) or the native API prefix (``.../api`` / ``.../api/v1``).
Native probes append ``/api/v1/...`` themselves, so normalize all accepted
forms back to the bare server root to avoid ``/api/api/v1`` requests.
Returns ``None`` when the base URL is empty/invalid.
"""
root = (base_url or "").strip().rstrip("/")
if root.endswith("/v1"):
root = root[:-3].rstrip("/")
for suffix in ("/api/v1", "/api", "/v1"):
if root.endswith(suffix):
root = root[: -len(suffix)].rstrip("/")
break
return root or None

View file

@ -463,6 +463,9 @@ def _resolve_runtime_from_pool_entry(
provider=provider, api_mode=api_mode, model_cfg=model_cfg
)
if provider == "lmstudio":
base_url = auth_mod._normalize_lmstudio_runtime_base_url(base_url)
return {
"provider": provider,
"api_mode": api_mode,
@ -1914,6 +1917,8 @@ def resolve_runtime_provider(
# Strip trailing /v1 for OpenCode Anthropic models (see comment above).
if api_mode == "anthropic_messages" and provider in {"opencode-zen", "opencode-go"}:
base_url = re.sub(r"/v1/?$", "", base_url)
if provider == "lmstudio":
base_url = auth_mod._normalize_lmstudio_runtime_base_url(base_url)
return {
"provider": provider,
"api_mode": api_mode,

View file

@ -424,6 +424,31 @@ class TestQueryLocalContextLengthLmStudio:
"max_context_length (1048576) must not win over loaded_instances."
)
def test_lmstudio_native_api_base_url_is_not_doubled(self):
from agent.model_metadata import _query_local_context_length
native_resp = self._make_resp(200, {
"models": [
{
"key": "publisher/model-a",
"id": "publisher/model-a",
"loaded_instances": [{"config": {"context_length": 32768}}],
},
]
})
client_mock = self._make_client(
native_resp,
self._make_resp(404, {}),
self._make_resp(404, {}),
)
with patch("agent.model_metadata.detect_local_server_type", return_value="lm-studio"), \
patch("httpx.Client", return_value=client_mock):
result = _query_local_context_length("publisher/model-a", "http://localhost:1234/api/v1")
assert result == 32768
assert client_mock.get.call_args_list[0].args[0] == "http://localhost:1234/api/v1/models"
class TestDetectLocalServerTypeAuth:
def test_passes_bearer_token_to_probe_requests(self):
@ -445,6 +470,24 @@ class TestDetectLocalServerTypeAuth:
"Authorization": "Bearer lm-token"
}
def test_native_api_base_url_is_not_doubled(self):
from agent.model_metadata import detect_local_server_type
resp = MagicMock()
resp.status_code = 200
client_mock = MagicMock()
client_mock.__enter__ = lambda s: client_mock
client_mock.__exit__ = MagicMock(return_value=False)
client_mock.get.return_value = resp
result = None
with patch("httpx.Client", return_value=client_mock):
result = detect_local_server_type("http://localhost:1234/api/v1")
assert result == "lm-studio"
assert client_mock.get.call_args_list[0].args[0] == "http://localhost:1234/api/v1/models"
class TestFetchEndpointModelMetadataLmStudio:
"""fetch_endpoint_model_metadata should use LM Studio's native models endpoint."""
@ -489,6 +532,33 @@ class TestFetchEndpointModelMetadataLmStudio:
assert result["lmstudio-community/Qwen3.5-27B-GGUF/Qwen3.5-27B-Q8_0.gguf"]["context_length"] == 131072
assert result["Qwen3.5-27B-GGUF/Qwen3.5-27B-Q8_0.gguf"]["context_length"] == 131072
def test_native_api_base_url_is_not_doubled(self):
from agent.model_metadata import fetch_endpoint_model_metadata
native_resp = self._make_resp(
{
"models": [
{
"key": "publisher/model-a",
"id": "publisher/model-a",
"loaded_instances": [
{"config": {"context_length": 65536}}
],
}
]
}
)
with patch("agent.model_metadata.detect_local_server_type", return_value="lm-studio"), \
patch("agent.model_metadata.requests.get", return_value=native_resp) as mock_get:
result = fetch_endpoint_model_metadata(
"http://localhost:1234/api/v1",
force_refresh=True,
)
assert mock_get.call_args[0][0] == "http://localhost:1234/api/v1/models"
assert result["publisher/model-a"]["context_length"] == 65536
class TestQueryLocalContextLengthNetworkError:
"""_query_local_context_length handles network failures gracefully."""

View file

@ -427,6 +427,15 @@ class TestResolveApiKeyProviderCredentials:
assert creds["api_key"] == "lm-token"
assert creds["base_url"] == "http://lmstudio.remote:4321/v1"
def test_resolve_lmstudio_normalizes_native_api_base_url_from_env(self, monkeypatch):
monkeypatch.setenv("LM_API_KEY", "lm-token")
monkeypatch.setenv("LM_BASE_URL", "http://lmstudio.remote:4321/api/v1")
creds = resolve_api_key_provider_credentials("lmstudio")
assert creds["provider"] == "lmstudio"
assert creds["base_url"] == "http://lmstudio.remote:4321/v1"
def test_resolve_lmstudio_no_api_key_substitutes_placeholder(self, monkeypatch):
# No-auth LM Studio: when LM_API_KEY isn't set, runtime credentials
# carry a placeholder so gateway/TUI/cron paths see the local server

View file

@ -677,6 +677,19 @@ class TestValidateApiFallback:
assert models == ["publisher/chat-model"]
def test_fetch_lmstudio_models_normalizes_native_api_base_url(self):
mock_resp = MagicMock()
mock_resp.__enter__.return_value = mock_resp
mock_resp.__exit__.return_value = False
mock_resp.read.return_value = b'{"models":[{"key":"publisher/chat-model","type":"llm"}]}'
with patch("hermes_cli.models.urllib.request.urlopen", return_value=mock_resp) as mock_urlopen:
models = fetch_lmstudio_models(base_url="http://localhost:1234/api/v1")
request = mock_urlopen.call_args[0][0]
assert request.full_url == "http://localhost:1234/api/v1/models"
assert models == ["publisher/chat-model"]
def test_validate_lmstudio_rejects_embedding_models(self):
mock_resp = MagicMock()
mock_resp.__enter__.return_value = mock_resp

View file

@ -416,6 +416,32 @@ def test_resolve_runtime_provider_lmstudio_saved_base_url_wins_over_env(monkeypa
assert resolved["api_key"] == "dummy-lm-api-key"
def test_resolve_runtime_provider_lmstudio_normalizes_native_api_saved_base_url(monkeypatch):
monkeypatch.delenv("LM_API_KEY", raising=False)
monkeypatch.delenv("LM_BASE_URL", raising=False)
monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "lmstudio")
monkeypatch.setattr(
rp,
"_get_model_config",
lambda: {
"provider": "lmstudio",
"base_url": "http://192.168.1.10:1234/api/v1",
"default": "qwen/qwen3-coder-30b",
},
)
monkeypatch.setattr(
rp,
"load_pool",
lambda provider: type("Pool", (), {"has_credentials": lambda self: False})(),
)
resolved = rp.resolve_runtime_provider(requested="lmstudio")
assert resolved["provider"] == "lmstudio"
assert resolved["api_mode"] == "chat_completions"
assert resolved["base_url"] == "http://192.168.1.10:1234/v1"
def test_resolve_runtime_provider_openrouter_explicit(monkeypatch):
monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "openrouter")
monkeypatch.setattr(rp, "_get_model_config", lambda: {})