mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-30 11:52:04 +00:00
fix: normalize lmstudio base urls
This commit is contained in:
parent
43eaf79ae6
commit
163562bf88
8 changed files with 166 additions and 6 deletions
|
|
@ -478,6 +478,16 @@ def _infer_provider_from_url(base_url: str) -> Optional[str]:
|
|||
return None
|
||||
|
||||
|
||||
def _lmstudio_server_root(base_url: str) -> str:
|
||||
"""Return the LM Studio server root for native ``/api/v1`` endpoints."""
|
||||
root = _normalize_base_url(base_url).rstrip("/")
|
||||
for suffix in ("/api/v1", "/api", "/v1"):
|
||||
if root.endswith(suffix):
|
||||
root = root[: -len(suffix)].rstrip("/")
|
||||
break
|
||||
return root
|
||||
|
||||
|
||||
def _is_known_provider_base_url(base_url: str) -> bool:
|
||||
return _infer_provider_from_url(base_url) is not None
|
||||
|
||||
|
|
@ -549,6 +559,7 @@ def detect_local_server_type(base_url: str, api_key: str = "") -> Optional[str]:
|
|||
server_url = normalized
|
||||
if server_url.endswith("/v1"):
|
||||
server_url = server_url[:-3]
|
||||
lmstudio_url = _lmstudio_server_root(base_url)
|
||||
|
||||
headers = _auth_headers(api_key)
|
||||
|
||||
|
|
@ -556,7 +567,7 @@ def detect_local_server_type(base_url: str, api_key: str = "") -> Optional[str]:
|
|||
with httpx.Client(timeout=2.0, headers=headers) as client:
|
||||
# LM Studio exposes /api/v1/models — check first (most specific)
|
||||
try:
|
||||
r = client.get(f"{server_url}/api/v1/models")
|
||||
r = client.get(f"{lmstudio_url}/api/v1/models")
|
||||
if r.status_code == 200:
|
||||
return "lm-studio"
|
||||
except Exception:
|
||||
|
|
@ -774,7 +785,7 @@ def fetch_endpoint_model_metadata(
|
|||
if is_local_endpoint(normalized):
|
||||
try:
|
||||
if detect_local_server_type(normalized, api_key=api_key) == "lm-studio":
|
||||
server_url = normalized[:-3].rstrip("/") if normalized.endswith("/v1") else normalized
|
||||
server_url = _lmstudio_server_root(normalized)
|
||||
response = requests.get(
|
||||
server_url.rstrip("/") + "/api/v1/models",
|
||||
headers=headers,
|
||||
|
|
@ -1297,6 +1308,7 @@ def _query_local_context_length(model: str, base_url: str, api_key: str = "") ->
|
|||
server_url = base_url.rstrip("/")
|
||||
if server_url.endswith("/v1"):
|
||||
server_url = server_url[:-3]
|
||||
lmstudio_url = _lmstudio_server_root(base_url)
|
||||
|
||||
headers = _auth_headers(api_key)
|
||||
|
||||
|
|
@ -1340,7 +1352,7 @@ def _query_local_context_length(model: str, base_url: str, api_key: str = "") ->
|
|||
# Use _model_id_matches for fuzzy matching: LM Studio stores models as
|
||||
# "publisher/slug" but users configure only "slug" after "local:" prefix.
|
||||
if server_type == "lm-studio":
|
||||
resp = client.get(f"{server_url}/api/v1/models")
|
||||
resp = client.get(f"{lmstudio_url}/api/v1/models")
|
||||
if resp.status_code == 200:
|
||||
data = resp.json()
|
||||
for m in data.get("models", []):
|
||||
|
|
|
|||
|
|
@ -704,6 +704,22 @@ def _resolve_zai_base_url(api_key: str, default_url: str, env_override: str) ->
|
|||
return default_url
|
||||
|
||||
|
||||
def _normalize_lmstudio_runtime_base_url(base_url: str) -> str:
|
||||
"""Return the OpenAI-compatible LM Studio runtime base URL.
|
||||
|
||||
LM Studio's native management API lives under ``/api/v1`` while its
|
||||
OpenAI-compatible chat endpoint lives under ``/v1``. Users often paste
|
||||
either form into ``LM_BASE_URL`` or ``model.base_url``; normalize before
|
||||
the OpenAI SDK appends ``/chat/completions``.
|
||||
"""
|
||||
root = str(base_url or "").strip().rstrip("/")
|
||||
for suffix in ("/api/v1", "/api", "/v1"):
|
||||
if root.endswith(suffix):
|
||||
root = root[: -len(suffix)].rstrip("/")
|
||||
break
|
||||
return (root or "http://127.0.0.1:1234") + "/v1"
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Error Types
|
||||
# =============================================================================
|
||||
|
|
@ -6341,6 +6357,9 @@ def resolve_api_key_provider_credentials(provider_id: str) -> Dict[str, Any]:
|
|||
else:
|
||||
base_url = pconfig.inference_base_url
|
||||
|
||||
if provider_id == "lmstudio":
|
||||
base_url = _normalize_lmstudio_runtime_base_url(base_url)
|
||||
|
||||
return {
|
||||
"provider": provider_id,
|
||||
"api_key": api_key,
|
||||
|
|
|
|||
|
|
@ -2902,13 +2902,19 @@ def _is_github_models_base_url(base_url: Optional[str]) -> bool:
|
|||
|
||||
|
||||
def _lmstudio_server_root(base_url: Optional[str]) -> Optional[str]:
|
||||
"""Strip ``/v1`` suffix from an LM Studio base URL to get the native API root.
|
||||
"""Return the LM Studio server root for native ``/api/v1`` endpoints.
|
||||
|
||||
Users commonly copy either the OpenAI-compatible runtime URL
|
||||
(``.../v1``) or the native API prefix (``.../api`` / ``.../api/v1``).
|
||||
Native probes append ``/api/v1/...`` themselves, so normalize all accepted
|
||||
forms back to the bare server root to avoid ``/api/api/v1`` requests.
|
||||
Returns ``None`` when the base URL is empty/invalid.
|
||||
"""
|
||||
root = (base_url or "").strip().rstrip("/")
|
||||
if root.endswith("/v1"):
|
||||
root = root[:-3].rstrip("/")
|
||||
for suffix in ("/api/v1", "/api", "/v1"):
|
||||
if root.endswith(suffix):
|
||||
root = root[: -len(suffix)].rstrip("/")
|
||||
break
|
||||
return root or None
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -463,6 +463,9 @@ def _resolve_runtime_from_pool_entry(
|
|||
provider=provider, api_mode=api_mode, model_cfg=model_cfg
|
||||
)
|
||||
|
||||
if provider == "lmstudio":
|
||||
base_url = auth_mod._normalize_lmstudio_runtime_base_url(base_url)
|
||||
|
||||
return {
|
||||
"provider": provider,
|
||||
"api_mode": api_mode,
|
||||
|
|
@ -1914,6 +1917,8 @@ def resolve_runtime_provider(
|
|||
# Strip trailing /v1 for OpenCode Anthropic models (see comment above).
|
||||
if api_mode == "anthropic_messages" and provider in {"opencode-zen", "opencode-go"}:
|
||||
base_url = re.sub(r"/v1/?$", "", base_url)
|
||||
if provider == "lmstudio":
|
||||
base_url = auth_mod._normalize_lmstudio_runtime_base_url(base_url)
|
||||
return {
|
||||
"provider": provider,
|
||||
"api_mode": api_mode,
|
||||
|
|
|
|||
|
|
@ -424,6 +424,31 @@ class TestQueryLocalContextLengthLmStudio:
|
|||
"max_context_length (1048576) must not win over loaded_instances."
|
||||
)
|
||||
|
||||
def test_lmstudio_native_api_base_url_is_not_doubled(self):
|
||||
from agent.model_metadata import _query_local_context_length
|
||||
|
||||
native_resp = self._make_resp(200, {
|
||||
"models": [
|
||||
{
|
||||
"key": "publisher/model-a",
|
||||
"id": "publisher/model-a",
|
||||
"loaded_instances": [{"config": {"context_length": 32768}}],
|
||||
},
|
||||
]
|
||||
})
|
||||
client_mock = self._make_client(
|
||||
native_resp,
|
||||
self._make_resp(404, {}),
|
||||
self._make_resp(404, {}),
|
||||
)
|
||||
|
||||
with patch("agent.model_metadata.detect_local_server_type", return_value="lm-studio"), \
|
||||
patch("httpx.Client", return_value=client_mock):
|
||||
result = _query_local_context_length("publisher/model-a", "http://localhost:1234/api/v1")
|
||||
|
||||
assert result == 32768
|
||||
assert client_mock.get.call_args_list[0].args[0] == "http://localhost:1234/api/v1/models"
|
||||
|
||||
|
||||
class TestDetectLocalServerTypeAuth:
|
||||
def test_passes_bearer_token_to_probe_requests(self):
|
||||
|
|
@ -445,6 +470,24 @@ class TestDetectLocalServerTypeAuth:
|
|||
"Authorization": "Bearer lm-token"
|
||||
}
|
||||
|
||||
def test_native_api_base_url_is_not_doubled(self):
|
||||
from agent.model_metadata import detect_local_server_type
|
||||
|
||||
resp = MagicMock()
|
||||
resp.status_code = 200
|
||||
|
||||
client_mock = MagicMock()
|
||||
client_mock.__enter__ = lambda s: client_mock
|
||||
client_mock.__exit__ = MagicMock(return_value=False)
|
||||
client_mock.get.return_value = resp
|
||||
|
||||
result = None
|
||||
with patch("httpx.Client", return_value=client_mock):
|
||||
result = detect_local_server_type("http://localhost:1234/api/v1")
|
||||
|
||||
assert result == "lm-studio"
|
||||
assert client_mock.get.call_args_list[0].args[0] == "http://localhost:1234/api/v1/models"
|
||||
|
||||
|
||||
class TestFetchEndpointModelMetadataLmStudio:
|
||||
"""fetch_endpoint_model_metadata should use LM Studio's native models endpoint."""
|
||||
|
|
@ -489,6 +532,33 @@ class TestFetchEndpointModelMetadataLmStudio:
|
|||
assert result["lmstudio-community/Qwen3.5-27B-GGUF/Qwen3.5-27B-Q8_0.gguf"]["context_length"] == 131072
|
||||
assert result["Qwen3.5-27B-GGUF/Qwen3.5-27B-Q8_0.gguf"]["context_length"] == 131072
|
||||
|
||||
def test_native_api_base_url_is_not_doubled(self):
|
||||
from agent.model_metadata import fetch_endpoint_model_metadata
|
||||
|
||||
native_resp = self._make_resp(
|
||||
{
|
||||
"models": [
|
||||
{
|
||||
"key": "publisher/model-a",
|
||||
"id": "publisher/model-a",
|
||||
"loaded_instances": [
|
||||
{"config": {"context_length": 65536}}
|
||||
],
|
||||
}
|
||||
]
|
||||
}
|
||||
)
|
||||
|
||||
with patch("agent.model_metadata.detect_local_server_type", return_value="lm-studio"), \
|
||||
patch("agent.model_metadata.requests.get", return_value=native_resp) as mock_get:
|
||||
result = fetch_endpoint_model_metadata(
|
||||
"http://localhost:1234/api/v1",
|
||||
force_refresh=True,
|
||||
)
|
||||
|
||||
assert mock_get.call_args[0][0] == "http://localhost:1234/api/v1/models"
|
||||
assert result["publisher/model-a"]["context_length"] == 65536
|
||||
|
||||
|
||||
class TestQueryLocalContextLengthNetworkError:
|
||||
"""_query_local_context_length handles network failures gracefully."""
|
||||
|
|
|
|||
|
|
@ -427,6 +427,15 @@ class TestResolveApiKeyProviderCredentials:
|
|||
assert creds["api_key"] == "lm-token"
|
||||
assert creds["base_url"] == "http://lmstudio.remote:4321/v1"
|
||||
|
||||
def test_resolve_lmstudio_normalizes_native_api_base_url_from_env(self, monkeypatch):
|
||||
monkeypatch.setenv("LM_API_KEY", "lm-token")
|
||||
monkeypatch.setenv("LM_BASE_URL", "http://lmstudio.remote:4321/api/v1")
|
||||
|
||||
creds = resolve_api_key_provider_credentials("lmstudio")
|
||||
|
||||
assert creds["provider"] == "lmstudio"
|
||||
assert creds["base_url"] == "http://lmstudio.remote:4321/v1"
|
||||
|
||||
def test_resolve_lmstudio_no_api_key_substitutes_placeholder(self, monkeypatch):
|
||||
# No-auth LM Studio: when LM_API_KEY isn't set, runtime credentials
|
||||
# carry a placeholder so gateway/TUI/cron paths see the local server
|
||||
|
|
|
|||
|
|
@ -677,6 +677,19 @@ class TestValidateApiFallback:
|
|||
|
||||
assert models == ["publisher/chat-model"]
|
||||
|
||||
def test_fetch_lmstudio_models_normalizes_native_api_base_url(self):
|
||||
mock_resp = MagicMock()
|
||||
mock_resp.__enter__.return_value = mock_resp
|
||||
mock_resp.__exit__.return_value = False
|
||||
mock_resp.read.return_value = b'{"models":[{"key":"publisher/chat-model","type":"llm"}]}'
|
||||
|
||||
with patch("hermes_cli.models.urllib.request.urlopen", return_value=mock_resp) as mock_urlopen:
|
||||
models = fetch_lmstudio_models(base_url="http://localhost:1234/api/v1")
|
||||
|
||||
request = mock_urlopen.call_args[0][0]
|
||||
assert request.full_url == "http://localhost:1234/api/v1/models"
|
||||
assert models == ["publisher/chat-model"]
|
||||
|
||||
def test_validate_lmstudio_rejects_embedding_models(self):
|
||||
mock_resp = MagicMock()
|
||||
mock_resp.__enter__.return_value = mock_resp
|
||||
|
|
|
|||
|
|
@ -416,6 +416,32 @@ def test_resolve_runtime_provider_lmstudio_saved_base_url_wins_over_env(monkeypa
|
|||
assert resolved["api_key"] == "dummy-lm-api-key"
|
||||
|
||||
|
||||
def test_resolve_runtime_provider_lmstudio_normalizes_native_api_saved_base_url(monkeypatch):
|
||||
monkeypatch.delenv("LM_API_KEY", raising=False)
|
||||
monkeypatch.delenv("LM_BASE_URL", raising=False)
|
||||
monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "lmstudio")
|
||||
monkeypatch.setattr(
|
||||
rp,
|
||||
"_get_model_config",
|
||||
lambda: {
|
||||
"provider": "lmstudio",
|
||||
"base_url": "http://192.168.1.10:1234/api/v1",
|
||||
"default": "qwen/qwen3-coder-30b",
|
||||
},
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
rp,
|
||||
"load_pool",
|
||||
lambda provider: type("Pool", (), {"has_credentials": lambda self: False})(),
|
||||
)
|
||||
|
||||
resolved = rp.resolve_runtime_provider(requested="lmstudio")
|
||||
|
||||
assert resolved["provider"] == "lmstudio"
|
||||
assert resolved["api_mode"] == "chat_completions"
|
||||
assert resolved["base_url"] == "http://192.168.1.10:1234/v1"
|
||||
|
||||
|
||||
def test_resolve_runtime_provider_openrouter_explicit(monkeypatch):
|
||||
monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "openrouter")
|
||||
monkeypatch.setattr(rp, "_get_model_config", lambda: {})
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue