mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-01 01:51:44 +00:00
fix: follow-up for salvaged PR #17061
- Remove dead _lmstudio_loaded_context attribute from run_agent.py (set but never read — the loaded context is pushed to context_compressor.update_model which is the actual consumer) - Cache empty reasoning options with 60s TTL to avoid per-turn HTTP probe for non-reasoning LM Studio models. Non-empty results cached permanently. - Extract _lmstudio_server_root(), _lmstudio_request_headers(), and _lmstudio_fetch_raw_models() shared helpers in models.py — eliminates URL-strip + auth-header + HTTP-call duplication across probe_lmstudio_models, ensure_lmstudio_model_loaded, and lmstudio_model_reasoning_options - Revert runtime_provider.py base_url precedence change: preserve the established contract (saved config.base_url > env var > default) for all api_key providers - Remove unnecessary config version bump 22→23 - Fix TUI test: relax target_model assertion to avoid module-cache flake - AUTHOR_MAP: added rugved@lmstudio.ai → rugvedS07
This commit is contained in:
parent
433d38da09
commit
5d2f9b5d7d
7 changed files with 92 additions and 102 deletions
|
|
@ -2199,31 +2199,41 @@ def _is_github_models_base_url(base_url: Optional[str]) -> bool:
|
|||
)
|
||||
|
||||
|
||||
def probe_lmstudio_models(
|
||||
api_key: Optional[str] = None,
|
||||
base_url: Optional[str] = None,
|
||||
timeout: float = 5.0,
|
||||
) -> Optional[list[str]]:
|
||||
"""Probe LM Studio's model listing.
|
||||
def _lmstudio_server_root(base_url: Optional[str]) -> Optional[str]:
|
||||
"""Strip ``/v1`` suffix from an LM Studio base URL to get the native API root.
|
||||
|
||||
Returns chat-capable model keys on success, including the valid empty-list
|
||||
case when the server is reachable but has no non-embedding models.
|
||||
Returns ``None`` on network errors, malformed responses, or empty/invalid
|
||||
base URLs.
|
||||
|
||||
Raises ``AuthError`` on HTTP 401/403 so callers can surface token issues
|
||||
separately from reachability problems.
|
||||
Returns ``None`` when the base URL is empty/invalid.
|
||||
"""
|
||||
server_root = (base_url or "").strip().rstrip("/")
|
||||
if server_root.endswith("/v1"):
|
||||
server_root = server_root[:-3].rstrip("/")
|
||||
if not server_root:
|
||||
return None
|
||||
root = (base_url or "").strip().rstrip("/")
|
||||
if root.endswith("/v1"):
|
||||
root = root[:-3].rstrip("/")
|
||||
return root or None
|
||||
|
||||
|
||||
def _lmstudio_request_headers(api_key: Optional[str] = None) -> dict:
|
||||
"""Build HTTP headers for LM Studio native API requests."""
|
||||
headers = {"User-Agent": _HERMES_USER_AGENT}
|
||||
token = str(api_key or "").strip()
|
||||
if token:
|
||||
headers["Authorization"] = f"Bearer {token}"
|
||||
return headers
|
||||
|
||||
|
||||
def _lmstudio_fetch_raw_models(
|
||||
api_key: Optional[str] = None,
|
||||
base_url: Optional[str] = None,
|
||||
timeout: float = 5.0,
|
||||
) -> Optional[list[dict]]:
|
||||
"""Fetch the raw model list from LM Studio's ``/api/v1/models``.
|
||||
|
||||
Returns the ``models`` list of dicts on success, ``None`` on network
|
||||
errors or malformed responses. Raises ``AuthError`` on HTTP 401/403.
|
||||
"""
|
||||
server_root = _lmstudio_server_root(base_url)
|
||||
if not server_root:
|
||||
return None
|
||||
|
||||
headers = _lmstudio_request_headers(api_key)
|
||||
request = urllib.request.Request(server_root + "/api/v1/models", headers=headers)
|
||||
try:
|
||||
with urllib.request.urlopen(request, timeout=timeout) as resp:
|
||||
|
|
@ -2256,6 +2266,27 @@ def probe_lmstudio_models(
|
|||
server_root,
|
||||
)
|
||||
return None
|
||||
return raw_models
|
||||
|
||||
|
||||
def probe_lmstudio_models(
|
||||
api_key: Optional[str] = None,
|
||||
base_url: Optional[str] = None,
|
||||
timeout: float = 5.0,
|
||||
) -> Optional[list[str]]:
|
||||
"""Probe LM Studio's model listing.
|
||||
|
||||
Returns chat-capable model keys on success, including the valid empty-list
|
||||
case when the server is reachable but has no non-embedding models.
|
||||
Returns ``None`` on network errors, malformed responses, or empty/invalid
|
||||
base URLs.
|
||||
|
||||
Raises ``AuthError`` on HTTP 401/403 so callers can surface token issues
|
||||
separately from reachability problems.
|
||||
"""
|
||||
raw_models = _lmstudio_fetch_raw_models(api_key=api_key, base_url=base_url, timeout=timeout)
|
||||
if raw_models is None:
|
||||
return None
|
||||
|
||||
keys: list[str] = []
|
||||
for raw in raw_models:
|
||||
|
|
@ -2302,28 +2333,17 @@ def ensure_lmstudio_model_loaded(
|
|||
at the model's ``max_context_length``. Returns the resolved loaded context
|
||||
length, or ``None`` when the probe / load failed.
|
||||
"""
|
||||
server_root = (base_url or "").strip().rstrip("/")
|
||||
if server_root.endswith("/v1"):
|
||||
server_root = server_root[:-3].rstrip("/")
|
||||
server_root = _lmstudio_server_root(base_url)
|
||||
if not server_root:
|
||||
return None
|
||||
|
||||
headers = {"User-Agent": _HERMES_USER_AGENT}
|
||||
token = str(api_key or "").strip()
|
||||
if token:
|
||||
headers["Authorization"] = f"Bearer {token}"
|
||||
headers = _lmstudio_request_headers(api_key)
|
||||
|
||||
try:
|
||||
with urllib.request.urlopen(
|
||||
urllib.request.Request(server_root + "/api/v1/models", headers=headers),
|
||||
timeout=10,
|
||||
) as resp:
|
||||
payload = json.loads(resp.read().decode())
|
||||
raw_models = _lmstudio_fetch_raw_models(api_key=api_key, base_url=base_url, timeout=10)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
raw_models = payload.get("models") if isinstance(payload, dict) else None
|
||||
if not isinstance(raw_models, list):
|
||||
raw_models = None
|
||||
if raw_models is None:
|
||||
return None
|
||||
|
||||
target_entry = None
|
||||
|
|
@ -2380,28 +2400,11 @@ def lmstudio_model_reasoning_options(
|
|||
Returns ``[]`` when the model is unknown, the endpoint is unreachable,
|
||||
or the model does not declare a reasoning capability.
|
||||
"""
|
||||
server_root = (base_url or "").strip().rstrip("/")
|
||||
if server_root.endswith("/v1"):
|
||||
server_root = server_root[:-3].rstrip("/")
|
||||
if not server_root:
|
||||
return []
|
||||
|
||||
headers = {"User-Agent": _HERMES_USER_AGENT}
|
||||
token = str(api_key or "").strip()
|
||||
if token:
|
||||
headers["Authorization"] = f"Bearer {token}"
|
||||
|
||||
try:
|
||||
with urllib.request.urlopen(
|
||||
urllib.request.Request(server_root + "/api/v1/models", headers=headers),
|
||||
timeout=timeout,
|
||||
) as resp:
|
||||
payload = json.loads(resp.read().decode())
|
||||
raw_models = _lmstudio_fetch_raw_models(api_key=api_key, base_url=base_url, timeout=timeout)
|
||||
except Exception:
|
||||
return []
|
||||
|
||||
raw_models = payload.get("models") if isinstance(payload, dict) else None
|
||||
if not isinstance(raw_models, list):
|
||||
raw_models = None
|
||||
if not raw_models:
|
||||
return []
|
||||
|
||||
for raw in raw_models:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue