fix(model_metadata): use /v1/props endpoint for llama.cpp context detection

Recent versions of llama.cpp moved the server properties endpoint from
/props to /v1/props (consistent with the /v1 API prefix convention).

The server-type detection path and the n_ctx reading path both used the
old /props URL, which returns 404 on current builds. This caused the
allocated context window size to fall back to a hardcoded default,
resulting in an incorrect (too small) value being displayed in the TUI
context bar.

Fix: try /v1/props first, fall back to /props for backward compatibility
with older llama.cpp builds. Both paths are now handled gracefully.
This commit is contained in:
Teknium 2026-03-21 18:07:18 -07:00
parent 3ab50376b0
commit 29d0541ac9
No known key found for this signature in database

View file

@ -260,9 +260,11 @@ def detect_local_server_type(base_url: str) -> Optional[str]:
pass
except Exception:
pass
# llama.cpp exposes /props
# llama.cpp exposes /v1/props (older builds used /props without the /v1 prefix)
try:
r = client.get(f"{server_url}/props")
r = client.get(f"{server_url}/v1/props")
if r.status_code != 200:
r = client.get(f"{server_url}/props") # fallback for older builds
if r.status_code == 200 and "default_generation_settings" in r.text:
return "llamacpp"
except Exception:
@ -455,8 +457,11 @@ def fetch_endpoint_model_metadata(
)
if is_llamacpp:
try:
props_url = candidate.rstrip("/").replace("/v1", "") + "/props"
props_resp = requests.get(props_url, headers=headers, timeout=5)
# Try /v1/props first (current llama.cpp); fall back to /props for older builds
base = candidate.rstrip("/").replace("/v1", "")
props_resp = requests.get(base + "/v1/props", headers=headers, timeout=5)
if not props_resp.ok:
props_resp = requests.get(base + "/props", headers=headers, timeout=5)
if props_resp.ok:
props = props_resp.json()
gen_settings = props.get("default_generation_settings", {})