This commit is contained in:
Roy Sun 2026-04-24 19:25:06 -05:00 committed by GitHub
commit f0a5021da2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 74 additions and 0 deletions

View file

@ -833,6 +833,40 @@ _EMPTY_MODEL_INFO: dict = {
}
def _resolve_custom_provider_model_context_length(cfg: dict, *, model_name: str, base_url: str) -> int:
"""Return custom_providers per-model context_length override, if any."""
if not model_name or not base_url:
return 0
try:
from hermes_cli.config import get_compatible_custom_providers
custom_providers = get_compatible_custom_providers(cfg)
except Exception:
custom_providers = cfg.get("custom_providers")
if not isinstance(custom_providers, list):
custom_providers = []
normalized_base_url = str(base_url or "").rstrip("/")
for entry in custom_providers:
if not isinstance(entry, dict):
continue
if str(entry.get("base_url") or "").rstrip("/") != normalized_base_url:
continue
models = entry.get("models", {})
if not isinstance(models, dict):
break
model_cfg = models.get(model_name, {})
if not isinstance(model_cfg, dict):
break
ctx = model_cfg.get("context_length")
try:
return int(ctx) if ctx is not None and int(ctx) > 0 else 0
except (TypeError, ValueError):
return 0
return 0
@app.get("/api/model/info")
def get_model_info():
"""Return resolved model metadata for the currently configured model.
@ -876,6 +910,12 @@ def get_model_info():
config_ctx_int = 0
if isinstance(config_ctx, int) and config_ctx > 0:
config_ctx_int = config_ctx
elif provider:
config_ctx_int = _resolve_custom_provider_model_context_length(
cfg,
model_name=model_name,
base_url=base_url,
)
# Effective is what the agent actually uses
effective_ctx = config_ctx_int if config_ctx_int > 0 else auto_ctx

View file

@ -125,6 +125,40 @@ class TestWebServerEndpoints:
assert "hermes_home" in data
assert "active_sessions" in data
def test_get_model_info_honors_custom_provider_context_length(self):
from hermes_cli.config import save_config
save_config({
"model": {
"default": "gpt-5.4",
"provider": "roy",
"base_url": "http://127.0.0.1:8317/v1",
},
"providers": {
"roy": {
"type": "custom-openai",
"base_url": "http://127.0.0.1:8317/v1",
"api_key_env": "ROY_API_KEY",
"models": {
"gpt-5.4": {
"context_length": 400000,
}
},
}
},
})
with patch("agent.model_metadata.get_model_context_length", return_value=128000):
resp = self.client.get("/api/model/info")
assert resp.status_code == 200
data = resp.json()
assert data["model"] == "gpt-5.4"
assert data["provider"] == "roy"
assert data["auto_context_length"] == 128000
assert data["config_context_length"] == 400000
assert data["effective_context_length"] == 400000
def test_get_status_filters_unconfigured_gateway_platforms(self, monkeypatch):
import gateway.config as gateway_config
import hermes_cli.web_server as web_server