From 92fbab300faea83d868291cee3e42ba851a881fd Mon Sep 17 00:00:00 2001 From: Roy Sun Date: Fri, 24 Apr 2026 18:10:03 +0800 Subject: [PATCH] fix: honor custom provider context in web model info --- hermes_cli/web_server.py | 40 +++++++++++++++++++++++++++++ tests/hermes_cli/test_web_server.py | 34 ++++++++++++++++++++++++ 2 files changed, 74 insertions(+) diff --git a/hermes_cli/web_server.py b/hermes_cli/web_server.py index 083e0714f..ad89e02e3 100644 --- a/hermes_cli/web_server.py +++ b/hermes_cli/web_server.py @@ -829,6 +829,40 @@ _EMPTY_MODEL_INFO: dict = { } +def _resolve_custom_provider_model_context_length(cfg: dict, *, model_name: str, base_url: str) -> int: + """Return custom_providers per-model context_length override, if any.""" + if not model_name or not base_url: + return 0 + + try: + from hermes_cli.config import get_compatible_custom_providers + + custom_providers = get_compatible_custom_providers(cfg) + except Exception: + custom_providers = cfg.get("custom_providers") + if not isinstance(custom_providers, list): + custom_providers = [] + + normalized_base_url = str(base_url or "").rstrip("/") + for entry in custom_providers: + if not isinstance(entry, dict): + continue + if str(entry.get("base_url") or "").rstrip("/") != normalized_base_url: + continue + models = entry.get("models", {}) + if not isinstance(models, dict): + break + model_cfg = models.get(model_name, {}) + if not isinstance(model_cfg, dict): + break + ctx = model_cfg.get("context_length") + try: + return int(ctx) if ctx is not None and int(ctx) > 0 else 0 + except (TypeError, ValueError): + return 0 + return 0 + + @app.get("/api/model/info") def get_model_info(): """Return resolved model metadata for the currently configured model. @@ -872,6 +906,12 @@ def get_model_info(): config_ctx_int = 0 if isinstance(config_ctx, int) and config_ctx > 0: config_ctx_int = config_ctx + elif provider: + config_ctx_int = _resolve_custom_provider_model_context_length( + cfg, + model_name=model_name, + base_url=base_url, + ) # Effective is what the agent actually uses effective_ctx = config_ctx_int if config_ctx_int > 0 else auto_ctx diff --git a/tests/hermes_cli/test_web_server.py b/tests/hermes_cli/test_web_server.py index a92f0c8d1..1acce595a 100644 --- a/tests/hermes_cli/test_web_server.py +++ b/tests/hermes_cli/test_web_server.py @@ -125,6 +125,40 @@ class TestWebServerEndpoints: assert "hermes_home" in data assert "active_sessions" in data + def test_get_model_info_honors_custom_provider_context_length(self): + from hermes_cli.config import save_config + + save_config({ + "model": { + "default": "gpt-5.4", + "provider": "roy", + "base_url": "http://127.0.0.1:8317/v1", + }, + "providers": { + "roy": { + "type": "custom-openai", + "base_url": "http://127.0.0.1:8317/v1", + "api_key_env": "ROY_API_KEY", + "models": { + "gpt-5.4": { + "context_length": 400000, + } + }, + } + }, + }) + + with patch("agent.model_metadata.get_model_context_length", return_value=128000): + resp = self.client.get("/api/model/info") + + assert resp.status_code == 200 + data = resp.json() + assert data["model"] == "gpt-5.4" + assert data["provider"] == "roy" + assert data["auto_context_length"] == 128000 + assert data["config_context_length"] == 400000 + assert data["effective_context_length"] == 400000 + def test_get_status_filters_unconfigured_gateway_platforms(self, monkeypatch): import gateway.config as gateway_config import hermes_cli.web_server as web_server