From cf786593cd83f8cffd76d9faed561f7d92e24454 Mon Sep 17 00:00:00 2001 From: ViewWay <834740219@qq.com> Date: Wed, 6 May 2026 23:41:56 +0800 Subject: [PATCH] fix(gateway): propagate max_tokens from config.yaml to AIAgent max_tokens set under model: in config.yaml was silently ignored. The value was never read from config, never passed through _resolve_runtime_agent_kwargs(), _resolve_turn_agent_config(), or the session override path. Added it to all three code paths so custom/Ollama endpoints receive the correct output cap. Closes #20741 --- gateway/run.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/gateway/run.py b/gateway/run.py index 45d8f6a7a8c..6444c857a79 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -1179,6 +1179,7 @@ def _resolve_runtime_agent_kwargs() -> dict: from hermes_cli.runtime_provider import ( resolve_runtime_provider, format_runtime_provider_error, + _get_model_config, ) from hermes_cli.auth import AuthError, is_rate_limited_auth_error @@ -1200,6 +1201,13 @@ def _resolve_runtime_agent_kwargs() -> dict: except Exception as exc: raise RuntimeError(format_runtime_provider_error(exc)) from exc + model_cfg = _get_model_config() + max_tokens = None + if isinstance(model_cfg, dict): + mt = model_cfg.get("max_tokens") + if isinstance(mt, int): + max_tokens = mt + return { "api_key": runtime.get("api_key"), "base_url": runtime.get("base_url"), @@ -1208,6 +1216,7 @@ def _resolve_runtime_agent_kwargs() -> dict: "command": runtime.get("command"), "args": list(runtime.get("args") or []), "credential_pool": runtime.get("credential_pool"), + "max_tokens": max_tokens, } @@ -2596,6 +2605,7 @@ class GatewayRunner: "api_key": override.get("api_key"), "base_url": override.get("base_url"), "api_mode": override.get("api_mode"), + "max_tokens": override.get("max_tokens"), } if override_runtime.get("api_key"): logger.debug( @@ -2693,6 +2703,7 @@ class GatewayRunner: "command": runtime_kwargs.get("command"), "args": list(runtime_kwargs.get("args") or []), "credential_pool": runtime_kwargs.get("credential_pool"), + "max_tokens": runtime_kwargs.get("max_tokens"), } route = { "model": model,