fix(gateway): propagate max_tokens from config.yaml to AIAgent

max_tokens set under model: in config.yaml was silently ignored.
The value was never read from config, never passed through
_resolve_runtime_agent_kwargs(), _resolve_turn_agent_config(),
or the session override path.  Added it to all three code paths
so custom/Ollama endpoints receive the correct output cap.

Closes #20741
This commit is contained in:
ViewWay 2026-05-06 23:41:56 +08:00 committed by Teknium
parent 9af54b2f8c
commit cf786593cd

View file

@ -1179,6 +1179,7 @@ def _resolve_runtime_agent_kwargs() -> dict:
from hermes_cli.runtime_provider import (
resolve_runtime_provider,
format_runtime_provider_error,
_get_model_config,
)
from hermes_cli.auth import AuthError, is_rate_limited_auth_error
@ -1200,6 +1201,13 @@ def _resolve_runtime_agent_kwargs() -> dict:
except Exception as exc:
raise RuntimeError(format_runtime_provider_error(exc)) from exc
model_cfg = _get_model_config()
max_tokens = None
if isinstance(model_cfg, dict):
mt = model_cfg.get("max_tokens")
if isinstance(mt, int):
max_tokens = mt
return {
"api_key": runtime.get("api_key"),
"base_url": runtime.get("base_url"),
@ -1208,6 +1216,7 @@ def _resolve_runtime_agent_kwargs() -> dict:
"command": runtime.get("command"),
"args": list(runtime.get("args") or []),
"credential_pool": runtime.get("credential_pool"),
"max_tokens": max_tokens,
}
@ -2596,6 +2605,7 @@ class GatewayRunner:
"api_key": override.get("api_key"),
"base_url": override.get("base_url"),
"api_mode": override.get("api_mode"),
"max_tokens": override.get("max_tokens"),
}
if override_runtime.get("api_key"):
logger.debug(
@ -2693,6 +2703,7 @@ class GatewayRunner:
"command": runtime_kwargs.get("command"),
"args": list(runtime_kwargs.get("args") or []),
"credential_pool": runtime_kwargs.get("credential_pool"),
"max_tokens": runtime_kwargs.get("max_tokens"),
}
route = {
"model": model,