fix(cli,gateway): complete max_tokens propagation — CLI path + env var override

Previous commit only covered the gateway runtime path. This adds:
- CLI __init__: read max_tokens from model config with HERMES_MAX_TOKENS env override
- CLI AIAgent() calls (interactive + background): pass max_tokens
- Gateway _resolve_runtime_agent_kwargs: add HERMES_MAX_TOKENS env override

All three code paths (CLI, gateway runtime, session override) now
consistently propagate max_tokens to AIAgent.
This commit is contained in:
ViewWay 2026-05-07 00:04:12 +08:00 committed by Teknium
parent cf786593cd
commit 1c909e75e1
2 changed files with 21 additions and 1 deletions

14
cli.py
View file

@ -3194,6 +3194,18 @@ class HermesCLI:
_config_model = (_model_config.get("default") or _model_config.get("model") or "") if isinstance(_model_config, dict) else (_model_config or "")
_DEFAULT_CONFIG_MODEL = ""
self.model = model or _config_model or _DEFAULT_CONFIG_MODEL
# Read max_tokens from config (env var override: HERMES_MAX_TOKENS)
_env_mt = os.environ.get("HERMES_MAX_TOKENS")
if _env_mt:
try:
self.max_tokens = int(_env_mt)
except (ValueError, TypeError):
self.max_tokens = None
elif isinstance(_model_config, dict):
_mt = _model_config.get("max_tokens")
self.max_tokens = _mt if isinstance(_mt, int) else None
else:
self.max_tokens = None
# Auto-detect model from local server if still on default
if self.model == _DEFAULT_CONFIG_MODEL:
_base_url = (_model_config.get("base_url") or "") if isinstance(_model_config, dict) else ""
@ -5168,6 +5180,7 @@ class HermesCLI:
acp_command=runtime.get("command"),
acp_args=runtime.get("args"),
credential_pool=runtime.get("credential_pool"),
max_tokens=self.max_tokens,
max_iterations=self.max_turns,
enabled_toolsets=self.enabled_toolsets,
disabled_toolsets=self.disabled_toolsets,
@ -9284,6 +9297,7 @@ class HermesCLI:
api_mode=turn_route["runtime"].get("api_mode"),
acp_command=turn_route["runtime"].get("command"),
acp_args=turn_route["runtime"].get("args"),
max_tokens=turn_route["runtime"].get("max_tokens"),
max_iterations=self.max_turns,
enabled_toolsets=self.enabled_toolsets,
quiet_mode=True,

View file

@ -1203,7 +1203,13 @@ def _resolve_runtime_agent_kwargs() -> dict:
model_cfg = _get_model_config()
max_tokens = None
if isinstance(model_cfg, dict):
_env_mt = os.environ.get("HERMES_MAX_TOKENS")
if _env_mt:
try:
max_tokens = int(_env_mt)
except (ValueError, TypeError):
max_tokens = None
elif isinstance(model_cfg, dict):
mt = model_cfg.get("max_tokens")
if isinstance(mt, int):
max_tokens = mt