From 1c909e75e1a0ba6e5fde07da804066a1e42450e9 Mon Sep 17 00:00:00 2001 From: ViewWay <834740219@qq.com> Date: Thu, 7 May 2026 00:04:12 +0800 Subject: [PATCH] =?UTF-8?q?fix(cli,gateway):=20complete=20max=5Ftokens=20p?= =?UTF-8?q?ropagation=20=E2=80=94=20CLI=20path=20+=20env=20var=20override?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previous commit only covered the gateway runtime path. This adds: - CLI __init__: read max_tokens from model config with HERMES_MAX_TOKENS env override - CLI AIAgent() calls (interactive + background): pass max_tokens - Gateway _resolve_runtime_agent_kwargs: add HERMES_MAX_TOKENS env override All three code paths (CLI, gateway runtime, session override) now consistently propagate max_tokens to AIAgent. --- cli.py | 14 ++++++++++++++ gateway/run.py | 8 +++++++- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/cli.py b/cli.py index 8910e2d8c5d..177a1e44e97 100644 --- a/cli.py +++ b/cli.py @@ -3194,6 +3194,18 @@ class HermesCLI: _config_model = (_model_config.get("default") or _model_config.get("model") or "") if isinstance(_model_config, dict) else (_model_config or "") _DEFAULT_CONFIG_MODEL = "" self.model = model or _config_model or _DEFAULT_CONFIG_MODEL + # Read max_tokens from config (env var override: HERMES_MAX_TOKENS) + _env_mt = os.environ.get("HERMES_MAX_TOKENS") + if _env_mt: + try: + self.max_tokens = int(_env_mt) + except (ValueError, TypeError): + self.max_tokens = None + elif isinstance(_model_config, dict): + _mt = _model_config.get("max_tokens") + self.max_tokens = _mt if isinstance(_mt, int) else None + else: + self.max_tokens = None # Auto-detect model from local server if still on default if self.model == _DEFAULT_CONFIG_MODEL: _base_url = (_model_config.get("base_url") or "") if isinstance(_model_config, dict) else "" @@ -5168,6 +5180,7 @@ class HermesCLI: acp_command=runtime.get("command"), acp_args=runtime.get("args"), credential_pool=runtime.get("credential_pool"), + max_tokens=self.max_tokens, max_iterations=self.max_turns, enabled_toolsets=self.enabled_toolsets, disabled_toolsets=self.disabled_toolsets, @@ -9284,6 +9297,7 @@ class HermesCLI: api_mode=turn_route["runtime"].get("api_mode"), acp_command=turn_route["runtime"].get("command"), acp_args=turn_route["runtime"].get("args"), + max_tokens=turn_route["runtime"].get("max_tokens"), max_iterations=self.max_turns, enabled_toolsets=self.enabled_toolsets, quiet_mode=True, diff --git a/gateway/run.py b/gateway/run.py index 6444c857a79..ef3fd3be5ed 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -1203,7 +1203,13 @@ def _resolve_runtime_agent_kwargs() -> dict: model_cfg = _get_model_config() max_tokens = None - if isinstance(model_cfg, dict): + _env_mt = os.environ.get("HERMES_MAX_TOKENS") + if _env_mt: + try: + max_tokens = int(_env_mt) + except (ValueError, TypeError): + max_tokens = None + elif isinstance(model_cfg, dict): mt = model_cfg.get("max_tokens") if isinstance(mt, int): max_tokens = mt