diff --git a/cli.py b/cli.py index 8910e2d8c5d..177a1e44e97 100644 --- a/cli.py +++ b/cli.py @@ -3194,6 +3194,18 @@ class HermesCLI: _config_model = (_model_config.get("default") or _model_config.get("model") or "") if isinstance(_model_config, dict) else (_model_config or "") _DEFAULT_CONFIG_MODEL = "" self.model = model or _config_model or _DEFAULT_CONFIG_MODEL + # Read max_tokens from config (env var override: HERMES_MAX_TOKENS) + _env_mt = os.environ.get("HERMES_MAX_TOKENS") + if _env_mt: + try: + self.max_tokens = int(_env_mt) + except (ValueError, TypeError): + self.max_tokens = None + elif isinstance(_model_config, dict): + _mt = _model_config.get("max_tokens") + self.max_tokens = _mt if isinstance(_mt, int) else None + else: + self.max_tokens = None # Auto-detect model from local server if still on default if self.model == _DEFAULT_CONFIG_MODEL: _base_url = (_model_config.get("base_url") or "") if isinstance(_model_config, dict) else "" @@ -5168,6 +5180,7 @@ class HermesCLI: acp_command=runtime.get("command"), acp_args=runtime.get("args"), credential_pool=runtime.get("credential_pool"), + max_tokens=self.max_tokens, max_iterations=self.max_turns, enabled_toolsets=self.enabled_toolsets, disabled_toolsets=self.disabled_toolsets, @@ -9284,6 +9297,7 @@ class HermesCLI: api_mode=turn_route["runtime"].get("api_mode"), acp_command=turn_route["runtime"].get("command"), acp_args=turn_route["runtime"].get("args"), + max_tokens=turn_route["runtime"].get("max_tokens"), max_iterations=self.max_turns, enabled_toolsets=self.enabled_toolsets, quiet_mode=True, diff --git a/gateway/run.py b/gateway/run.py index 6444c857a79..ef3fd3be5ed 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -1203,7 +1203,13 @@ def _resolve_runtime_agent_kwargs() -> dict: model_cfg = _get_model_config() max_tokens = None - if isinstance(model_cfg, dict): + _env_mt = os.environ.get("HERMES_MAX_TOKENS") + if _env_mt: + try: + max_tokens = int(_env_mt) + except (ValueError, TypeError): + max_tokens = None + elif isinstance(model_cfg, dict): mt = model_cfg.get("max_tokens") if isinstance(mt, int): max_tokens = mt