diff --git a/gateway/run.py b/gateway/run.py index 45d8f6a7a8c..6444c857a79 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -1179,6 +1179,7 @@ def _resolve_runtime_agent_kwargs() -> dict: from hermes_cli.runtime_provider import ( resolve_runtime_provider, format_runtime_provider_error, + _get_model_config, ) from hermes_cli.auth import AuthError, is_rate_limited_auth_error @@ -1200,6 +1201,13 @@ def _resolve_runtime_agent_kwargs() -> dict: except Exception as exc: raise RuntimeError(format_runtime_provider_error(exc)) from exc + model_cfg = _get_model_config() + max_tokens = None + if isinstance(model_cfg, dict): + mt = model_cfg.get("max_tokens") + if isinstance(mt, int): + max_tokens = mt + return { "api_key": runtime.get("api_key"), "base_url": runtime.get("base_url"), @@ -1208,6 +1216,7 @@ def _resolve_runtime_agent_kwargs() -> dict: "command": runtime.get("command"), "args": list(runtime.get("args") or []), "credential_pool": runtime.get("credential_pool"), + "max_tokens": max_tokens, } @@ -2596,6 +2605,7 @@ class GatewayRunner: "api_key": override.get("api_key"), "base_url": override.get("base_url"), "api_mode": override.get("api_mode"), + "max_tokens": override.get("max_tokens"), } if override_runtime.get("api_key"): logger.debug( @@ -2693,6 +2703,7 @@ class GatewayRunner: "command": runtime_kwargs.get("command"), "args": list(runtime_kwargs.get("args") or []), "credential_pool": runtime_kwargs.get("credential_pool"), + "max_tokens": runtime_kwargs.get("max_tokens"), } route = { "model": model,