From cf786593cd83f8cffd76d9faed561f7d92e24454 Mon Sep 17 00:00:00 2001
From: ViewWay <834740219@qq.com>
Date: Wed, 6 May 2026 23:41:56 +0800
Subject: [PATCH] fix(gateway): propagate max_tokens from config.yaml to
 AIAgent

max_tokens set under model: in config.yaml was silently ignored.
The value was never read from config, never passed through
_resolve_runtime_agent_kwargs(), _resolve_turn_agent_config(),
or the session override path.  Added it to all three code paths
so custom/Ollama endpoints receive the correct output cap.

Closes #20741
---
 gateway/run.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/gateway/run.py b/gateway/run.py
index 45d8f6a7a8c..6444c857a79 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -1179,6 +1179,7 @@ def _resolve_runtime_agent_kwargs() -> dict:
     from hermes_cli.runtime_provider import (
         resolve_runtime_provider,
         format_runtime_provider_error,
+        _get_model_config,
     )
     from hermes_cli.auth import AuthError, is_rate_limited_auth_error
 
@@ -1200,6 +1201,13 @@ def _resolve_runtime_agent_kwargs() -> dict:
     except Exception as exc:
         raise RuntimeError(format_runtime_provider_error(exc)) from exc
 
+    model_cfg = _get_model_config()
+    max_tokens = None
+    if isinstance(model_cfg, dict):
+        mt = model_cfg.get("max_tokens")
+        if isinstance(mt, int):
+            max_tokens = mt
+
     return {
         "api_key": runtime.get("api_key"),
         "base_url": runtime.get("base_url"),
@@ -1208,6 +1216,7 @@ def _resolve_runtime_agent_kwargs() -> dict:
         "command": runtime.get("command"),
         "args": list(runtime.get("args") or []),
         "credential_pool": runtime.get("credential_pool"),
+        "max_tokens": max_tokens,
     }
 
 
@@ -2596,6 +2605,7 @@ class GatewayRunner:
                 "api_key": override.get("api_key"),
                 "base_url": override.get("base_url"),
                 "api_mode": override.get("api_mode"),
+                "max_tokens": override.get("max_tokens"),
             }
             if override_runtime.get("api_key"):
                 logger.debug(
@@ -2693,6 +2703,7 @@ class GatewayRunner:
             "command": runtime_kwargs.get("command"),
             "args": list(runtime_kwargs.get("args") or []),
             "credential_pool": runtime_kwargs.get("credential_pool"),
+            "max_tokens": runtime_kwargs.get("max_tokens"),
         }
         route = {
             "model": model,