mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-09 08:21:50 +00:00
fix(gateway): propagate max_tokens from config.yaml to AIAgent
max_tokens set under model: in config.yaml was silently ignored. The value was never read from config, never passed through _resolve_runtime_agent_kwargs(), _resolve_turn_agent_config(), or the session override path. Added it to all three code paths so custom/Ollama endpoints receive the correct output cap. Closes #20741
This commit is contained in:
parent
9af54b2f8c
commit
cf786593cd
1 changed files with 11 additions and 0 deletions
|
|
@ -1179,6 +1179,7 @@ def _resolve_runtime_agent_kwargs() -> dict:
|
|||
from hermes_cli.runtime_provider import (
|
||||
resolve_runtime_provider,
|
||||
format_runtime_provider_error,
|
||||
_get_model_config,
|
||||
)
|
||||
from hermes_cli.auth import AuthError, is_rate_limited_auth_error
|
||||
|
||||
|
|
@ -1200,6 +1201,13 @@ def _resolve_runtime_agent_kwargs() -> dict:
|
|||
except Exception as exc:
|
||||
raise RuntimeError(format_runtime_provider_error(exc)) from exc
|
||||
|
||||
model_cfg = _get_model_config()
|
||||
max_tokens = None
|
||||
if isinstance(model_cfg, dict):
|
||||
mt = model_cfg.get("max_tokens")
|
||||
if isinstance(mt, int):
|
||||
max_tokens = mt
|
||||
|
||||
return {
|
||||
"api_key": runtime.get("api_key"),
|
||||
"base_url": runtime.get("base_url"),
|
||||
|
|
@ -1208,6 +1216,7 @@ def _resolve_runtime_agent_kwargs() -> dict:
|
|||
"command": runtime.get("command"),
|
||||
"args": list(runtime.get("args") or []),
|
||||
"credential_pool": runtime.get("credential_pool"),
|
||||
"max_tokens": max_tokens,
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -2596,6 +2605,7 @@ class GatewayRunner:
|
|||
"api_key": override.get("api_key"),
|
||||
"base_url": override.get("base_url"),
|
||||
"api_mode": override.get("api_mode"),
|
||||
"max_tokens": override.get("max_tokens"),
|
||||
}
|
||||
if override_runtime.get("api_key"):
|
||||
logger.debug(
|
||||
|
|
@ -2693,6 +2703,7 @@ class GatewayRunner:
|
|||
"command": runtime_kwargs.get("command"),
|
||||
"args": list(runtime_kwargs.get("args") or []),
|
||||
"credential_pool": runtime_kwargs.get("credential_pool"),
|
||||
"max_tokens": runtime_kwargs.get("max_tokens"),
|
||||
}
|
||||
route = {
|
||||
"model": model,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue