fix(gateway): refresh max_turns before resolving runtime budget

This commit is contained in:
infinitycrew39 2026-06-18 07:28:28 +07:00 committed by Teknium
parent 2c3aebcadc
commit 460b1e50e5
2 changed files with 19 additions and 10 deletions

View file

@ -1033,7 +1033,13 @@ class APIServerAdapter(BasePlatformAdapter):
matching the semantics of the native gateway's ``session_key``.
"""
from run_agent import AIAgent
from gateway.run import _resolve_runtime_agent_kwargs, _resolve_gateway_model, _load_gateway_config, GatewayRunner
from gateway.run import (
_current_max_iterations,
_resolve_runtime_agent_kwargs,
_resolve_gateway_model,
_load_gateway_config,
GatewayRunner,
)
from hermes_cli.tools_config import _get_platform_tools
runtime_kwargs = _resolve_runtime_agent_kwargs()
@ -1043,7 +1049,7 @@ class APIServerAdapter(BasePlatformAdapter):
user_config = _load_gateway_config()
enabled_toolsets = sorted(_get_platform_tools(user_config, "api_server"))
max_iterations = int(os.getenv("HERMES_MAX_ITERATIONS", "90"))
max_iterations = _current_max_iterations()
# Load fallback provider chain so the API server platform has the
# same fallback behaviour as Telegram/Discord/Slack (fixes #4954).

View file

@ -1196,6 +1196,15 @@ def _reload_runtime_env_preserving_config_authority() -> None:
os.environ["HERMES_MAX_ITERATIONS"] = str(agent_cfg["max_turns"])
def _current_max_iterations() -> int:
"""Return the current per-turn iteration budget after runtime env refresh."""
_reload_runtime_env_preserving_config_authority()
try:
return int(os.getenv("HERMES_MAX_ITERATIONS", "90"))
except (TypeError, ValueError):
return 90
_DOCKER_VOLUME_SPEC_RE = re.compile(r"^(?P<host>.+):(?P<container>/[^:]+?)(?::(?P<options>[^:]+))?$")
_DOCKER_MEDIA_OUTPUT_CONTAINER_PATHS = {"/output", "/outputs"}
@ -10633,7 +10642,7 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
disabled_toolsets = agent_cfg.get("disabled_toolsets") or None
pr = self._provider_routing
max_iterations = int(os.getenv("HERMES_MAX_ITERATIONS", "90"))
max_iterations = _current_max_iterations()
reasoning_config = self._resolve_session_reasoning_config(source=source)
self._reasoning_config = reasoning_config
self._service_tier = self._load_service_tier()
@ -14581,9 +14590,6 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
# session_key is now set via contextvars in _set_session_env()
# (concurrency-safe). Keep os.environ as fallback for CLI/cron.
os.environ["HERMES_SESSION_KEY"] = session_key or ""
# Read from env var or use default (same as CLI)
max_iterations = int(os.getenv("HERMES_MAX_ITERATIONS", "90"))
# Map platform enum to the platform hint key the agent understands.
# Platform.LOCAL ("local") maps to "cli"; others pass through as-is.
@ -14598,10 +14604,7 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
if self._ephemeral_system_prompt:
combined_ephemeral = (combined_ephemeral + "\n\n" + self._ephemeral_system_prompt).strip()
# Re-read .env and config for fresh credentials (gateway is long-lived,
# keys may change without restart). Keep config.yaml authoritative for
# runtime budget settings bridged into env vars.
_reload_runtime_env_preserving_config_authority()
max_iterations = _current_max_iterations()
try:
model, runtime_kwargs = self._resolve_session_agent_runtime(