From 460b1e50e515fd9b0b8f472f66f8773336862d88 Mon Sep 17 00:00:00 2001 From: infinitycrew39 Date: Thu, 18 Jun 2026 07:28:28 +0700 Subject: [PATCH] fix(gateway): refresh max_turns before resolving runtime budget --- gateway/platforms/api_server.py | 10 ++++++++-- gateway/run.py | 19 +++++++++++-------- 2 files changed, 19 insertions(+), 10 deletions(-) diff --git a/gateway/platforms/api_server.py b/gateway/platforms/api_server.py index da86952a09d..54720f2b300 100644 --- a/gateway/platforms/api_server.py +++ b/gateway/platforms/api_server.py @@ -1033,7 +1033,13 @@ class APIServerAdapter(BasePlatformAdapter): — matching the semantics of the native gateway's ``session_key``. """ from run_agent import AIAgent - from gateway.run import _resolve_runtime_agent_kwargs, _resolve_gateway_model, _load_gateway_config, GatewayRunner + from gateway.run import ( + _current_max_iterations, + _resolve_runtime_agent_kwargs, + _resolve_gateway_model, + _load_gateway_config, + GatewayRunner, + ) from hermes_cli.tools_config import _get_platform_tools runtime_kwargs = _resolve_runtime_agent_kwargs() @@ -1043,7 +1049,7 @@ class APIServerAdapter(BasePlatformAdapter): user_config = _load_gateway_config() enabled_toolsets = sorted(_get_platform_tools(user_config, "api_server")) - max_iterations = int(os.getenv("HERMES_MAX_ITERATIONS", "90")) + max_iterations = _current_max_iterations() # Load fallback provider chain so the API server platform has the # same fallback behaviour as Telegram/Discord/Slack (fixes #4954). diff --git a/gateway/run.py b/gateway/run.py index e24afd035e7..59dd890f8c9 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -1196,6 +1196,15 @@ def _reload_runtime_env_preserving_config_authority() -> None: os.environ["HERMES_MAX_ITERATIONS"] = str(agent_cfg["max_turns"]) +def _current_max_iterations() -> int: + """Return the current per-turn iteration budget after runtime env refresh.""" + _reload_runtime_env_preserving_config_authority() + try: + return int(os.getenv("HERMES_MAX_ITERATIONS", "90")) + except (TypeError, ValueError): + return 90 + + _DOCKER_VOLUME_SPEC_RE = re.compile(r"^(?P.+):(?P/[^:]+?)(?::(?P[^:]+))?$") _DOCKER_MEDIA_OUTPUT_CONTAINER_PATHS = {"/output", "/outputs"} @@ -10633,7 +10642,7 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew disabled_toolsets = agent_cfg.get("disabled_toolsets") or None pr = self._provider_routing - max_iterations = int(os.getenv("HERMES_MAX_ITERATIONS", "90")) + max_iterations = _current_max_iterations() reasoning_config = self._resolve_session_reasoning_config(source=source) self._reasoning_config = reasoning_config self._service_tier = self._load_service_tier() @@ -14581,9 +14590,6 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew # session_key is now set via contextvars in _set_session_env() # (concurrency-safe). Keep os.environ as fallback for CLI/cron. os.environ["HERMES_SESSION_KEY"] = session_key or "" - - # Read from env var or use default (same as CLI) - max_iterations = int(os.getenv("HERMES_MAX_ITERATIONS", "90")) # Map platform enum to the platform hint key the agent understands. # Platform.LOCAL ("local") maps to "cli"; others pass through as-is. @@ -14598,10 +14604,7 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew if self._ephemeral_system_prompt: combined_ephemeral = (combined_ephemeral + "\n\n" + self._ephemeral_system_prompt).strip() - # Re-read .env and config for fresh credentials (gateway is long-lived, - # keys may change without restart). Keep config.yaml authoritative for - # runtime budget settings bridged into env vars. - _reload_runtime_env_preserving_config_authority() + max_iterations = _current_max_iterations() try: model, runtime_kwargs = self._resolve_session_agent_runtime(