From 06ca1e9980fed6009dc442a9468247fac32e5581 Mon Sep 17 00:00:00 2001 From: annguyenNous Date: Sat, 20 Jun 2026 14:00:07 +0530 Subject: [PATCH 1/2] fix(utils): add env_float helper for safe float env var parsing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mirrors the existing env_int() helper: returns the default when the variable is unset or non-numeric instead of raising ValueError. Used by the follow-up commit to guard malformed float env vars across the gateway. Salvaged from #48735 (@annguyenNous). The PR's api_server.py change is now redundant — main guards HERMES_MAX_ITERATIONS via _current_max_iterations(). --- utils.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/utils.py b/utils.py index ad7f28f8dba..5e1b964debc 100644 --- a/utils.py +++ b/utils.py @@ -323,6 +323,17 @@ def env_int(key: str, default: int = 0) -> int: return default +def env_float(key: str, default: float = 0.0) -> float: + """Read an environment variable as a float, with fallback.""" + raw = os.getenv(key, "").strip() + if not raw: + return default + try: + return float(raw) + except (ValueError, TypeError): + return default + + def env_bool(key: str, default: bool = False) -> bool: """Read an environment variable as a boolean.""" return is_truthy_value(os.getenv(key, ""), default=default) From a7dd98c8609c0d944e3c5dd0c5b9ee31dd99eb29 Mon Sep 17 00:00:00 2001 From: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com> Date: Sat, 20 Jun 2026 14:54:36 +0530 Subject: [PATCH 2/2] fix(env): guard remaining malformed int/float env var casts with utils helpers Widen the env_float() guard from #48735 across the whole bug class: a non-numeric value (e.g. a stale .env "HERMES_API_TIMEOUT=abc" or a typo'd port) raised an unhandled ValueError and crashed adapter/agent init. Converts 22 genuinely-unguarded first-party int/float(os.getenv()) sites to the canonical utils.env_int / utils.env_float helpers (the established house pattern), instead of duplicating per-module helpers or inline try/except: - gateway/config.py: WECOM_CALLBACK_PORT, BLUEBUBBLES_WEBHOOK_PORT - gateway/platforms/email.py: EMAIL_IMAP/SMTP_PORT, EMAIL_POLL_INTERVAL - gateway/platforms/feishu.py: dedup cache + text/media batch settings - gateway/platforms/wecom.py, discord/adapter.py: text batch delays - gateway/platforms/telegram.py: media batch delay, TELEGRAM_WEBHOOK_PORT - gateway/platforms/whatsapp.py: WHATSAPP_NPM_INSTALL_TIMEOUT - hermes_cli/auth.py: CODEX/XAI refresh timeouts - agent/chat_completion_helpers.py: API/stream read/stale timeouts - run_agent.py, agent/auxiliary_client.py: API + nous timeouts Sites already guarded by try/except or local helpers are left untouched. The HERMES_MAX_ITERATIONS sites are already guarded on main via _current_max_iterations(), so they are not included. --- agent/auxiliary_client.py | 6 +++--- agent/chat_completion_helpers.py | 8 ++++---- gateway/config.py | 6 +++--- gateway/platforms/email.py | 7 ++++--- gateway/platforms/feishu.py | 20 ++++++++++---------- gateway/platforms/telegram.py | 6 +++--- gateway/platforms/wecom.py | 5 +++-- gateway/platforms/whatsapp.py | 3 ++- hermes_cli/auth.py | 6 +++--- plugins/platforms/discord/adapter.py | 6 +++--- run_agent.py | 6 +++--- 11 files changed, 41 insertions(+), 38 deletions(-) diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index f28b5f60156..0af56a7473d 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -102,7 +102,7 @@ OpenAI = _OpenAIProxy() # module-level name, resolves lazily on call/isinstance from agent.credential_pool import load_pool from hermes_cli.config import get_hermes_home from hermes_constants import OPENROUTER_BASE_URL -from utils import base_url_host_matches, base_url_hostname, model_forces_max_completion_tokens, normalize_proxy_env_vars +from utils import base_url_host_matches, base_url_hostname, env_float, model_forces_max_completion_tokens, normalize_proxy_env_vars logger = logging.getLogger(__name__) @@ -1312,7 +1312,7 @@ def _resolve_nous_runtime_api(*, force_refresh: bool = False) -> Optional[tuple[ from hermes_cli.auth import resolve_nous_runtime_credentials creds = resolve_nous_runtime_credentials( - timeout_seconds=float(os.getenv("HERMES_NOUS_TIMEOUT_SECONDS", "15")), + timeout_seconds=env_float("HERMES_NOUS_TIMEOUT_SECONDS", 15), force_refresh=force_refresh, ) except Exception as exc: @@ -2905,7 +2905,7 @@ def _refresh_provider_credentials(provider: str) -> bool: from hermes_cli.auth import resolve_nous_runtime_credentials creds = resolve_nous_runtime_credentials( - timeout_seconds=float(os.getenv("HERMES_NOUS_TIMEOUT_SECONDS", "15")), + timeout_seconds=env_float("HERMES_NOUS_TIMEOUT_SECONDS", 15), force_refresh=True, ) if not str(creds.get("api_key", "") or "").strip(): diff --git a/agent/chat_completion_helpers.py b/agent/chat_completion_helpers.py index 1ee1702b45e..c9272c76266 100644 --- a/agent/chat_completion_helpers.py +++ b/agent/chat_completion_helpers.py @@ -34,7 +34,7 @@ from agent.message_sanitization import ( _repair_tool_call_arguments, ) from tools.terminal_tool import is_persistent_env -from utils import base_url_host_matches, base_url_hostname, env_int +from utils import base_url_host_matches, base_url_hostname, env_float, env_int logger = logging.getLogger(__name__) @@ -1761,14 +1761,14 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta= _base_timeout = ( _provider_timeout_cfg if _provider_timeout_cfg is not None - else float(os.getenv("HERMES_API_TIMEOUT", 1800.0)) + else env_float("HERMES_API_TIMEOUT", 1800.0) ) # Read timeout: config wins here too. Otherwise use # HERMES_STREAM_READ_TIMEOUT (default 120s) for cloud providers. if _provider_timeout_cfg is not None: _stream_read_timeout = _provider_timeout_cfg else: - _stream_read_timeout = float(os.getenv("HERMES_STREAM_READ_TIMEOUT", 120.0)) + _stream_read_timeout = env_float("HERMES_STREAM_READ_TIMEOUT", 120.0) # Local providers (Ollama, llama.cpp, vLLM) can take minutes for # prefill on large contexts before producing the first token. # Auto-increase the httpx read timeout unless the user explicitly @@ -2508,7 +2508,7 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta= if _cfg_stale is not None: _stream_stale_timeout_base = _cfg_stale else: - _stream_stale_timeout_base = float(os.getenv("HERMES_STREAM_STALE_TIMEOUT", 180.0)) + _stream_stale_timeout_base = env_float("HERMES_STREAM_STALE_TIMEOUT", 180.0) # Local providers (Ollama, oMLX, llama-cpp) can take 300+ seconds # for prefill on large contexts. Disable the stale detector unless # the user explicitly set HERMES_STREAM_STALE_TIMEOUT. diff --git a/gateway/config.py b/gateway/config.py index 13d262e792d..8b459c32420 100644 --- a/gateway/config.py +++ b/gateway/config.py @@ -17,7 +17,7 @@ from typing import Dict, List, Optional, Any, Callable from enum import Enum from hermes_cli.config import get_hermes_home -from utils import is_truthy_value +from utils import env_int, is_truthy_value logger = logging.getLogger(__name__) @@ -1860,7 +1860,7 @@ def _apply_env_overrides(config: GatewayConfig) -> None: "token": os.getenv("WECOM_CALLBACK_TOKEN", ""), "encoding_aes_key": os.getenv("WECOM_CALLBACK_ENCODING_AES_KEY", ""), "host": os.getenv("WECOM_CALLBACK_HOST", "0.0.0.0"), - "port": int(os.getenv("WECOM_CALLBACK_PORT", "8645")), + "port": env_int("WECOM_CALLBACK_PORT", 8645), }) # Weixin (personal WeChat via iLink Bot API) @@ -1916,7 +1916,7 @@ def _apply_env_overrides(config: GatewayConfig) -> None: "server_url": bluebubbles_server_url.rstrip("/"), "password": bluebubbles_password, "webhook_host": os.getenv("BLUEBUBBLES_WEBHOOK_HOST", "127.0.0.1"), - "webhook_port": int(os.getenv("BLUEBUBBLES_WEBHOOK_PORT", "8645")), + "webhook_port": env_int("BLUEBUBBLES_WEBHOOK_PORT", 8645), "webhook_path": os.getenv("BLUEBUBBLES_WEBHOOK_PATH", "/bluebubbles-webhook"), "send_read_receipts": os.getenv("BLUEBUBBLES_SEND_READ_RECEIPTS", "true").lower() in {"true", "1", "yes"}, }) diff --git a/gateway/platforms/email.py b/gateway/platforms/email.py index d2f7e64ac61..3ce41d5fe17 100644 --- a/gateway/platforms/email.py +++ b/gateway/platforms/email.py @@ -43,6 +43,7 @@ from gateway.platforms.base import ( cache_image_from_bytes, ) from gateway.config import Platform, PlatformConfig +from utils import env_int logger = logging.getLogger(__name__) # Automated sender patterns — emails from these are silently ignored @@ -309,10 +310,10 @@ class EmailAdapter(BasePlatformAdapter): self._address = os.getenv("EMAIL_ADDRESS", "") self._password = os.getenv("EMAIL_PASSWORD", "") self._imap_host = os.getenv("EMAIL_IMAP_HOST", "") - self._imap_port = int(os.getenv("EMAIL_IMAP_PORT", "993")) + self._imap_port = env_int("EMAIL_IMAP_PORT", 993) self._smtp_host = os.getenv("EMAIL_SMTP_HOST", "") - self._smtp_port = int(os.getenv("EMAIL_SMTP_PORT", "587")) - self._poll_interval = int(os.getenv("EMAIL_POLL_INTERVAL", "15")) + self._smtp_port = env_int("EMAIL_SMTP_PORT", 587) + self._poll_interval = env_int("EMAIL_POLL_INTERVAL", 15) # Skip attachments — configured via config.yaml: # platforms: diff --git a/gateway/platforms/feishu.py b/gateway/platforms/feishu.py index 4814107bacd..7b29ba13528 100644 --- a/gateway/platforms/feishu.py +++ b/gateway/platforms/feishu.py @@ -142,7 +142,7 @@ from gateway.platforms.base import ( ) from gateway.status import acquire_scoped_lock, release_scoped_lock from hermes_constants import get_hermes_home -from utils import atomic_json_write +from utils import atomic_json_write, env_float, env_int logger = logging.getLogger(__name__) @@ -1535,24 +1535,24 @@ class FeishuAdapter(BasePlatformAdapter): bot_name=os.getenv("FEISHU_BOT_NAME", "").strip(), dedup_cache_size=max( 32, - int(os.getenv("HERMES_FEISHU_DEDUP_CACHE_SIZE", str(_DEFAULT_DEDUP_CACHE_SIZE))), + env_int("HERMES_FEISHU_DEDUP_CACHE_SIZE", _DEFAULT_DEDUP_CACHE_SIZE), ), - text_batch_delay_seconds=float( - os.getenv("HERMES_FEISHU_TEXT_BATCH_DELAY_SECONDS", str(_DEFAULT_TEXT_BATCH_DELAY_SECONDS)) + text_batch_delay_seconds=env_float( + "HERMES_FEISHU_TEXT_BATCH_DELAY_SECONDS", _DEFAULT_TEXT_BATCH_DELAY_SECONDS ), - text_batch_split_delay_seconds=float( - os.getenv("HERMES_FEISHU_TEXT_BATCH_SPLIT_DELAY_SECONDS", "2.0") + text_batch_split_delay_seconds=env_float( + "HERMES_FEISHU_TEXT_BATCH_SPLIT_DELAY_SECONDS", 2.0 ), text_batch_max_messages=max( 1, - int(os.getenv("HERMES_FEISHU_TEXT_BATCH_MAX_MESSAGES", str(_DEFAULT_TEXT_BATCH_MAX_MESSAGES))), + env_int("HERMES_FEISHU_TEXT_BATCH_MAX_MESSAGES", _DEFAULT_TEXT_BATCH_MAX_MESSAGES), ), text_batch_max_chars=max( 1, - int(os.getenv("HERMES_FEISHU_TEXT_BATCH_MAX_CHARS", str(_DEFAULT_TEXT_BATCH_MAX_CHARS))), + env_int("HERMES_FEISHU_TEXT_BATCH_MAX_CHARS", _DEFAULT_TEXT_BATCH_MAX_CHARS), ), - media_batch_delay_seconds=float( - os.getenv("HERMES_FEISHU_MEDIA_BATCH_DELAY_SECONDS", str(_DEFAULT_MEDIA_BATCH_DELAY_SECONDS)) + media_batch_delay_seconds=env_float( + "HERMES_FEISHU_MEDIA_BATCH_DELAY_SECONDS", _DEFAULT_MEDIA_BATCH_DELAY_SECONDS ), webhook_host=str( extra.get("webhook_host") or os.getenv("FEISHU_WEBHOOK_HOST", _DEFAULT_WEBHOOK_HOST) diff --git a/gateway/platforms/telegram.py b/gateway/platforms/telegram.py index 2a2bdb68641..d5228d873c1 100644 --- a/gateway/platforms/telegram.py +++ b/gateway/platforms/telegram.py @@ -87,7 +87,7 @@ from gateway.platforms.telegram_network import ( discover_fallback_ips, parse_fallback_ip_env, ) -from utils import atomic_replace +from utils import atomic_replace, env_float, env_int _TELEGRAM_IMAGE_EXTENSIONS = {".png", ".jpg", ".jpeg", ".webp", ".gif"} _TELEGRAM_IMAGE_MIME_TO_EXT = { @@ -433,7 +433,7 @@ class TelegramAdapter(BasePlatformAdapter): self._rich_draft_disabled: bool = False # Buffer rapid/album photo updates so Telegram image bursts are handled # as a single MessageEvent instead of self-interrupting multiple turns. - self._media_batch_delay_seconds = float(os.getenv("HERMES_TELEGRAM_MEDIA_BATCH_DELAY_SECONDS", "0.8")) + self._media_batch_delay_seconds = env_float("HERMES_TELEGRAM_MEDIA_BATCH_DELAY_SECONDS", 0.8) self._pending_photo_batches: Dict[str, MessageEvent] = {} self._pending_photo_batch_tasks: Dict[str, asyncio.Task] = {} self._media_group_events: Dict[str, MessageEvent] = {} @@ -2153,7 +2153,7 @@ class TelegramAdapter(BasePlatformAdapter): # inject forged updates as if from Telegram. Refuse to # start rather than silently run in fail-open mode. # See GHSA-3vpc-7q5r-276h. - webhook_port = int(os.getenv("TELEGRAM_WEBHOOK_PORT", "8443")) + webhook_port = env_int("TELEGRAM_WEBHOOK_PORT", 8443) webhook_secret = os.getenv("TELEGRAM_WEBHOOK_SECRET", "").strip() if not webhook_secret: raise RuntimeError( diff --git a/gateway/platforms/wecom.py b/gateway/platforms/wecom.py index 5bec5baca92..bb8b422cdcf 100644 --- a/gateway/platforms/wecom.py +++ b/gateway/platforms/wecom.py @@ -68,6 +68,7 @@ from gateway.platforms.base import ( cache_document_from_bytes, cache_image_from_bytes, ) +from utils import env_float logger = logging.getLogger(__name__) @@ -186,8 +187,8 @@ class WeComAdapter(BasePlatformAdapter): # Text batching: merge rapid successive messages (Telegram-style). # WeCom clients split long messages around 4000 chars. - self._text_batch_delay_seconds = float(os.getenv("HERMES_WECOM_TEXT_BATCH_DELAY_SECONDS", "0.6")) - self._text_batch_split_delay_seconds = float(os.getenv("HERMES_WECOM_TEXT_BATCH_SPLIT_DELAY_SECONDS", "2.0")) + self._text_batch_delay_seconds = env_float("HERMES_WECOM_TEXT_BATCH_DELAY_SECONDS", 0.6) + self._text_batch_split_delay_seconds = env_float("HERMES_WECOM_TEXT_BATCH_SPLIT_DELAY_SECONDS", 2.0) self._pending_text_batches: Dict[str, MessageEvent] = {} self._pending_text_batch_tasks: Dict[str, asyncio.Task] = {} self._device_id = uuid.uuid4().hex diff --git a/gateway/platforms/whatsapp.py b/gateway/platforms/whatsapp.py index d6490662684..f31d21cae4a 100644 --- a/gateway/platforms/whatsapp.py +++ b/gateway/platforms/whatsapp.py @@ -191,6 +191,7 @@ from gateway.platforms.base import ( cache_image_from_url, cache_audio_from_url, ) +from utils import env_int def _file_content_hash(path: Path) -> str: @@ -412,7 +413,7 @@ class WhatsAppAdapter(WhatsAppBehaviorMixin, BasePlatformAdapter): try: # Read timeout from environment variable, default to 300 seconds (5 minutes) # to accommodate slower systems like Unraid NAS - npm_install_timeout = int(os.environ.get("WHATSAPP_NPM_INSTALL_TIMEOUT", "300")) + npm_install_timeout = env_int("WHATSAPP_NPM_INSTALL_TIMEOUT", 300) install_result = subprocess.run( [_npm_bin, "install", "--silent"], cwd=str(bridge_dir), diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py index 7a08e2165bf..647779f6e82 100644 --- a/hermes_cli/auth.py +++ b/hermes_cli/auth.py @@ -46,7 +46,7 @@ import httpx from hermes_cli.config import get_hermes_home, get_config_path, read_raw_config from hermes_constants import OPENROUTER_BASE_URL, secure_parent_dir from agent.credential_persistence import sanitize_borrowed_credential_payload -from utils import atomic_replace, atomic_yaml_write, is_truthy_value +from utils import atomic_replace, atomic_yaml_write, env_float, is_truthy_value logger = logging.getLogger(__name__) @@ -3838,7 +3838,7 @@ def resolve_codex_runtime_credentials( tokens = dict(data["tokens"]) access_token = str(tokens.get("access_token", "") or "").strip() - refresh_timeout_seconds = float(os.getenv("HERMES_CODEX_REFRESH_TIMEOUT_SECONDS", "20")) + refresh_timeout_seconds = env_float("HERMES_CODEX_REFRESH_TIMEOUT_SECONDS", 20) should_refresh = bool(force_refresh) if (not should_refresh) and refresh_if_expiring: @@ -4475,7 +4475,7 @@ def resolve_xai_oauth_runtime_credentials( data = _read_xai_oauth_tokens() tokens = dict(data["tokens"]) access_token = str(tokens.get("access_token", "") or "").strip() - refresh_timeout_seconds = float(os.getenv("HERMES_XAI_REFRESH_TIMEOUT_SECONDS", "20")) + refresh_timeout_seconds = env_float("HERMES_XAI_REFRESH_TIMEOUT_SECONDS", 20) discovery = dict(data.get("discovery") or {}) token_endpoint = str(discovery.get("token_endpoint", "") or "").strip() redirect_uri = str(data.get("redirect_uri", "") or "").strip() diff --git a/plugins/platforms/discord/adapter.py b/plugins/platforms/discord/adapter.py index a2c2660136e..642f2f12b3b 100644 --- a/plugins/platforms/discord/adapter.py +++ b/plugins/platforms/discord/adapter.py @@ -103,7 +103,7 @@ sys.path.insert(0, str(_Path(__file__).resolve().parents[2])) from gateway.config import Platform, PlatformConfig from gateway.platforms.helpers import MessageDeduplicator, ThreadParticipationTracker -from utils import atomic_json_write +from utils import atomic_json_write, env_float from gateway.platforms.base import ( BasePlatformAdapter, MessageEvent, @@ -746,8 +746,8 @@ class DiscordAdapter(BasePlatformAdapter): self._voice_clients: Dict[int, Any] = {} # guild_id -> VoiceClient self._voice_locks: Dict[int, asyncio.Lock] = {} # guild_id -> serialize join/leave # Text batching: merge rapid successive messages (Telegram-style) - self._text_batch_delay_seconds = float(os.getenv("HERMES_DISCORD_TEXT_BATCH_DELAY_SECONDS", "0.6")) - self._text_batch_split_delay_seconds = float(os.getenv("HERMES_DISCORD_TEXT_BATCH_SPLIT_DELAY_SECONDS", "2.0")) + self._text_batch_delay_seconds = env_float("HERMES_DISCORD_TEXT_BATCH_DELAY_SECONDS", 0.6) + self._text_batch_split_delay_seconds = env_float("HERMES_DISCORD_TEXT_BATCH_SPLIT_DELAY_SECONDS", 2.0) self._pending_text_batches: Dict[str, MessageEvent] = {} self._pending_text_batch_tasks: Dict[str, asyncio.Task] = {} self._voice_text_channels: Dict[int, int] = {} # guild_id -> text_channel_id diff --git a/run_agent.py b/run_agent.py index 2c78123829c..87ad09dd915 100644 --- a/run_agent.py +++ b/run_agent.py @@ -209,7 +209,7 @@ from agent.tool_dispatch_helpers import ( _extract_error_preview, _trajectory_normalize_msg, # noqa: F401 # re-exported for tests that `from run_agent import _trajectory_normalize_msg` ) -from utils import atomic_json_write, base_url_host_matches, base_url_hostname, is_truthy_value, model_forces_max_completion_tokens +from utils import atomic_json_write, base_url_host_matches, base_url_hostname, env_float, is_truthy_value, model_forces_max_completion_tokens @@ -1109,7 +1109,7 @@ class AIAgent: cfg = get_provider_request_timeout(self.provider, self.model) if cfg is not None: return cfg - return float(os.getenv("HERMES_API_TIMEOUT", 1800.0)) + return env_float("HERMES_API_TIMEOUT", 1800.0) def _resolved_api_call_stale_timeout_base(self) -> tuple[float, bool]: """Resolve the base non-stream stale timeout and whether it is implicit. @@ -3839,7 +3839,7 @@ class AIAgent: from hermes_cli.auth import resolve_nous_runtime_credentials creds = resolve_nous_runtime_credentials( - timeout_seconds=float(os.getenv("HERMES_NOUS_TIMEOUT_SECONDS", "15")), + timeout_seconds=env_float("HERMES_NOUS_TIMEOUT_SECONDS", 15), force_refresh=force, ) except Exception as exc: