fix(env): guard remaining malformed int/float env var casts with utils helpers

Widen the env_float() guard from #48735 across the whole bug class: a
non-numeric value (e.g. a stale .env "HERMES_API_TIMEOUT=abc" or a typo'd
port) raised an unhandled ValueError and crashed adapter/agent init.

Converts 22 genuinely-unguarded first-party int/float(os.getenv()) sites to
the canonical utils.env_int / utils.env_float helpers (the established house
pattern), instead of duplicating per-module helpers or inline try/except:

- gateway/config.py: WECOM_CALLBACK_PORT, BLUEBUBBLES_WEBHOOK_PORT
- gateway/platforms/email.py: EMAIL_IMAP/SMTP_PORT, EMAIL_POLL_INTERVAL
- gateway/platforms/feishu.py: dedup cache + text/media batch settings
- gateway/platforms/wecom.py, discord/adapter.py: text batch delays
- gateway/platforms/telegram.py: media batch delay, TELEGRAM_WEBHOOK_PORT
- gateway/platforms/whatsapp.py: WHATSAPP_NPM_INSTALL_TIMEOUT
- hermes_cli/auth.py: CODEX/XAI refresh timeouts
- agent/chat_completion_helpers.py: API/stream read/stale timeouts
- run_agent.py, agent/auxiliary_client.py: API + nous timeouts

Sites already guarded by try/except or local helpers are left untouched.
The HERMES_MAX_ITERATIONS sites are already guarded on main via
_current_max_iterations(), so they are not included.
This commit is contained in:
kshitijk4poor 2026-06-20 14:54:36 +05:30
parent 06ca1e9980
commit a7dd98c860
11 changed files with 41 additions and 38 deletions

View file

@ -102,7 +102,7 @@ OpenAI = _OpenAIProxy() # module-level name, resolves lazily on call/isinstance
from agent.credential_pool import load_pool
from hermes_cli.config import get_hermes_home
from hermes_constants import OPENROUTER_BASE_URL
from utils import base_url_host_matches, base_url_hostname, model_forces_max_completion_tokens, normalize_proxy_env_vars
from utils import base_url_host_matches, base_url_hostname, env_float, model_forces_max_completion_tokens, normalize_proxy_env_vars
logger = logging.getLogger(__name__)
@ -1312,7 +1312,7 @@ def _resolve_nous_runtime_api(*, force_refresh: bool = False) -> Optional[tuple[
from hermes_cli.auth import resolve_nous_runtime_credentials
creds = resolve_nous_runtime_credentials(
timeout_seconds=float(os.getenv("HERMES_NOUS_TIMEOUT_SECONDS", "15")),
timeout_seconds=env_float("HERMES_NOUS_TIMEOUT_SECONDS", 15),
force_refresh=force_refresh,
)
except Exception as exc:
@ -2905,7 +2905,7 @@ def _refresh_provider_credentials(provider: str) -> bool:
from hermes_cli.auth import resolve_nous_runtime_credentials
creds = resolve_nous_runtime_credentials(
timeout_seconds=float(os.getenv("HERMES_NOUS_TIMEOUT_SECONDS", "15")),
timeout_seconds=env_float("HERMES_NOUS_TIMEOUT_SECONDS", 15),
force_refresh=True,
)
if not str(creds.get("api_key", "") or "").strip():

View file

@ -34,7 +34,7 @@ from agent.message_sanitization import (
_repair_tool_call_arguments,
)
from tools.terminal_tool import is_persistent_env
from utils import base_url_host_matches, base_url_hostname, env_int
from utils import base_url_host_matches, base_url_hostname, env_float, env_int
logger = logging.getLogger(__name__)
@ -1761,14 +1761,14 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
_base_timeout = (
_provider_timeout_cfg
if _provider_timeout_cfg is not None
else float(os.getenv("HERMES_API_TIMEOUT", 1800.0))
else env_float("HERMES_API_TIMEOUT", 1800.0)
)
# Read timeout: config wins here too. Otherwise use
# HERMES_STREAM_READ_TIMEOUT (default 120s) for cloud providers.
if _provider_timeout_cfg is not None:
_stream_read_timeout = _provider_timeout_cfg
else:
_stream_read_timeout = float(os.getenv("HERMES_STREAM_READ_TIMEOUT", 120.0))
_stream_read_timeout = env_float("HERMES_STREAM_READ_TIMEOUT", 120.0)
# Local providers (Ollama, llama.cpp, vLLM) can take minutes for
# prefill on large contexts before producing the first token.
# Auto-increase the httpx read timeout unless the user explicitly
@ -2508,7 +2508,7 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
if _cfg_stale is not None:
_stream_stale_timeout_base = _cfg_stale
else:
_stream_stale_timeout_base = float(os.getenv("HERMES_STREAM_STALE_TIMEOUT", 180.0))
_stream_stale_timeout_base = env_float("HERMES_STREAM_STALE_TIMEOUT", 180.0)
# Local providers (Ollama, oMLX, llama-cpp) can take 300+ seconds
# for prefill on large contexts. Disable the stale detector unless
# the user explicitly set HERMES_STREAM_STALE_TIMEOUT.

View file

@ -17,7 +17,7 @@ from typing import Dict, List, Optional, Any, Callable
from enum import Enum
from hermes_cli.config import get_hermes_home
from utils import is_truthy_value
from utils import env_int, is_truthy_value
logger = logging.getLogger(__name__)
@ -1860,7 +1860,7 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
"token": os.getenv("WECOM_CALLBACK_TOKEN", ""),
"encoding_aes_key": os.getenv("WECOM_CALLBACK_ENCODING_AES_KEY", ""),
"host": os.getenv("WECOM_CALLBACK_HOST", "0.0.0.0"),
"port": int(os.getenv("WECOM_CALLBACK_PORT", "8645")),
"port": env_int("WECOM_CALLBACK_PORT", 8645),
})
# Weixin (personal WeChat via iLink Bot API)
@ -1916,7 +1916,7 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
"server_url": bluebubbles_server_url.rstrip("/"),
"password": bluebubbles_password,
"webhook_host": os.getenv("BLUEBUBBLES_WEBHOOK_HOST", "127.0.0.1"),
"webhook_port": int(os.getenv("BLUEBUBBLES_WEBHOOK_PORT", "8645")),
"webhook_port": env_int("BLUEBUBBLES_WEBHOOK_PORT", 8645),
"webhook_path": os.getenv("BLUEBUBBLES_WEBHOOK_PATH", "/bluebubbles-webhook"),
"send_read_receipts": os.getenv("BLUEBUBBLES_SEND_READ_RECEIPTS", "true").lower() in {"true", "1", "yes"},
})

View file

@ -43,6 +43,7 @@ from gateway.platforms.base import (
cache_image_from_bytes,
)
from gateway.config import Platform, PlatformConfig
from utils import env_int
logger = logging.getLogger(__name__)
# Automated sender patterns — emails from these are silently ignored
@ -309,10 +310,10 @@ class EmailAdapter(BasePlatformAdapter):
self._address = os.getenv("EMAIL_ADDRESS", "")
self._password = os.getenv("EMAIL_PASSWORD", "")
self._imap_host = os.getenv("EMAIL_IMAP_HOST", "")
self._imap_port = int(os.getenv("EMAIL_IMAP_PORT", "993"))
self._imap_port = env_int("EMAIL_IMAP_PORT", 993)
self._smtp_host = os.getenv("EMAIL_SMTP_HOST", "")
self._smtp_port = int(os.getenv("EMAIL_SMTP_PORT", "587"))
self._poll_interval = int(os.getenv("EMAIL_POLL_INTERVAL", "15"))
self._smtp_port = env_int("EMAIL_SMTP_PORT", 587)
self._poll_interval = env_int("EMAIL_POLL_INTERVAL", 15)
# Skip attachments — configured via config.yaml:
# platforms:

View file

@ -142,7 +142,7 @@ from gateway.platforms.base import (
)
from gateway.status import acquire_scoped_lock, release_scoped_lock
from hermes_constants import get_hermes_home
from utils import atomic_json_write
from utils import atomic_json_write, env_float, env_int
logger = logging.getLogger(__name__)
@ -1535,24 +1535,24 @@ class FeishuAdapter(BasePlatformAdapter):
bot_name=os.getenv("FEISHU_BOT_NAME", "").strip(),
dedup_cache_size=max(
32,
int(os.getenv("HERMES_FEISHU_DEDUP_CACHE_SIZE", str(_DEFAULT_DEDUP_CACHE_SIZE))),
env_int("HERMES_FEISHU_DEDUP_CACHE_SIZE", _DEFAULT_DEDUP_CACHE_SIZE),
),
text_batch_delay_seconds=float(
os.getenv("HERMES_FEISHU_TEXT_BATCH_DELAY_SECONDS", str(_DEFAULT_TEXT_BATCH_DELAY_SECONDS))
text_batch_delay_seconds=env_float(
"HERMES_FEISHU_TEXT_BATCH_DELAY_SECONDS", _DEFAULT_TEXT_BATCH_DELAY_SECONDS
),
text_batch_split_delay_seconds=float(
os.getenv("HERMES_FEISHU_TEXT_BATCH_SPLIT_DELAY_SECONDS", "2.0")
text_batch_split_delay_seconds=env_float(
"HERMES_FEISHU_TEXT_BATCH_SPLIT_DELAY_SECONDS", 2.0
),
text_batch_max_messages=max(
1,
int(os.getenv("HERMES_FEISHU_TEXT_BATCH_MAX_MESSAGES", str(_DEFAULT_TEXT_BATCH_MAX_MESSAGES))),
env_int("HERMES_FEISHU_TEXT_BATCH_MAX_MESSAGES", _DEFAULT_TEXT_BATCH_MAX_MESSAGES),
),
text_batch_max_chars=max(
1,
int(os.getenv("HERMES_FEISHU_TEXT_BATCH_MAX_CHARS", str(_DEFAULT_TEXT_BATCH_MAX_CHARS))),
env_int("HERMES_FEISHU_TEXT_BATCH_MAX_CHARS", _DEFAULT_TEXT_BATCH_MAX_CHARS),
),
media_batch_delay_seconds=float(
os.getenv("HERMES_FEISHU_MEDIA_BATCH_DELAY_SECONDS", str(_DEFAULT_MEDIA_BATCH_DELAY_SECONDS))
media_batch_delay_seconds=env_float(
"HERMES_FEISHU_MEDIA_BATCH_DELAY_SECONDS", _DEFAULT_MEDIA_BATCH_DELAY_SECONDS
),
webhook_host=str(
extra.get("webhook_host") or os.getenv("FEISHU_WEBHOOK_HOST", _DEFAULT_WEBHOOK_HOST)

View file

@ -87,7 +87,7 @@ from gateway.platforms.telegram_network import (
discover_fallback_ips,
parse_fallback_ip_env,
)
from utils import atomic_replace
from utils import atomic_replace, env_float, env_int
_TELEGRAM_IMAGE_EXTENSIONS = {".png", ".jpg", ".jpeg", ".webp", ".gif"}
_TELEGRAM_IMAGE_MIME_TO_EXT = {
@ -433,7 +433,7 @@ class TelegramAdapter(BasePlatformAdapter):
self._rich_draft_disabled: bool = False
# Buffer rapid/album photo updates so Telegram image bursts are handled
# as a single MessageEvent instead of self-interrupting multiple turns.
self._media_batch_delay_seconds = float(os.getenv("HERMES_TELEGRAM_MEDIA_BATCH_DELAY_SECONDS", "0.8"))
self._media_batch_delay_seconds = env_float("HERMES_TELEGRAM_MEDIA_BATCH_DELAY_SECONDS", 0.8)
self._pending_photo_batches: Dict[str, MessageEvent] = {}
self._pending_photo_batch_tasks: Dict[str, asyncio.Task] = {}
self._media_group_events: Dict[str, MessageEvent] = {}
@ -2153,7 +2153,7 @@ class TelegramAdapter(BasePlatformAdapter):
# inject forged updates as if from Telegram. Refuse to
# start rather than silently run in fail-open mode.
# See GHSA-3vpc-7q5r-276h.
webhook_port = int(os.getenv("TELEGRAM_WEBHOOK_PORT", "8443"))
webhook_port = env_int("TELEGRAM_WEBHOOK_PORT", 8443)
webhook_secret = os.getenv("TELEGRAM_WEBHOOK_SECRET", "").strip()
if not webhook_secret:
raise RuntimeError(

View file

@ -68,6 +68,7 @@ from gateway.platforms.base import (
cache_document_from_bytes,
cache_image_from_bytes,
)
from utils import env_float
logger = logging.getLogger(__name__)
@ -186,8 +187,8 @@ class WeComAdapter(BasePlatformAdapter):
# Text batching: merge rapid successive messages (Telegram-style).
# WeCom clients split long messages around 4000 chars.
self._text_batch_delay_seconds = float(os.getenv("HERMES_WECOM_TEXT_BATCH_DELAY_SECONDS", "0.6"))
self._text_batch_split_delay_seconds = float(os.getenv("HERMES_WECOM_TEXT_BATCH_SPLIT_DELAY_SECONDS", "2.0"))
self._text_batch_delay_seconds = env_float("HERMES_WECOM_TEXT_BATCH_DELAY_SECONDS", 0.6)
self._text_batch_split_delay_seconds = env_float("HERMES_WECOM_TEXT_BATCH_SPLIT_DELAY_SECONDS", 2.0)
self._pending_text_batches: Dict[str, MessageEvent] = {}
self._pending_text_batch_tasks: Dict[str, asyncio.Task] = {}
self._device_id = uuid.uuid4().hex

View file

@ -191,6 +191,7 @@ from gateway.platforms.base import (
cache_image_from_url,
cache_audio_from_url,
)
from utils import env_int
def _file_content_hash(path: Path) -> str:
@ -412,7 +413,7 @@ class WhatsAppAdapter(WhatsAppBehaviorMixin, BasePlatformAdapter):
try:
# Read timeout from environment variable, default to 300 seconds (5 minutes)
# to accommodate slower systems like Unraid NAS
npm_install_timeout = int(os.environ.get("WHATSAPP_NPM_INSTALL_TIMEOUT", "300"))
npm_install_timeout = env_int("WHATSAPP_NPM_INSTALL_TIMEOUT", 300)
install_result = subprocess.run(
[_npm_bin, "install", "--silent"],
cwd=str(bridge_dir),

View file

@ -46,7 +46,7 @@ import httpx
from hermes_cli.config import get_hermes_home, get_config_path, read_raw_config
from hermes_constants import OPENROUTER_BASE_URL, secure_parent_dir
from agent.credential_persistence import sanitize_borrowed_credential_payload
from utils import atomic_replace, atomic_yaml_write, is_truthy_value
from utils import atomic_replace, atomic_yaml_write, env_float, is_truthy_value
logger = logging.getLogger(__name__)
@ -3838,7 +3838,7 @@ def resolve_codex_runtime_credentials(
tokens = dict(data["tokens"])
access_token = str(tokens.get("access_token", "") or "").strip()
refresh_timeout_seconds = float(os.getenv("HERMES_CODEX_REFRESH_TIMEOUT_SECONDS", "20"))
refresh_timeout_seconds = env_float("HERMES_CODEX_REFRESH_TIMEOUT_SECONDS", 20)
should_refresh = bool(force_refresh)
if (not should_refresh) and refresh_if_expiring:
@ -4475,7 +4475,7 @@ def resolve_xai_oauth_runtime_credentials(
data = _read_xai_oauth_tokens()
tokens = dict(data["tokens"])
access_token = str(tokens.get("access_token", "") or "").strip()
refresh_timeout_seconds = float(os.getenv("HERMES_XAI_REFRESH_TIMEOUT_SECONDS", "20"))
refresh_timeout_seconds = env_float("HERMES_XAI_REFRESH_TIMEOUT_SECONDS", 20)
discovery = dict(data.get("discovery") or {})
token_endpoint = str(discovery.get("token_endpoint", "") or "").strip()
redirect_uri = str(data.get("redirect_uri", "") or "").strip()

View file

@ -103,7 +103,7 @@ sys.path.insert(0, str(_Path(__file__).resolve().parents[2]))
from gateway.config import Platform, PlatformConfig
from gateway.platforms.helpers import MessageDeduplicator, ThreadParticipationTracker
from utils import atomic_json_write
from utils import atomic_json_write, env_float
from gateway.platforms.base import (
BasePlatformAdapter,
MessageEvent,
@ -746,8 +746,8 @@ class DiscordAdapter(BasePlatformAdapter):
self._voice_clients: Dict[int, Any] = {} # guild_id -> VoiceClient
self._voice_locks: Dict[int, asyncio.Lock] = {} # guild_id -> serialize join/leave
# Text batching: merge rapid successive messages (Telegram-style)
self._text_batch_delay_seconds = float(os.getenv("HERMES_DISCORD_TEXT_BATCH_DELAY_SECONDS", "0.6"))
self._text_batch_split_delay_seconds = float(os.getenv("HERMES_DISCORD_TEXT_BATCH_SPLIT_DELAY_SECONDS", "2.0"))
self._text_batch_delay_seconds = env_float("HERMES_DISCORD_TEXT_BATCH_DELAY_SECONDS", 0.6)
self._text_batch_split_delay_seconds = env_float("HERMES_DISCORD_TEXT_BATCH_SPLIT_DELAY_SECONDS", 2.0)
self._pending_text_batches: Dict[str, MessageEvent] = {}
self._pending_text_batch_tasks: Dict[str, asyncio.Task] = {}
self._voice_text_channels: Dict[int, int] = {} # guild_id -> text_channel_id

View file

@ -209,7 +209,7 @@ from agent.tool_dispatch_helpers import (
_extract_error_preview,
_trajectory_normalize_msg, # noqa: F401 # re-exported for tests that `from run_agent import _trajectory_normalize_msg`
)
from utils import atomic_json_write, base_url_host_matches, base_url_hostname, is_truthy_value, model_forces_max_completion_tokens
from utils import atomic_json_write, base_url_host_matches, base_url_hostname, env_float, is_truthy_value, model_forces_max_completion_tokens
@ -1109,7 +1109,7 @@ class AIAgent:
cfg = get_provider_request_timeout(self.provider, self.model)
if cfg is not None:
return cfg
return float(os.getenv("HERMES_API_TIMEOUT", 1800.0))
return env_float("HERMES_API_TIMEOUT", 1800.0)
def _resolved_api_call_stale_timeout_base(self) -> tuple[float, bool]:
"""Resolve the base non-stream stale timeout and whether it is implicit.
@ -3839,7 +3839,7 @@ class AIAgent:
from hermes_cli.auth import resolve_nous_runtime_credentials
creds = resolve_nous_runtime_credentials(
timeout_seconds=float(os.getenv("HERMES_NOUS_TIMEOUT_SECONDS", "15")),
timeout_seconds=env_float("HERMES_NOUS_TIMEOUT_SECONDS", 15),
force_refresh=force,
)
except Exception as exc: