mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-08 03:01:47 +00:00
fix: WhatsApp bridge process leak and disable config asymmetry
- Add PID file mechanism to track bridge processes and kill stale ones on startup - Improve _kill_port_process() with lsof fallback when fuser is not available - Support explicit WhatsApp disable via config.yaml (whatsapp.enabled: false) - Respect WHATSAPP_ENABLED=false env var to disable WhatsApp Fixes #19124
This commit is contained in:
parent
0214858ef5
commit
0d3593e514
2 changed files with 85 additions and 11 deletions
|
|
@ -1152,10 +1152,17 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
|
||||||
|
|
||||||
# WhatsApp (typically uses different auth mechanism)
|
# WhatsApp (typically uses different auth mechanism)
|
||||||
whatsapp_enabled = os.getenv("WHATSAPP_ENABLED", "").lower() in ("true", "1", "yes")
|
whatsapp_enabled = os.getenv("WHATSAPP_ENABLED", "").lower() in ("true", "1", "yes")
|
||||||
if whatsapp_enabled:
|
whatsapp_disabled_explicitly = os.getenv("WHATSAPP_ENABLED", "").lower() in ("false", "0", "no")
|
||||||
if Platform.WHATSAPP not in config.platforms:
|
if Platform.WHATSAPP in config.platforms:
|
||||||
config.platforms[Platform.WHATSAPP] = PlatformConfig()
|
# YAML config exists — respect explicit disable
|
||||||
config.platforms[Platform.WHATSAPP].enabled = True
|
wa_cfg = config.platforms[Platform.WHATSAPP]
|
||||||
|
if whatsapp_disabled_explicitly:
|
||||||
|
wa_cfg.enabled = False
|
||||||
|
elif whatsapp_enabled:
|
||||||
|
wa_cfg.enabled = True
|
||||||
|
# else: keep whatever the YAML set
|
||||||
|
elif whatsapp_enabled:
|
||||||
|
config.platforms[Platform.WHATSAPP] = PlatformConfig(enabled=True)
|
||||||
whatsapp_home = os.getenv("WHATSAPP_HOME_CHANNEL")
|
whatsapp_home = os.getenv("WHATSAPP_HOME_CHANNEL")
|
||||||
if whatsapp_home and Platform.WHATSAPP in config.platforms:
|
if whatsapp_home and Platform.WHATSAPP in config.platforms:
|
||||||
config.platforms[Platform.WHATSAPP].home_channel = HomeChannel(
|
config.platforms[Platform.WHATSAPP].home_channel = HomeChannel(
|
||||||
|
|
|
||||||
|
|
@ -21,6 +21,7 @@ import logging
|
||||||
import os
|
import os
|
||||||
import platform
|
import platform
|
||||||
import re
|
import re
|
||||||
|
import signal
|
||||||
import subprocess
|
import subprocess
|
||||||
|
|
||||||
_IS_WINDOWS = platform.system() == "Windows"
|
_IS_WINDOWS = platform.system() == "Windows"
|
||||||
|
|
@ -54,19 +55,77 @@ def _kill_port_process(port: int) -> None:
|
||||||
except subprocess.SubprocessError:
|
except subprocess.SubprocessError:
|
||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
result = subprocess.run(
|
# Try fuser first (Linux), fall back to lsof (macOS / WSL2)
|
||||||
["fuser", f"{port}/tcp"],
|
killed = False
|
||||||
capture_output=True, timeout=5,
|
try:
|
||||||
)
|
result = subprocess.run(
|
||||||
if result.returncode == 0:
|
["fuser", f"{port}/tcp"],
|
||||||
subprocess.run(
|
|
||||||
["fuser", "-k", f"{port}/tcp"],
|
|
||||||
capture_output=True, timeout=5,
|
capture_output=True, timeout=5,
|
||||||
)
|
)
|
||||||
|
if result.returncode == 0:
|
||||||
|
subprocess.run(
|
||||||
|
["fuser", "-k", f"{port}/tcp"],
|
||||||
|
capture_output=True, timeout=5,
|
||||||
|
)
|
||||||
|
killed = True
|
||||||
|
except FileNotFoundError:
|
||||||
|
pass # fuser not installed
|
||||||
|
|
||||||
|
if not killed:
|
||||||
|
try:
|
||||||
|
result = subprocess.run(
|
||||||
|
["lsof", "-ti", f":{port}"],
|
||||||
|
capture_output=True, text=True, timeout=5,
|
||||||
|
)
|
||||||
|
for pid_str in result.stdout.strip().splitlines():
|
||||||
|
try:
|
||||||
|
os.kill(int(pid_str), signal.SIGTERM)
|
||||||
|
except (ValueError, ProcessLookupError, PermissionError):
|
||||||
|
pass
|
||||||
|
except FileNotFoundError:
|
||||||
|
pass # lsof not installed either
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def _kill_stale_bridge_by_pidfile(session_path: Path) -> None:
|
||||||
|
"""Kill a bridge process recorded in a PID file from a previous run.
|
||||||
|
|
||||||
|
The bridge writes ``bridge.pid`` into the session directory when it
|
||||||
|
starts. If the gateway crashed without a clean shutdown the old bridge
|
||||||
|
process becomes orphaned — this helper finds and kills it.
|
||||||
|
"""
|
||||||
|
pid_file = session_path / "bridge.pid"
|
||||||
|
if not pid_file.exists():
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
pid = int(pid_file.read_text().strip())
|
||||||
|
except (ValueError, OSError, TypeError):
|
||||||
|
try:
|
||||||
|
pid_file.unlink()
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
os.kill(pid, 0) # check existence
|
||||||
|
os.kill(pid, signal.SIGTERM)
|
||||||
|
logger.info("[whatsapp] Killed stale bridge PID %d from pidfile", pid)
|
||||||
|
except (ProcessLookupError, PermissionError, OSError):
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
pid_file.unlink()
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def _write_bridge_pidfile(session_path: Path, pid: int) -> None:
|
||||||
|
"""Write the bridge PID to a file for later cleanup."""
|
||||||
|
try:
|
||||||
|
(session_path / "bridge.pid").write_text(str(pid))
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
def _terminate_bridge_process(proc, *, force: bool = False) -> None:
|
def _terminate_bridge_process(proc, *, force: bool = False) -> None:
|
||||||
"""Terminate the bridge process using process-tree semantics where possible."""
|
"""Terminate the bridge process using process-tree semantics where possible."""
|
||||||
if _IS_WINDOWS:
|
if _IS_WINDOWS:
|
||||||
|
|
@ -428,6 +487,7 @@ class WhatsAppAdapter(BasePlatformAdapter):
|
||||||
pass # Bridge not running, start a new one
|
pass # Bridge not running, start a new one
|
||||||
|
|
||||||
# Kill any orphaned bridge from a previous gateway run
|
# Kill any orphaned bridge from a previous gateway run
|
||||||
|
_kill_stale_bridge_by_pidfile(self._session_path)
|
||||||
_kill_port_process(self._bridge_port)
|
_kill_port_process(self._bridge_port)
|
||||||
await asyncio.sleep(1)
|
await asyncio.sleep(1)
|
||||||
|
|
||||||
|
|
@ -459,6 +519,7 @@ class WhatsAppAdapter(BasePlatformAdapter):
|
||||||
preexec_fn=None if _IS_WINDOWS else os.setsid,
|
preexec_fn=None if _IS_WINDOWS else os.setsid,
|
||||||
env=bridge_env,
|
env=bridge_env,
|
||||||
)
|
)
|
||||||
|
_write_bridge_pidfile(self._session_path, self._bridge_process.pid)
|
||||||
|
|
||||||
# Wait for the bridge to connect to WhatsApp.
|
# Wait for the bridge to connect to WhatsApp.
|
||||||
# Phase 1: wait for the HTTP server to come up (up to 15s).
|
# Phase 1: wait for the HTTP server to come up (up to 15s).
|
||||||
|
|
@ -609,6 +670,12 @@ class WhatsAppAdapter(BasePlatformAdapter):
|
||||||
# Bridge was not started by us, don't kill it
|
# Bridge was not started by us, don't kill it
|
||||||
print(f"[{self.name}] Disconnecting (external bridge left running)")
|
print(f"[{self.name}] Disconnecting (external bridge left running)")
|
||||||
|
|
||||||
|
# Clean up PID file
|
||||||
|
try:
|
||||||
|
(self._session_path / "bridge.pid").unlink(missing_ok=True)
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
|
||||||
# Cancel the poll task explicitly
|
# Cancel the poll task explicitly
|
||||||
if self._poll_task and not self._poll_task.done():
|
if self._poll_task and not self._poll_task.done():
|
||||||
self._poll_task.cancel()
|
self._poll_task.cancel()
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue