diff --git a/gateway/config.py b/gateway/config.py index 8eb39ba54a..da370541bb 100644 --- a/gateway/config.py +++ b/gateway/config.py @@ -1152,10 +1152,17 @@ def _apply_env_overrides(config: GatewayConfig) -> None: # WhatsApp (typically uses different auth mechanism) whatsapp_enabled = os.getenv("WHATSAPP_ENABLED", "").lower() in ("true", "1", "yes") - if whatsapp_enabled: - if Platform.WHATSAPP not in config.platforms: - config.platforms[Platform.WHATSAPP] = PlatformConfig() - config.platforms[Platform.WHATSAPP].enabled = True + whatsapp_disabled_explicitly = os.getenv("WHATSAPP_ENABLED", "").lower() in ("false", "0", "no") + if Platform.WHATSAPP in config.platforms: + # YAML config exists — respect explicit disable + wa_cfg = config.platforms[Platform.WHATSAPP] + if whatsapp_disabled_explicitly: + wa_cfg.enabled = False + elif whatsapp_enabled: + wa_cfg.enabled = True + # else: keep whatever the YAML set + elif whatsapp_enabled: + config.platforms[Platform.WHATSAPP] = PlatformConfig(enabled=True) whatsapp_home = os.getenv("WHATSAPP_HOME_CHANNEL") if whatsapp_home and Platform.WHATSAPP in config.platforms: config.platforms[Platform.WHATSAPP].home_channel = HomeChannel( diff --git a/gateway/platforms/whatsapp.py b/gateway/platforms/whatsapp.py index 921dd70d72..3aff6bfd37 100644 --- a/gateway/platforms/whatsapp.py +++ b/gateway/platforms/whatsapp.py @@ -21,6 +21,7 @@ import logging import os import platform import re +import signal import subprocess _IS_WINDOWS = platform.system() == "Windows" @@ -54,19 +55,77 @@ def _kill_port_process(port: int) -> None: except subprocess.SubprocessError: pass else: - result = subprocess.run( - ["fuser", f"{port}/tcp"], - capture_output=True, timeout=5, - ) - if result.returncode == 0: - subprocess.run( - ["fuser", "-k", f"{port}/tcp"], + # Try fuser first (Linux), fall back to lsof (macOS / WSL2) + killed = False + try: + result = subprocess.run( + ["fuser", f"{port}/tcp"], capture_output=True, timeout=5, ) + if result.returncode == 0: + subprocess.run( + ["fuser", "-k", f"{port}/tcp"], + capture_output=True, timeout=5, + ) + killed = True + except FileNotFoundError: + pass # fuser not installed + + if not killed: + try: + result = subprocess.run( + ["lsof", "-ti", f":{port}"], + capture_output=True, text=True, timeout=5, + ) + for pid_str in result.stdout.strip().splitlines(): + try: + os.kill(int(pid_str), signal.SIGTERM) + except (ValueError, ProcessLookupError, PermissionError): + pass + except FileNotFoundError: + pass # lsof not installed either except Exception: pass +def _kill_stale_bridge_by_pidfile(session_path: Path) -> None: + """Kill a bridge process recorded in a PID file from a previous run. + + The bridge writes ``bridge.pid`` into the session directory when it + starts. If the gateway crashed without a clean shutdown the old bridge + process becomes orphaned — this helper finds and kills it. + """ + pid_file = session_path / "bridge.pid" + if not pid_file.exists(): + return + try: + pid = int(pid_file.read_text().strip()) + except (ValueError, OSError, TypeError): + try: + pid_file.unlink() + except OSError: + pass + return + try: + os.kill(pid, 0) # check existence + os.kill(pid, signal.SIGTERM) + logger.info("[whatsapp] Killed stale bridge PID %d from pidfile", pid) + except (ProcessLookupError, PermissionError, OSError): + pass + try: + pid_file.unlink() + except OSError: + pass + + +def _write_bridge_pidfile(session_path: Path, pid: int) -> None: + """Write the bridge PID to a file for later cleanup.""" + try: + (session_path / "bridge.pid").write_text(str(pid)) + except OSError: + pass + + def _terminate_bridge_process(proc, *, force: bool = False) -> None: """Terminate the bridge process using process-tree semantics where possible.""" if _IS_WINDOWS: @@ -428,6 +487,7 @@ class WhatsAppAdapter(BasePlatformAdapter): pass # Bridge not running, start a new one # Kill any orphaned bridge from a previous gateway run + _kill_stale_bridge_by_pidfile(self._session_path) _kill_port_process(self._bridge_port) await asyncio.sleep(1) @@ -459,6 +519,7 @@ class WhatsAppAdapter(BasePlatformAdapter): preexec_fn=None if _IS_WINDOWS else os.setsid, env=bridge_env, ) + _write_bridge_pidfile(self._session_path, self._bridge_process.pid) # Wait for the bridge to connect to WhatsApp. # Phase 1: wait for the HTTP server to come up (up to 15s). @@ -609,6 +670,12 @@ class WhatsAppAdapter(BasePlatformAdapter): # Bridge was not started by us, don't kill it print(f"[{self.name}] Disconnecting (external bridge left running)") + # Clean up PID file + try: + (self._session_path / "bridge.pid").unlink(missing_ok=True) + except OSError: + pass + # Cancel the poll task explicitly if self._poll_task and not self._poll_task.done(): self._poll_task.cancel()