fix(whatsapp): wait for connected status and log bridge output

The gateway health check broke out of the polling loop as soon as
the bridge HTTP server returned 200, regardless of the actual
WhatsApp connection status. This meant 'Bridge ready (status:
disconnected)' was printed and the gateway moved on, even when
WhatsApp never connected.

Additionally, bridge stdout/stderr were piped to DEVNULL, so if the
session had expired and the bridge needed a QR re-scan, the user had
no way to see that. The 'Scan QR code if prompted (check bridge
output)' message was misleading since there was no output to check.

Changes:
- Health check now has two phases: wait for HTTP (15s), then wait
  for status:connected (15s more). Total 30s budget.
- Bridge output routes to ~/.hermes/whatsapp/bridge.log instead of
  DEVNULL — QR codes, errors, reconnection msgs are preserved.
- Clear warnings with actionable steps if connection fails after 30s
  (check bridge.log, re-pair with hermes whatsapp).
- Removed misleading 'Scan QR code' message.
- Log file handle properly cleaned up on disconnect.

Fixes #365
This commit is contained in:
teknium1 2026-03-04 04:58:21 -08:00
parent ee7fde6531
commit c45aeb45b1

View file

@ -100,6 +100,8 @@ class WhatsAppAdapter(BasePlatformAdapter):
Path.home() / ".hermes" / "whatsapp" / "session"
))
self._message_queue: asyncio.Queue = asyncio.Queue()
self._bridge_log_fh = None
self._bridge_log: Optional[Path] = None
async def connect(self) -> bool:
"""
@ -159,8 +161,13 @@ class WhatsAppAdapter(BasePlatformAdapter):
except Exception:
pass
# Start the bridge process in its own process group
# Start the bridge process in its own process group.
# Route output to a log file so QR codes, errors, and reconnection
# messages are preserved for troubleshooting.
whatsapp_mode = os.getenv("WHATSAPP_MODE", "self-chat")
self._bridge_log = self._session_path.parent / "bridge.log"
bridge_log_fh = open(self._bridge_log, "a")
self._bridge_log_fh = bridge_log_fh
self._bridge_process = subprocess.Popen(
[
"node",
@ -169,17 +176,21 @@ class WhatsAppAdapter(BasePlatformAdapter):
"--session", str(self._session_path),
"--mode", whatsapp_mode,
],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
stdout=bridge_log_fh,
stderr=bridge_log_fh,
preexec_fn=None if _IS_WINDOWS else os.setsid,
)
# Wait for bridge to be ready via HTTP health check
# Wait for the bridge to connect to WhatsApp.
# Phase 1: wait for the HTTP server to come up (up to 15s).
# Phase 2: wait for WhatsApp status: connected (up to 15s more).
import aiohttp
http_ready = False
for attempt in range(15):
await asyncio.sleep(1)
if self._bridge_process.poll() is not None:
print(f"[{self.name}] Bridge process died (exit code {self._bridge_process.returncode})")
print(f"[{self.name}] Check log: {self._bridge_log}")
return False
try:
async with aiohttp.ClientSession() as session:
@ -188,21 +199,54 @@ class WhatsAppAdapter(BasePlatformAdapter):
timeout=aiohttp.ClientTimeout(total=2)
) as resp:
if resp.status == 200:
http_ready = True
data = await resp.json()
print(f"[{self.name}] Bridge ready (status: {data.get('status', '?')})")
break
if data.get("status") == "connected":
print(f"[{self.name}] Bridge ready (status: connected)")
break
except Exception:
continue
else:
print(f"[{self.name}] Bridge did not become ready in 15s")
if not http_ready:
print(f"[{self.name}] Bridge HTTP server did not start in 15s")
print(f"[{self.name}] Check log: {self._bridge_log}")
return False
# Phase 2: HTTP is up but WhatsApp may still be connecting.
# Give it more time to authenticate with saved credentials.
if data.get("status") != "connected":
print(f"[{self.name}] Bridge HTTP ready, waiting for WhatsApp connection...")
for attempt in range(15):
await asyncio.sleep(1)
if self._bridge_process.poll() is not None:
print(f"[{self.name}] Bridge process died during connection")
print(f"[{self.name}] Check log: {self._bridge_log}")
return False
try:
async with aiohttp.ClientSession() as session:
async with session.get(
f"http://localhost:{self._bridge_port}/health",
timeout=aiohttp.ClientTimeout(total=2)
) as resp:
if resp.status == 200:
data = await resp.json()
if data.get("status") == "connected":
print(f"[{self.name}] Bridge ready (status: connected)")
break
except Exception:
continue
else:
# Still not connected — warn but proceed (bridge may
# auto-reconnect later, e.g. after a code 515 restart).
print(f"[{self.name}] ⚠ WhatsApp not connected after 30s")
print(f"[{self.name}] Bridge log: {self._bridge_log}")
print(f"[{self.name}] If session expired, re-pair: hermes whatsapp")
# Start message polling task
asyncio.create_task(self._poll_messages())
self._running = True
print(f"[{self.name}] Bridge started on port {self._bridge_port}")
print(f"[{self.name}] Scan QR code if prompted (check bridge output)")
return True
except Exception as e:
@ -245,6 +289,12 @@ class WhatsAppAdapter(BasePlatformAdapter):
self._running = False
self._bridge_process = None
if self._bridge_log_fh:
try:
self._bridge_log_fh.close()
except Exception:
pass
self._bridge_log_fh = None
print(f"[{self.name}] Disconnected")
async def send(