mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
fix(whatsapp): wait for connected status and log bridge output
The gateway health check broke out of the polling loop as soon as the bridge HTTP server returned 200, regardless of the actual WhatsApp connection status. This meant 'Bridge ready (status: disconnected)' was printed and the gateway moved on, even when WhatsApp never connected. Additionally, bridge stdout/stderr were piped to DEVNULL, so if the session had expired and the bridge needed a QR re-scan, the user had no way to see that. The 'Scan QR code if prompted (check bridge output)' message was misleading since there was no output to check. Changes: - Health check now has two phases: wait for HTTP (15s), then wait for status:connected (15s more). Total 30s budget. - Bridge output routes to ~/.hermes/whatsapp/bridge.log instead of DEVNULL — QR codes, errors, reconnection msgs are preserved. - Clear warnings with actionable steps if connection fails after 30s (check bridge.log, re-pair with hermes whatsapp). - Removed misleading 'Scan QR code' message. - Log file handle properly cleaned up on disconnect. Fixes #365
This commit is contained in:
parent
ee7fde6531
commit
c45aeb45b1
1 changed files with 59 additions and 9 deletions
|
|
@ -100,6 +100,8 @@ class WhatsAppAdapter(BasePlatformAdapter):
|
|||
Path.home() / ".hermes" / "whatsapp" / "session"
|
||||
))
|
||||
self._message_queue: asyncio.Queue = asyncio.Queue()
|
||||
self._bridge_log_fh = None
|
||||
self._bridge_log: Optional[Path] = None
|
||||
|
||||
async def connect(self) -> bool:
|
||||
"""
|
||||
|
|
@ -159,8 +161,13 @@ class WhatsAppAdapter(BasePlatformAdapter):
|
|||
except Exception:
|
||||
pass
|
||||
|
||||
# Start the bridge process in its own process group
|
||||
# Start the bridge process in its own process group.
|
||||
# Route output to a log file so QR codes, errors, and reconnection
|
||||
# messages are preserved for troubleshooting.
|
||||
whatsapp_mode = os.getenv("WHATSAPP_MODE", "self-chat")
|
||||
self._bridge_log = self._session_path.parent / "bridge.log"
|
||||
bridge_log_fh = open(self._bridge_log, "a")
|
||||
self._bridge_log_fh = bridge_log_fh
|
||||
self._bridge_process = subprocess.Popen(
|
||||
[
|
||||
"node",
|
||||
|
|
@ -169,17 +176,21 @@ class WhatsAppAdapter(BasePlatformAdapter):
|
|||
"--session", str(self._session_path),
|
||||
"--mode", whatsapp_mode,
|
||||
],
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.DEVNULL,
|
||||
stdout=bridge_log_fh,
|
||||
stderr=bridge_log_fh,
|
||||
preexec_fn=None if _IS_WINDOWS else os.setsid,
|
||||
)
|
||||
|
||||
# Wait for bridge to be ready via HTTP health check
|
||||
# Wait for the bridge to connect to WhatsApp.
|
||||
# Phase 1: wait for the HTTP server to come up (up to 15s).
|
||||
# Phase 2: wait for WhatsApp status: connected (up to 15s more).
|
||||
import aiohttp
|
||||
http_ready = False
|
||||
for attempt in range(15):
|
||||
await asyncio.sleep(1)
|
||||
if self._bridge_process.poll() is not None:
|
||||
print(f"[{self.name}] Bridge process died (exit code {self._bridge_process.returncode})")
|
||||
print(f"[{self.name}] Check log: {self._bridge_log}")
|
||||
return False
|
||||
try:
|
||||
async with aiohttp.ClientSession() as session:
|
||||
|
|
@ -188,21 +199,54 @@ class WhatsAppAdapter(BasePlatformAdapter):
|
|||
timeout=aiohttp.ClientTimeout(total=2)
|
||||
) as resp:
|
||||
if resp.status == 200:
|
||||
http_ready = True
|
||||
data = await resp.json()
|
||||
print(f"[{self.name}] Bridge ready (status: {data.get('status', '?')})")
|
||||
break
|
||||
if data.get("status") == "connected":
|
||||
print(f"[{self.name}] Bridge ready (status: connected)")
|
||||
break
|
||||
except Exception:
|
||||
continue
|
||||
else:
|
||||
print(f"[{self.name}] Bridge did not become ready in 15s")
|
||||
|
||||
if not http_ready:
|
||||
print(f"[{self.name}] Bridge HTTP server did not start in 15s")
|
||||
print(f"[{self.name}] Check log: {self._bridge_log}")
|
||||
return False
|
||||
|
||||
# Phase 2: HTTP is up but WhatsApp may still be connecting.
|
||||
# Give it more time to authenticate with saved credentials.
|
||||
if data.get("status") != "connected":
|
||||
print(f"[{self.name}] Bridge HTTP ready, waiting for WhatsApp connection...")
|
||||
for attempt in range(15):
|
||||
await asyncio.sleep(1)
|
||||
if self._bridge_process.poll() is not None:
|
||||
print(f"[{self.name}] Bridge process died during connection")
|
||||
print(f"[{self.name}] Check log: {self._bridge_log}")
|
||||
return False
|
||||
try:
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.get(
|
||||
f"http://localhost:{self._bridge_port}/health",
|
||||
timeout=aiohttp.ClientTimeout(total=2)
|
||||
) as resp:
|
||||
if resp.status == 200:
|
||||
data = await resp.json()
|
||||
if data.get("status") == "connected":
|
||||
print(f"[{self.name}] Bridge ready (status: connected)")
|
||||
break
|
||||
except Exception:
|
||||
continue
|
||||
else:
|
||||
# Still not connected — warn but proceed (bridge may
|
||||
# auto-reconnect later, e.g. after a code 515 restart).
|
||||
print(f"[{self.name}] ⚠ WhatsApp not connected after 30s")
|
||||
print(f"[{self.name}] Bridge log: {self._bridge_log}")
|
||||
print(f"[{self.name}] If session expired, re-pair: hermes whatsapp")
|
||||
|
||||
# Start message polling task
|
||||
asyncio.create_task(self._poll_messages())
|
||||
|
||||
self._running = True
|
||||
print(f"[{self.name}] Bridge started on port {self._bridge_port}")
|
||||
print(f"[{self.name}] Scan QR code if prompted (check bridge output)")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
|
|
@ -245,6 +289,12 @@ class WhatsAppAdapter(BasePlatformAdapter):
|
|||
|
||||
self._running = False
|
||||
self._bridge_process = None
|
||||
if self._bridge_log_fh:
|
||||
try:
|
||||
self._bridge_log_fh.close()
|
||||
except Exception:
|
||||
pass
|
||||
self._bridge_log_fh = None
|
||||
print(f"[{self.name}] Disconnected")
|
||||
|
||||
async def send(
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue