fix(whatsapp): restart stale bridge processes instead of silently reusing them (#44205)

A long-lived Baileys bridge survives gateway restarts AND hermes update:
connect() adopted any bridge already listening with status connected, and
disconnect() only kills bridges the adapter spawned itself. Users who
updated to get inbound media support kept talking to a bridge process
serving months-old bridge.js — images and voice notes still arrived as
placeholders with no cached file path (refs #19105 follow-up reports).

Three fixes in the same stale-bridge class:

- Staleness handshake: bridge.js reports a sha256 self-hash in /health
  (scriptHash); connect() compares it against bridge.js on disk and
  restarts the bridge on mismatch. Pre-handshake bridges report no hash
  and are treated as stale, so every existing stale bridge gets recycled
  exactly once on the next gateway start.
- npm dep refresh: deps reinstall when package.json changes (stamp file
  in node_modules), not only when node_modules is missing — a Baileys
  pin bump now actually lands.
- Cache-dir passthrough: the gateway passes profile-aware
  HERMES_{IMAGE,AUDIO,DOCUMENT}_CACHE_DIR to the bridge instead of the
  bridge hardcoding ~/.hermes/image_cache etc., fixing media paths under
  HERMES_HOME overrides, profiles, and the new cache/ layout.
This commit is contained in:
Teknium 2026-06-11 03:47:29 -07:00 committed by GitHub
parent 875aa8f162
commit 3edd09a46f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 435 additions and 12 deletions

View file

@ -24,7 +24,8 @@ import { Boom } from '@hapi/boom';
import pino from 'pino';
import path from 'path';
import { mkdirSync, readFileSync, writeFileSync, existsSync, readdirSync, unlinkSync } from 'fs';
import { randomBytes } from 'crypto';
import { fileURLToPath } from 'url';
import { randomBytes, createHash } from 'crypto';
import { execSync } from 'child_process';
import { tmpdir } from 'os';
import qrcode from 'qrcode-terminal';
@ -45,9 +46,28 @@ const WHATSAPP_DEBUG =
const PORT = parseInt(getArg('port', '3000'), 10);
const SESSION_DIR = getArg('session', path.join(process.env.HOME || '~', '.hermes', 'whatsapp', 'session'));
const IMAGE_CACHE_DIR = path.join(process.env.HOME || '~', '.hermes', 'image_cache');
const DOCUMENT_CACHE_DIR = path.join(process.env.HOME || '~', '.hermes', 'document_cache');
const AUDIO_CACHE_DIR = path.join(process.env.HOME || '~', '.hermes', 'audio_cache');
// Cache directories: the Python gateway passes the profile-aware paths via
// env (HERMES_HOME-aware, new cache/ layout). Fall back to the legacy
// hardcoded locations for bridges launched outside the gateway.
const IMAGE_CACHE_DIR = process.env.HERMES_IMAGE_CACHE_DIR
|| path.join(process.env.HOME || '~', '.hermes', 'image_cache');
const DOCUMENT_CACHE_DIR = process.env.HERMES_DOCUMENT_CACHE_DIR
|| path.join(process.env.HOME || '~', '.hermes', 'document_cache');
const AUDIO_CACHE_DIR = process.env.HERMES_AUDIO_CACHE_DIR
|| path.join(process.env.HOME || '~', '.hermes', 'audio_cache');
// Self-hash of this script file. Reported in /health so the Python gateway
// can detect a running bridge that predates the current bridge.js and
// restart it instead of silently reusing stale code (stale-bridge trap:
// `hermes update` updates bridge.js on disk but a long-lived bridge process
// keeps serving the old behavior forever).
let SCRIPT_HASH = '';
try {
SCRIPT_HASH = createHash('sha256')
.update(readFileSync(fileURLToPath(import.meta.url)))
.digest('hex')
.slice(0, 16);
} catch {}
const PAIR_ONLY = args.includes('--pair-only');
const WHATSAPP_MODE = getArg('mode', process.env.WHATSAPP_MODE || 'self-chat'); // "bot" or "self-chat"
const ALLOWED_USERS = parseAllowedUsers(process.env.WHATSAPP_ALLOWED_USERS || '');
@ -700,6 +720,7 @@ app.get('/health', (req, res) => {
status: connectionState,
queueLength: messageQueue.length,
uptime: process.uptime(),
scriptHash: SCRIPT_HASH,
});
});