mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-14 04:02:26 +00:00
fix(whatsapp): auto-convert mp3/wav to ogg/opus in send-media for native voice bubbles
WhatsApp bridge (bridge.js) only sets ptt:true when file extension is .ogg or .opus, causing mp3/wav files (from Edge TTS, NeuTTS, etc.) to arrive as file attachments instead of voice bubbles — silently, with no error. Fix: when audio type is sent with a non-ogg/opus format, run ffmpeg conversion to ogg/opus in a temp file before sending. This makes send_voice() self-sufficient regardless of what format the caller provides. Fallback: if ffmpeg is unavailable, original buffer is sent (previous behaviour) with a console.warn — no crash. Addresses veloguardian's review comment on PR #4992.
This commit is contained in:
parent
45949e944a
commit
dad62c4c47
1 changed files with 28 additions and 3 deletions
|
|
@ -23,8 +23,10 @@ import express from 'express';
|
||||||
import { Boom } from '@hapi/boom';
|
import { Boom } from '@hapi/boom';
|
||||||
import pino from 'pino';
|
import pino from 'pino';
|
||||||
import path from 'path';
|
import path from 'path';
|
||||||
import { mkdirSync, readFileSync, writeFileSync, existsSync, readdirSync } from 'fs';
|
import { mkdirSync, readFileSync, writeFileSync, existsSync, readdirSync, unlinkSync } from 'fs';
|
||||||
import { randomBytes } from 'crypto';
|
import { randomBytes } from 'crypto';
|
||||||
|
import { execSync } from 'child_process';
|
||||||
|
import { tmpdir } from 'os';
|
||||||
import qrcode from 'qrcode-terminal';
|
import qrcode from 'qrcode-terminal';
|
||||||
import { matchesAllowedUser, parseAllowedUsers } from './allowlist.js';
|
import { matchesAllowedUser, parseAllowedUsers } from './allowlist.js';
|
||||||
|
|
||||||
|
|
@ -505,8 +507,31 @@ app.post('/send-media', async (req, res) => {
|
||||||
msgPayload = { video: buffer, caption: caption || undefined, mimetype: MIME_MAP[ext] || 'video/mp4' };
|
msgPayload = { video: buffer, caption: caption || undefined, mimetype: MIME_MAP[ext] || 'video/mp4' };
|
||||||
break;
|
break;
|
||||||
case 'audio': {
|
case 'audio': {
|
||||||
const audioMime = (ext === 'ogg' || ext === 'opus') ? 'audio/ogg; codecs=opus' : 'audio/mpeg';
|
// WhatsApp only renders a native voice bubble (ptt) when the file is ogg/opus.
|
||||||
msgPayload = { audio: buffer, mimetype: audioMime, ptt: ext === 'ogg' || ext === 'opus' };
|
// If the caller passes mp3, wav, m4a etc. (e.g. from Edge TTS / NeuTTS),
|
||||||
|
// silently convert to ogg/opus via ffmpeg so ptt is always honoured.
|
||||||
|
let audioBuffer = buffer;
|
||||||
|
let audioExt = ext;
|
||||||
|
const needsConversion = !['ogg', 'opus'].includes(ext);
|
||||||
|
let tmpPath = null;
|
||||||
|
if (needsConversion) {
|
||||||
|
tmpPath = path.join(tmpdir(), `hermes_voice_${randomBytes(6).toString('hex')}.ogg`);
|
||||||
|
try {
|
||||||
|
execSync(
|
||||||
|
`ffmpeg -y -i ${JSON.stringify(filePath)} -ar 48000 -ac 1 -c:a libopus ${JSON.stringify(tmpPath)}`,
|
||||||
|
{ timeout: 30000, stdio: 'pipe' }
|
||||||
|
);
|
||||||
|
audioBuffer = readFileSync(tmpPath);
|
||||||
|
audioExt = 'ogg';
|
||||||
|
} catch (convErr) {
|
||||||
|
// ffmpeg not available or conversion failed — fall back to original format
|
||||||
|
console.warn('[bridge] ffmpeg conversion failed, sending as file attachment:', convErr.message);
|
||||||
|
} finally {
|
||||||
|
try { if (tmpPath && existsSync(tmpPath)) unlinkSync(tmpPath); } catch (_) {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
const audioMime = (audioExt === 'ogg' || audioExt === 'opus') ? 'audio/ogg; codecs=opus' : 'audio/mpeg';
|
||||||
|
msgPayload = { audio: audioBuffer, mimetype: audioMime, ptt: audioExt === 'ogg' || audioExt === 'opus' };
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case 'document':
|
case 'document':
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue