mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-08 03:01:47 +00:00
fix(whatsapp): auto-convert mp3/wav to ogg/opus in send-media for native voice bubbles
WhatsApp bridge (bridge.js) only sets ptt:true when file extension is .ogg or .opus, causing mp3/wav files (from Edge TTS, NeuTTS, etc.) to arrive as file attachments instead of voice bubbles — silently, with no error. Fix: when audio type is sent with a non-ogg/opus format, run ffmpeg conversion to ogg/opus in a temp file before sending. This makes send_voice() self-sufficient regardless of what format the caller provides. Fallback: if ffmpeg is unavailable, original buffer is sent (previous behaviour) with a console.warn — no crash. Addresses veloguardian's review comment on PR #4992.
This commit is contained in:
parent
45949e944a
commit
dad62c4c47
1 changed files with 28 additions and 3 deletions
|
|
@ -23,8 +23,10 @@ import express from 'express';
|
|||
import { Boom } from '@hapi/boom';
|
||||
import pino from 'pino';
|
||||
import path from 'path';
|
||||
import { mkdirSync, readFileSync, writeFileSync, existsSync, readdirSync } from 'fs';
|
||||
import { mkdirSync, readFileSync, writeFileSync, existsSync, readdirSync, unlinkSync } from 'fs';
|
||||
import { randomBytes } from 'crypto';
|
||||
import { execSync } from 'child_process';
|
||||
import { tmpdir } from 'os';
|
||||
import qrcode from 'qrcode-terminal';
|
||||
import { matchesAllowedUser, parseAllowedUsers } from './allowlist.js';
|
||||
|
||||
|
|
@ -505,8 +507,31 @@ app.post('/send-media', async (req, res) => {
|
|||
msgPayload = { video: buffer, caption: caption || undefined, mimetype: MIME_MAP[ext] || 'video/mp4' };
|
||||
break;
|
||||
case 'audio': {
|
||||
const audioMime = (ext === 'ogg' || ext === 'opus') ? 'audio/ogg; codecs=opus' : 'audio/mpeg';
|
||||
msgPayload = { audio: buffer, mimetype: audioMime, ptt: ext === 'ogg' || ext === 'opus' };
|
||||
// WhatsApp only renders a native voice bubble (ptt) when the file is ogg/opus.
|
||||
// If the caller passes mp3, wav, m4a etc. (e.g. from Edge TTS / NeuTTS),
|
||||
// silently convert to ogg/opus via ffmpeg so ptt is always honoured.
|
||||
let audioBuffer = buffer;
|
||||
let audioExt = ext;
|
||||
const needsConversion = !['ogg', 'opus'].includes(ext);
|
||||
let tmpPath = null;
|
||||
if (needsConversion) {
|
||||
tmpPath = path.join(tmpdir(), `hermes_voice_${randomBytes(6).toString('hex')}.ogg`);
|
||||
try {
|
||||
execSync(
|
||||
`ffmpeg -y -i ${JSON.stringify(filePath)} -ar 48000 -ac 1 -c:a libopus ${JSON.stringify(tmpPath)}`,
|
||||
{ timeout: 30000, stdio: 'pipe' }
|
||||
);
|
||||
audioBuffer = readFileSync(tmpPath);
|
||||
audioExt = 'ogg';
|
||||
} catch (convErr) {
|
||||
// ffmpeg not available or conversion failed — fall back to original format
|
||||
console.warn('[bridge] ffmpeg conversion failed, sending as file attachment:', convErr.message);
|
||||
} finally {
|
||||
try { if (tmpPath && existsSync(tmpPath)) unlinkSync(tmpPath); } catch (_) {}
|
||||
}
|
||||
}
|
||||
const audioMime = (audioExt === 'ogg' || audioExt === 'opus') ? 'audio/ogg; codecs=opus' : 'audio/mpeg';
|
||||
msgPayload = { audio: audioBuffer, mimetype: audioMime, ptt: audioExt === 'ogg' || audioExt === 'opus' };
|
||||
break;
|
||||
}
|
||||
case 'document':
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue