fix: harden web gateway security and fix error swallowing

- Use hmac.compare_digest for timing-safe token comparison (3 endpoints)
- Default bind to 127.0.0.1 instead of 0.0.0.0
- Sanitize upload filenames with Path.name to prevent path traversal
- Add DOMPurify to sanitize marked.parse() output against XSS
- Replace add_static with authenticated media handler
- Hide token in group chats for /remote-control command
- Use ctypes.util.find_library for Opus instead of hardcoded paths
- Add force=True to 5 interrupt _vprint calls for visibility
- Log Opus decode errors and voice restart failures instead of swallowing
This commit is contained in:
0xbyt4 2026-03-13 15:29:18 +03:00
parent d646442692
commit 0ff1b4ade2
8 changed files with 59 additions and 30 deletions

View file

@ -217,7 +217,7 @@ VOICE_TOOLS_OPENAI_KEY=
# Access from phone/tablet/desktop at http://<your-ip>:8765 # Access from phone/tablet/desktop at http://<your-ip>:8765
# WEB_UI_ENABLED=false # WEB_UI_ENABLED=false
# WEB_UI_PORT=8765 # WEB_UI_PORT=8765
# WEB_UI_HOST=0.0.0.0 # WEB_UI_HOST=127.0.0.1 # Use 0.0.0.0 to expose on LAN
# WEB_UI_TOKEN= # Auto-generated if empty # WEB_UI_TOKEN= # Auto-generated if empty
# Gateway-wide: allow ALL users without an allowlist (default: false = deny) # Gateway-wide: allow ALL users without an allowlist (default: false = deny)

4
cli.py
View file

@ -3709,8 +3709,8 @@ class HermesCLI:
self._voice_start_recording() self._voice_start_recording()
if hasattr(self, '_app') and self._app: if hasattr(self, '_app') and self._app:
self._app.invalidate() self._app.invalidate()
except Exception: except Exception as e:
pass _cprint(f"{_DIM}Voice auto-restart failed: {e}{_RST}")
threading.Thread(target=_restart_recording, daemon=True).start() threading.Thread(target=_restart_recording, daemon=True).start()
def _voice_speak_response(self, text: str): def _voice_speak_response(self, text: str):

View file

@ -478,7 +478,7 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
config.platforms[Platform.WEB].enabled = True config.platforms[Platform.WEB].enabled = True
config.platforms[Platform.WEB].extra.update({ config.platforms[Platform.WEB].extra.update({
"port": int(os.getenv("WEB_UI_PORT", "8765")), "port": int(os.getenv("WEB_UI_PORT", "8765")),
"host": os.getenv("WEB_UI_HOST", "0.0.0.0"), "host": os.getenv("WEB_UI_HOST", "127.0.0.1"),
"token": os.getenv("WEB_UI_TOKEN", ""), "token": os.getenv("WEB_UI_TOKEN", ""),
}) })

View file

@ -294,7 +294,8 @@ class VoiceReceiver:
with self._lock: with self._lock:
self._buffers[ssrc].extend(pcm) self._buffers[ssrc].extend(pcm)
self._last_packet_time[ssrc] = time.monotonic() self._last_packet_time[ssrc] = time.monotonic()
except Exception: except Exception as e:
logger.debug("Opus decode error for SSRC %s: %s", ssrc, e)
return return
# ------------------------------------------------------------------ # ------------------------------------------------------------------
@ -406,14 +407,15 @@ class DiscordAdapter(BasePlatformAdapter):
# Load opus codec for voice channel support # Load opus codec for voice channel support
if not discord.opus.is_loaded(): if not discord.opus.is_loaded():
try: import ctypes.util
discord.opus.load_opus("/opt/homebrew/lib/libopus.dylib") opus_path = ctypes.util.find_library("opus")
except Exception: if opus_path:
# Try common Linux path as fallback
try: try:
discord.opus.load_opus("libopus.so.0") discord.opus.load_opus(opus_path)
except Exception: except Exception:
logger.warning("Opus codec not found — voice channel playback disabled") logger.warning("Opus codec found at %s but failed to load", opus_path)
if not discord.opus.is_loaded():
logger.warning("Opus codec not found — voice channel playback disabled")
if not self.config.token: if not self.config.token:
logger.error("[%s] No bot token configured", self.name) logger.error("[%s] No bot token configured", self.name)

View file

@ -10,6 +10,7 @@ No external dependencies beyond aiohttp (already in messaging extra).
import asyncio import asyncio
import base64 import base64
import hmac
import json import json
import logging import logging
import os import os
@ -63,7 +64,7 @@ class WebAdapter(BasePlatformAdapter):
self._site: Optional[web.TCPSite] = None self._site: Optional[web.TCPSite] = None
# Config # Config
self._host: str = config.extra.get("host", "0.0.0.0") self._host: str = config.extra.get("host", "127.0.0.1")
self._port: int = config.extra.get("port", 8765) self._port: int = config.extra.get("port", 8765)
self._token: str = config.extra.get("token", "") or secrets.token_hex(16) self._token: str = config.extra.get("token", "") or secrets.token_hex(16)
@ -87,7 +88,7 @@ class WebAdapter(BasePlatformAdapter):
self._app.router.add_get("/", self._handle_index) self._app.router.add_get("/", self._handle_index)
self._app.router.add_get("/ws", self._handle_websocket) self._app.router.add_get("/ws", self._handle_websocket)
self._app.router.add_post("/upload", self._handle_upload) self._app.router.add_post("/upload", self._handle_upload)
self._app.router.add_static("/media", str(self._media_dir), show_index=False) self._app.router.add_get("/media/{filename}", self._handle_media)
self._runner = web.AppRunner(self._app) self._runner = web.AppRunner(self._app)
await self._runner.setup() await self._runner.setup()
@ -316,7 +317,7 @@ class WebAdapter(BasePlatformAdapter):
# Auth handshake # Auth handshake
if msg_type == "auth": if msg_type == "auth":
if data.get("token") == self._token: if hmac.compare_digest(data.get("token", ""), self._token):
authenticated = True authenticated = True
self._clients[session_id] = ws self._clients[session_id] = ws
await ws.send_str(json.dumps({ await ws.send_str(json.dumps({
@ -356,7 +357,7 @@ class WebAdapter(BasePlatformAdapter):
async def _handle_upload(self, request: web.Request) -> web.Response: async def _handle_upload(self, request: web.Request) -> web.Response:
"""Handle file uploads (images, voice recordings).""" """Handle file uploads (images, voice recordings)."""
token = request.headers.get("Authorization", "").replace("Bearer ", "") token = request.headers.get("Authorization", "").replace("Bearer ", "")
if token != self._token: if not hmac.compare_digest(token, self._token):
return web.json_response({"error": "Unauthorized"}, status=401) return web.json_response({"error": "Unauthorized"}, status=401)
reader = await request.multipart() reader = await request.multipart()
@ -364,7 +365,8 @@ class WebAdapter(BasePlatformAdapter):
if not field: if not field:
return web.json_response({"error": "No file"}, status=400) return web.json_response({"error": "No file"}, status=400)
orig_name = field.filename or "file" # Sanitize filename to prevent path traversal attacks
orig_name = Path(field.filename or "file").name
filename = f"upload_{uuid.uuid4().hex[:8]}_{orig_name}" filename = f"upload_{uuid.uuid4().hex[:8]}_{orig_name}"
dest = self._media_dir / filename dest = self._media_dir / filename
@ -377,6 +379,19 @@ class WebAdapter(BasePlatformAdapter):
return web.json_response({"url": f"/media/{filename}", "filename": filename}) return web.json_response({"url": f"/media/{filename}", "filename": filename})
async def _handle_media(self, request: web.Request) -> web.Response:
"""Serve media files with token authentication."""
token = request.query.get("token", "")
if not hmac.compare_digest(token, self._token):
return web.Response(status=401, text="Unauthorized")
filename = Path(request.match_info["filename"]).name
filepath = self._media_dir / filename
if not filepath.exists() or not filepath.is_file():
return web.Response(status=404, text="Not found")
return web.FileResponse(filepath)
# ---- Message Processing ---- # ---- Message Processing ----
async def _process_user_message(self, session_id: str, text: str) -> None: async def _process_user_message(self, session_id: str, text: str) -> None:
@ -570,6 +585,7 @@ def _build_chat_html() -> str:
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/github-dark.min.css"> <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/github-dark.min.css">
<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js"></script> <script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js"></script>
<script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script> <script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
<script src="https://cdn.jsdelivr.net/npm/dompurify@3/dist/purify.min.js"></script>
<style> <style>
:root { :root {
--bg: #08090d; --bg: #08090d;
@ -1157,7 +1173,7 @@ function handleServerMessage(data) {
case 'play_audio': case 'play_audio':
// Invisible TTS playback no UI element, just play audio // Invisible TTS playback no UI element, just play audio
{ {
const a = new Audio(data.url); const a = new Audio(mediaUrl(data.url));
currentTtsAudio = a; currentTtsAudio = a;
voiceAwaitingResponse = false; voiceAwaitingResponse = false;
a.onended = () => { a.onended = () => {
@ -1357,7 +1373,7 @@ function addImageMessage(id, url, caption, ts) {
div.id = 'msg-' + id; div.id = 'msg-' + id;
if (caption) div.innerHTML = renderMarkdown(caption); if (caption) div.innerHTML = renderMarkdown(caption);
const img = document.createElement('img'); const img = document.createElement('img');
img.src = url; img.src = mediaUrl(url);
img.alt = caption || 'Image'; img.alt = caption || 'Image';
img.onclick = () => window.open(url, '_blank'); img.onclick = () => window.open(url, '_blank');
div.appendChild(img); div.appendChild(img);
@ -1381,7 +1397,7 @@ function addVoiceMessage(id, url, caption, ts) {
div.appendChild(p); div.appendChild(p);
} }
const audio = new Audio(url); const audio = new Audio(mediaUrl(url));
audio.preload = 'metadata'; audio.preload = 'metadata';
// Build voice bubble // Build voice bubble
@ -1481,7 +1497,7 @@ function addDocumentMessage(id, url, filename, caption, ts) {
if (caption) div.innerHTML = renderMarkdown(caption); if (caption) div.innerHTML = renderMarkdown(caption);
const a = document.createElement('a'); const a = document.createElement('a');
a.className = 'file-download'; a.className = 'file-download';
a.href = url; a.href = mediaUrl(url);
a.download = filename; a.download = filename;
a.innerHTML = '<svg width="16" height="16" viewBox="0 0 24 24" fill="currentColor"><path d="M14 2H6c-1.1 0-2 .9-2 2v16c0 1.1.9 2 2 2h12c1.1 0 2-.9 2-2V8l-6-6zm4 18H6V4h7v5h5v11z"/></svg>' + filename; a.innerHTML = '<svg width="16" height="16" viewBox="0 0 24 24" fill="currentColor"><path d="M14 2H6c-1.1 0-2 .9-2 2v16c0 1.1.9 2 2 2h12c1.1 0 2-.9 2-2V8l-6-6zm4 18H6V4h7v5h5v11z"/></svg>' + filename;
div.appendChild(a); div.appendChild(a);
@ -1516,9 +1532,16 @@ function addTranscriptMessage(text) {
scrollToBottom(); scrollToBottom();
} }
function mediaUrl(url) {
if (url && url.startsWith('/media/')) {
return url + (url.includes('?') ? '&' : '?') + 'token=' + encodeURIComponent(authToken);
}
return url;
}
function renderMarkdown(text) { function renderMarkdown(text) {
try { try {
return marked.parse(text); return DOMPurify.sanitize(marked.parse(text));
} catch (e) { } catch (e) {
return text.replace(/</g, '&lt;').replace(/>/g, '&gt;'); return text.replace(/</g, '&lt;').replace(/>/g, '&gt;');
} }

View file

@ -2472,14 +2472,17 @@ class GatewayRunner:
"""Handle /remote-control — start or show the web UI for remote access.""" """Handle /remote-control — start or show the web UI for remote access."""
from gateway.config import Platform, PlatformConfig from gateway.config import Platform, PlatformConfig
is_dm = event.source and event.source.chat_type == "dm"
# Already running? # Already running?
if Platform.WEB in self.adapters: if Platform.WEB in self.adapters:
adapter = self.adapters[Platform.WEB] adapter = self.adapters[Platform.WEB]
local_ip = adapter._get_local_ip() local_ip = adapter._get_local_ip()
token_display = adapter._token if is_dm else "(hidden — use in DM to see token)"
return ( return (
f"Web UI already running.\n" f"Web UI already running.\n"
f"URL: http://{local_ip}:{adapter._port}\n" f"URL: http://{local_ip}:{adapter._port}\n"
f"Token: {adapter._token}" f"Token: {token_display}"
) )
# Start web adapter on the fly # Start web adapter on the fly
@ -2499,7 +2502,7 @@ class GatewayRunner:
web_config = PlatformConfig( web_config = PlatformConfig(
enabled=True, enabled=True,
extra={"port": port, "host": "0.0.0.0", "token": token}, extra={"port": port, "host": "127.0.0.1", "token": token},
) )
adapter = WebAdapter(web_config) adapter = WebAdapter(web_config)
adapter.set_message_handler(self._handle_message) adapter.set_message_handler(self._handle_message)
@ -2510,10 +2513,11 @@ class GatewayRunner:
self.adapters[Platform.WEB] = adapter self.adapters[Platform.WEB] = adapter
local_ip = adapter._get_local_ip() local_ip = adapter._get_local_ip()
token_display = adapter._token if is_dm else "(hidden — use in DM to see token)"
return ( return (
f"Web UI started!\n" f"Web UI started!\n"
f"URL: http://{local_ip}:{adapter._port}\n" f"URL: http://{local_ip}:{adapter._port}\n"
f"Token: {adapter._token}\n" f"Token: {token_display}\n"
f"Open this URL on your phone or any device on the same network." f"Open this URL on your phone or any device on the same network."
) )
except Exception as e: except Exception as e:

View file

@ -3625,7 +3625,7 @@ class AIAgent:
if self._interrupt_requested: if self._interrupt_requested:
remaining_calls = assistant_message.tool_calls[i-1:] remaining_calls = assistant_message.tool_calls[i-1:]
if remaining_calls: if remaining_calls:
self._vprint(f"{self.log_prefix}⚡ Interrupt: skipping {len(remaining_calls)} tool call(s)") self._vprint(f"{self.log_prefix}⚡ Interrupt: skipping {len(remaining_calls)} tool call(s)", force=True)
for skipped_tc in remaining_calls: for skipped_tc in remaining_calls:
skipped_name = skipped_tc.function.name skipped_name = skipped_tc.function.name
skip_msg = { skip_msg = {
@ -3849,7 +3849,7 @@ class AIAgent:
if self._interrupt_requested and i < len(assistant_message.tool_calls): if self._interrupt_requested and i < len(assistant_message.tool_calls):
remaining = len(assistant_message.tool_calls) - i remaining = len(assistant_message.tool_calls) - i
self._vprint(f"{self.log_prefix}⚡ Interrupt: skipping {remaining} remaining tool call(s)") self._vprint(f"{self.log_prefix}⚡ Interrupt: skipping {remaining} remaining tool call(s)", force=True)
for skipped_tc in assistant_message.tool_calls[i:]: for skipped_tc in assistant_message.tool_calls[i:]:
skipped_name = skipped_tc.function.name skipped_name = skipped_tc.function.name
skip_msg = { skip_msg = {
@ -4559,7 +4559,7 @@ class AIAgent:
sleep_end = time.time() + wait_time sleep_end = time.time() + wait_time
while time.time() < sleep_end: while time.time() < sleep_end:
if self._interrupt_requested: if self._interrupt_requested:
self._vprint(f"{self.log_prefix}⚡ Interrupt detected during retry wait, aborting.") self._vprint(f"{self.log_prefix}⚡ Interrupt detected during retry wait, aborting.", force=True)
self._persist_session(messages, conversation_history) self._persist_session(messages, conversation_history)
self.clear_interrupt() self.clear_interrupt()
return { return {
@ -4801,7 +4801,7 @@ class AIAgent:
# Check for interrupt before deciding to retry # Check for interrupt before deciding to retry
if self._interrupt_requested: if self._interrupt_requested:
self._vprint(f"{self.log_prefix}⚡ Interrupt detected during error handling, aborting retries.") self._vprint(f"{self.log_prefix}⚡ Interrupt detected during error handling, aborting retries.", force=True)
self._persist_session(messages, conversation_history) self._persist_session(messages, conversation_history)
self.clear_interrupt() self.clear_interrupt()
return { return {
@ -4993,7 +4993,7 @@ class AIAgent:
sleep_end = time.time() + wait_time sleep_end = time.time() + wait_time
while time.time() < sleep_end: while time.time() < sleep_end:
if self._interrupt_requested: if self._interrupt_requested:
self._vprint(f"{self.log_prefix}⚡ Interrupt detected during retry wait, aborting.") self._vprint(f"{self.log_prefix}⚡ Interrupt detected during retry wait, aborting.", force=True)
self._persist_session(messages, conversation_history) self._persist_session(messages, conversation_history)
self.clear_interrupt() self.clear_interrupt()
return { return {

View file

@ -143,7 +143,7 @@ cloudflared tunnel --url http://localhost:8765
|----------|---------|-------------| |----------|---------|-------------|
| `WEB_UI_ENABLED` | `false` | Enable the web gateway | | `WEB_UI_ENABLED` | `false` | Enable the web gateway |
| `WEB_UI_PORT` | `8765` | HTTP server port | | `WEB_UI_PORT` | `8765` | HTTP server port |
| `WEB_UI_HOST` | `0.0.0.0` | Bind address (`0.0.0.0` = LAN, `127.0.0.1` = localhost) | | `WEB_UI_HOST` | `127.0.0.1` | Bind address (`0.0.0.0` = LAN, `127.0.0.1` = localhost) |
| `WEB_UI_TOKEN` | (auto) | Access token. Auto-generated if empty. | | `WEB_UI_TOKEN` | (auto) | Access token. Auto-generated if empty. |
--- ---