From 514f5020c7978cad3d8c50cd8ae17419481a1c6d Mon Sep 17 00:00:00 2001 From: helix4u <4317663+helix4u@users.noreply.github.com> Date: Sun, 24 May 2026 16:04:39 -0600 Subject: [PATCH] fix(debug): redact BlueBubbles webhook secrets --- agent/redact.py | 42 +++++++++++++++++++++++++++++++ gateway/platforms/bluebubbles.py | 22 ++++++++++++---- hermes_cli/debug.py | 10 +++++++- tests/agent/test_redact.py | 22 ++++++++++++++++ tests/gateway/test_bluebubbles.py | 8 ++++++ tests/hermes_cli/test_debug.py | 34 +++++++++++++++++++++++++ 6 files changed, 132 insertions(+), 6 deletions(-) diff --git a/agent/redact.py b/agent/redact.py index 1beb10450fd..7ed241c5efd 100644 --- a/agent/redact.py +++ b/agent/redact.py @@ -176,6 +176,15 @@ _URL_USERINFO_RE = re.compile( r"(https?|wss?|ftp)://([^/\s:@]+):([^/\s@]+)@", ) +# HTTP access logs often use a relative request target rather than a full URL: +# `"POST /webhook?password=... HTTP/1.1"`. The full-URL redactor above only +# sees strings containing `://`, so handle request-target query strings too. +_HTTP_REQUEST_TARGET_QUERY_RE = re.compile( + r"\b((?:GET|POST|PUT|PATCH|DELETE|HEAD|OPTIONS|TRACE|CONNECT)\s+[^ \t\r\n\"']*?)" + r"\?([^ \t\r\n\"']+)", + re.IGNORECASE, +) + # Form-urlencoded body detection: conservative — only applies when the entire # text looks like a query string (k=v&k=v pattern with no newlines). _FORM_BODY_RE = re.compile( @@ -293,6 +302,15 @@ def _redact_url_userinfo(text: str) -> str: ) +def _redact_http_request_target_query_params(text: str) -> str: + """Redact sensitive query params in HTTP access-log request targets.""" + def _sub(m: re.Match) -> str: + prefix = m.group(1) + query = _redact_query_string(m.group(2)) + return f"{prefix}?{query}" + return _HTTP_REQUEST_TARGET_QUERY_RE.sub(_sub, text) + + def _redact_form_body(text: str) -> str: """Redact sensitive values in a form-urlencoded body. @@ -397,6 +415,11 @@ def redact_sensitive_text(text: str, *, force: bool = False, code_file: bool = F if "?" in text: text = _redact_url_query_params(text) + # HTTP access logs can contain relative request targets with query params + # and no URL scheme, e.g. `"POST /hook?password=... HTTP/1.1"`. + if "?" in text and "=" in text and _has_http_method_substring(text): + text = _redact_http_request_target_query_params(text) + # Form-urlencoded bodies (only triggers on clean k=v&k=v inputs). if "&" in text and "=" in text: text = _redact_form_body(text) @@ -456,6 +479,25 @@ def _has_known_prefix_substring(text: str) -> bool: return any(p in text for p in _PREFIX_SUBSTRINGS) +_HTTP_METHOD_SUBSTRINGS = ( + "GET ", + "POST ", + "PUT ", + "PATCH ", + "DELETE ", + "HEAD ", + "OPTIONS ", + "TRACE ", + "CONNECT ", +) + + +def _has_http_method_substring(text: str) -> bool: + """Cheap pre-check before scanning for access-log request targets.""" + upper = text.upper() + return any(method in upper for method in _HTTP_METHOD_SUBSTRINGS) + + class RedactingFormatter(logging.Formatter): """Log formatter that redacts secrets from all log messages.""" diff --git a/gateway/platforms/bluebubbles.py b/gateway/platforms/bluebubbles.py index 7a4af3ad685..ec852e3d610 100644 --- a/gateway/platforms/bluebubbles.py +++ b/gateway/platforms/bluebubbles.py @@ -189,7 +189,10 @@ class BlueBubblesAdapter(BasePlatformAdapter): app = web.Application() app.router.add_get("/health", lambda _: web.Response(text="ok")) app.router.add_post(self.webhook_path, self._handle_webhook) - self._runner = web.AppRunner(app) + # The webhook auth value is carried in the query string because the + # BlueBubbles webhook API cannot send custom headers. Do not let + # aiohttp access logs write that request target to agent.log. + self._runner = web.AppRunner(app, access_log=None) await self._runner.setup() site = web.TCPSite(self._runner, self.webhook_host, self.webhook_port) await site.start() @@ -242,6 +245,14 @@ class BlueBubblesAdapter(BasePlatformAdapter): return f"{base}?password={quote(self.password, safe='')}" return base + @property + def _webhook_register_url_for_log(self) -> str: + """Webhook registration URL safe for logs.""" + base = self._webhook_url + if self.password: + return f"{base}?password=***" + return base + async def _find_registered_webhooks(self, url: str) -> list: """Return list of BB webhook entries matching *url*.""" try: @@ -269,7 +280,8 @@ class BlueBubblesAdapter(BasePlatformAdapter): existing = await self._find_registered_webhooks(webhook_url) if existing: logger.info( - "[bluebubbles] webhook already registered: %s", webhook_url + "[bluebubbles] webhook already registered: %s", + self._webhook_register_url_for_log, ) return True @@ -284,7 +296,7 @@ class BlueBubblesAdapter(BasePlatformAdapter): if 200 <= status < 300: logger.info( "[bluebubbles] webhook registered with server: %s", - webhook_url, + self._webhook_register_url_for_log, ) return True else: @@ -324,7 +336,8 @@ class BlueBubblesAdapter(BasePlatformAdapter): removed = True if removed: logger.info( - "[bluebubbles] webhook unregistered: %s", webhook_url + "[bluebubbles] webhook unregistered: %s", + self._webhook_register_url_for_log, ) except Exception as exc: logger.debug( @@ -934,4 +947,3 @@ class BlueBubblesAdapter(BasePlatformAdapter): asyncio.create_task(self.mark_read(session_chat_id)) return web.Response(text="ok") - diff --git a/hermes_cli/debug.py b/hermes_cli/debug.py index a7338e4ba82..b309ee37c54 100644 --- a/hermes_cli/debug.py +++ b/hermes_cli/debug.py @@ -14,6 +14,7 @@ Currently supports: import io import json import logging +import re import sys import time import urllib.error @@ -36,6 +37,12 @@ _REDACTION_BANNER = ( "run with --no-redact to disable]\n" ) +_EMAIL_ADDRESS_RE = re.compile( + r"(? str: return text from agent.redact import redact_sensitive_text - return redact_sensitive_text(text, force=True) + text = redact_sensitive_text(text, force=True) + return _EMAIL_ADDRESS_RE.sub("[REDACTED_EMAIL]", text) def _capture_log_snapshot( diff --git a/tests/agent/test_redact.py b/tests/agent/test_redact.py index 928eb1ff357..ea79ea9ce39 100644 --- a/tests/agent/test_redact.py +++ b/tests/agent/test_redact.py @@ -451,6 +451,28 @@ class TestUrlQueryParamRedaction: result = redact_sensitive_text(text) assert "opaqueWsToken123" not in result + def test_http_access_log_relative_request_target_query(self): + text = ( + 'INFO aiohttp.access: 127.0.0.1 "POST ' + '/bluebubbles-webhook?password=webhookSecret123&event=new-message ' + 'HTTP/1.1" 200 173 "-" "test-client"' + ) + result = redact_sensitive_text(text) + assert "webhookSecret123" not in result + assert "password=***" in result + assert "event=new-message" in result + + def test_http_access_log_absolute_request_target_query(self): + text = ( + 'INFO aiohttp.access: 127.0.0.1 "GET ' + 'https://example.com/callback?code=oauthCode123&state=csrf-ok ' + 'HTTP/1.1" 200 173 "-" "test-client"' + ) + result = redact_sensitive_text(text) + assert "oauthCode123" not in result + assert "code=***" in result + assert "state=csrf-ok" in result + class TestUrlUserinfoRedaction: """URL userinfo (`scheme://user:pass@host`) for non-DB schemes.""" diff --git a/tests/gateway/test_bluebubbles.py b/tests/gateway/test_bluebubbles.py index 6f93c1d4dba..dea806fe66b 100644 --- a/tests/gateway/test_bluebubbles.py +++ b/tests/gateway/test_bluebubbles.py @@ -452,6 +452,14 @@ class TestBlueBubblesWebhookUrl: adapter = _make_adapter(monkeypatch, password="W9fTC&L5JL*@") assert "password=W9fTC%26L5JL%2A%40" in adapter._webhook_register_url + def test_register_url_for_log_masks_password(self, monkeypatch): + """Log-safe webhook URLs must never expose the webhook password.""" + adapter = _make_adapter(monkeypatch, password="W9fTC&L5JL*@") + safe_url = adapter._webhook_register_url_for_log + assert safe_url.endswith("?password=***") + assert "W9fTC" not in safe_url + assert "%26" not in safe_url + def test_register_url_omits_query_when_no_password(self, monkeypatch): """If no password is configured, the register URL should be the bare URL.""" monkeypatch.delenv("BLUEBUBBLES_PASSWORD", raising=False) diff --git a/tests/hermes_cli/test_debug.py b/tests/hermes_cli/test_debug.py index 1996e7fce98..aad1c8e92a5 100644 --- a/tests/hermes_cli/test_debug.py +++ b/tests/hermes_cli/test_debug.py @@ -353,6 +353,40 @@ class TestCaptureLogSnapshotRedaction: assert snap.full_text is not None assert _REDACT_FIXTURE_TOKEN not in snap.full_text + def test_default_redacts_email_addresses_for_public_share( + self, hermes_home_with_secret + ): + from hermes_cli.debug import _capture_log_snapshot + + log_path = hermes_home_with_secret / "logs" / "agent.log" + log_path.write_text( + "2026-04-12 17:00:00 INFO gateway.run: " + "inbound message: platform=bluebubbles " + "user=person@example.com chat=iMessage;-;person@example.com msg='hello'\n" + ) + + snap = _capture_log_snapshot("agent", tail_lines=10) + + assert "person@example.com" not in snap.tail_text + assert "[REDACTED_EMAIL]" in snap.tail_text + assert snap.full_text is not None + assert "person@example.com" not in snap.full_text + + def test_no_redact_preserves_email_addresses(self, hermes_home_with_secret): + from hermes_cli.debug import _capture_log_snapshot + + log_path = hermes_home_with_secret / "logs" / "agent.log" + log_path.write_text( + "2026-04-12 17:00:00 INFO gateway.run: " + "inbound message: platform=bluebubbles " + "user=person@example.com chat=iMessage;-;person@example.com msg='hello'\n" + ) + + snap = _capture_log_snapshot("agent", tail_lines=10, redact=False) + + assert "person@example.com" in snap.tail_text + assert "person@example.com" in (snap.full_text or "") + def test_capture_default_log_snapshots_threads_redact( self, hermes_home_with_secret ):