fix(security): require source CIDR allowlisting for public msgraph webhook binds

This commit is contained in:
Dusk1e 2026-05-25 18:40:36 +03:00 committed by Teknium
parent 986abb3cf7
commit 43abc51f66
4 changed files with 84 additions and 11 deletions

View file

@ -25,6 +25,7 @@ from gateway.platforms.base import (
MessageEvent,
MessageType,
SendResult,
is_network_accessible,
)
logger = logging.getLogger(__name__)
@ -132,12 +133,24 @@ class MSGraphWebhookAdapter(BasePlatformAdapter):
def set_notification_scheduler(self, scheduler: Optional[NotificationScheduler]) -> None:
self._notification_scheduler = scheduler
def _source_allowlist_required_but_missing(self) -> bool:
return is_network_accessible(self._host) and not self._allowed_source_networks
async def connect(self) -> bool:
if self._client_state is None:
logger.error(
"[msgraph_webhook] Refusing to start without extra.client_state configured"
)
return False
if self._source_allowlist_required_but_missing():
logger.error(
"[msgraph_webhook] Refusing to start: binding to %s requires "
"extra.allowed_source_cidrs. Configure the Microsoft Graph "
"source CIDRs or bind to loopback (127.0.0.1/::1) behind a "
"tunnel or reverse proxy.",
self._host,
)
return False
app = web.Application()
app.router.add_get(self._health_path, self._handle_health)
@ -177,6 +190,8 @@ class MSGraphWebhookAdapter(BasePlatformAdapter):
return {"name": chat_id, "type": "webhook"}
async def _handle_health(self, request: "web.Request") -> "web.Response":
if not self._source_ip_allowed(request):
return web.Response(status=403)
return web.json_response(
{
"status": "ok",
@ -271,9 +286,12 @@ class MSGraphWebhookAdapter(BasePlatformAdapter):
def _source_ip_allowed(self, request: "web.Request") -> bool:
"""Return True if the request's source IP is in the configured allowlist.
When ``allowed_source_cidrs`` is empty (the default), everything is
allowed preserves behavior for dev tunnels / localhost setups.
Loopback-only binds may omit ``allowed_source_cidrs`` for local reverse
proxies and dev tunnels. Network-accessible binds fail closed until an
explicit CIDR allowlist is configured.
"""
if self._source_allowlist_required_but_missing():
return False
if not self._allowed_source_networks:
return True
peer = request.remote or ""

View file

@ -11,6 +11,7 @@ from gateway.platforms.msgraph_webhook import AIOHTTP_AVAILABLE, MSGraphWebhookA
def _make_adapter(**extra_overrides) -> MSGraphWebhookAdapter:
extra = {
"host": "127.0.0.1",
"client_state": "expected-client-state",
"accepted_resources": ["communications/onlineMeetings"],
}
@ -80,6 +81,27 @@ class TestMSGraphValidationHandshake:
# is_connected is a @property on the base adapter, not a method.
assert adapter.is_connected is False
@pytest.mark.anyio
async def test_connect_requires_source_allowlist_on_public_bind(self):
if not AIOHTTP_AVAILABLE:
pytest.skip("aiohttp not installed")
adapter = _make_adapter(host="0.0.0.0", port=0, allowed_source_cidrs=[])
connected = await adapter.connect()
assert connected is False
assert adapter.is_connected is False
@pytest.mark.anyio
async def test_connect_allows_loopback_without_source_allowlist(self):
if not AIOHTTP_AVAILABLE:
pytest.skip("aiohttp not installed")
adapter = _make_adapter(host="127.0.0.1", port=0, allowed_source_cidrs=[])
try:
connected = await adapter.connect()
assert connected is True
assert adapter.is_connected is True
finally:
await adapter.disconnect()
@pytest.mark.anyio
async def test_validation_token_echo_on_get(self):
adapter = _make_adapter()
@ -381,9 +403,9 @@ class TestMSGraphNotifications:
class TestMSGraphSourceIPAllowlist:
@pytest.mark.anyio
async def test_disabled_by_default_allows_all(self):
"""Empty allowlist preserves pre-existing behavior (dev tunnels, localhost)."""
adapter = _make_adapter() # no allowed_source_cidrs set
async def test_public_bind_without_allowlist_fails_closed(self):
"""Public binds must not accept requests until a source allowlist is configured."""
adapter = _make_adapter(host="0.0.0.0", allowed_source_cidrs=[])
payload = {
"value": [
{
@ -396,6 +418,24 @@ class TestMSGraphSourceIPAllowlist:
resp = await adapter._handle_notification(
_FakeRequest(json_payload=payload, remote="203.0.113.99")
)
assert resp.status == 403
@pytest.mark.anyio
async def test_loopback_bind_without_allowlist_still_accepts_local_requests(self):
"""Loopback-only listeners may rely on local proxying/tunnels instead of CIDRs."""
adapter = _make_adapter(host="127.0.0.1", allowed_source_cidrs=[])
payload = {
"value": [
{
"id": "notif-ip-local",
"resource": "communications/onlineMeetings/m",
"clientState": "expected-client-state",
}
]
}
resp = await adapter._handle_notification(
_FakeRequest(json_payload=payload, remote="127.0.0.1")
)
assert resp.status == 202
@pytest.mark.anyio
@ -441,6 +481,13 @@ class TestMSGraphSourceIPAllowlist:
)
assert resp.status == 403
@pytest.mark.anyio
async def test_health_endpoint_also_respects_allowlist(self):
"""The readiness endpoint should not leak counters to arbitrary sources."""
adapter = _make_adapter(allowed_source_cidrs=["10.0.0.0/8"])
resp = await adapter._handle_health(_FakeRequest(remote="203.0.113.99"))
assert resp.status == 403
@pytest.mark.anyio
async def test_invalid_cidr_entries_are_ignored_at_init(self):
"""Malformed CIDR strings should log a warning and be ignored, not crash."""

View file

@ -25,6 +25,7 @@ platforms:
msgraph_webhook:
enabled: true
extra:
host: 127.0.0.1
port: 8646
client_state: "replace-with-a-strong-secret"
accepted_resources:
@ -35,6 +36,7 @@ Or via env vars in `~/.hermes/.env` (auto-merged on startup):
```bash
MSGRAPH_WEBHOOK_ENABLED=true
MSGRAPH_WEBHOOK_HOST=127.0.0.1
MSGRAPH_WEBHOOK_PORT=8646
MSGRAPH_WEBHOOK_CLIENT_STATE=<generate-with-openssl-rand-hex-32>
MSGRAPH_WEBHOOK_ACCEPTED_RESOURCES=communications/onlineMeetings
@ -58,14 +60,14 @@ All settings go under `platforms.msgraph_webhook.extra`:
| Setting | Default | Description |
|---------|---------|-------------|
| `host` | `0.0.0.0` | Bind address for the HTTP listener. |
| `host` | `0.0.0.0` | Bind address for the HTTP listener. Non-loopback binds require `allowed_source_cidrs`; loopback (`127.0.0.1` / `::1`) is the easiest dev-tunnel / reverse-proxy setup. |
| `port` | `8646` | Bind port. |
| `webhook_path` | `/msgraph/webhook` | URL path Graph POSTs to. |
| `health_path` | `/health` | Readiness endpoint. |
| `client_state` | — | Shared secret Graph echoes in every notification. Compared with `hmac.compare_digest` — generate with `openssl rand -hex 32`. |
| `accepted_resources` | `[]` (accept all) | Allowlist of Graph resource paths/patterns. Trailing `*` acts as prefix match. Leading `/` is tolerated. Example: `["communications/onlineMeetings", "chats/*/messages"]`. |
| `max_seen_receipts` | `5000` | Dedupe cache size for notification IDs. Oldest entries evicted when the cap is hit. |
| `allowed_source_cidrs` | `[]` (allow all) | Optional source-IP allowlist. See below. |
| `allowed_source_cidrs` | `[]` | Required for non-loopback binds. Leave empty only when the listener is bound to loopback and fronted by a local tunnel / reverse proxy. |
Each setting also has an equivalent env var (`MSGRAPH_WEBHOOK_*`) that merges into the config at gateway startup — see the [environment variables reference](/reference/environment-variables#microsoft-graph-teams-meetings).
@ -75,7 +77,7 @@ Each setting also has an equivalent env var (`MSGRAPH_WEBHOOK_*`) that merges in
Every Graph notification includes the `clientState` string your subscription registered with. The listener rejects any notification whose `clientState` doesn't match, using timing-safe comparison. This is Microsoft's documented mechanism — treat the value as a strong shared secret.
If `client_state` is unset, the listener accepts every well-formed POST. **Don't run without it in production.**
If `client_state` is unset, the listener refuses to start.
### Source-IP allowlisting (production deployments)
@ -86,6 +88,7 @@ platforms:
msgraph_webhook:
enabled: true
extra:
host: 0.0.0.0
client_state: "..."
allowed_source_cidrs:
- "52.96.0.0/14"
@ -99,7 +102,7 @@ Or as an env var:
MSGRAPH_WEBHOOK_ALLOWED_SOURCE_CIDRS="52.96.0.0/14,52.104.0.0/14"
```
Empty allowlist = accept from anywhere (default; preserves dev-tunnel workflows). Invalid CIDR strings log a warning and are ignored. **Review the Microsoft IP list quarterly** — it changes.
Binding a non-loopback host such as `0.0.0.0`, `::`, or a LAN IP without `allowed_source_cidrs` is refused at startup. If you're using a dev tunnel or reverse proxy on the same machine, bind Hermes to `127.0.0.1` or `::1` and leave the allowlist empty there. Invalid CIDR strings log a warning and are ignored. **Review the Microsoft IP list quarterly** — it changes.
### HTTPS termination
@ -107,7 +110,7 @@ The listener speaks plain HTTP. Terminate TLS at your reverse proxy (Caddy, Ngin
### Response hygiene
On success the listener returns `202 Accepted` with an empty body — internal counters stay out of the wire response. Operators can observe counts via `/health`.
On success the listener returns `202 Accepted` with an empty body — internal counters stay out of the wire response. Operators can observe counts via `/health`, which is guarded by the same source-IP rules as the webhook path.
Status code table:
@ -127,8 +130,9 @@ Status code table:
| Graph subscription validation fails | Public URL is reachable, `/msgraph/webhook` path matches, GET with `validationToken` echoes the token verbatim as `text/plain` within 10 seconds. |
| Notifications POST but nothing ingests | `client_state` matches what you registered the subscription with. Re-run `openssl rand -hex 32` and create a new subscription if the value drifted. Check `accepted_resources` includes the resource path Graph is sending. |
| Every notification 403s | `clientState` mismatch (forged, or subscription registered with a different value). Re-create the subscription with `hermes teams-pipeline subscribe --client-state "$MSGRAPH_WEBHOOK_CLIENT_STATE" ...` (ships with the pipeline runtime PR). |
| Listener refuses to start on `0.0.0.0` | Set `allowed_source_cidrs` to Microsoft's current webhook egress ranges, or bind Hermes to `127.0.0.1` / `::1` behind your tunnel or reverse proxy. |
| Listener starts but `curl http://localhost:8646/health` hangs | Port binding collision. Check `ss -tlnp \| grep 8646` and change `port:` if needed. |
| Real Graph requests from Microsoft get 403'd | Source IP allowlist is too narrow. Remove `allowed_source_cidrs` temporarily, confirm traffic flows, then widen the list to include the current Microsoft egress ranges. |
| Real Graph requests from Microsoft get 403'd | Source IP allowlist is too narrow. Widen the list to include the current Microsoft egress ranges. If you're still validating the tunnel path, bind Hermes to loopback and let the tunnel handle public exposure. |
## Related Docs

View file

@ -65,6 +65,7 @@ The webhook listener is a gateway platform named `msgraph_webhook`. At minimum,
```bash
MSGRAPH_WEBHOOK_ENABLED=true
MSGRAPH_WEBHOOK_HOST=127.0.0.1
MSGRAPH_WEBHOOK_PORT=8646
MSGRAPH_WEBHOOK_CLIENT_STATE=<random-shared-secret>
MSGRAPH_WEBHOOK_ACCEPTED_RESOURCES=communications/onlineMeetings
@ -91,6 +92,7 @@ platforms:
msgraph_webhook:
enabled: true
extra:
host: 127.0.0.1
port: 8646
client_state: "replace-me"
accepted_resources:
@ -120,6 +122,8 @@ platforms:
enabled: false
```
If you bind the listener to a non-loopback host such as `0.0.0.0`, you must also set `allowed_source_cidrs` to Microsoft's webhook egress ranges. Loopback binds (`127.0.0.1` / `::1`) are the intended dev-tunnel and local reverse-proxy setup.
## Teams Delivery Modes
The pipeline supports two Teams summary-delivery modes inside the existing Teams plugin.