From 6cbda6c1027468552c404cac5a399fe30133bf96 Mon Sep 17 00:00:00 2001 From: sgaofen <135070653+sgaofen@users.noreply.github.com> Date: Mon, 20 Apr 2026 22:08:47 -0700 Subject: [PATCH] fix(webhook): cap chunked request bodies --- gateway/platforms/webhook.py | 41 ++++++++++++++++++++++----- tests/gateway/test_webhook_adapter.py | 41 +++++++++++++++++++++++++++ 2 files changed, 75 insertions(+), 7 deletions(-) diff --git a/gateway/platforms/webhook.py b/gateway/platforms/webhook.py index e3a736a45..b01f05d06 100644 --- a/gateway/platforms/webhook.py +++ b/gateway/platforms/webhook.py @@ -60,6 +60,10 @@ _INSECURE_NO_AUTH = "INSECURE_NO_AUTH" _DYNAMIC_ROUTES_FILENAME = "webhook_subscriptions.json" +class _PayloadTooLarge(ValueError): + """Raised when an inbound webhook body exceeds the configured limit.""" + + def check_webhook_requirements() -> bool: """Check if webhook adapter dependencies are available.""" return AIOHTTP_AVAILABLE @@ -259,6 +263,31 @@ class WebhookAdapter(BasePlatformAdapter): """GET /health — simple health check.""" return web.json_response({"status": "ok", "platform": "webhook"}) + async def _read_body_with_limit(self, request: "web.Request") -> bytes: + """Read a webhook body while enforcing max_body_bytes for chunked uploads.""" + content_length = request.content_length + if content_length is not None and content_length > self._max_body_bytes: + raise _PayloadTooLarge + + if content_length is None: + content = getattr(request, "content", None) + iter_chunked = getattr(content, "iter_chunked", None) + if iter_chunked is not None: + chunks: list[bytes] = [] + total = 0 + chunk_size = min(64 * 1024, self._max_body_bytes + 1) + async for chunk in iter_chunked(chunk_size): + total += len(chunk) + if total > self._max_body_bytes: + raise _PayloadTooLarge + chunks.append(bytes(chunk)) + return b"".join(chunks) + + raw_body = await request.read() + if len(raw_body) > self._max_body_bytes: + raise _PayloadTooLarge + return raw_body + def _reload_dynamic_routes(self) -> None: """Reload agent-created subscriptions from disk if the file changed.""" from hermes_constants import get_hermes_home @@ -306,16 +335,14 @@ class WebhookAdapter(BasePlatformAdapter): ) # ── Auth-before-body ───────────────────────────────────── - # Check Content-Length before reading the full payload. - content_length = request.content_length or 0 - if content_length > self._max_body_bytes: + # Enforce max size before reading known-length bodies and while + # streaming chunked/no-length bodies. + try: + raw_body = await self._read_body_with_limit(request) + except _PayloadTooLarge: return web.json_response( {"error": "Payload too large"}, status=413 ) - - # Read body (must be done before any validation) - try: - raw_body = await request.read() except Exception as e: logger.error("[webhook] Failed to read body: %s", e) return web.json_response({"error": "Bad request"}, status=400) diff --git a/tests/gateway/test_webhook_adapter.py b/tests/gateway/test_webhook_adapter.py index bedf254a1..49fd3f849 100644 --- a/tests/gateway/test_webhook_adapter.py +++ b/tests/gateway/test_webhook_adapter.py @@ -88,6 +88,29 @@ def _mock_request(headers=None, body=b"", content_length=None, match_info=None): return req +class _ChunkedBody: + def __init__(self, chunks): + self._chunks = chunks + + async def iter_chunked(self, _chunk_size): + for chunk in self._chunks: + yield chunk + + +class _ChunkedRequest: + def __init__(self, *, chunks, headers=None, match_info=None): + self.headers = headers or {} + self.content_length = None + self.match_info = match_info or {} + self.method = "POST" + self.content = _ChunkedBody(chunks) + self.read_called = False + + async def read(self): + self.read_called = True + return b"".join(self.content._chunks) + + def _github_signature(body: bytes, secret: str) -> str: """Compute X-Hub-Signature-256 for *body* using *secret*.""" return "sha256=" + hmac.new( @@ -516,6 +539,24 @@ class TestBodySize: ) assert resp.status == 413 + @pytest.mark.asyncio + async def test_chunked_payload_without_content_length_rejected(self): + """Chunked/no-length bodies are capped while streaming.""" + routes = {"big": {"secret": _INSECURE_NO_AUTH, "prompt": "test"}} + adapter = _make_adapter(routes=routes, max_body_bytes=100) + adapter.handle_message = AsyncMock() + request = _ChunkedRequest( + chunks=[b'{"data":"', b"x" * 128, b'"}'], + headers={"Content-Type": "application/json"}, + match_info={"route_name": "big"}, + ) + + resp = await adapter._handle_webhook(request) + + assert resp.status == 413 + assert request.read_called is False + adapter.handle_message.assert_not_called() + # =================================================================== # INSECURE_NO_AUTH