fix(msgraph): stream download_to_file body instead of buffering

The prior implementation routed download_to_file through the shared _request() path, which uses httpx.AsyncClient.request() inside a context manager that closes before aiter_bytes() iterates. The body was read into memory first and the chunked write loop replayed it from buffer. On small test payloads this was invisible; on real Teams meeting recordings (hundreds of MB) it would force the full artifact into RAM per download. Rewrites download_to_file to open its own AsyncClient and use client.stream(), keeping the context open across the aiter_bytes iteration so the body is actually streamed chunk-by-chunk to disk. Retry/token-refresh/Retry-After semantics are preserved by handling them inline on the stream path. Partial .part files are cleaned up on transport errors and on exhausted retries. Adds three tests: large-payload streaming verifies the chunk loop runs multiple times (discriminator: 512 KiB at chunk_size=65536 yields 8 chunks under streaming, 1 under buffering), transient-5xx retry recovers after a single retry, and exhausted-retry cleans up the partial file.
2026-05-10 03:22:05 +00:00 · 2026-05-08 08:00:45 -07:00 · 2026-05-08 08:00:45 -07:00 · 45d860d424
commit 45d860d424
parent b878f89f66
2 changed files with 197 additions and 11 deletions
--- a/tests/tools/test_microsoft_graph_client.py
+++ b/tests/tools/test_microsoft_graph_client.py
@ -135,6 +135,111 @@ class TestMicrosoftGraphClient:
        assert result["content_type"] == "video/mp4"
        assert result["size_bytes"] == len(b"meeting-recording")

+    async def test_download_to_file_streams_large_payload_in_chunks(
+        self, tmp_path: Path, monkeypatch
+    ):
+        """Recordings can be hundreds of MB; verify the body is streamed.
+
+        Uses a payload larger than the chunk size and counts how many
+        ``aiter_bytes`` iterations the download loop performs. If the
+        response were buffered in memory before the loop ran, only one
+        non-empty chunk would be yielded.
+        """
+        payload = b"x" * (512 * 1024)  # 512 KiB
+
+        def handler(request: httpx.Request) -> httpx.Response:
+            return httpx.Response(
+                200,
+                content=payload,
+                headers={"content-type": "video/mp4"},
+            )
+
+        chunk_calls: list[int] = []
+        original_aiter_bytes = httpx.Response.aiter_bytes
+
+        async def counting_aiter_bytes(self, chunk_size: int | None = None):
+            async for chunk in original_aiter_bytes(self, chunk_size):
+                chunk_calls.append(len(chunk))
+                yield chunk
+
+        monkeypatch.setattr(httpx.Response, "aiter_bytes", counting_aiter_bytes)
+
+        client = MicrosoftGraphClient(
+            _make_provider(),
+            transport=httpx.MockTransport(handler),
+        )
+        destination = tmp_path / "big-recording.mp4"
+        result = await client.download_to_file(
+            "/drive/item/content", destination, chunk_size=65536
+        )
+
+        assert destination.read_bytes() == payload
+        assert result["size_bytes"] == len(payload)
+        assert len(chunk_calls) >= 2, (
+            "Expected multiple chunks; got a single chunk "
+            f"which suggests the body was buffered: {chunk_calls}"
+        )
+        assert not (tmp_path / "big-recording.mp4.part").exists()
+
+    async def test_download_to_file_retries_on_transient_server_error(
+        self, tmp_path: Path
+    ):
+        calls: list[int] = []
+        sleeps: list[float] = []
+
+        def handler(request: httpx.Request) -> httpx.Response:
+            calls.append(1)
+            if len(calls) == 1:
+                return httpx.Response(
+                    503, json={"error": {"message": "unavailable"}}
+                )
+            return httpx.Response(
+                200,
+                content=b"payload",
+                headers={"content-type": "application/octet-stream"},
+            )
+
+        async def fake_sleep(delay: float) -> None:
+            sleeps.append(delay)
+
+        client = MicrosoftGraphClient(
+            _make_provider(),
+            transport=httpx.MockTransport(handler),
+            sleep=fake_sleep,
+            max_retries=2,
+        )
+        destination = tmp_path / "artifact.bin"
+        result = await client.download_to_file("/drive/item/content", destination)
+
+        assert destination.read_bytes() == b"payload"
+        assert result["size_bytes"] == len(b"payload")
+        assert len(calls) == 2
+        assert sleeps == [0.5]
+        assert not (tmp_path / "artifact.bin.part").exists()
+
+    async def test_download_to_file_cleans_partial_file_on_exhausted_retries(
+        self, tmp_path: Path
+    ):
+        def handler(request: httpx.Request) -> httpx.Response:
+            return httpx.Response(503, json={"error": {"message": "unavailable"}})
+
+        async def fake_sleep(delay: float) -> None:
+            return None
+
+        client = MicrosoftGraphClient(
+            _make_provider(),
+            transport=httpx.MockTransport(handler),
+            sleep=fake_sleep,
+            max_retries=1,
+        )
+        destination = tmp_path / "artifact.bin"
+
+        with pytest.raises(MicrosoftGraphAPIError):
+            await client.download_to_file("/drive/item/content", destination)
+
+        assert not destination.exists()
+        assert not (tmp_path / "artifact.bin.part").exists()
+
    async def test_invalid_json_response_raises_client_error(self):
        def handler(request: httpx.Request) -> httpx.Response:
            return httpx.Response(