feat: add streaming LLM response support across all platforms

Cherry-picked from PR #828, resolved conflicts with main.
This commit is contained in:
teknium1 2026-03-11 08:56:37 -07:00
parent b2a4092783
commit 95d221c31c
6 changed files with 696 additions and 22 deletions

View file

@ -314,11 +314,18 @@ class TestChatCompletionsEndpoint:
"""stream=true returns SSE format with the full response."""
app = _create_app(adapter)
async with TestClient(TestServer(app)) as cli:
with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run:
mock_run.return_value = (
async def _mock_run_agent(**kwargs):
# Simulate streaming: invoke stream_callback with tokens
cb = kwargs.get("stream_callback")
if cb:
cb("Hello!")
cb(None) # End signal
return (
{"final_response": "Hello!", "messages": [], "api_calls": 1},
{"input_tokens": 10, "output_tokens": 5, "total_tokens": 15},
)
with patch.object(adapter, "_run_agent", side_effect=_mock_run_agent) as mock_run:
resp = await cli.post(
"/v1/chat/completions",
json={