fix(gateway/api_server): deduplicate concurrent idempotent requests

This commit is contained in:
Yukipukii1 2026-04-21 03:12:57 +03:00 committed by Teknium
parent f81c0394d0
commit 3f10c27cc0
2 changed files with 113 additions and 5 deletions

View file

@ -12,6 +12,7 @@ Tests cover:
- Error handling (invalid JSON, missing fields)
"""
import asyncio
import json
import time
import uuid
@ -25,6 +26,7 @@ from gateway.config import GatewayConfig, Platform, PlatformConfig
from gateway.platforms.api_server import (
APIServerAdapter,
ResponseStore,
_IdempotencyCache,
_CORS_HEADERS,
_derive_chat_session_id,
check_api_server_requirements,
@ -104,6 +106,95 @@ class TestResponseStore:
assert store.delete("resp_missing") is False
# ---------------------------------------------------------------------------
# _IdempotencyCache
# ---------------------------------------------------------------------------
class TestIdempotencyCache:
@pytest.mark.asyncio
async def test_concurrent_same_key_and_fingerprint_runs_once(self):
cache = _IdempotencyCache()
gate = asyncio.Event()
started = asyncio.Event()
calls = 0
async def compute():
nonlocal calls
calls += 1
started.set()
await gate.wait()
return ("response", {"total_tokens": 1})
first = asyncio.create_task(cache.get_or_set("idem-key", "fp-1", compute))
second = asyncio.create_task(cache.get_or_set("idem-key", "fp-1", compute))
await started.wait()
assert calls == 1
gate.set()
first_result, second_result = await asyncio.gather(first, second)
assert first_result == second_result == ("response", {"total_tokens": 1})
@pytest.mark.asyncio
async def test_different_fingerprint_does_not_reuse_inflight_task(self):
cache = _IdempotencyCache()
gate = asyncio.Event()
started = asyncio.Event()
calls = 0
async def compute():
nonlocal calls
calls += 1
result = calls
if calls == 2:
started.set()
await gate.wait()
return result
first = asyncio.create_task(cache.get_or_set("idem-key", "fp-1", compute))
second = asyncio.create_task(cache.get_or_set("idem-key", "fp-2", compute))
await started.wait()
assert calls == 2
gate.set()
results = await asyncio.gather(first, second)
assert sorted(results) == [1, 2]
@pytest.mark.asyncio
async def test_cancelled_waiter_does_not_drop_shared_inflight_task(self):
cache = _IdempotencyCache()
gate = asyncio.Event()
started = asyncio.Event()
calls = 0
async def compute():
nonlocal calls
calls += 1
started.set()
await gate.wait()
return "response"
first = asyncio.create_task(cache.get_or_set("idem-key", "fp-1", compute))
await started.wait()
assert calls == 1
first.cancel()
with pytest.raises(asyncio.CancelledError):
await first
second = asyncio.create_task(cache.get_or_set("idem-key", "fp-1", compute))
await asyncio.sleep(0)
assert calls == 1
gate.set()
assert await second == "response"
# ---------------------------------------------------------------------------
# Adapter initialization
# ---------------------------------------------------------------------------