fix(copilot): send vision header for Copilot vision requests

Thread a vision-request flag through auxiliary provider resolution so Copilot clients can include Copilot-Vision-Request only for vision tasks. This preserves normal text requests while ensuring Copilot vision payloads reach the vision-capable route.

Add regression coverage for Copilot vision routing and keep cached text and vision clients separate so a text client without the header is not reused for vision.

Co-authored-by: dhabibi <9087935+dhabibi@users.noreply.github.com>
This commit is contained in:
hermes-agent-dhabibi 2026-04-26 21:29:55 +00:00 committed by Teknium
parent 512c610058
commit 8402ba150e
3 changed files with 156 additions and 40 deletions

View file

@ -103,7 +103,7 @@ class TestCleanupStaleAsyncClients:
mock_client._client = MagicMock()
mock_client._client.is_closed = False
key = ("test_stale", True, "", "", "", ())
key = ("test_stale", True, "", "", "", (), False)
with _client_cache_lock:
_client_cache[key] = (mock_client, "test-model", loop)
@ -127,7 +127,7 @@ class TestCleanupStaleAsyncClients:
loop = asyncio.new_event_loop() # NOT closed
mock_client = MagicMock()
key = ("test_live", True, "", "", "", ())
key = ("test_live", True, "", "", "", (), False)
with _client_cache_lock:
_client_cache[key] = (mock_client, "test-model", loop)
@ -149,7 +149,7 @@ class TestCleanupStaleAsyncClients:
)
mock_client = MagicMock()
key = ("test_sync", False, "", "", "", ())
key = ("test_sync", False, "", "", "", (), False)
with _client_cache_lock:
_client_cache[key] = (mock_client, "test-model", None)
@ -182,7 +182,7 @@ class TestClientCacheBoundedGrowth:
_get_cached_client,
)
key = ("test_replace", True, "", "", "", ())
key = ("test_replace", True, "", "", "", (), False)
# Simulate a stale entry from a closed loop
old_loop = asyncio.new_event_loop()
@ -217,7 +217,7 @@ class TestClientCacheBoundedGrowth:
_client_cache_lock,
)
key = ("test_no_grow", True, "", "", "", ())
key = ("test_no_grow", True, "", "", "", (), False)
loops = []
try:
@ -269,7 +269,7 @@ class TestClientCacheBoundedGrowth:
mock_client = MagicMock()
mock_client._client = MagicMock()
mock_client._client.is_closed = False
key = (f"evict_test_{i}", False, "", "", "", ())
key = (f"evict_test_{i}", False, "", "", "", (), False)
with _client_cache_lock:
# Inline the eviction logic (same as _get_cached_client)
while len(_client_cache) >= _CLIENT_CACHE_MAX_SIZE:
@ -281,9 +281,9 @@ class TestClientCacheBoundedGrowth:
assert len(_client_cache) <= _CLIENT_CACHE_MAX_SIZE, \
f"Cache size {len(_client_cache)} exceeds max {_CLIENT_CACHE_MAX_SIZE}"
# The earliest entries should have been evicted
assert ("evict_test_0", False, "", "", "", ()) not in _client_cache
assert ("evict_test_0", False, "", "", "", (), False) not in _client_cache
# The latest entries should be present
assert (f"evict_test_{_CLIENT_CACHE_MAX_SIZE + 4}", False, "", "", "", ()) in _client_cache
assert (f"evict_test_{_CLIENT_CACHE_MAX_SIZE + 4}", False, "", "", "", (), False) in _client_cache
finally:
with _client_cache_lock:
_client_cache.clear()