fix(gateway): harden kanban and provider cleanup races

This commit is contained in:
helix4u 2026-05-20 14:58:01 -06:00 committed by Teknium
parent 31a0100104
commit 1a7bb988fc
6 changed files with 259 additions and 101 deletions

View file

@ -48,6 +48,27 @@ def test_init_creates_expected_tables(kanban_home):
assert {"tasks", "task_links", "task_comments", "task_events"} <= names
def test_connect_rejects_tls_record_in_sqlite_header(tmp_path, monkeypatch):
"""Kanban should classify TLS-looking page-0 clobbers before WAL setup."""
home = tmp_path / ".hermes"
home.mkdir()
monkeypatch.setenv("HERMES_HOME", str(home))
monkeypatch.delenv("HERMES_KANBAN_DB", raising=False)
monkeypatch.delenv("HERMES_KANBAN_HOME", raising=False)
monkeypatch.setattr(Path, "home", lambda: tmp_path)
corrupt = home / "kanban.db"
corrupt.write_bytes(b"SQLit" + bytes.fromhex("17 03 03 00 13") + b"x" * 32)
with pytest.raises(sqlite3.DatabaseError) as exc_info:
kb.connect(board="default")
msg = str(exc_info.value)
assert "file is not a database" in msg
assert "TLS record header detected at byte offset 5" in msg
assert "53 51 4c 69 74 17 03 03 00 13" in msg
def test_connect_migrates_legacy_db_before_optional_column_indexes(tmp_path):
"""Legacy DBs missing additive indexed columns must migrate cleanly.

View file

@ -16,6 +16,7 @@ with ``APIConnectionError('Connection error.')`` whose cause was
That is the exact scenario this test reproduces at object level without a
network, so it runs in CI on every PR.
"""
from types import SimpleNamespace
from unittest.mock import MagicMock, patch
from run_agent import AIAgent
@ -186,3 +187,32 @@ def test_replace_primary_openai_client_survives_repeated_rebuilds():
"Some _create_openai_client calls returned the same object across "
"a teardown — rebuild is not producing fresh clients"
)
def test_force_close_tcp_sockets_descends_httpcore_1_connection_wrapper():
"""httpcore 1.x stores the real stream below conn._connection."""
from agent.agent_runtime_helpers import force_close_tcp_sockets
class FakeSocket:
def __init__(self):
self.shutdown_calls = 0
self.close_calls = 0
def shutdown(self, _how):
self.shutdown_calls += 1
def close(self):
self.close_calls += 1
sock = FakeSocket()
stream = SimpleNamespace(_sock=sock)
http11 = SimpleNamespace(_network_stream=stream)
pool_entry = SimpleNamespace(_connection=http11)
pool = SimpleNamespace(_connections=[pool_entry])
transport = SimpleNamespace(_pool=pool)
http_client = SimpleNamespace(_transport=transport)
openai_client = SimpleNamespace(_client=http_client)
assert force_close_tcp_sockets(openai_client) == 1
assert sock.shutdown_calls == 1
assert sock.close_calls == 1

View file

@ -1,5 +1,6 @@
import sys
import threading
import time
import types
from types import SimpleNamespace
@ -64,6 +65,7 @@ def _build_agent(shared_client=None):
agent.stream_delta_callback = None
agent._stream_callback = None
agent.reasoning_callback = None
agent.status_callback = None
return agent
@ -93,6 +95,24 @@ def test_retry_after_api_connection_error_recreates_request_client(monkeypatch):
assert second_request.close_calls >= 1
def test_stale_non_stream_close_is_single_owner(monkeypatch):
def slow_responder(**kwargs):
time.sleep(0.1)
raise _connection_error()
request_client = FakeRequestClient(slow_responder)
factory = OpenAIFactory([request_client])
monkeypatch.setattr(run_agent, "OpenAI", factory)
agent = _build_agent()
agent._compute_non_stream_stale_timeout = lambda _messages: 0.01
with pytest.raises(APIConnectionError):
agent._interruptible_api_call({"model": agent.model, "messages": []})
assert request_client.close_calls == 1
def test_closed_shared_client_is_recreated_before_request(monkeypatch):
stale_shared = FakeSharedClient(lambda **kwargs: (_ for _ in ()).throw(AssertionError("stale shared client used")))
stale_shared._client.is_closed = True