test: mock retry backoff and compression sleeps in slow tests

Cuts ~65s off shard 3's local runtime (108s \u2192 48s) by neutralizing
real wall-clock waits in backoff/compression/retry paths. Tests assert
behavior (retry count, final result, error handling), never timing.

Changes:
- tests/run_agent/conftest.py (NEW): autouse fixture mocks
  run_agent.jittered_backoff to 0.0 for all tests in the directory.
  Collapses the `while time.time() < sleep_end` busy-loop to a no-op.
  Does NOT mock time.sleep globally (breaks threading tests).
- test_anthropic_error_handling.py: per-file fixture mocks time.sleep
  and asyncio.sleep for this test's retry paths (6 tests \u00d7 10s \u2192 ~2s each).
- test_413_compression.py: mocks time.sleep for the 2s compression retry
  pauses (9 tests \u00d7 2s \u2192 millisecond range).
- test_run_agent_codex_responses.py: mocks time.sleep for Codex retry
  path (6.8s \u2192 0.24s on the empty-output retry test).
- test_fallback_model.py: mocks time.sleep for transport-recovery path.
- test_retaindb_plugin.py: caps retaindb module's time.sleep to 0.05s
  so background writer-thread sleeps don't block tests. Replaces
  arbitrary time.sleep(N) waits with polling loops.

Validation:
- tests/run_agent/ + tests/plugins/test_retaindb_plugin.py: 827 passed,
  0 failed, 22.9s (was ~75s before).
- Matrix shard 3 local: 3098 passed, 48.2s (was 108s).
- No test's timing-assertion contract is changed (tests still verify
  retry happens, just don't wait 5s for it).
This commit is contained in:
Teknium 2026-04-17 13:19:00 -07:00
parent d0e1388ca9
commit 6cae0744f0
No known key found for this signature in database
6 changed files with 164 additions and 15 deletions

View file

@ -31,6 +31,31 @@ def _isolate_env(tmp_path, monkeypatch):
monkeypatch.delenv("RETAINDB_PROJECT", raising=False)
@pytest.fixture(autouse=True)
def _cap_retaindb_sleeps(monkeypatch):
"""Cap production-code sleeps so background-thread tests run fast.
The retaindb ``_WriteQueue._flush_row`` does ``time.sleep(2)`` after
errors. Across multiple tests that trigger the retry path, that adds
up. Cap the module's bound ``time.sleep`` to 0.05s — tests don't care
about the exact retry delay, only that it happens. The test file's
own ``time.sleep`` stays real since it uses a different reference.
"""
try:
from plugins.memory import retaindb as _retaindb
except ImportError:
return
real_sleep = _retaindb.time.sleep
def _capped_sleep(seconds):
return real_sleep(min(float(seconds), 0.05))
import types as _types
fake_time = _types.SimpleNamespace(sleep=_capped_sleep, time=_retaindb.time.time)
monkeypatch.setattr(_retaindb, "time", fake_time)
# We need the repo root on sys.path so the plugin can import agent.memory_provider
import sys
_repo_root = str(Path(__file__).resolve().parents[2])
@ -130,16 +155,18 @@ class TestWriteQueue:
def test_enqueue_creates_row(self, tmp_path):
q, client, db_path = self._make_queue(tmp_path)
q.enqueue("user1", "sess1", [{"role": "user", "content": "hi"}])
# Give the writer thread a moment to process
time.sleep(1)
# shutdown() blocks until the writer thread drains the queue — no need
# to pre-sleep (the old 1s sleep was a just-in-case wait, but shutdown
# does the right thing).
q.shutdown()
# If ingest succeeded, the row should be deleted
client.ingest_session.assert_called_once()
def test_enqueue_persists_to_sqlite(self, tmp_path):
client = MagicMock()
# Make ingest hang so the row stays in SQLite
client.ingest_session = MagicMock(side_effect=lambda *a, **kw: time.sleep(5))
# Make ingest slow so the row is still in SQLite when we peek.
# 0.5s is plenty — the test just needs the flush to still be in-flight.
client.ingest_session = MagicMock(side_effect=lambda *a, **kw: time.sleep(0.5))
db_path = tmp_path / "test_queue.db"
q = _WriteQueue(client, db_path)
q.enqueue("user1", "sess1", [{"role": "user", "content": "test"}])
@ -154,8 +181,7 @@ class TestWriteQueue:
def test_flush_deletes_row_on_success(self, tmp_path):
q, client, db_path = self._make_queue(tmp_path)
q.enqueue("user1", "sess1", [{"role": "user", "content": "hi"}])
time.sleep(1)
q.shutdown()
q.shutdown() # blocks until drain
# Row should be gone
conn = sqlite3.connect(str(db_path))
rows = conn.execute("SELECT COUNT(*) FROM pending").fetchone()[0]
@ -168,14 +194,20 @@ class TestWriteQueue:
db_path = tmp_path / "test_queue.db"
q = _WriteQueue(client, db_path)
q.enqueue("user1", "sess1", [{"role": "user", "content": "hi"}])
time.sleep(3) # Allow retry + sleep(2) in _flush_row
# Poll for the error to be recorded (max 2s), instead of a fixed 3s wait.
deadline = time.time() + 2.0
last_error = None
while time.time() < deadline:
conn = sqlite3.connect(str(db_path))
row = conn.execute("SELECT last_error FROM pending").fetchone()
conn.close()
if row and row[0]:
last_error = row[0]
break
time.sleep(0.05)
q.shutdown()
# Row should still exist with error recorded
conn = sqlite3.connect(str(db_path))
row = conn.execute("SELECT last_error FROM pending").fetchone()
conn.close()
assert row is not None
assert "API down" in row[0]
assert last_error is not None
assert "API down" in last_error
def test_thread_local_connection_reuse(self, tmp_path):
q, _, _ = self._make_queue(tmp_path)
@ -193,14 +225,27 @@ class TestWriteQueue:
client1.ingest_session = MagicMock(side_effect=RuntimeError("fail"))
q1 = _WriteQueue(client1, db_path)
q1.enqueue("user1", "sess1", [{"role": "user", "content": "lost turn"}])
time.sleep(3)
# Wait until the error is recorded (poll with short interval).
deadline = time.time() + 2.0
while time.time() < deadline:
conn = sqlite3.connect(str(db_path))
row = conn.execute("SELECT last_error FROM pending").fetchone()
conn.close()
if row and row[0]:
break
time.sleep(0.05)
q1.shutdown()
# Now create a new queue — it should replay the pending rows
client2 = MagicMock()
client2.ingest_session = MagicMock(return_value={"status": "ok"})
q2 = _WriteQueue(client2, db_path)
time.sleep(2)
# Poll for the replay to happen.
deadline = time.time() + 2.0
while time.time() < deadline:
if client2.ingest_session.called:
break
time.sleep(0.05)
q2.shutdown()
# The replayed row should have been ingested via client2

View file

@ -0,0 +1,34 @@
"""Fast-path fixtures shared across tests/run_agent/.
Many tests in this directory exercise the retry/backoff paths in the
agent loop. Production code uses ``jittered_backoff(base_delay=5.0)``
with a ``while time.time() < sleep_end`` loop a single retry test
spends 5+ seconds of real wall-clock time on backoff waits.
Mocking ``jittered_backoff`` to return 0.0 collapses the while-loop
to a no-op (``time.time() < time.time() + 0`` is false immediately),
which handles the most common case without touching ``time.sleep``.
We deliberately DO NOT mock ``time.sleep`` here some tests
(test_interrupt_propagation, test_primary_runtime_restore, etc.) use
the real ``time.sleep`` for threading coordination or assert that it
was called with specific values. Tests that want to additionally
fast-path direct ``time.sleep(N)`` calls in production code should
monkeypatch ``run_agent.time.sleep`` locally (see
``test_anthropic_error_handling.py`` for the pattern).
"""
from __future__ import annotations
import pytest
@pytest.fixture(autouse=True)
def _fast_retry_backoff(monkeypatch):
"""Short-circuit retry backoff for all tests in this directory."""
try:
import run_agent
except ImportError:
return
monkeypatch.setattr(run_agent, "jittered_backoff", lambda *a, **k: 0.0)

View file

@ -19,6 +19,24 @@ import pytest
from agent.context_compressor import SUMMARY_PREFIX
from run_agent import AIAgent
import run_agent
# ---------------------------------------------------------------------------
# Fast backoff for compression retry tests
# ---------------------------------------------------------------------------
@pytest.fixture(autouse=True)
def _no_compression_sleep(monkeypatch):
"""Short-circuit the 2s time.sleep between compression retries.
Production code has ``time.sleep(2)`` in multiple places after a 413/context
compression, for rate-limit smoothing. Tests assert behavior, not timing.
"""
import time as _time
monkeypatch.setattr(_time, "sleep", lambda *_a, **_k: None)
monkeypatch.setattr(run_agent, "jittered_backoff", lambda *a, **k: 0.0)
# ---------------------------------------------------------------------------

View file

@ -27,6 +27,39 @@ from gateway.config import Platform
from gateway.session import SessionSource
# ---------------------------------------------------------------------------
# Fast backoff for tests that exercise the retry loop
# ---------------------------------------------------------------------------
@pytest.fixture(autouse=True)
def _no_backoff_wait(monkeypatch):
"""Short-circuit retry backoff so tests don't block on real wall-clock waits.
The production code uses jittered_backoff() with a 5s base delay plus a
tight time.sleep(0.2) loop. Without this patch, each 429/500/529 retry
test burns ~10s of real time on CI across six tests that's ~60s for
behavior we're not asserting against timing.
Tests assert retry counts and final results, never wait durations.
"""
import asyncio as _asyncio
import time as _time
monkeypatch.setattr(run_agent, "jittered_backoff", lambda *a, **k: 0.0)
monkeypatch.setattr(_time, "sleep", lambda *_a, **_k: None)
# Also fast-path asyncio.sleep — the gateway's _run_agent path has
# several await asyncio.sleep(...) calls that add real wall-clock time.
_real_asyncio_sleep = _asyncio.sleep
async def _fast_sleep(delay=0, *args, **kwargs):
# Yield to the event loop but skip the actual delay.
await _real_asyncio_sleep(0)
monkeypatch.setattr(_asyncio, "sleep", _fast_sleep)
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------

View file

@ -11,6 +11,16 @@ from unittest.mock import MagicMock, patch
import pytest
from run_agent import AIAgent
import run_agent
@pytest.fixture(autouse=True)
def _no_fallback_wait(monkeypatch):
"""Short-circuit time.sleep in fallback/recovery paths so tests don't
block on the ``min(3 + retry_count, 8)`` wait before a primary retry."""
import time as _time
monkeypatch.setattr(_time, "sleep", lambda *_a, **_k: None)
monkeypatch.setattr(run_agent, "jittered_backoff", lambda *a, **k: 0.0)
def _make_tool_defs(*names: str) -> list:

View file

@ -12,6 +12,15 @@ sys.modules.setdefault("fal_client", types.SimpleNamespace())
import run_agent
@pytest.fixture(autouse=True)
def _no_codex_backoff(monkeypatch):
"""Short-circuit retry backoff so Codex retry tests don't block on real
wall-clock waits (5s jittered_backoff base delay + tight time.sleep loop)."""
import time as _time
monkeypatch.setattr(run_agent, "jittered_backoff", lambda *a, **k: 0.0)
monkeypatch.setattr(_time, "sleep", lambda *_a, **_k: None)
def _patch_agent_bootstrap(monkeypatch):
monkeypatch.setattr(
run_agent,