diff --git a/tests/run_agent/test_24996_fallback_exhaustion_cooldown.py b/tests/run_agent/test_24996_fallback_exhaustion_cooldown.py
index 76481154630..83991c2471d 100644
--- a/tests/run_agent/test_24996_fallback_exhaustion_cooldown.py
+++ b/tests/run_agent/test_24996_fallback_exhaustion_cooldown.py
@@ -15,9 +15,7 @@ restore stays gated (and does NOT reset the index) until the cooldown clears.
 Rate-limit / billing failures keep their own 60s cooldown and are unaffected.
 """
 
-import time
 from unittest.mock import MagicMock, patch
-
 from run_agent import AIAgent
 from agent.error_classifier import FailoverReason
 from agent.chat_completion_helpers import _FALLBACK_EXHAUSTED_COOLDOWN_S
@@ -51,31 +49,35 @@ def _mock_client(base_url="https://openrouter.ai/api/v1", api_key="fb-key"):
 class TestExhaustionArmsCooldown:
     def test_non_retryable_exhaustion_arms_cooldown(self):
         """Walking a non-empty chain to exhaustion on a non-rate-limit
-        failure arms a short ``_rate_limited_until`` cooldown."""
+        failure arms a short ``_rate_limited_until`` cooldown.
+
+        ``time.monotonic`` is frozen inside ``chat_completion_helpers`` so the
+        cooldown math is exact and independent of CI scheduling latency — the
+        previous wall-clock upper bound (``before + window + 1.0``) flaked on
+        loaded runners when the three activation calls took longer than 1s.
+        """
         fbs = [
             {"provider": "openai", "model": "gpt-4o"},
             {"provider": "zai", "model": "glm-4.7"},
         ]
         agent = _make_agent(fallback_model=fbs)
         agent._rate_limited_until = 0
-        before = time.monotonic()
-        with patch(
-            "agent.auxiliary_client.resolve_provider_client",
-            return_value=(_mock_client(), "resolved"),
+        frozen = 1_000.0
+        with (
+            patch("agent.chat_completion_helpers.time.monotonic", return_value=frozen),
+            patch(
+                "agent.auxiliary_client.resolve_provider_client",
+                return_value=(_mock_client(), "resolved"),
+            ),
         ):
             assert agent._try_activate_fallback() is True   # -> entry 0
             assert agent._try_activate_fallback() is True   # -> entry 1
             # Chain now exhausted; a non-rate-limit failure must arm cooldown.
             assert agent._try_activate_fallback() is False
-        cooldown = getattr(agent, "_rate_limited_until", 0)
-        assert cooldown > before
-        # Cooldown is the short exhaustion window, not the 60s rate-limit one.
-        # Use a generous upper slack: the only thing this must distinguish is
-        # the ~5s short window from the 60s rate-limit window, so any margin
-        # well below 60s proves it. A tight +1.0s false-fails on a loaded CI
-        # runner when wall-clock jitter between `before` and the cooldown
-        # computation exceeds a second (GC pause, swap, scheduler contention).
-        assert cooldown <= before + _FALLBACK_EXHAUSTED_COOLDOWN_S + 30.0
+            cooldown = getattr(agent, "_rate_limited_until", 0)
+        # Cooldown is exactly the short exhaustion window past the frozen clock,
+        # not the 60s rate-limit one.
+        assert cooldown == frozen + _FALLBACK_EXHAUSTED_COOLDOWN_S
 
     def test_no_chain_does_not_arm_cooldown(self):
         """An empty chain (no fallback configured) must not arm a cooldown —
@@ -92,30 +94,39 @@ class TestExhaustionArmsCooldown:
         fbs = [{"provider": "openai", "model": "gpt-4o"}]
         agent = _make_agent(fallback_model=fbs)
         agent._rate_limited_until = 0
-        before = time.monotonic()
-        with patch(
-            "agent.auxiliary_client.resolve_provider_client",
-            return_value=(_mock_client(), "resolved"),
+        frozen = 1_000.0
+        with (
+            patch("agent.chat_completion_helpers.time.monotonic", return_value=frozen),
+            patch(
+                "agent.auxiliary_client.resolve_provider_client",
+                return_value=(_mock_client(), "resolved"),
+            ),
         ):
             # First activation with rate_limit reason arms the 60s cooldown.
             assert agent._try_activate_fallback(reason=FailoverReason.rate_limit) is True
             # Chain exhausted on the next call (also rate_limit) -> still False,
             # and the 60s cooldown must survive (max(), not overwritten down).
             assert agent._try_activate_fallback(reason=FailoverReason.rate_limit) is False
-        cooldown = getattr(agent, "_rate_limited_until", 0)
-        assert cooldown > before + 50  # ~60s, far past the short window
+            cooldown = getattr(agent, "_rate_limited_until", 0)
+        # ~60s past the frozen clock, far past the short exhaustion window.
+        assert cooldown == frozen + 60
 
     def test_cooldown_never_shrinks_existing_window(self):
         """If a longer cooldown is already armed, exhaustion must not reduce
         it (we take the max)."""
         fbs = [{"provider": "openai", "model": "gpt-4o"}]
         agent = _make_agent(fallback_model=fbs)
-        far_future = time.monotonic() + 999
+        frozen = 1_000.0
+        far_future = frozen + 999
         agent._rate_limited_until = far_future
-        with patch(
-            "agent.auxiliary_client.resolve_provider_client",
-            return_value=(_mock_client(), "resolved"),
+        with (
+            patch("agent.chat_completion_helpers.time.monotonic", return_value=frozen),
+            patch(
+                "agent.auxiliary_client.resolve_provider_client",
+                return_value=(_mock_client(), "resolved"),
+            ),
         ):
             assert agent._try_activate_fallback() is True
             assert agent._try_activate_fallback() is False
-        assert getattr(agent, "_rate_limited_until", 0) >= far_future
+            cooldown = getattr(agent, "_rate_limited_until", 0)
+        assert cooldown == far_future