fix(agent): keep cooldown and lock refresh on one authority (#54465)

This commit is contained in:
Rod Boev 2026-06-28 19:54:03 -04:00 committed by kshitij
parent f2ccb2859f
commit 53ef954841
4 changed files with 31 additions and 9 deletions

View file

@ -1550,7 +1550,7 @@ Summary generation was unavailable, so this is a best-effort deterministic fallb
self._last_aux_model_failure_error = _err_text
self._last_aux_model_failure_model = self.summary_model
self.summary_model = "" # empty = use main model
self._summary_failure_cooldown_until = 0.0 # no cooldown — retry immediately
self._clear_compression_failure_cooldown() # no cooldown — retry immediately
def _generate_summary(
self,

View file

@ -521,13 +521,14 @@ def compress_context(
if not _existing_sp:
_existing_sp = agent._build_system_prompt(system_message)
return messages, _existing_sp
_lock_refresher = _CompressionLockLeaseRefresher(
_lock_db,
_lock_sid,
_lock_holder,
_lock_ttl,
_lock_refresh_interval,
).start()
if _lock_holder is not None:
_lock_refresher = _CompressionLockLeaseRefresher(
_lock_db,
_lock_sid,
_lock_holder,
_lock_ttl,
_lock_refresh_interval,
).start()
def _release_lock() -> None:
"""Release the lock keyed on the OLD session_id (before rotation)."""

View file

@ -258,7 +258,7 @@ class _NoLockSubsystemDB:
return getattr(self._real, name)
def test_missing_lock_subsystem_fails_open_not_infinite_loop(tmp_path: Path) -> None:
def test_missing_lock_subsystem_fails_open_not_infinite_loop(tmp_path: Path, monkeypatch) -> None:
"""Version skew (no lock methods) must fail OPEN, not raise into the loop.
Reproduces the "API call #47/#48/#49 ... has no attribute
@ -275,6 +275,12 @@ def test_missing_lock_subsystem_fails_open_not_infinite_loop(tmp_path: Path) ->
# Swap in the lock-less wrapper AFTER construction (the agent already
# holds a normal db reference; we only break the lock methods).
agent._session_db = _NoLockSubsystemDB(db)
monkeypatch.setattr(
"agent.conversation_compression._CompressionLockLeaseRefresher",
lambda *_a, **_k: (_ for _ in ()).throw(
AssertionError("lock refresher should not start on fail-open lock skew")
),
)
messages = [{"role": "user", "content": f"m{i}"} for i in range(20)]

View file

@ -1487,6 +1487,21 @@ class TestAbortOnSummaryFailure:
assert len(result) < len(msgs)
assert db.get_compression_failure_cooldown("s1") is None
def test_aux_fallback_clears_persisted_session_cooldown_before_retry(self, tmp_path):
db = SessionDB(db_path=tmp_path / "state.db")
db.create_session("s1", "cli")
db.record_compression_failure_cooldown("s1", time.time() + 999.0, "timeout")
c = self._make_compressor()
c.bind_session_state(db, "s1")
c.summary_model = "aux/model"
c._fallback_to_main_for_compression(Exception("provider down"), "failed")
assert c.summary_model == ""
assert c._summary_failure_cooldown_until == 0.0
assert db.get_compression_failure_cooldown("s1") is None
def test_success_clears_persisted_session_cooldown(self, tmp_path):
mock_response = MagicMock()
mock_response.choices = [MagicMock()]