From 53ef95484107bb6601a076ba71ac3e81600e9f40 Mon Sep 17 00:00:00 2001 From: Rod Boev Date: Sun, 28 Jun 2026 19:54:03 -0400 Subject: [PATCH] fix(agent): keep cooldown and lock refresh on one authority (#54465) --- agent/context_compressor.py | 2 +- agent/conversation_compression.py | 15 ++++++++------- tests/agent/test_compression_concurrent_fork.py | 8 +++++++- tests/agent/test_context_compressor.py | 15 +++++++++++++++ 4 files changed, 31 insertions(+), 9 deletions(-) diff --git a/agent/context_compressor.py b/agent/context_compressor.py index feaa1c2cc6d..f70fbd59abe 100644 --- a/agent/context_compressor.py +++ b/agent/context_compressor.py @@ -1550,7 +1550,7 @@ Summary generation was unavailable, so this is a best-effort deterministic fallb self._last_aux_model_failure_error = _err_text self._last_aux_model_failure_model = self.summary_model self.summary_model = "" # empty = use main model - self._summary_failure_cooldown_until = 0.0 # no cooldown — retry immediately + self._clear_compression_failure_cooldown() # no cooldown — retry immediately def _generate_summary( self, diff --git a/agent/conversation_compression.py b/agent/conversation_compression.py index 551bdcdee6f..dcc3eeffae5 100644 --- a/agent/conversation_compression.py +++ b/agent/conversation_compression.py @@ -521,13 +521,14 @@ def compress_context( if not _existing_sp: _existing_sp = agent._build_system_prompt(system_message) return messages, _existing_sp - _lock_refresher = _CompressionLockLeaseRefresher( - _lock_db, - _lock_sid, - _lock_holder, - _lock_ttl, - _lock_refresh_interval, - ).start() + if _lock_holder is not None: + _lock_refresher = _CompressionLockLeaseRefresher( + _lock_db, + _lock_sid, + _lock_holder, + _lock_ttl, + _lock_refresh_interval, + ).start() def _release_lock() -> None: """Release the lock keyed on the OLD session_id (before rotation).""" diff --git a/tests/agent/test_compression_concurrent_fork.py b/tests/agent/test_compression_concurrent_fork.py index a8e5ccf97e6..3356a399cfd 100644 --- a/tests/agent/test_compression_concurrent_fork.py +++ b/tests/agent/test_compression_concurrent_fork.py @@ -258,7 +258,7 @@ class _NoLockSubsystemDB: return getattr(self._real, name) -def test_missing_lock_subsystem_fails_open_not_infinite_loop(tmp_path: Path) -> None: +def test_missing_lock_subsystem_fails_open_not_infinite_loop(tmp_path: Path, monkeypatch) -> None: """Version skew (no lock methods) must fail OPEN, not raise into the loop. Reproduces the "API call #47/#48/#49 ... has no attribute @@ -275,6 +275,12 @@ def test_missing_lock_subsystem_fails_open_not_infinite_loop(tmp_path: Path) -> # Swap in the lock-less wrapper AFTER construction (the agent already # holds a normal db reference; we only break the lock methods). agent._session_db = _NoLockSubsystemDB(db) + monkeypatch.setattr( + "agent.conversation_compression._CompressionLockLeaseRefresher", + lambda *_a, **_k: (_ for _ in ()).throw( + AssertionError("lock refresher should not start on fail-open lock skew") + ), + ) messages = [{"role": "user", "content": f"m{i}"} for i in range(20)] diff --git a/tests/agent/test_context_compressor.py b/tests/agent/test_context_compressor.py index e3d03829e79..5edb412e507 100644 --- a/tests/agent/test_context_compressor.py +++ b/tests/agent/test_context_compressor.py @@ -1487,6 +1487,21 @@ class TestAbortOnSummaryFailure: assert len(result) < len(msgs) assert db.get_compression_failure_cooldown("s1") is None + def test_aux_fallback_clears_persisted_session_cooldown_before_retry(self, tmp_path): + db = SessionDB(db_path=tmp_path / "state.db") + db.create_session("s1", "cli") + db.record_compression_failure_cooldown("s1", time.time() + 999.0, "timeout") + + c = self._make_compressor() + c.bind_session_state(db, "s1") + c.summary_model = "aux/model" + + c._fallback_to_main_for_compression(Exception("provider down"), "failed") + + assert c.summary_model == "" + assert c._summary_failure_cooldown_until == 0.0 + assert db.get_compression_failure_cooldown("s1") is None + def test_success_clears_persisted_session_cooldown(self, tmp_path): mock_response = MagicMock() mock_response.choices = [MagicMock()]