From 5ce0067c08a81181c5b550a5bc8fcb0262ece2df Mon Sep 17 00:00:00 2001 From: Stephen Schoettler Date: Thu, 14 May 2026 14:28:14 -0700 Subject: [PATCH] fix(ci): stabilize shared test state after 21012 --- agent/context_compressor.py | 12 ++++++++++-- run_agent.py | 4 +++- .../test_context_compressor_summary_continuity.py | 2 ++ tests/conftest.py | 8 +++++--- tests/hermes_cli/test_update_autostash.py | 1 + tests/providers/test_plugin_discovery.py | 6 +++--- tests/run_agent/test_compression_feasibility.py | 14 ++++++++++++++ 7 files changed, 38 insertions(+), 9 deletions(-) diff --git a/agent/context_compressor.py b/agent/context_compressor.py index df75b8b88ce..e7a14faf51b 100644 --- a/agent/context_compressor.py +++ b/agent/context_compressor.py @@ -1429,15 +1429,23 @@ The user has requested that this compaction PRIORITISE preserving all informatio return messages turns_to_summarize = messages[compress_start:compress_end] + # A persisted handoff summary can sit in the protected head after a + # resume (commonly immediately after the system prompt). Search from + # the first non-system message through the compression window so we can + # rehydrate iterative-summary state without serializing that handoff as + # a new turn. Protected messages after the handoff remain live context, + # so only summarize messages that are both after the handoff and inside + # the current compression window. + summary_search_start = 1 if messages and messages[0].get("role") == "system" else 0 summary_idx, summary_body = self._find_latest_context_summary( messages, - compress_start, + summary_search_start, compress_end, ) if summary_idx is not None: if summary_body and not self._previous_summary: self._previous_summary = summary_body - turns_to_summarize = messages[summary_idx + 1:compress_end] + turns_to_summarize = messages[max(compress_start, summary_idx + 1):compress_end] if not self.quiet_mode: logger.info( diff --git a/run_agent.py b/run_agent.py index b60f6c43ce6..e2605ebee88 100644 --- a/run_agent.py +++ b/run_agent.py @@ -4268,6 +4268,7 @@ class AIAgent: except Exception: pass review_agent = None + review_messages = [] try: with open(os.devnull, "w", encoding="utf-8") as _devnull, \ contextlib.redirect_stdout(_devnull), \ @@ -4385,6 +4386,7 @@ class AIAgent: review_agent.close() except Exception: pass + review_messages = list(getattr(review_agent, "_session_messages", [])) review_agent = None # Scan the review agent's messages for successful tool actions @@ -4394,7 +4396,7 @@ class AIAgent: # re-surface stale "created"/"updated" messages from the prior # conversation as if they just happened (issue #14944). actions = self._summarize_background_review_actions( - getattr(review_agent, "_session_messages", []), + review_messages, messages_snapshot, ) diff --git a/tests/agent/test_context_compressor_summary_continuity.py b/tests/agent/test_context_compressor_summary_continuity.py index d9a27375834..d797b661f01 100644 --- a/tests/agent/test_context_compressor_summary_continuity.py +++ b/tests/agent/test_context_compressor_summary_continuity.py @@ -27,10 +27,12 @@ def _messages_with_handoff(summary_body: str): return [ {"role": "system", "content": "system prompt"}, {"role": "user", "content": f"{SUMMARY_PREFIX}\n{summary_body}"}, + {"role": "assistant", "content": "handoff acknowledged after resume"}, {"role": "user", "content": "new user turn after resume"}, {"role": "assistant", "content": "new assistant work after resume"}, {"role": "user", "content": "more new work after resume"}, {"role": "assistant", "content": "latest tail response"}, + {"role": "user", "content": "final active request stays in protected tail"}, ] diff --git a/tests/conftest.py b/tests/conftest.py index 5d7f197f195..d9ae0c86ea6 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -476,12 +476,14 @@ def _reset_module_state(): except Exception: pass - # --- agent.auxiliary_client — runtime main provider/model override --- - # Set per-turn by AIAgent.run_conversation; tests that import it must - # see a clean state so config.yaml fallback works as expected. + # --- agent.auxiliary_client — runtime main provider/model override and + # payment-error health cache. Both are process-global in production; + # reset them per test so one worker's fallback/402 test does not make + # later auxiliary-client tests skip otherwise-available providers. try: from agent import auxiliary_client as _aux_mod _aux_mod.clear_runtime_main() + _aux_mod._reset_aux_unhealthy_cache() except Exception: pass diff --git a/tests/hermes_cli/test_update_autostash.py b/tests/hermes_cli/test_update_autostash.py index 645b3b24ea4..f7d90245a81 100644 --- a/tests/hermes_cli/test_update_autostash.py +++ b/tests/hermes_cli/test_update_autostash.py @@ -305,6 +305,7 @@ def _setup_update_mocks(monkeypatch, tmp_path): monkeypatch.setattr(hermes_config, "get_missing_config_fields", lambda: []) monkeypatch.setattr(hermes_config, "check_config_version", lambda: (5, 5)) monkeypatch.setattr(hermes_config, "migrate_config", lambda **kw: {"env_added": [], "config_added": []}) + monkeypatch.setattr(hermes_main, "_refresh_active_lazy_features", lambda: None) def test_cmd_update_retries_optional_extras_individually_when_all_fails(monkeypatch, tmp_path, capsys): diff --git a/tests/providers/test_plugin_discovery.py b/tests/providers/test_plugin_discovery.py index 9ad6713e3ec..a7cbb7d9030 100644 --- a/tests/providers/test_plugin_discovery.py +++ b/tests/providers/test_plugin_discovery.py @@ -46,14 +46,14 @@ def test_bundled_plugins_discovered(): assert (child / "plugin.yaml").exists(), f"{child.name} missing plugin.yaml" -def test_all_33_profiles_register(): - """After discovery, the registry must contain exactly 33 distinct profiles.""" +def test_all_34_profiles_register(): + """After discovery, the registry must contain exactly 34 distinct profiles.""" _clear_provider_caches() from providers import list_providers profiles = list_providers() names = sorted(p.name for p in profiles) - assert len(names) == 33, f"Expected 33 profiles, got {len(names)}: {names}" + assert len(names) == 34, f"Expected 34 profiles, got {len(names)}: {names}" # Spot-check representative providers from different categories for required in ( diff --git a/tests/run_agent/test_compression_feasibility.py b/tests/run_agent/test_compression_feasibility.py index f935821ada9..3e23f3eb5d3 100644 --- a/tests/run_agent/test_compression_feasibility.py +++ b/tests/run_agent/test_compression_feasibility.py @@ -16,6 +16,16 @@ from run_agent import AIAgent from agent.context_compressor import ContextCompressor +@pytest.fixture(autouse=True) +def _stable_aux_provider_config(): + """Keep feasibility tests independent from the developer's config.yaml.""" + with patch( + "agent.auxiliary_client._resolve_task_provider_model", + return_value=("auto", None, None, None, None), + ): + yield + + def _make_agent( *, compression_enabled: bool = True, @@ -41,6 +51,7 @@ def _make_agent( agent.tool_progress_callback = None agent._compression_warning = None agent._aux_compression_context_length_config = None + agent._custom_providers = [] agent.tools = [] compressor = MagicMock(spec=ContextCompressor) @@ -182,6 +193,7 @@ def test_feasibility_check_passes_config_context_length(mock_get_client, mock_ct api_key="sk-custom", config_context_length=1_000_000, provider="openrouter", + custom_providers=[], ) @@ -205,6 +217,7 @@ def test_feasibility_check_ignores_invalid_context_length(mock_get_client, mock_ api_key="sk-test", config_context_length=None, provider="openrouter", + custom_providers=[], ) @@ -258,6 +271,7 @@ def test_init_feasibility_check_uses_aux_context_override_from_config(): api_key="sk-custom", config_context_length=1_000_000, provider="", + custom_providers=[], )