diff --git a/agent/context_compressor.py b/agent/context_compressor.py index 4b08f3c11d..edbc89b7dd 100644 --- a/agent/context_compressor.py +++ b/agent/context_compressor.py @@ -340,6 +340,8 @@ class ContextCompressor(ContextEngine): self._last_summary_error = None self._last_summary_dropped_count = 0 self._last_summary_fallback_used = False + self._last_aux_model_failure_error = None + self._last_aux_model_failure_model = None self._last_compression_savings_pct = 100.0 self._ineffective_compression_count = 0 @@ -448,6 +450,12 @@ class ContextCompressor(ContextEngine): # (gateway hygiene, /compress) can surface a visible warning. self._last_summary_dropped_count: int = 0 self._last_summary_fallback_used: bool = False + # When a user-configured summary model fails and we recover by + # retrying on the main model, record the failure so gateway / + # CLI callers can still warn the user even though compression + # succeeded. Silent recovery would hide the broken config. + self._last_aux_model_failure_error: Optional[str] = None + self._last_aux_model_failure_model: Optional[str] = None def update_from_response(self, usage: Dict[str, Any]): """Update tracked token usage from API response.""" @@ -907,6 +915,14 @@ The user has requested that this compaction PRIORITISE preserving all informatio "Falling back to main model '%s' for compression.", self.summary_model, e, self.model, ) + # Record the aux-model failure so callers can warn the user + # even if the retry-on-main succeeds — a misconfigured aux + # model is something the user needs to fix. + _err_text = str(e).strip() or e.__class__.__name__ + if len(_err_text) > 220: + _err_text = _err_text[:217].rstrip() + "..." + self._last_aux_model_failure_error = _err_text + self._last_aux_model_failure_model = self.summary_model self.summary_model = "" # empty = use main model self._summary_failure_cooldown_until = 0.0 # no cooldown return self._generate_summary(turns_to_summarize, focus_topic=focus_topic) # retry immediately @@ -931,6 +947,14 @@ The user has requested that this compaction PRIORITISE preserving all informatio "Retrying on main model '%s' before giving up.", self.summary_model, e, self.model, ) + # Record the aux-model failure (see 404 branch above) — user + # should know their configured model is broken even if main + # recovers the call. + _err_text = str(e).strip() or e.__class__.__name__ + if len(_err_text) > 220: + _err_text = _err_text[:217].rstrip() + "..." + self._last_aux_model_failure_error = _err_text + self._last_aux_model_failure_model = self.summary_model self.summary_model = "" # empty = use main model self._summary_failure_cooldown_until = 0.0 return self._generate_summary(turns_to_summarize, focus_topic=focus_topic) @@ -1232,6 +1256,8 @@ The user has requested that this compaction PRIORITISE preserving all informatio self._last_summary_dropped_count = 0 self._last_summary_fallback_used = False self._last_summary_error = None + self._last_aux_model_failure_error = None + self._last_aux_model_failure_model = None n_messages = len(messages) # Only need head + 3 tail messages minimum (token budget decides the real tail size) _min_for_compress = self.protect_first_n + 3 + 1 diff --git a/gateway/run.py b/gateway/run.py index 293daa9590..cd2a5ab114 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -4828,6 +4828,30 @@ class GatewayRunner: "Failed to deliver compression-failure warning to user: %s", _werr, ) + # Separately: if the user's CONFIGURED aux + # model failed and we recovered by falling + # back to the main model, tell them — a + # misconfigured auxiliary.compression.model + # is something only they can fix, and + # silent recovery would hide it. + elif _comp is not None and getattr(_comp, "_last_aux_model_failure_model", None): + _aux_model = getattr(_comp, "_last_aux_model_failure_model", "") + _aux_err = getattr(_comp, "_last_aux_model_failure_error", None) or "unknown error" + _aux_msg = ( + f"ℹ️ Configured compression model `{_aux_model}` " + f"failed ({_aux_err}). Recovered using your main " + "model — context is intact — but you may want to " + "check `auxiliary.compression.model` in config.yaml." + ) + try: + _adapter = self.adapters.get(source.platform) + if _adapter and source.chat_id: + await _adapter.send(source.chat_id, _aux_msg, metadata=_hyg_meta) + except Exception as _werr: + logger.warning( + "Failed to deliver aux-model-fallback notice to user: %s", + _werr, + ) finally: self._cleanup_agent_resources(_hyg_agent) @@ -7377,6 +7401,11 @@ class GatewayRunner: _summary_failed = bool(getattr(compressor, "_last_summary_fallback_used", False)) _dropped_count = int(getattr(compressor, "_last_summary_dropped_count", 0) or 0) _summary_err = getattr(compressor, "_last_summary_error", None) + # Separately: did the user's CONFIGURED aux model fail + # and we recovered via main? Surface that as an info + # note so they can fix their config. + _aux_fail_model = getattr(compressor, "_last_aux_model_failure_model", None) + _aux_fail_err = getattr(compressor, "_last_aux_model_failure_error", None) finally: self._cleanup_agent_resources(tmp_agent) lines = [f"🗜️ {summary['headline']}"] @@ -7392,6 +7421,13 @@ class GatewayRunner: "with a placeholder; earlier context is no longer recoverable. " "Consider checking your auxiliary.compression model configuration." ) + elif _aux_fail_model: + lines.append( + f"ℹ️ Configured compression model `{_aux_fail_model}` failed " + f"({_aux_fail_err or 'unknown error'}). Recovered using your main " + "model — context is intact — but you may want to check " + "`auxiliary.compression.model` in config.yaml." + ) return "\n".join(lines) except Exception as e: logger.warning("Manual compress failed: %s", e) diff --git a/run_agent.py b/run_agent.py index 42f1e6f9e5..3f2b783082 100644 --- a/run_agent.py +++ b/run_agent.py @@ -8460,6 +8460,23 @@ class AIAgent: f"⚠ Compression summary failed: {summary_error}. " "Inserted a fallback context marker." ) + else: + # No hard failure — but did the configured aux model error out + # and get recovered by retrying on main? Surface that so users + # know their auxiliary.compression.model setting is broken even + # though compression succeeded. + _aux_fail_model = getattr(self.context_compressor, "_last_aux_model_failure_model", None) + _aux_fail_err = getattr(self.context_compressor, "_last_aux_model_failure_error", None) + if _aux_fail_model: + # Dedup on (model, error) so we don't spam on every compaction + _aux_key = (_aux_fail_model, _aux_fail_err) + if getattr(self, "_last_aux_fallback_warning_key", None) != _aux_key: + self._last_aux_fallback_warning_key = _aux_key + self._emit_warning( + f"ℹ Configured compression model '{_aux_fail_model}' failed " + f"({_aux_fail_err or 'unknown error'}). Recovered using main model — " + "check auxiliary.compression.model in config.yaml." + ) todo_snapshot = self._todo_store.format_for_injection() if todo_snapshot: diff --git a/tests/agent/test_context_compressor.py b/tests/agent/test_context_compressor.py index 797c9edc3f..5225fa6eee 100644 --- a/tests/agent/test_context_compressor.py +++ b/tests/agent/test_context_compressor.py @@ -285,6 +285,12 @@ class TestSummaryFallbackToMainModel: assert "model" not in mock_call.call_args_list[1].kwargs assert result is not None assert "summary via main model" in result + # Aux-model failure is recorded even though retry succeeded — this is + # how callers (gateway /compress, CLI warning) know to tell the user + # their auxiliary.compression.model setting is broken. + assert c._last_aux_model_failure_model == "broken-aux-model" + assert c._last_aux_model_failure_error is not None + assert "404" in c._last_aux_model_failure_error def test_unknown_error_falls_back_to_main_and_succeeds(self): """Errors that don't match the 404/503/model_not_found fast-path @@ -317,6 +323,10 @@ class TestSummaryFallbackToMainModel: assert "model" not in mock_call.call_args_list[1].kwargs assert result is not None assert "summary via main model" in result + # Aux-model failure recorded despite successful recovery + assert c._last_aux_model_failure_model == "broken-aux-model" + assert c._last_aux_model_failure_error is not None + assert "400" in c._last_aux_model_failure_error def test_no_fallback_when_summary_model_equals_main_model(self): """If the aux model IS the main model, there's nowhere to fall back @@ -367,6 +377,97 @@ class TestSummaryFallbackToMainModel: assert c._summary_model_fallen_back is True +class TestAuxModelFallbackSurfacedToCallers: + """When summary_model fails but retry-on-main succeeds, compress() must + expose the aux-model failure via _last_aux_model_failure_{model,error} + so gateway /compress and CLI callers can warn the user about their + broken auxiliary.compression.model config — silent recovery would hide + a misconfiguration only the user can fix.""" + + def _make_msgs(self): + return [ + {"role": "system", "content": "sys"}, + {"role": "user", "content": "msg 1"}, + {"role": "assistant", "content": "msg 2"}, + {"role": "user", "content": "msg 3"}, + {"role": "assistant", "content": "msg 4"}, + {"role": "user", "content": "msg 5"}, + {"role": "assistant", "content": "msg 6"}, + {"role": "user", "content": "msg 7"}, + ] + + def test_compress_exposes_aux_failure_fields_after_successful_fallback(self): + mock_ok = MagicMock() + mock_ok.choices = [MagicMock()] + mock_ok.choices[0].message.content = "summary via main" + err_400 = Exception("400 provider rejected configured model") + err_400.status_code = 400 + + with patch("agent.context_compressor.get_model_context_length", return_value=100000): + c = ContextCompressor( + model="main-model", + summary_model_override="broken-aux-model", + quiet_mode=True, + protect_first_n=2, + protect_last_n=2, + ) + + with patch( + "agent.context_compressor.call_llm", + side_effect=[err_400, mock_ok], + ): + result = c.compress(self._make_msgs()) + + # Recovery succeeded → no fallback placeholder + assert c._last_summary_fallback_used is False + # But aux-model failure IS recorded for the gateway/CLI warning + assert c._last_aux_model_failure_model == "broken-aux-model" + assert c._last_aux_model_failure_error is not None + assert "400" in c._last_aux_model_failure_error + # Result is well-formed with a real summary, not a placeholder + assert any( + isinstance(m.get("content"), str) and "summary via main" in m["content"] + for m in result + ) + + def test_compress_clears_aux_failure_fields_at_start_of_next_call(self): + """A subsequent successful compression must clear the aux-failure + fields so the warning doesn't persist forever.""" + mock_ok = MagicMock() + mock_ok.choices = [MagicMock()] + mock_ok.choices[0].message.content = "summary via main" + err_400 = Exception("400 aux model busted") + err_400.status_code = 400 + + with patch("agent.context_compressor.get_model_context_length", return_value=100000): + c = ContextCompressor( + model="main-model", + summary_model_override="broken-aux-model", + quiet_mode=True, + protect_first_n=2, + protect_last_n=2, + ) + + # Call 1: aux fails, retry-on-main succeeds + with patch( + "agent.context_compressor.call_llm", + side_effect=[err_400, mock_ok], + ): + c.compress(self._make_msgs()) + assert c._last_aux_model_failure_model == "broken-aux-model" + + # Call 2: clean run on main (summary_model was cleared to "" after + # first fallback). Aux-failure fields MUST reset at compress() start + # so the old warning state doesn't leak into this call. + with patch( + "agent.context_compressor.call_llm", + return_value=mock_ok, + ): + c.compress(self._make_msgs()) + assert c._last_aux_model_failure_model is None + assert c._last_aux_model_failure_error is None + + class TestSummaryFailureTrackingForGatewayWarning: """When summary generation fails, the compressor must record dropped count + fallback flag so gateway hygiene & /compress can surface a visible diff --git a/tests/gateway/test_compress_command.py b/tests/gateway/test_compress_command.py index 91b3eeac42..21ff777f6a 100644 --- a/tests/gateway/test_compress_command.py +++ b/tests/gateway/test_compress_command.py @@ -181,3 +181,65 @@ async def test_compress_command_appends_warning_when_summary_generation_fails(): assert "historical message(s) were removed" in result agent_instance.shutdown_memory_provider.assert_called_once() agent_instance.close.assert_called_once() + + +@pytest.mark.asyncio +async def test_compress_command_surfaces_aux_model_failure_even_when_recovered(): + """When the user's configured ``auxiliary.compression.model`` errors out + but compression recovers by retrying on the main model, /compress must + STILL inform the user. Silent recovery hides broken config the user + needs to fix.""" + history = _make_history() + # Compressed transcript — normal successful compression, no placeholder. + compressed = [ + history[0], + {"role": "assistant", "content": "summary via main model"}, + history[-1], + ] + runner = _make_runner(history) + agent_instance = MagicMock() + agent_instance.shutdown_memory_provider = MagicMock() + agent_instance.close = MagicMock() + agent_instance.context_compressor.has_content_to_compress.return_value = True + # Fallback placeholder was NOT used — recovery succeeded. + agent_instance.context_compressor._last_summary_fallback_used = False + agent_instance.context_compressor._last_summary_dropped_count = 0 + agent_instance.context_compressor._last_summary_error = None + # But the configured aux model DID fail before the retry succeeded. + agent_instance.context_compressor._last_aux_model_failure_model = ( + "gemini-3-flash-preview" + ) + agent_instance.context_compressor._last_aux_model_failure_error = ( + "404 model not found: gemini-3-flash-preview" + ) + agent_instance.session_id = "sess-1" + agent_instance._compress_context.return_value = (compressed, "") + + def _estimate(messages): + if messages == history: + return 100 + if messages == compressed: + return 60 + raise AssertionError(f"unexpected transcript: {messages!r}") + + with ( + patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "***"}), + patch("gateway.run._resolve_gateway_model", return_value="test-model"), + patch("run_agent.AIAgent", return_value=agent_instance), + patch("agent.model_metadata.estimate_messages_tokens_rough", side_effect=_estimate), + ): + result = await runner._handle_compress_command(_make_event()) + + # Compression succeeded + assert "Compressed:" in result + # No ⚠️ warning (that's reserved for dropped-turns case) + assert "⚠️" not in result + # But there IS an info note about the broken aux model + assert "ℹ️" in result + assert "gemini-3-flash-preview" in result + assert "404" in result + assert "auxiliary.compression.model" in result + # The user's context is explicitly called out as intact + assert "intact" in result + agent_instance.shutdown_memory_provider.assert_called_once() + agent_instance.close.assert_called_once() diff --git a/tests/gateway/test_session_hygiene.py b/tests/gateway/test_session_hygiene.py index 555a8d7d4d..0932b1bbf0 100644 --- a/tests/gateway/test_session_hygiene.py +++ b/tests/gateway/test_session_hygiene.py @@ -508,4 +508,128 @@ async def test_session_hygiene_warns_user_when_summary_generation_fails(monkeypa assert warn["chat_id"] == "-1001" assert warn["metadata"] == {"thread_id": "17585"} - FakeCompressAgentWithSummaryFailure.last_instance.close.assert_called_once() \ No newline at end of file + FakeCompressAgentWithSummaryFailure.last_instance.close.assert_called_once() + + +@pytest.mark.asyncio +async def test_session_hygiene_informs_user_when_aux_model_fails_but_recovers(monkeypatch, tmp_path): + """When the user's configured ``auxiliary.compression.model`` errors out + and we recover via the main model, compression succeeds but the user's + config is still broken. Gateway hygiene must surface an ℹ note so the + user knows to fix ``auxiliary.compression.model`` — silent recovery + hides a misconfig only they can resolve.""" + fake_dotenv = types.ModuleType("dotenv") + fake_dotenv.load_dotenv = lambda *args, **kwargs: None + monkeypatch.setitem(sys.modules, "dotenv", fake_dotenv) + + class FakeCompressAgentWithAuxRecovery: + last_instance = None + + def __init__(self, **kwargs): + self.model = kwargs.get("model") + self.session_id = kwargs.get("session_id", "fake-session") + self._print_fn = None + self.shutdown_memory_provider = MagicMock() + self.close = MagicMock() + # Compression succeeded (no placeholder inserted) but the + # configured aux model errored and we fell back to main. + self.context_compressor = SimpleNamespace( + _last_summary_fallback_used=False, + _last_summary_dropped_count=0, + _last_summary_error=None, + _last_aux_model_failure_model="gemini-3-flash-preview", + _last_aux_model_failure_error="404 model not found", + ) + type(self).last_instance = self + + def _compress_context(self, messages, *_args, **_kwargs): + self.session_id = f"{self.session_id}_compressed" + return ([{"role": "assistant", "content": "real summary"}], None) + + fake_run_agent = types.ModuleType("run_agent") + fake_run_agent.AIAgent = FakeCompressAgentWithAuxRecovery + monkeypatch.setitem(sys.modules, "run_agent", fake_run_agent) + + gateway_run = importlib.import_module("gateway.run") + GatewayRunner = gateway_run.GatewayRunner + + adapter = HygieneCaptureAdapter() + runner = object.__new__(GatewayRunner) + runner.config = GatewayConfig( + platforms={Platform.TELEGRAM: PlatformConfig(enabled=True, token="fake-token")} + ) + runner.adapters = {Platform.TELEGRAM: adapter} + runner._voice_mode = {} + runner.hooks = SimpleNamespace(emit=AsyncMock(), loaded_hooks=False) + runner.session_store = MagicMock() + runner.session_store.get_or_create_session.return_value = SessionEntry( + session_key="agent:main:telegram:group:-1001:17585", + session_id="sess-1", + created_at=datetime.now(), + updated_at=datetime.now(), + platform=Platform.TELEGRAM, + chat_type="group", + ) + runner.session_store.load_transcript.return_value = _make_history(6, content_size=400) + runner.session_store.has_any_sessions.return_value = True + runner.session_store.rewrite_transcript = MagicMock() + runner.session_store.append_to_transcript = MagicMock() + runner._running_agents = {} + runner._pending_messages = {} + runner._pending_approvals = {} + runner._session_db = None + runner._is_user_authorized = lambda _source: True + runner._set_session_env = lambda _context: None + runner._run_agent = AsyncMock( + return_value={ + "final_response": "ok", + "messages": [], + "tools": [], + "history_offset": 0, + "last_prompt_tokens": 0, + } + ) + + monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path) + monkeypatch.setattr(gateway_run, "_resolve_runtime_agent_kwargs", lambda: {"api_key": "***"}) + monkeypatch.setattr( + "agent.model_metadata.get_model_context_length", + lambda *_args, **_kwargs: 100, + ) + monkeypatch.setenv("TELEGRAM_HOME_CHANNEL", "795544298") + + event = MessageEvent( + text="hello", + source=SessionSource( + platform=Platform.TELEGRAM, + chat_id="-1001", + chat_type="group", + thread_id="17585", + user_id="12345", + ), + message_id="1", + ) + + result = await runner._handle_message(event) + + assert result == "ok" + # No ⚠️ hard-failure warning (that's for dropped turns) + hard_warnings = [s for s in adapter.sent if "Context compression summary failed" in s["content"]] + assert len(hard_warnings) == 0, adapter.sent + # But an ℹ note about the configured aux model must be delivered. + aux_notes = [ + s for s in adapter.sent + if "Configured compression model" in s["content"] + ] + assert len(aux_notes) == 1, ( + f"Expected 1 aux-model fallback notice, got {len(aux_notes)}: {adapter.sent}" + ) + note = aux_notes[0] + assert "gemini-3-flash-preview" in note["content"] + assert "404" in note["content"] + assert "auxiliary.compression.model" in note["content"] + # Note must land in the originating topic/thread. + assert note["chat_id"] == "-1001" + assert note["metadata"] == {"thread_id": "17585"} + + FakeCompressAgentWithAuxRecovery.last_instance.close.assert_called_once() \ No newline at end of file