From 8a2506af43760a9dbecb384fc2b1fd7f5937ad0c Mon Sep 17 00:00:00 2001 From: helix4u <4317663+helix4u@users.noreply.github.com> Date: Fri, 24 Apr 2026 14:22:25 -0600 Subject: [PATCH] fix(aux): surface auxiliary failures in UI --- agent/context_compressor.py | 8 ++ run_agent.py | 97 +++++++++++++++++--- tests/run_agent/test_flush_memories_codex.py | 24 +++++ 3 files changed, 118 insertions(+), 11 deletions(-) diff --git a/agent/context_compressor.py b/agent/context_compressor.py index d4441a1c7..ef40cbfaf 100644 --- a/agent/context_compressor.py +++ b/agent/context_compressor.py @@ -294,6 +294,7 @@ class ContextCompressor(ContextEngine): self._context_probed = False self._context_probe_persistable = False self._previous_summary = None + self._last_summary_error = None self._last_compression_savings_pct = 100.0 self._ineffective_compression_count = 0 @@ -389,6 +390,7 @@ class ContextCompressor(ContextEngine): self._last_compression_savings_pct: float = 100.0 self._ineffective_compression_count: int = 0 self._summary_failure_cooldown_until: float = 0.0 + self._last_summary_error: Optional[str] = None def update_from_response(self, usage: Dict[str, Any]): """Update tracked token usage from API response.""" @@ -812,10 +814,12 @@ The user has requested that this compaction PRIORITISE preserving all informatio self._previous_summary = summary self._summary_failure_cooldown_until = 0.0 self._summary_model_fallen_back = False + self._last_summary_error = None return self._with_summary_prefix(summary) except RuntimeError: # No provider configured — long cooldown, unlikely to self-resolve self._summary_failure_cooldown_until = time.monotonic() + _SUMMARY_FAILURE_COOLDOWN_SECONDS + self._last_summary_error = "no auxiliary LLM provider configured" logging.warning("Context compression: no provider available for " "summary. Middle turns will be dropped without summary " "for %d seconds.", @@ -853,6 +857,10 @@ The user has requested that this compaction PRIORITISE preserving all informatio # Transient errors (timeout, rate limit, network) — shorter cooldown _transient_cooldown = 60 self._summary_failure_cooldown_until = time.monotonic() + _transient_cooldown + err_text = str(e).strip() or e.__class__.__name__ + if len(err_text) > 220: + err_text = err_text[:217].rstrip() + "..." + self._last_summary_error = err_text logging.warning( "Failed to generate context summary: %s. " "Further summary attempts paused for %d seconds.", diff --git a/run_agent.py b/run_agent.py index 4911e4899..3daa05db6 100644 --- a/run_agent.py +++ b/run_agent.py @@ -2231,6 +2231,34 @@ class AIAgent: except Exception: logger.debug("status_callback error in _emit_status", exc_info=True) + def _emit_warning(self, message: str) -> None: + """Emit a user-visible warning through the same status plumbing. + + Unlike debug logs, these warnings are meant for degraded side paths + such as auxiliary compression or memory flushes where the main turn can + continue but the user needs to know something important failed. + """ + try: + self._vprint(f"{self.log_prefix}{message}", force=True) + except Exception: + pass + if self.status_callback: + try: + self.status_callback("warn", message) + except Exception: + logger.debug("status_callback error in _emit_warning", exc_info=True) + + def _emit_auxiliary_failure(self, task: str, exc: BaseException) -> None: + """Surface a compact warning for failed auxiliary work.""" + try: + detail = self._summarize_api_error(exc) + except Exception: + detail = str(exc) + detail = (detail or exc.__class__.__name__).strip() + if len(detail) > 220: + detail = detail[:217].rstrip() + "..." + self._emit_warning(f"⚠ Auxiliary {task} failed: {detail}") + def _current_main_runtime(self) -> Dict[str, str]: """Return the live main runtime for session-scoped auxiliary routing.""" return { @@ -3081,7 +3109,8 @@ class AIAgent: pass except Exception as e: - logger.debug("Background memory/skill review failed: %s", e) + logger.warning("Background memory/skill review failed: %s", e) + self._emit_auxiliary_failure("background review", e) finally: # Close all resources (httpx client, subprocesses, etc.) so # GC doesn't try to clean them up on a dead asyncio event @@ -7653,6 +7682,7 @@ class AIAgent: _flush_temperature = _fixed_temp else: _flush_temperature = 0.3 + aux_error = None try: response = _call_llm( task="flush_memories", @@ -7662,14 +7692,19 @@ class AIAgent: max_tokens=5120, # timeout resolved from auxiliary.flush_memories.timeout config ) - except RuntimeError: + except Exception as e: + aux_error = e _aux_available = False response = None if not _aux_available and self.api_mode == "codex_responses": # No auxiliary client -- use the Codex Responses path directly codex_kwargs = self._build_api_kwargs(api_messages) - codex_kwargs["tools"] = self._get_transport().convert_tools([memory_tool_def]) + _ct_flush = self._get_transport() + if _ct_flush is not None: + codex_kwargs["tools"] = _ct_flush.convert_tools([memory_tool_def]) + elif not codex_kwargs.get("tools"): + codex_kwargs["tools"] = [memory_tool_def] if _flush_temperature is not None: codex_kwargs["temperature"] = _flush_temperature else: @@ -7701,11 +7736,37 @@ class AIAgent: **api_kwargs, timeout=_get_task_timeout("flush_memories") ) + if aux_error is not None: + logger.warning("Auxiliary memory flush failed; used fallback path: %s", aux_error) + self._emit_auxiliary_failure("memory flush", aux_error) + + def _openai_tool_calls(resp): + if resp is not None and hasattr(resp, "choices") and resp.choices: + msg = getattr(resp.choices[0], "message", None) + calls = getattr(msg, "tool_calls", None) + if calls: + return calls + return [] + + def _codex_output_tool_calls(resp): + calls = [] + for item in getattr(resp, "output", []) or []: + if getattr(item, "type", None) == "function_call": + calls.append(SimpleNamespace( + id=getattr(item, "call_id", None), + type="function", + function=SimpleNamespace( + name=getattr(item, "name", ""), + arguments=getattr(item, "arguments", "{}"), + ), + )) + return calls + # Extract tool calls from the response, handling all API formats tool_calls = [] if self.api_mode == "codex_responses" and not _aux_available: _ct_flush = self._get_transport() - _cnr_flush = _ct_flush.normalize_response(response) + _cnr_flush = _ct_flush.normalize_response(response) if _ct_flush is not None else None if _cnr_flush and _cnr_flush.tool_calls: tool_calls = [ SimpleNamespace( @@ -7713,6 +7774,8 @@ class AIAgent: function=SimpleNamespace(name=tc.name, arguments=tc.arguments), ) for tc in _cnr_flush.tool_calls ] + else: + tool_calls = _codex_output_tool_calls(response) elif self.api_mode == "anthropic_messages" and not _aux_available: _tfn = self._get_transport() _flush_result = _tfn.normalize_response(response, strip_tool_prefix=self._is_anthropic_oauth) @@ -7725,15 +7788,16 @@ class AIAgent: ] elif self.api_mode in ("chat_completions", "bedrock_converse"): # chat_completions / bedrock — normalize through transport - _flush_result = self._get_transport().normalize_response(response) - if _flush_result.tool_calls: + _tfn = self._get_transport() + _flush_result = _tfn.normalize_response(response) if _tfn is not None else None + if _flush_result and _flush_result.tool_calls: tool_calls = _flush_result.tool_calls + else: + tool_calls = _openai_tool_calls(response) elif _aux_available and hasattr(response, "choices") and response.choices: # Auxiliary client returned OpenAI-shaped response while main # api_mode is codex/anthropic — extract tool_calls from .choices - _aux_msg = response.choices[0].message - if hasattr(_aux_msg, "tool_calls") and _aux_msg.tool_calls: - tool_calls = _aux_msg.tool_calls + tool_calls = _openai_tool_calls(response) for tc in tool_calls: if tc.function.name == "memory": @@ -7751,9 +7815,11 @@ class AIAgent: if not self.quiet_mode: print(f" 🧠 Memory flush: saved to {args.get('target', 'memory')}") except Exception as e: - logger.debug("Memory flush tool call failed: %s", e) + logger.warning("Memory flush tool call failed: %s", e) + self._emit_auxiliary_failure("memory flush tool", e) except Exception as e: - logger.debug("Memory flush API call failed: %s", e) + logger.warning("Memory flush API call failed: %s", e) + self._emit_auxiliary_failure("memory flush", e) finally: # Strip flush artifacts: remove everything from the flush message onward. # Use sentinel marker instead of identity check for robustness. @@ -7799,6 +7865,15 @@ class AIAgent: # focus_topic — fall back to calling without it. compressed = self.context_compressor.compress(messages, current_tokens=approx_tokens) + summary_error = getattr(self.context_compressor, "_last_summary_error", None) + if summary_error: + if getattr(self, "_last_compression_summary_warning", None) != summary_error: + self._last_compression_summary_warning = summary_error + self._emit_warning( + f"⚠ Compression summary failed: {summary_error}. " + "Inserted a fallback context marker." + ) + todo_snapshot = self._todo_store.format_for_injection() if todo_snapshot: compressed.append({"role": "user", "content": todo_snapshot}) diff --git a/tests/run_agent/test_flush_memories_codex.py b/tests/run_agent/test_flush_memories_codex.py index 28fbf550d..04e20402f 100644 --- a/tests/run_agent/test_flush_memories_codex.py +++ b/tests/run_agent/test_flush_memories_codex.py @@ -188,6 +188,30 @@ class TestFlushMemoriesUsesAuxiliaryClient: agent.client.chat.completions.create.assert_called_once() + def test_auxiliary_provider_failure_surfaces_warning_and_falls_back(self, monkeypatch): + """Provider/API failures from auxiliary flush must be visible. + + Exhausted keys and rate limits are not always RuntimeError. They used + to fall into the broad outer handler and disappear into debug logs. + """ + agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter") + agent.client = MagicMock() + agent.client.chat.completions.create.return_value = _chat_response_with_memory_call() + events = [] + agent.status_callback = lambda kind, text=None: events.append((kind, text)) + + with patch("agent.auxiliary_client.call_llm", side_effect=Exception("opencode-go key exhausted")), \ + patch("tools.memory_tool.memory_tool", return_value="Saved."): + messages = [ + {"role": "user", "content": "Hello"}, + {"role": "assistant", "content": "Hi there"}, + {"role": "user", "content": "Save this"}, + ] + agent.flush_memories(messages) + + agent.client.chat.completions.create.assert_called_once() + assert any(kind == "warn" and "Auxiliary memory flush failed" in text for kind, text in events) + def test_flush_executes_memory_tool_calls(self, monkeypatch): """Verify that memory tool calls from the flush response actually get executed.""" agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter")