fix(aux): surface auxiliary failures in UI

2026-06-14 09:11:54 +00:00 · 2026-04-24 14:22:25 -06:00 · 2026-04-24 14:22:25 -06:00 · 8a2506af43
commit 8a2506af43
parent e7590f92a2
3 changed files with 118 additions and 11 deletions
--- a/agent/context_compressor.py
+++ b/agent/context_compressor.py
@ -294,6 +294,7 @@ class ContextCompressor(ContextEngine):
        self._context_probed = False
        self._context_probe_persistable = False
        self._previous_summary = None
+        self._last_summary_error = None
        self._last_compression_savings_pct = 100.0
        self._ineffective_compression_count = 0

@ -389,6 +390,7 @@ class ContextCompressor(ContextEngine):
        self._last_compression_savings_pct: float = 100.0
        self._ineffective_compression_count: int = 0
        self._summary_failure_cooldown_until: float = 0.0
+        self._last_summary_error: Optional[str] = None

    def update_from_response(self, usage: Dict[str, Any]):
        """Update tracked token usage from API response."""
@ -812,10 +814,12 @@ The user has requested that this compaction PRIORITISE preserving all informatio
            self._previous_summary = summary
            self._summary_failure_cooldown_until = 0.0
            self._summary_model_fallen_back = False
+            self._last_summary_error = None
            return self._with_summary_prefix(summary)
        except RuntimeError:
            # No provider configured — long cooldown, unlikely to self-resolve
            self._summary_failure_cooldown_until = time.monotonic() + _SUMMARY_FAILURE_COOLDOWN_SECONDS
+            self._last_summary_error = "no auxiliary LLM provider configured"
            logging.warning("Context compression: no provider available for "
                            "summary. Middle turns will be dropped without summary "
                            "for %d seconds.",
@ -853,6 +857,10 @@ The user has requested that this compaction PRIORITISE preserving all informatio
            # Transient errors (timeout, rate limit, network) — shorter cooldown
            _transient_cooldown = 60
            self._summary_failure_cooldown_until = time.monotonic() + _transient_cooldown
+            err_text = str(e).strip() or e.__class__.__name__
+            if len(err_text) > 220:
+                err_text = err_text[:217].rstrip() + "..."
+            self._last_summary_error = err_text
            logging.warning(
                "Failed to generate context summary: %s. "
                "Further summary attempts paused for %d seconds.",
--- a/run_agent.py
+++ b/run_agent.py
@ -2231,6 +2231,34 @@ class AIAgent:
            except Exception:
                logger.debug("status_callback error in _emit_status", exc_info=True)

+    def _emit_warning(self, message: str) -> None:
+        """Emit a user-visible warning through the same status plumbing.
+
+        Unlike debug logs, these warnings are meant for degraded side paths
+        such as auxiliary compression or memory flushes where the main turn can
+        continue but the user needs to know something important failed.
+        """
+        try:
+            self._vprint(f"{self.log_prefix}{message}", force=True)
+        except Exception:
+            pass
+        if self.status_callback:
+            try:
+                self.status_callback("warn", message)
+            except Exception:
+                logger.debug("status_callback error in _emit_warning", exc_info=True)
+
+    def _emit_auxiliary_failure(self, task: str, exc: BaseException) -> None:
+        """Surface a compact warning for failed auxiliary work."""
+        try:
+            detail = self._summarize_api_error(exc)
+        except Exception:
+            detail = str(exc)
+        detail = (detail or exc.__class__.__name__).strip()
+        if len(detail) > 220:
+            detail = detail[:217].rstrip() + "..."
+        self._emit_warning(f"⚠ Auxiliary {task} failed: {detail}")
+
    def _current_main_runtime(self) -> Dict[str, str]:
        """Return the live main runtime for session-scoped auxiliary routing."""
        return {
@ -3081,7 +3109,8 @@ class AIAgent:
                            pass

            except Exception as e:
-                logger.debug("Background memory/skill review failed: %s", e)
+                logger.warning("Background memory/skill review failed: %s", e)
+                self._emit_auxiliary_failure("background review", e)
            finally:
                # Close all resources (httpx client, subprocesses, etc.) so
                # GC doesn't try to clean them up on a dead asyncio event
@ -7653,6 +7682,7 @@ class AIAgent:
                _flush_temperature = _fixed_temp
            else:
                _flush_temperature = 0.3
+            aux_error = None
            try:
                response = _call_llm(
                    task="flush_memories",
@ -7662,14 +7692,19 @@ class AIAgent:
                    max_tokens=5120,
                    # timeout resolved from auxiliary.flush_memories.timeout config
                )
-            except RuntimeError:
+            except Exception as e:
+                aux_error = e
                _aux_available = False
                response = None

            if not _aux_available and self.api_mode == "codex_responses":
                # No auxiliary client -- use the Codex Responses path directly
                codex_kwargs = self._build_api_kwargs(api_messages)
-                codex_kwargs["tools"] = self._get_transport().convert_tools([memory_tool_def])
+                _ct_flush = self._get_transport()
+                if _ct_flush is not None:
+                    codex_kwargs["tools"] = _ct_flush.convert_tools([memory_tool_def])
+                elif not codex_kwargs.get("tools"):
+                    codex_kwargs["tools"] = [memory_tool_def]
                if _flush_temperature is not None:
                    codex_kwargs["temperature"] = _flush_temperature
                else:
@ -7701,11 +7736,37 @@ class AIAgent:
                    **api_kwargs, timeout=_get_task_timeout("flush_memories")
                )

+            if aux_error is not None:
+                logger.warning("Auxiliary memory flush failed; used fallback path: %s", aux_error)
+                self._emit_auxiliary_failure("memory flush", aux_error)
+
+            def _openai_tool_calls(resp):
+                if resp is not None and hasattr(resp, "choices") and resp.choices:
+                    msg = getattr(resp.choices[0], "message", None)
+                    calls = getattr(msg, "tool_calls", None)
+                    if calls:
+                        return calls
+                return []
+
+            def _codex_output_tool_calls(resp):
+                calls = []
+                for item in getattr(resp, "output", []) or []:
+                    if getattr(item, "type", None) == "function_call":
+                        calls.append(SimpleNamespace(
+                            id=getattr(item, "call_id", None),
+                            type="function",
+                            function=SimpleNamespace(
+                                name=getattr(item, "name", ""),
+                                arguments=getattr(item, "arguments", "{}"),
+                            ),
+                        ))
+                return calls
+
            # Extract tool calls from the response, handling all API formats
            tool_calls = []
            if self.api_mode == "codex_responses" and not _aux_available:
                _ct_flush = self._get_transport()
-                _cnr_flush = _ct_flush.normalize_response(response)
+                _cnr_flush = _ct_flush.normalize_response(response) if _ct_flush is not None else None
                if _cnr_flush and _cnr_flush.tool_calls:
                    tool_calls = [
                        SimpleNamespace(
@ -7713,6 +7774,8 @@ class AIAgent:
                            function=SimpleNamespace(name=tc.name, arguments=tc.arguments),
                        ) for tc in _cnr_flush.tool_calls
                    ]
+                else:
+                    tool_calls = _codex_output_tool_calls(response)
            elif self.api_mode == "anthropic_messages" and not _aux_available:
                _tfn = self._get_transport()
                _flush_result = _tfn.normalize_response(response, strip_tool_prefix=self._is_anthropic_oauth)
@ -7725,15 +7788,16 @@ class AIAgent:
                    ]
            elif self.api_mode in ("chat_completions", "bedrock_converse"):
                # chat_completions / bedrock — normalize through transport
-                _flush_result = self._get_transport().normalize_response(response)
-                if _flush_result.tool_calls:
+                _tfn = self._get_transport()
+                _flush_result = _tfn.normalize_response(response) if _tfn is not None else None
+                if _flush_result and _flush_result.tool_calls:
                    tool_calls = _flush_result.tool_calls
+                else:
+                    tool_calls = _openai_tool_calls(response)
            elif _aux_available and hasattr(response, "choices") and response.choices:
                # Auxiliary client returned OpenAI-shaped response while main
                # api_mode is codex/anthropic — extract tool_calls from .choices
-                _aux_msg = response.choices[0].message
-                if hasattr(_aux_msg, "tool_calls") and _aux_msg.tool_calls:
-                    tool_calls = _aux_msg.tool_calls
+                tool_calls = _openai_tool_calls(response)

            for tc in tool_calls:
                if tc.function.name == "memory":
@ -7751,9 +7815,11 @@ class AIAgent:
                        if not self.quiet_mode:
                            print(f"  🧠 Memory flush: saved to {args.get('target', 'memory')}")
                    except Exception as e:
-                        logger.debug("Memory flush tool call failed: %s", e)
+                        logger.warning("Memory flush tool call failed: %s", e)
+                        self._emit_auxiliary_failure("memory flush tool", e)
        except Exception as e:
-            logger.debug("Memory flush API call failed: %s", e)
+            logger.warning("Memory flush API call failed: %s", e)
+            self._emit_auxiliary_failure("memory flush", e)
        finally:
            # Strip flush artifacts: remove everything from the flush message onward.
            # Use sentinel marker instead of identity check for robustness.
@ -7799,6 +7865,15 @@ class AIAgent:
            # focus_topic — fall back to calling without it.
            compressed = self.context_compressor.compress(messages, current_tokens=approx_tokens)

+        summary_error = getattr(self.context_compressor, "_last_summary_error", None)
+        if summary_error:
+            if getattr(self, "_last_compression_summary_warning", None) != summary_error:
+                self._last_compression_summary_warning = summary_error
+                self._emit_warning(
+                    f"⚠ Compression summary failed: {summary_error}. "
+                    "Inserted a fallback context marker."
+                )
+
        todo_snapshot = self._todo_store.format_for_injection()
        if todo_snapshot:
            compressed.append({"role": "user", "content": todo_snapshot})
--- a/tests/run_agent/test_flush_memories_codex.py
+++ b/tests/run_agent/test_flush_memories_codex.py
@ -188,6 +188,30 @@ class TestFlushMemoriesUsesAuxiliaryClient:

        agent.client.chat.completions.create.assert_called_once()

+    def test_auxiliary_provider_failure_surfaces_warning_and_falls_back(self, monkeypatch):
+        """Provider/API failures from auxiliary flush must be visible.
+
+        Exhausted keys and rate limits are not always RuntimeError. They used
+        to fall into the broad outer handler and disappear into debug logs.
+        """
+        agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter")
+        agent.client = MagicMock()
+        agent.client.chat.completions.create.return_value = _chat_response_with_memory_call()
+        events = []
+        agent.status_callback = lambda kind, text=None: events.append((kind, text))
+
+        with patch("agent.auxiliary_client.call_llm", side_effect=Exception("opencode-go key exhausted")), \
+             patch("tools.memory_tool.memory_tool", return_value="Saved."):
+            messages = [
+                {"role": "user", "content": "Hello"},
+                {"role": "assistant", "content": "Hi there"},
+                {"role": "user", "content": "Save this"},
+            ]
+            agent.flush_memories(messages)
+
+        agent.client.chat.completions.create.assert_called_once()
+        assert any(kind == "warn" and "Auxiliary memory flush failed" in text for kind, text in events)
+
    def test_flush_executes_memory_tool_calls(self, monkeypatch):
        """Verify that memory tool calls from the flush response actually get executed."""
        agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter")