From 8a2506af43760a9dbecb384fc2b1fd7f5937ad0c Mon Sep 17 00:00:00 2001
From: helix4u <4317663+helix4u@users.noreply.github.com>
Date: Fri, 24 Apr 2026 14:22:25 -0600
Subject: [PATCH] fix(aux): surface auxiliary failures in UI

---
 agent/context_compressor.py                  |  8 ++
 run_agent.py                                 | 97 +++++++++++++++++---
 tests/run_agent/test_flush_memories_codex.py | 24 +++++
 3 files changed, 118 insertions(+), 11 deletions(-)

diff --git a/agent/context_compressor.py b/agent/context_compressor.py
index d4441a1c7..ef40cbfaf 100644
--- a/agent/context_compressor.py
+++ b/agent/context_compressor.py
@@ -294,6 +294,7 @@ class ContextCompressor(ContextEngine):
         self._context_probed = False
         self._context_probe_persistable = False
         self._previous_summary = None
+        self._last_summary_error = None
         self._last_compression_savings_pct = 100.0
         self._ineffective_compression_count = 0
 
@@ -389,6 +390,7 @@ class ContextCompressor(ContextEngine):
         self._last_compression_savings_pct: float = 100.0
         self._ineffective_compression_count: int = 0
         self._summary_failure_cooldown_until: float = 0.0
+        self._last_summary_error: Optional[str] = None
 
     def update_from_response(self, usage: Dict[str, Any]):
         """Update tracked token usage from API response."""
@@ -812,10 +814,12 @@ The user has requested that this compaction PRIORITISE preserving all informatio
             self._previous_summary = summary
             self._summary_failure_cooldown_until = 0.0
             self._summary_model_fallen_back = False
+            self._last_summary_error = None
             return self._with_summary_prefix(summary)
         except RuntimeError:
             # No provider configured — long cooldown, unlikely to self-resolve
             self._summary_failure_cooldown_until = time.monotonic() + _SUMMARY_FAILURE_COOLDOWN_SECONDS
+            self._last_summary_error = "no auxiliary LLM provider configured"
             logging.warning("Context compression: no provider available for "
                             "summary. Middle turns will be dropped without summary "
                             "for %d seconds.",
@@ -853,6 +857,10 @@ The user has requested that this compaction PRIORITISE preserving all informatio
             # Transient errors (timeout, rate limit, network) — shorter cooldown
             _transient_cooldown = 60
             self._summary_failure_cooldown_until = time.monotonic() + _transient_cooldown
+            err_text = str(e).strip() or e.__class__.__name__
+            if len(err_text) > 220:
+                err_text = err_text[:217].rstrip() + "..."
+            self._last_summary_error = err_text
             logging.warning(
                 "Failed to generate context summary: %s. "
                 "Further summary attempts paused for %d seconds.",
diff --git a/run_agent.py b/run_agent.py
index 4911e4899..3daa05db6 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -2231,6 +2231,34 @@ class AIAgent:
             except Exception:
                 logger.debug("status_callback error in _emit_status", exc_info=True)
 
+    def _emit_warning(self, message: str) -> None:
+        """Emit a user-visible warning through the same status plumbing.
+
+        Unlike debug logs, these warnings are meant for degraded side paths
+        such as auxiliary compression or memory flushes where the main turn can
+        continue but the user needs to know something important failed.
+        """
+        try:
+            self._vprint(f"{self.log_prefix}{message}", force=True)
+        except Exception:
+            pass
+        if self.status_callback:
+            try:
+                self.status_callback("warn", message)
+            except Exception:
+                logger.debug("status_callback error in _emit_warning", exc_info=True)
+
+    def _emit_auxiliary_failure(self, task: str, exc: BaseException) -> None:
+        """Surface a compact warning for failed auxiliary work."""
+        try:
+            detail = self._summarize_api_error(exc)
+        except Exception:
+            detail = str(exc)
+        detail = (detail or exc.__class__.__name__).strip()
+        if len(detail) > 220:
+            detail = detail[:217].rstrip() + "..."
+        self._emit_warning(f"⚠ Auxiliary {task} failed: {detail}")
+
     def _current_main_runtime(self) -> Dict[str, str]:
         """Return the live main runtime for session-scoped auxiliary routing."""
         return {
@@ -3081,7 +3109,8 @@ class AIAgent:
                             pass
 
             except Exception as e:
-                logger.debug("Background memory/skill review failed: %s", e)
+                logger.warning("Background memory/skill review failed: %s", e)
+                self._emit_auxiliary_failure("background review", e)
             finally:
                 # Close all resources (httpx client, subprocesses, etc.) so
                 # GC doesn't try to clean them up on a dead asyncio event
@@ -7653,6 +7682,7 @@ class AIAgent:
                 _flush_temperature = _fixed_temp
             else:
                 _flush_temperature = 0.3
+            aux_error = None
             try:
                 response = _call_llm(
                     task="flush_memories",
@@ -7662,14 +7692,19 @@ class AIAgent:
                     max_tokens=5120,
                     # timeout resolved from auxiliary.flush_memories.timeout config
                 )
-            except RuntimeError:
+            except Exception as e:
+                aux_error = e
                 _aux_available = False
                 response = None
 
             if not _aux_available and self.api_mode == "codex_responses":
                 # No auxiliary client -- use the Codex Responses path directly
                 codex_kwargs = self._build_api_kwargs(api_messages)
-                codex_kwargs["tools"] = self._get_transport().convert_tools([memory_tool_def])
+                _ct_flush = self._get_transport()
+                if _ct_flush is not None:
+                    codex_kwargs["tools"] = _ct_flush.convert_tools([memory_tool_def])
+                elif not codex_kwargs.get("tools"):
+                    codex_kwargs["tools"] = [memory_tool_def]
                 if _flush_temperature is not None:
                     codex_kwargs["temperature"] = _flush_temperature
                 else:
@@ -7701,11 +7736,37 @@ class AIAgent:
                     **api_kwargs, timeout=_get_task_timeout("flush_memories")
                 )
 
+            if aux_error is not None:
+                logger.warning("Auxiliary memory flush failed; used fallback path: %s", aux_error)
+                self._emit_auxiliary_failure("memory flush", aux_error)
+
+            def _openai_tool_calls(resp):
+                if resp is not None and hasattr(resp, "choices") and resp.choices:
+                    msg = getattr(resp.choices[0], "message", None)
+                    calls = getattr(msg, "tool_calls", None)
+                    if calls:
+                        return calls
+                return []
+
+            def _codex_output_tool_calls(resp):
+                calls = []
+                for item in getattr(resp, "output", []) or []:
+                    if getattr(item, "type", None) == "function_call":
+                        calls.append(SimpleNamespace(
+                            id=getattr(item, "call_id", None),
+                            type="function",
+                            function=SimpleNamespace(
+                                name=getattr(item, "name", ""),
+                                arguments=getattr(item, "arguments", "{}"),
+                            ),
+                        ))
+                return calls
+
             # Extract tool calls from the response, handling all API formats
             tool_calls = []
             if self.api_mode == "codex_responses" and not _aux_available:
                 _ct_flush = self._get_transport()
-                _cnr_flush = _ct_flush.normalize_response(response)
+                _cnr_flush = _ct_flush.normalize_response(response) if _ct_flush is not None else None
                 if _cnr_flush and _cnr_flush.tool_calls:
                     tool_calls = [
                         SimpleNamespace(
@@ -7713,6 +7774,8 @@ class AIAgent:
                             function=SimpleNamespace(name=tc.name, arguments=tc.arguments),
                         ) for tc in _cnr_flush.tool_calls
                     ]
+                else:
+                    tool_calls = _codex_output_tool_calls(response)
             elif self.api_mode == "anthropic_messages" and not _aux_available:
                 _tfn = self._get_transport()
                 _flush_result = _tfn.normalize_response(response, strip_tool_prefix=self._is_anthropic_oauth)
@@ -7725,15 +7788,16 @@ class AIAgent:
                     ]
             elif self.api_mode in ("chat_completions", "bedrock_converse"):
                 # chat_completions / bedrock — normalize through transport
-                _flush_result = self._get_transport().normalize_response(response)
-                if _flush_result.tool_calls:
+                _tfn = self._get_transport()
+                _flush_result = _tfn.normalize_response(response) if _tfn is not None else None
+                if _flush_result and _flush_result.tool_calls:
                     tool_calls = _flush_result.tool_calls
+                else:
+                    tool_calls = _openai_tool_calls(response)
             elif _aux_available and hasattr(response, "choices") and response.choices:
                 # Auxiliary client returned OpenAI-shaped response while main
                 # api_mode is codex/anthropic — extract tool_calls from .choices
-                _aux_msg = response.choices[0].message
-                if hasattr(_aux_msg, "tool_calls") and _aux_msg.tool_calls:
-                    tool_calls = _aux_msg.tool_calls
+                tool_calls = _openai_tool_calls(response)
 
             for tc in tool_calls:
                 if tc.function.name == "memory":
@@ -7751,9 +7815,11 @@ class AIAgent:
                         if not self.quiet_mode:
                             print(f"  🧠 Memory flush: saved to {args.get('target', 'memory')}")
                     except Exception as e:
-                        logger.debug("Memory flush tool call failed: %s", e)
+                        logger.warning("Memory flush tool call failed: %s", e)
+                        self._emit_auxiliary_failure("memory flush tool", e)
         except Exception as e:
-            logger.debug("Memory flush API call failed: %s", e)
+            logger.warning("Memory flush API call failed: %s", e)
+            self._emit_auxiliary_failure("memory flush", e)
         finally:
             # Strip flush artifacts: remove everything from the flush message onward.
             # Use sentinel marker instead of identity check for robustness.
@@ -7799,6 +7865,15 @@ class AIAgent:
             # focus_topic — fall back to calling without it.
             compressed = self.context_compressor.compress(messages, current_tokens=approx_tokens)
 
+        summary_error = getattr(self.context_compressor, "_last_summary_error", None)
+        if summary_error:
+            if getattr(self, "_last_compression_summary_warning", None) != summary_error:
+                self._last_compression_summary_warning = summary_error
+                self._emit_warning(
+                    f"⚠ Compression summary failed: {summary_error}. "
+                    "Inserted a fallback context marker."
+                )
+
         todo_snapshot = self._todo_store.format_for_injection()
         if todo_snapshot:
             compressed.append({"role": "user", "content": todo_snapshot})
diff --git a/tests/run_agent/test_flush_memories_codex.py b/tests/run_agent/test_flush_memories_codex.py
index 28fbf550d..04e20402f 100644
--- a/tests/run_agent/test_flush_memories_codex.py
+++ b/tests/run_agent/test_flush_memories_codex.py
@@ -188,6 +188,30 @@ class TestFlushMemoriesUsesAuxiliaryClient:
 
         agent.client.chat.completions.create.assert_called_once()
 
+    def test_auxiliary_provider_failure_surfaces_warning_and_falls_back(self, monkeypatch):
+        """Provider/API failures from auxiliary flush must be visible.
+
+        Exhausted keys and rate limits are not always RuntimeError. They used
+        to fall into the broad outer handler and disappear into debug logs.
+        """
+        agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter")
+        agent.client = MagicMock()
+        agent.client.chat.completions.create.return_value = _chat_response_with_memory_call()
+        events = []
+        agent.status_callback = lambda kind, text=None: events.append((kind, text))
+
+        with patch("agent.auxiliary_client.call_llm", side_effect=Exception("opencode-go key exhausted")), \
+             patch("tools.memory_tool.memory_tool", return_value="Saved."):
+            messages = [
+                {"role": "user", "content": "Hello"},
+                {"role": "assistant", "content": "Hi there"},
+                {"role": "user", "content": "Save this"},
+            ]
+            agent.flush_memories(messages)
+
+        agent.client.chat.completions.create.assert_called_once()
+        assert any(kind == "warn" and "Auxiliary memory flush failed" in text for kind, text in events)
+
     def test_flush_executes_memory_tool_calls(self, monkeypatch):
         """Verify that memory tool calls from the flush response actually get executed."""
         agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter")