fix(aux): surface auxiliary failures in UI

This commit is contained in:
helix4u 2026-04-24 14:22:25 -06:00 committed by Teknium
parent e7590f92a2
commit 8a2506af43
3 changed files with 118 additions and 11 deletions

View file

@ -294,6 +294,7 @@ class ContextCompressor(ContextEngine):
self._context_probed = False self._context_probed = False
self._context_probe_persistable = False self._context_probe_persistable = False
self._previous_summary = None self._previous_summary = None
self._last_summary_error = None
self._last_compression_savings_pct = 100.0 self._last_compression_savings_pct = 100.0
self._ineffective_compression_count = 0 self._ineffective_compression_count = 0
@ -389,6 +390,7 @@ class ContextCompressor(ContextEngine):
self._last_compression_savings_pct: float = 100.0 self._last_compression_savings_pct: float = 100.0
self._ineffective_compression_count: int = 0 self._ineffective_compression_count: int = 0
self._summary_failure_cooldown_until: float = 0.0 self._summary_failure_cooldown_until: float = 0.0
self._last_summary_error: Optional[str] = None
def update_from_response(self, usage: Dict[str, Any]): def update_from_response(self, usage: Dict[str, Any]):
"""Update tracked token usage from API response.""" """Update tracked token usage from API response."""
@ -812,10 +814,12 @@ The user has requested that this compaction PRIORITISE preserving all informatio
self._previous_summary = summary self._previous_summary = summary
self._summary_failure_cooldown_until = 0.0 self._summary_failure_cooldown_until = 0.0
self._summary_model_fallen_back = False self._summary_model_fallen_back = False
self._last_summary_error = None
return self._with_summary_prefix(summary) return self._with_summary_prefix(summary)
except RuntimeError: except RuntimeError:
# No provider configured — long cooldown, unlikely to self-resolve # No provider configured — long cooldown, unlikely to self-resolve
self._summary_failure_cooldown_until = time.monotonic() + _SUMMARY_FAILURE_COOLDOWN_SECONDS self._summary_failure_cooldown_until = time.monotonic() + _SUMMARY_FAILURE_COOLDOWN_SECONDS
self._last_summary_error = "no auxiliary LLM provider configured"
logging.warning("Context compression: no provider available for " logging.warning("Context compression: no provider available for "
"summary. Middle turns will be dropped without summary " "summary. Middle turns will be dropped without summary "
"for %d seconds.", "for %d seconds.",
@ -853,6 +857,10 @@ The user has requested that this compaction PRIORITISE preserving all informatio
# Transient errors (timeout, rate limit, network) — shorter cooldown # Transient errors (timeout, rate limit, network) — shorter cooldown
_transient_cooldown = 60 _transient_cooldown = 60
self._summary_failure_cooldown_until = time.monotonic() + _transient_cooldown self._summary_failure_cooldown_until = time.monotonic() + _transient_cooldown
err_text = str(e).strip() or e.__class__.__name__
if len(err_text) > 220:
err_text = err_text[:217].rstrip() + "..."
self._last_summary_error = err_text
logging.warning( logging.warning(
"Failed to generate context summary: %s. " "Failed to generate context summary: %s. "
"Further summary attempts paused for %d seconds.", "Further summary attempts paused for %d seconds.",

View file

@ -2231,6 +2231,34 @@ class AIAgent:
except Exception: except Exception:
logger.debug("status_callback error in _emit_status", exc_info=True) logger.debug("status_callback error in _emit_status", exc_info=True)
def _emit_warning(self, message: str) -> None:
"""Emit a user-visible warning through the same status plumbing.
Unlike debug logs, these warnings are meant for degraded side paths
such as auxiliary compression or memory flushes where the main turn can
continue but the user needs to know something important failed.
"""
try:
self._vprint(f"{self.log_prefix}{message}", force=True)
except Exception:
pass
if self.status_callback:
try:
self.status_callback("warn", message)
except Exception:
logger.debug("status_callback error in _emit_warning", exc_info=True)
def _emit_auxiliary_failure(self, task: str, exc: BaseException) -> None:
"""Surface a compact warning for failed auxiliary work."""
try:
detail = self._summarize_api_error(exc)
except Exception:
detail = str(exc)
detail = (detail or exc.__class__.__name__).strip()
if len(detail) > 220:
detail = detail[:217].rstrip() + "..."
self._emit_warning(f"⚠ Auxiliary {task} failed: {detail}")
def _current_main_runtime(self) -> Dict[str, str]: def _current_main_runtime(self) -> Dict[str, str]:
"""Return the live main runtime for session-scoped auxiliary routing.""" """Return the live main runtime for session-scoped auxiliary routing."""
return { return {
@ -3081,7 +3109,8 @@ class AIAgent:
pass pass
except Exception as e: except Exception as e:
logger.debug("Background memory/skill review failed: %s", e) logger.warning("Background memory/skill review failed: %s", e)
self._emit_auxiliary_failure("background review", e)
finally: finally:
# Close all resources (httpx client, subprocesses, etc.) so # Close all resources (httpx client, subprocesses, etc.) so
# GC doesn't try to clean them up on a dead asyncio event # GC doesn't try to clean them up on a dead asyncio event
@ -7653,6 +7682,7 @@ class AIAgent:
_flush_temperature = _fixed_temp _flush_temperature = _fixed_temp
else: else:
_flush_temperature = 0.3 _flush_temperature = 0.3
aux_error = None
try: try:
response = _call_llm( response = _call_llm(
task="flush_memories", task="flush_memories",
@ -7662,14 +7692,19 @@ class AIAgent:
max_tokens=5120, max_tokens=5120,
# timeout resolved from auxiliary.flush_memories.timeout config # timeout resolved from auxiliary.flush_memories.timeout config
) )
except RuntimeError: except Exception as e:
aux_error = e
_aux_available = False _aux_available = False
response = None response = None
if not _aux_available and self.api_mode == "codex_responses": if not _aux_available and self.api_mode == "codex_responses":
# No auxiliary client -- use the Codex Responses path directly # No auxiliary client -- use the Codex Responses path directly
codex_kwargs = self._build_api_kwargs(api_messages) codex_kwargs = self._build_api_kwargs(api_messages)
codex_kwargs["tools"] = self._get_transport().convert_tools([memory_tool_def]) _ct_flush = self._get_transport()
if _ct_flush is not None:
codex_kwargs["tools"] = _ct_flush.convert_tools([memory_tool_def])
elif not codex_kwargs.get("tools"):
codex_kwargs["tools"] = [memory_tool_def]
if _flush_temperature is not None: if _flush_temperature is not None:
codex_kwargs["temperature"] = _flush_temperature codex_kwargs["temperature"] = _flush_temperature
else: else:
@ -7701,11 +7736,37 @@ class AIAgent:
**api_kwargs, timeout=_get_task_timeout("flush_memories") **api_kwargs, timeout=_get_task_timeout("flush_memories")
) )
if aux_error is not None:
logger.warning("Auxiliary memory flush failed; used fallback path: %s", aux_error)
self._emit_auxiliary_failure("memory flush", aux_error)
def _openai_tool_calls(resp):
if resp is not None and hasattr(resp, "choices") and resp.choices:
msg = getattr(resp.choices[0], "message", None)
calls = getattr(msg, "tool_calls", None)
if calls:
return calls
return []
def _codex_output_tool_calls(resp):
calls = []
for item in getattr(resp, "output", []) or []:
if getattr(item, "type", None) == "function_call":
calls.append(SimpleNamespace(
id=getattr(item, "call_id", None),
type="function",
function=SimpleNamespace(
name=getattr(item, "name", ""),
arguments=getattr(item, "arguments", "{}"),
),
))
return calls
# Extract tool calls from the response, handling all API formats # Extract tool calls from the response, handling all API formats
tool_calls = [] tool_calls = []
if self.api_mode == "codex_responses" and not _aux_available: if self.api_mode == "codex_responses" and not _aux_available:
_ct_flush = self._get_transport() _ct_flush = self._get_transport()
_cnr_flush = _ct_flush.normalize_response(response) _cnr_flush = _ct_flush.normalize_response(response) if _ct_flush is not None else None
if _cnr_flush and _cnr_flush.tool_calls: if _cnr_flush and _cnr_flush.tool_calls:
tool_calls = [ tool_calls = [
SimpleNamespace( SimpleNamespace(
@ -7713,6 +7774,8 @@ class AIAgent:
function=SimpleNamespace(name=tc.name, arguments=tc.arguments), function=SimpleNamespace(name=tc.name, arguments=tc.arguments),
) for tc in _cnr_flush.tool_calls ) for tc in _cnr_flush.tool_calls
] ]
else:
tool_calls = _codex_output_tool_calls(response)
elif self.api_mode == "anthropic_messages" and not _aux_available: elif self.api_mode == "anthropic_messages" and not _aux_available:
_tfn = self._get_transport() _tfn = self._get_transport()
_flush_result = _tfn.normalize_response(response, strip_tool_prefix=self._is_anthropic_oauth) _flush_result = _tfn.normalize_response(response, strip_tool_prefix=self._is_anthropic_oauth)
@ -7725,15 +7788,16 @@ class AIAgent:
] ]
elif self.api_mode in ("chat_completions", "bedrock_converse"): elif self.api_mode in ("chat_completions", "bedrock_converse"):
# chat_completions / bedrock — normalize through transport # chat_completions / bedrock — normalize through transport
_flush_result = self._get_transport().normalize_response(response) _tfn = self._get_transport()
if _flush_result.tool_calls: _flush_result = _tfn.normalize_response(response) if _tfn is not None else None
if _flush_result and _flush_result.tool_calls:
tool_calls = _flush_result.tool_calls tool_calls = _flush_result.tool_calls
else:
tool_calls = _openai_tool_calls(response)
elif _aux_available and hasattr(response, "choices") and response.choices: elif _aux_available and hasattr(response, "choices") and response.choices:
# Auxiliary client returned OpenAI-shaped response while main # Auxiliary client returned OpenAI-shaped response while main
# api_mode is codex/anthropic — extract tool_calls from .choices # api_mode is codex/anthropic — extract tool_calls from .choices
_aux_msg = response.choices[0].message tool_calls = _openai_tool_calls(response)
if hasattr(_aux_msg, "tool_calls") and _aux_msg.tool_calls:
tool_calls = _aux_msg.tool_calls
for tc in tool_calls: for tc in tool_calls:
if tc.function.name == "memory": if tc.function.name == "memory":
@ -7751,9 +7815,11 @@ class AIAgent:
if not self.quiet_mode: if not self.quiet_mode:
print(f" 🧠 Memory flush: saved to {args.get('target', 'memory')}") print(f" 🧠 Memory flush: saved to {args.get('target', 'memory')}")
except Exception as e: except Exception as e:
logger.debug("Memory flush tool call failed: %s", e) logger.warning("Memory flush tool call failed: %s", e)
self._emit_auxiliary_failure("memory flush tool", e)
except Exception as e: except Exception as e:
logger.debug("Memory flush API call failed: %s", e) logger.warning("Memory flush API call failed: %s", e)
self._emit_auxiliary_failure("memory flush", e)
finally: finally:
# Strip flush artifacts: remove everything from the flush message onward. # Strip flush artifacts: remove everything from the flush message onward.
# Use sentinel marker instead of identity check for robustness. # Use sentinel marker instead of identity check for robustness.
@ -7799,6 +7865,15 @@ class AIAgent:
# focus_topic — fall back to calling without it. # focus_topic — fall back to calling without it.
compressed = self.context_compressor.compress(messages, current_tokens=approx_tokens) compressed = self.context_compressor.compress(messages, current_tokens=approx_tokens)
summary_error = getattr(self.context_compressor, "_last_summary_error", None)
if summary_error:
if getattr(self, "_last_compression_summary_warning", None) != summary_error:
self._last_compression_summary_warning = summary_error
self._emit_warning(
f"⚠ Compression summary failed: {summary_error}. "
"Inserted a fallback context marker."
)
todo_snapshot = self._todo_store.format_for_injection() todo_snapshot = self._todo_store.format_for_injection()
if todo_snapshot: if todo_snapshot:
compressed.append({"role": "user", "content": todo_snapshot}) compressed.append({"role": "user", "content": todo_snapshot})

View file

@ -188,6 +188,30 @@ class TestFlushMemoriesUsesAuxiliaryClient:
agent.client.chat.completions.create.assert_called_once() agent.client.chat.completions.create.assert_called_once()
def test_auxiliary_provider_failure_surfaces_warning_and_falls_back(self, monkeypatch):
"""Provider/API failures from auxiliary flush must be visible.
Exhausted keys and rate limits are not always RuntimeError. They used
to fall into the broad outer handler and disappear into debug logs.
"""
agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter")
agent.client = MagicMock()
agent.client.chat.completions.create.return_value = _chat_response_with_memory_call()
events = []
agent.status_callback = lambda kind, text=None: events.append((kind, text))
with patch("agent.auxiliary_client.call_llm", side_effect=Exception("opencode-go key exhausted")), \
patch("tools.memory_tool.memory_tool", return_value="Saved."):
messages = [
{"role": "user", "content": "Hello"},
{"role": "assistant", "content": "Hi there"},
{"role": "user", "content": "Save this"},
]
agent.flush_memories(messages)
agent.client.chat.completions.create.assert_called_once()
assert any(kind == "warn" and "Auxiliary memory flush failed" in text for kind, text in events)
def test_flush_executes_memory_tool_calls(self, monkeypatch): def test_flush_executes_memory_tool_calls(self, monkeypatch):
"""Verify that memory tool calls from the flush response actually get executed.""" """Verify that memory tool calls from the flush response actually get executed."""
agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter") agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter")