mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
fix(aux): surface auxiliary failures in UI
This commit is contained in:
parent
e7590f92a2
commit
8a2506af43
3 changed files with 118 additions and 11 deletions
|
|
@ -294,6 +294,7 @@ class ContextCompressor(ContextEngine):
|
|||
self._context_probed = False
|
||||
self._context_probe_persistable = False
|
||||
self._previous_summary = None
|
||||
self._last_summary_error = None
|
||||
self._last_compression_savings_pct = 100.0
|
||||
self._ineffective_compression_count = 0
|
||||
|
||||
|
|
@ -389,6 +390,7 @@ class ContextCompressor(ContextEngine):
|
|||
self._last_compression_savings_pct: float = 100.0
|
||||
self._ineffective_compression_count: int = 0
|
||||
self._summary_failure_cooldown_until: float = 0.0
|
||||
self._last_summary_error: Optional[str] = None
|
||||
|
||||
def update_from_response(self, usage: Dict[str, Any]):
|
||||
"""Update tracked token usage from API response."""
|
||||
|
|
@ -812,10 +814,12 @@ The user has requested that this compaction PRIORITISE preserving all informatio
|
|||
self._previous_summary = summary
|
||||
self._summary_failure_cooldown_until = 0.0
|
||||
self._summary_model_fallen_back = False
|
||||
self._last_summary_error = None
|
||||
return self._with_summary_prefix(summary)
|
||||
except RuntimeError:
|
||||
# No provider configured — long cooldown, unlikely to self-resolve
|
||||
self._summary_failure_cooldown_until = time.monotonic() + _SUMMARY_FAILURE_COOLDOWN_SECONDS
|
||||
self._last_summary_error = "no auxiliary LLM provider configured"
|
||||
logging.warning("Context compression: no provider available for "
|
||||
"summary. Middle turns will be dropped without summary "
|
||||
"for %d seconds.",
|
||||
|
|
@ -853,6 +857,10 @@ The user has requested that this compaction PRIORITISE preserving all informatio
|
|||
# Transient errors (timeout, rate limit, network) — shorter cooldown
|
||||
_transient_cooldown = 60
|
||||
self._summary_failure_cooldown_until = time.monotonic() + _transient_cooldown
|
||||
err_text = str(e).strip() or e.__class__.__name__
|
||||
if len(err_text) > 220:
|
||||
err_text = err_text[:217].rstrip() + "..."
|
||||
self._last_summary_error = err_text
|
||||
logging.warning(
|
||||
"Failed to generate context summary: %s. "
|
||||
"Further summary attempts paused for %d seconds.",
|
||||
|
|
|
|||
97
run_agent.py
97
run_agent.py
|
|
@ -2231,6 +2231,34 @@ class AIAgent:
|
|||
except Exception:
|
||||
logger.debug("status_callback error in _emit_status", exc_info=True)
|
||||
|
||||
def _emit_warning(self, message: str) -> None:
|
||||
"""Emit a user-visible warning through the same status plumbing.
|
||||
|
||||
Unlike debug logs, these warnings are meant for degraded side paths
|
||||
such as auxiliary compression or memory flushes where the main turn can
|
||||
continue but the user needs to know something important failed.
|
||||
"""
|
||||
try:
|
||||
self._vprint(f"{self.log_prefix}{message}", force=True)
|
||||
except Exception:
|
||||
pass
|
||||
if self.status_callback:
|
||||
try:
|
||||
self.status_callback("warn", message)
|
||||
except Exception:
|
||||
logger.debug("status_callback error in _emit_warning", exc_info=True)
|
||||
|
||||
def _emit_auxiliary_failure(self, task: str, exc: BaseException) -> None:
|
||||
"""Surface a compact warning for failed auxiliary work."""
|
||||
try:
|
||||
detail = self._summarize_api_error(exc)
|
||||
except Exception:
|
||||
detail = str(exc)
|
||||
detail = (detail or exc.__class__.__name__).strip()
|
||||
if len(detail) > 220:
|
||||
detail = detail[:217].rstrip() + "..."
|
||||
self._emit_warning(f"⚠ Auxiliary {task} failed: {detail}")
|
||||
|
||||
def _current_main_runtime(self) -> Dict[str, str]:
|
||||
"""Return the live main runtime for session-scoped auxiliary routing."""
|
||||
return {
|
||||
|
|
@ -3081,7 +3109,8 @@ class AIAgent:
|
|||
pass
|
||||
|
||||
except Exception as e:
|
||||
logger.debug("Background memory/skill review failed: %s", e)
|
||||
logger.warning("Background memory/skill review failed: %s", e)
|
||||
self._emit_auxiliary_failure("background review", e)
|
||||
finally:
|
||||
# Close all resources (httpx client, subprocesses, etc.) so
|
||||
# GC doesn't try to clean them up on a dead asyncio event
|
||||
|
|
@ -7653,6 +7682,7 @@ class AIAgent:
|
|||
_flush_temperature = _fixed_temp
|
||||
else:
|
||||
_flush_temperature = 0.3
|
||||
aux_error = None
|
||||
try:
|
||||
response = _call_llm(
|
||||
task="flush_memories",
|
||||
|
|
@ -7662,14 +7692,19 @@ class AIAgent:
|
|||
max_tokens=5120,
|
||||
# timeout resolved from auxiliary.flush_memories.timeout config
|
||||
)
|
||||
except RuntimeError:
|
||||
except Exception as e:
|
||||
aux_error = e
|
||||
_aux_available = False
|
||||
response = None
|
||||
|
||||
if not _aux_available and self.api_mode == "codex_responses":
|
||||
# No auxiliary client -- use the Codex Responses path directly
|
||||
codex_kwargs = self._build_api_kwargs(api_messages)
|
||||
codex_kwargs["tools"] = self._get_transport().convert_tools([memory_tool_def])
|
||||
_ct_flush = self._get_transport()
|
||||
if _ct_flush is not None:
|
||||
codex_kwargs["tools"] = _ct_flush.convert_tools([memory_tool_def])
|
||||
elif not codex_kwargs.get("tools"):
|
||||
codex_kwargs["tools"] = [memory_tool_def]
|
||||
if _flush_temperature is not None:
|
||||
codex_kwargs["temperature"] = _flush_temperature
|
||||
else:
|
||||
|
|
@ -7701,11 +7736,37 @@ class AIAgent:
|
|||
**api_kwargs, timeout=_get_task_timeout("flush_memories")
|
||||
)
|
||||
|
||||
if aux_error is not None:
|
||||
logger.warning("Auxiliary memory flush failed; used fallback path: %s", aux_error)
|
||||
self._emit_auxiliary_failure("memory flush", aux_error)
|
||||
|
||||
def _openai_tool_calls(resp):
|
||||
if resp is not None and hasattr(resp, "choices") and resp.choices:
|
||||
msg = getattr(resp.choices[0], "message", None)
|
||||
calls = getattr(msg, "tool_calls", None)
|
||||
if calls:
|
||||
return calls
|
||||
return []
|
||||
|
||||
def _codex_output_tool_calls(resp):
|
||||
calls = []
|
||||
for item in getattr(resp, "output", []) or []:
|
||||
if getattr(item, "type", None) == "function_call":
|
||||
calls.append(SimpleNamespace(
|
||||
id=getattr(item, "call_id", None),
|
||||
type="function",
|
||||
function=SimpleNamespace(
|
||||
name=getattr(item, "name", ""),
|
||||
arguments=getattr(item, "arguments", "{}"),
|
||||
),
|
||||
))
|
||||
return calls
|
||||
|
||||
# Extract tool calls from the response, handling all API formats
|
||||
tool_calls = []
|
||||
if self.api_mode == "codex_responses" and not _aux_available:
|
||||
_ct_flush = self._get_transport()
|
||||
_cnr_flush = _ct_flush.normalize_response(response)
|
||||
_cnr_flush = _ct_flush.normalize_response(response) if _ct_flush is not None else None
|
||||
if _cnr_flush and _cnr_flush.tool_calls:
|
||||
tool_calls = [
|
||||
SimpleNamespace(
|
||||
|
|
@ -7713,6 +7774,8 @@ class AIAgent:
|
|||
function=SimpleNamespace(name=tc.name, arguments=tc.arguments),
|
||||
) for tc in _cnr_flush.tool_calls
|
||||
]
|
||||
else:
|
||||
tool_calls = _codex_output_tool_calls(response)
|
||||
elif self.api_mode == "anthropic_messages" and not _aux_available:
|
||||
_tfn = self._get_transport()
|
||||
_flush_result = _tfn.normalize_response(response, strip_tool_prefix=self._is_anthropic_oauth)
|
||||
|
|
@ -7725,15 +7788,16 @@ class AIAgent:
|
|||
]
|
||||
elif self.api_mode in ("chat_completions", "bedrock_converse"):
|
||||
# chat_completions / bedrock — normalize through transport
|
||||
_flush_result = self._get_transport().normalize_response(response)
|
||||
if _flush_result.tool_calls:
|
||||
_tfn = self._get_transport()
|
||||
_flush_result = _tfn.normalize_response(response) if _tfn is not None else None
|
||||
if _flush_result and _flush_result.tool_calls:
|
||||
tool_calls = _flush_result.tool_calls
|
||||
else:
|
||||
tool_calls = _openai_tool_calls(response)
|
||||
elif _aux_available and hasattr(response, "choices") and response.choices:
|
||||
# Auxiliary client returned OpenAI-shaped response while main
|
||||
# api_mode is codex/anthropic — extract tool_calls from .choices
|
||||
_aux_msg = response.choices[0].message
|
||||
if hasattr(_aux_msg, "tool_calls") and _aux_msg.tool_calls:
|
||||
tool_calls = _aux_msg.tool_calls
|
||||
tool_calls = _openai_tool_calls(response)
|
||||
|
||||
for tc in tool_calls:
|
||||
if tc.function.name == "memory":
|
||||
|
|
@ -7751,9 +7815,11 @@ class AIAgent:
|
|||
if not self.quiet_mode:
|
||||
print(f" 🧠 Memory flush: saved to {args.get('target', 'memory')}")
|
||||
except Exception as e:
|
||||
logger.debug("Memory flush tool call failed: %s", e)
|
||||
logger.warning("Memory flush tool call failed: %s", e)
|
||||
self._emit_auxiliary_failure("memory flush tool", e)
|
||||
except Exception as e:
|
||||
logger.debug("Memory flush API call failed: %s", e)
|
||||
logger.warning("Memory flush API call failed: %s", e)
|
||||
self._emit_auxiliary_failure("memory flush", e)
|
||||
finally:
|
||||
# Strip flush artifacts: remove everything from the flush message onward.
|
||||
# Use sentinel marker instead of identity check for robustness.
|
||||
|
|
@ -7799,6 +7865,15 @@ class AIAgent:
|
|||
# focus_topic — fall back to calling without it.
|
||||
compressed = self.context_compressor.compress(messages, current_tokens=approx_tokens)
|
||||
|
||||
summary_error = getattr(self.context_compressor, "_last_summary_error", None)
|
||||
if summary_error:
|
||||
if getattr(self, "_last_compression_summary_warning", None) != summary_error:
|
||||
self._last_compression_summary_warning = summary_error
|
||||
self._emit_warning(
|
||||
f"⚠ Compression summary failed: {summary_error}. "
|
||||
"Inserted a fallback context marker."
|
||||
)
|
||||
|
||||
todo_snapshot = self._todo_store.format_for_injection()
|
||||
if todo_snapshot:
|
||||
compressed.append({"role": "user", "content": todo_snapshot})
|
||||
|
|
|
|||
|
|
@ -188,6 +188,30 @@ class TestFlushMemoriesUsesAuxiliaryClient:
|
|||
|
||||
agent.client.chat.completions.create.assert_called_once()
|
||||
|
||||
def test_auxiliary_provider_failure_surfaces_warning_and_falls_back(self, monkeypatch):
|
||||
"""Provider/API failures from auxiliary flush must be visible.
|
||||
|
||||
Exhausted keys and rate limits are not always RuntimeError. They used
|
||||
to fall into the broad outer handler and disappear into debug logs.
|
||||
"""
|
||||
agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter")
|
||||
agent.client = MagicMock()
|
||||
agent.client.chat.completions.create.return_value = _chat_response_with_memory_call()
|
||||
events = []
|
||||
agent.status_callback = lambda kind, text=None: events.append((kind, text))
|
||||
|
||||
with patch("agent.auxiliary_client.call_llm", side_effect=Exception("opencode-go key exhausted")), \
|
||||
patch("tools.memory_tool.memory_tool", return_value="Saved."):
|
||||
messages = [
|
||||
{"role": "user", "content": "Hello"},
|
||||
{"role": "assistant", "content": "Hi there"},
|
||||
{"role": "user", "content": "Save this"},
|
||||
]
|
||||
agent.flush_memories(messages)
|
||||
|
||||
agent.client.chat.completions.create.assert_called_once()
|
||||
assert any(kind == "warn" and "Auxiliary memory flush failed" in text for kind, text in events)
|
||||
|
||||
def test_flush_executes_memory_tool_calls(self, monkeypatch):
|
||||
"""Verify that memory tool calls from the flush response actually get executed."""
|
||||
agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter")
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue