mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
fix(aux): surface auxiliary failures in UI
This commit is contained in:
parent
e7590f92a2
commit
8a2506af43
3 changed files with 118 additions and 11 deletions
|
|
@ -294,6 +294,7 @@ class ContextCompressor(ContextEngine):
|
||||||
self._context_probed = False
|
self._context_probed = False
|
||||||
self._context_probe_persistable = False
|
self._context_probe_persistable = False
|
||||||
self._previous_summary = None
|
self._previous_summary = None
|
||||||
|
self._last_summary_error = None
|
||||||
self._last_compression_savings_pct = 100.0
|
self._last_compression_savings_pct = 100.0
|
||||||
self._ineffective_compression_count = 0
|
self._ineffective_compression_count = 0
|
||||||
|
|
||||||
|
|
@ -389,6 +390,7 @@ class ContextCompressor(ContextEngine):
|
||||||
self._last_compression_savings_pct: float = 100.0
|
self._last_compression_savings_pct: float = 100.0
|
||||||
self._ineffective_compression_count: int = 0
|
self._ineffective_compression_count: int = 0
|
||||||
self._summary_failure_cooldown_until: float = 0.0
|
self._summary_failure_cooldown_until: float = 0.0
|
||||||
|
self._last_summary_error: Optional[str] = None
|
||||||
|
|
||||||
def update_from_response(self, usage: Dict[str, Any]):
|
def update_from_response(self, usage: Dict[str, Any]):
|
||||||
"""Update tracked token usage from API response."""
|
"""Update tracked token usage from API response."""
|
||||||
|
|
@ -812,10 +814,12 @@ The user has requested that this compaction PRIORITISE preserving all informatio
|
||||||
self._previous_summary = summary
|
self._previous_summary = summary
|
||||||
self._summary_failure_cooldown_until = 0.0
|
self._summary_failure_cooldown_until = 0.0
|
||||||
self._summary_model_fallen_back = False
|
self._summary_model_fallen_back = False
|
||||||
|
self._last_summary_error = None
|
||||||
return self._with_summary_prefix(summary)
|
return self._with_summary_prefix(summary)
|
||||||
except RuntimeError:
|
except RuntimeError:
|
||||||
# No provider configured — long cooldown, unlikely to self-resolve
|
# No provider configured — long cooldown, unlikely to self-resolve
|
||||||
self._summary_failure_cooldown_until = time.monotonic() + _SUMMARY_FAILURE_COOLDOWN_SECONDS
|
self._summary_failure_cooldown_until = time.monotonic() + _SUMMARY_FAILURE_COOLDOWN_SECONDS
|
||||||
|
self._last_summary_error = "no auxiliary LLM provider configured"
|
||||||
logging.warning("Context compression: no provider available for "
|
logging.warning("Context compression: no provider available for "
|
||||||
"summary. Middle turns will be dropped without summary "
|
"summary. Middle turns will be dropped without summary "
|
||||||
"for %d seconds.",
|
"for %d seconds.",
|
||||||
|
|
@ -853,6 +857,10 @@ The user has requested that this compaction PRIORITISE preserving all informatio
|
||||||
# Transient errors (timeout, rate limit, network) — shorter cooldown
|
# Transient errors (timeout, rate limit, network) — shorter cooldown
|
||||||
_transient_cooldown = 60
|
_transient_cooldown = 60
|
||||||
self._summary_failure_cooldown_until = time.monotonic() + _transient_cooldown
|
self._summary_failure_cooldown_until = time.monotonic() + _transient_cooldown
|
||||||
|
err_text = str(e).strip() or e.__class__.__name__
|
||||||
|
if len(err_text) > 220:
|
||||||
|
err_text = err_text[:217].rstrip() + "..."
|
||||||
|
self._last_summary_error = err_text
|
||||||
logging.warning(
|
logging.warning(
|
||||||
"Failed to generate context summary: %s. "
|
"Failed to generate context summary: %s. "
|
||||||
"Further summary attempts paused for %d seconds.",
|
"Further summary attempts paused for %d seconds.",
|
||||||
|
|
|
||||||
97
run_agent.py
97
run_agent.py
|
|
@ -2231,6 +2231,34 @@ class AIAgent:
|
||||||
except Exception:
|
except Exception:
|
||||||
logger.debug("status_callback error in _emit_status", exc_info=True)
|
logger.debug("status_callback error in _emit_status", exc_info=True)
|
||||||
|
|
||||||
|
def _emit_warning(self, message: str) -> None:
|
||||||
|
"""Emit a user-visible warning through the same status plumbing.
|
||||||
|
|
||||||
|
Unlike debug logs, these warnings are meant for degraded side paths
|
||||||
|
such as auxiliary compression or memory flushes where the main turn can
|
||||||
|
continue but the user needs to know something important failed.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
self._vprint(f"{self.log_prefix}{message}", force=True)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
if self.status_callback:
|
||||||
|
try:
|
||||||
|
self.status_callback("warn", message)
|
||||||
|
except Exception:
|
||||||
|
logger.debug("status_callback error in _emit_warning", exc_info=True)
|
||||||
|
|
||||||
|
def _emit_auxiliary_failure(self, task: str, exc: BaseException) -> None:
|
||||||
|
"""Surface a compact warning for failed auxiliary work."""
|
||||||
|
try:
|
||||||
|
detail = self._summarize_api_error(exc)
|
||||||
|
except Exception:
|
||||||
|
detail = str(exc)
|
||||||
|
detail = (detail or exc.__class__.__name__).strip()
|
||||||
|
if len(detail) > 220:
|
||||||
|
detail = detail[:217].rstrip() + "..."
|
||||||
|
self._emit_warning(f"⚠ Auxiliary {task} failed: {detail}")
|
||||||
|
|
||||||
def _current_main_runtime(self) -> Dict[str, str]:
|
def _current_main_runtime(self) -> Dict[str, str]:
|
||||||
"""Return the live main runtime for session-scoped auxiliary routing."""
|
"""Return the live main runtime for session-scoped auxiliary routing."""
|
||||||
return {
|
return {
|
||||||
|
|
@ -3081,7 +3109,8 @@ class AIAgent:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.debug("Background memory/skill review failed: %s", e)
|
logger.warning("Background memory/skill review failed: %s", e)
|
||||||
|
self._emit_auxiliary_failure("background review", e)
|
||||||
finally:
|
finally:
|
||||||
# Close all resources (httpx client, subprocesses, etc.) so
|
# Close all resources (httpx client, subprocesses, etc.) so
|
||||||
# GC doesn't try to clean them up on a dead asyncio event
|
# GC doesn't try to clean them up on a dead asyncio event
|
||||||
|
|
@ -7653,6 +7682,7 @@ class AIAgent:
|
||||||
_flush_temperature = _fixed_temp
|
_flush_temperature = _fixed_temp
|
||||||
else:
|
else:
|
||||||
_flush_temperature = 0.3
|
_flush_temperature = 0.3
|
||||||
|
aux_error = None
|
||||||
try:
|
try:
|
||||||
response = _call_llm(
|
response = _call_llm(
|
||||||
task="flush_memories",
|
task="flush_memories",
|
||||||
|
|
@ -7662,14 +7692,19 @@ class AIAgent:
|
||||||
max_tokens=5120,
|
max_tokens=5120,
|
||||||
# timeout resolved from auxiliary.flush_memories.timeout config
|
# timeout resolved from auxiliary.flush_memories.timeout config
|
||||||
)
|
)
|
||||||
except RuntimeError:
|
except Exception as e:
|
||||||
|
aux_error = e
|
||||||
_aux_available = False
|
_aux_available = False
|
||||||
response = None
|
response = None
|
||||||
|
|
||||||
if not _aux_available and self.api_mode == "codex_responses":
|
if not _aux_available and self.api_mode == "codex_responses":
|
||||||
# No auxiliary client -- use the Codex Responses path directly
|
# No auxiliary client -- use the Codex Responses path directly
|
||||||
codex_kwargs = self._build_api_kwargs(api_messages)
|
codex_kwargs = self._build_api_kwargs(api_messages)
|
||||||
codex_kwargs["tools"] = self._get_transport().convert_tools([memory_tool_def])
|
_ct_flush = self._get_transport()
|
||||||
|
if _ct_flush is not None:
|
||||||
|
codex_kwargs["tools"] = _ct_flush.convert_tools([memory_tool_def])
|
||||||
|
elif not codex_kwargs.get("tools"):
|
||||||
|
codex_kwargs["tools"] = [memory_tool_def]
|
||||||
if _flush_temperature is not None:
|
if _flush_temperature is not None:
|
||||||
codex_kwargs["temperature"] = _flush_temperature
|
codex_kwargs["temperature"] = _flush_temperature
|
||||||
else:
|
else:
|
||||||
|
|
@ -7701,11 +7736,37 @@ class AIAgent:
|
||||||
**api_kwargs, timeout=_get_task_timeout("flush_memories")
|
**api_kwargs, timeout=_get_task_timeout("flush_memories")
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if aux_error is not None:
|
||||||
|
logger.warning("Auxiliary memory flush failed; used fallback path: %s", aux_error)
|
||||||
|
self._emit_auxiliary_failure("memory flush", aux_error)
|
||||||
|
|
||||||
|
def _openai_tool_calls(resp):
|
||||||
|
if resp is not None and hasattr(resp, "choices") and resp.choices:
|
||||||
|
msg = getattr(resp.choices[0], "message", None)
|
||||||
|
calls = getattr(msg, "tool_calls", None)
|
||||||
|
if calls:
|
||||||
|
return calls
|
||||||
|
return []
|
||||||
|
|
||||||
|
def _codex_output_tool_calls(resp):
|
||||||
|
calls = []
|
||||||
|
for item in getattr(resp, "output", []) or []:
|
||||||
|
if getattr(item, "type", None) == "function_call":
|
||||||
|
calls.append(SimpleNamespace(
|
||||||
|
id=getattr(item, "call_id", None),
|
||||||
|
type="function",
|
||||||
|
function=SimpleNamespace(
|
||||||
|
name=getattr(item, "name", ""),
|
||||||
|
arguments=getattr(item, "arguments", "{}"),
|
||||||
|
),
|
||||||
|
))
|
||||||
|
return calls
|
||||||
|
|
||||||
# Extract tool calls from the response, handling all API formats
|
# Extract tool calls from the response, handling all API formats
|
||||||
tool_calls = []
|
tool_calls = []
|
||||||
if self.api_mode == "codex_responses" and not _aux_available:
|
if self.api_mode == "codex_responses" and not _aux_available:
|
||||||
_ct_flush = self._get_transport()
|
_ct_flush = self._get_transport()
|
||||||
_cnr_flush = _ct_flush.normalize_response(response)
|
_cnr_flush = _ct_flush.normalize_response(response) if _ct_flush is not None else None
|
||||||
if _cnr_flush and _cnr_flush.tool_calls:
|
if _cnr_flush and _cnr_flush.tool_calls:
|
||||||
tool_calls = [
|
tool_calls = [
|
||||||
SimpleNamespace(
|
SimpleNamespace(
|
||||||
|
|
@ -7713,6 +7774,8 @@ class AIAgent:
|
||||||
function=SimpleNamespace(name=tc.name, arguments=tc.arguments),
|
function=SimpleNamespace(name=tc.name, arguments=tc.arguments),
|
||||||
) for tc in _cnr_flush.tool_calls
|
) for tc in _cnr_flush.tool_calls
|
||||||
]
|
]
|
||||||
|
else:
|
||||||
|
tool_calls = _codex_output_tool_calls(response)
|
||||||
elif self.api_mode == "anthropic_messages" and not _aux_available:
|
elif self.api_mode == "anthropic_messages" and not _aux_available:
|
||||||
_tfn = self._get_transport()
|
_tfn = self._get_transport()
|
||||||
_flush_result = _tfn.normalize_response(response, strip_tool_prefix=self._is_anthropic_oauth)
|
_flush_result = _tfn.normalize_response(response, strip_tool_prefix=self._is_anthropic_oauth)
|
||||||
|
|
@ -7725,15 +7788,16 @@ class AIAgent:
|
||||||
]
|
]
|
||||||
elif self.api_mode in ("chat_completions", "bedrock_converse"):
|
elif self.api_mode in ("chat_completions", "bedrock_converse"):
|
||||||
# chat_completions / bedrock — normalize through transport
|
# chat_completions / bedrock — normalize through transport
|
||||||
_flush_result = self._get_transport().normalize_response(response)
|
_tfn = self._get_transport()
|
||||||
if _flush_result.tool_calls:
|
_flush_result = _tfn.normalize_response(response) if _tfn is not None else None
|
||||||
|
if _flush_result and _flush_result.tool_calls:
|
||||||
tool_calls = _flush_result.tool_calls
|
tool_calls = _flush_result.tool_calls
|
||||||
|
else:
|
||||||
|
tool_calls = _openai_tool_calls(response)
|
||||||
elif _aux_available and hasattr(response, "choices") and response.choices:
|
elif _aux_available and hasattr(response, "choices") and response.choices:
|
||||||
# Auxiliary client returned OpenAI-shaped response while main
|
# Auxiliary client returned OpenAI-shaped response while main
|
||||||
# api_mode is codex/anthropic — extract tool_calls from .choices
|
# api_mode is codex/anthropic — extract tool_calls from .choices
|
||||||
_aux_msg = response.choices[0].message
|
tool_calls = _openai_tool_calls(response)
|
||||||
if hasattr(_aux_msg, "tool_calls") and _aux_msg.tool_calls:
|
|
||||||
tool_calls = _aux_msg.tool_calls
|
|
||||||
|
|
||||||
for tc in tool_calls:
|
for tc in tool_calls:
|
||||||
if tc.function.name == "memory":
|
if tc.function.name == "memory":
|
||||||
|
|
@ -7751,9 +7815,11 @@ class AIAgent:
|
||||||
if not self.quiet_mode:
|
if not self.quiet_mode:
|
||||||
print(f" 🧠 Memory flush: saved to {args.get('target', 'memory')}")
|
print(f" 🧠 Memory flush: saved to {args.get('target', 'memory')}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.debug("Memory flush tool call failed: %s", e)
|
logger.warning("Memory flush tool call failed: %s", e)
|
||||||
|
self._emit_auxiliary_failure("memory flush tool", e)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.debug("Memory flush API call failed: %s", e)
|
logger.warning("Memory flush API call failed: %s", e)
|
||||||
|
self._emit_auxiliary_failure("memory flush", e)
|
||||||
finally:
|
finally:
|
||||||
# Strip flush artifacts: remove everything from the flush message onward.
|
# Strip flush artifacts: remove everything from the flush message onward.
|
||||||
# Use sentinel marker instead of identity check for robustness.
|
# Use sentinel marker instead of identity check for robustness.
|
||||||
|
|
@ -7799,6 +7865,15 @@ class AIAgent:
|
||||||
# focus_topic — fall back to calling without it.
|
# focus_topic — fall back to calling without it.
|
||||||
compressed = self.context_compressor.compress(messages, current_tokens=approx_tokens)
|
compressed = self.context_compressor.compress(messages, current_tokens=approx_tokens)
|
||||||
|
|
||||||
|
summary_error = getattr(self.context_compressor, "_last_summary_error", None)
|
||||||
|
if summary_error:
|
||||||
|
if getattr(self, "_last_compression_summary_warning", None) != summary_error:
|
||||||
|
self._last_compression_summary_warning = summary_error
|
||||||
|
self._emit_warning(
|
||||||
|
f"⚠ Compression summary failed: {summary_error}. "
|
||||||
|
"Inserted a fallback context marker."
|
||||||
|
)
|
||||||
|
|
||||||
todo_snapshot = self._todo_store.format_for_injection()
|
todo_snapshot = self._todo_store.format_for_injection()
|
||||||
if todo_snapshot:
|
if todo_snapshot:
|
||||||
compressed.append({"role": "user", "content": todo_snapshot})
|
compressed.append({"role": "user", "content": todo_snapshot})
|
||||||
|
|
|
||||||
|
|
@ -188,6 +188,30 @@ class TestFlushMemoriesUsesAuxiliaryClient:
|
||||||
|
|
||||||
agent.client.chat.completions.create.assert_called_once()
|
agent.client.chat.completions.create.assert_called_once()
|
||||||
|
|
||||||
|
def test_auxiliary_provider_failure_surfaces_warning_and_falls_back(self, monkeypatch):
|
||||||
|
"""Provider/API failures from auxiliary flush must be visible.
|
||||||
|
|
||||||
|
Exhausted keys and rate limits are not always RuntimeError. They used
|
||||||
|
to fall into the broad outer handler and disappear into debug logs.
|
||||||
|
"""
|
||||||
|
agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter")
|
||||||
|
agent.client = MagicMock()
|
||||||
|
agent.client.chat.completions.create.return_value = _chat_response_with_memory_call()
|
||||||
|
events = []
|
||||||
|
agent.status_callback = lambda kind, text=None: events.append((kind, text))
|
||||||
|
|
||||||
|
with patch("agent.auxiliary_client.call_llm", side_effect=Exception("opencode-go key exhausted")), \
|
||||||
|
patch("tools.memory_tool.memory_tool", return_value="Saved."):
|
||||||
|
messages = [
|
||||||
|
{"role": "user", "content": "Hello"},
|
||||||
|
{"role": "assistant", "content": "Hi there"},
|
||||||
|
{"role": "user", "content": "Save this"},
|
||||||
|
]
|
||||||
|
agent.flush_memories(messages)
|
||||||
|
|
||||||
|
agent.client.chat.completions.create.assert_called_once()
|
||||||
|
assert any(kind == "warn" and "Auxiliary memory flush failed" in text for kind, text in events)
|
||||||
|
|
||||||
def test_flush_executes_memory_tool_calls(self, monkeypatch):
|
def test_flush_executes_memory_tool_calls(self, monkeypatch):
|
||||||
"""Verify that memory tool calls from the flush response actually get executed."""
|
"""Verify that memory tool calls from the flush response actually get executed."""
|
||||||
agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter")
|
agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter")
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue