fix: graceful return on max retries instead of crashing thread

run_conversation raised the raw exception after exhausting retries,
which crashed the background thread in cli.py (unhandled exception
in Thread). Now returns a proper error result dict with failed=True
and persists the session, matching the pattern used by other error
paths (invalid responses, empty content, etc.).

Also wraps cli.py's run_agent thread function in try/except as a
safety net against any future unhandled exceptions from
run_conversation.

Made-with: Cursor
This commit is contained in:
Teknium 2026-03-25 19:00:33 -07:00
parent 156b50358b
commit 08d3be0412
No known key found for this signature in database
3 changed files with 35 additions and 12 deletions

12
cli.py
View file

@ -5508,6 +5508,7 @@ class HermesCLI:
def run_agent(): def run_agent():
nonlocal result nonlocal result
agent_message = _voice_prefix + message if _voice_prefix else message agent_message = _voice_prefix + message if _voice_prefix else message
try:
result = self.agent.run_conversation( result = self.agent.run_conversation(
user_message=agent_message, user_message=agent_message,
conversation_history=self.conversation_history[:-1], # Exclude the message we just added conversation_history=self.conversation_history[:-1], # Exclude the message we just added
@ -5515,6 +5516,17 @@ class HermesCLI:
task_id=self.session_id, task_id=self.session_id,
persist_user_message=message if _voice_prefix else None, persist_user_message=message if _voice_prefix else None,
) )
except Exception as exc:
logging.error("run_conversation raised: %s", exc, exc_info=True)
_summary = getattr(self.agent, '_summarize_api_error', lambda e: str(e)[:300])(exc)
result = {
"final_response": f"Error: {_summary}",
"messages": [],
"api_calls": 0,
"completed": False,
"failed": True,
"error": _summary,
}
# Start agent in background thread # Start agent in background thread
agent_thread = threading.Thread(target=run_agent) agent_thread = threading.Thread(target=run_agent)

View file

@ -6697,7 +6697,15 @@ class AIAgent:
self._dump_api_request_debug( self._dump_api_request_debug(
api_kwargs, reason="max_retries_exhausted", error=api_error, api_kwargs, reason="max_retries_exhausted", error=api_error,
) )
raise api_error self._persist_session(messages, conversation_history)
return {
"final_response": f"API call failed after {max_retries} retries: {_final_summary}",
"messages": messages,
"api_calls": api_call_count,
"completed": False,
"failed": True,
"error": _final_summary,
}
wait_time = min(2 ** retry_count, 60) # Exponential backoff: 2s, 4s, 8s, 16s, 32s, 60s, 60s wait_time = min(2 ** retry_count, 60) # Exponential backoff: 2s, 4s, 8s, 16s, 32s, 60s, 60s
logger.warning( logger.warning(

View file

@ -1425,8 +1425,8 @@ class TestRetryExhaustion:
assert "error" in result assert "error" in result
assert "Invalid API response" in result["error"] assert "Invalid API response" in result["error"]
def test_api_error_raises_after_retries(self, agent): def test_api_error_returns_gracefully_after_retries(self, agent):
"""Exhausted retries on API errors must raise, not fall through.""" """Exhausted retries on API errors must return error result, not crash."""
self._setup_agent(agent) self._setup_agent(agent)
agent.client.chat.completions.create.side_effect = RuntimeError("rate limited") agent.client.chat.completions.create.side_effect = RuntimeError("rate limited")
with ( with (
@ -1435,8 +1435,11 @@ class TestRetryExhaustion:
patch.object(agent, "_cleanup_task_resources"), patch.object(agent, "_cleanup_task_resources"),
patch("run_agent.time", self._make_fast_time_mock()), patch("run_agent.time", self._make_fast_time_mock()),
): ):
with pytest.raises(RuntimeError, match="rate limited"): result = agent.run_conversation("hello")
agent.run_conversation("hello") assert result.get("completed") is False
assert result.get("failed") is True
assert "error" in result
assert "rate limited" in result["error"]
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------