fix: graceful return on max retries instead of crashing thread

run_conversation raised the raw exception after exhausting retries, which crashed the background thread in cli.py (unhandled exception in Thread). Now returns a proper error result dict with failed=True and persists the session, matching the pattern used by other error paths (invalid responses, empty content, etc.). Also wraps cli.py's run_agent thread function in try/except as a safety net against any future unhandled exceptions from run_conversation. Made-with: Cursor
2026-04-25 00:51:20 +00:00 · 2026-03-25 19:00:33 -07:00 · 2026-03-25 19:00:33 -07:00 · 08d3be0412
commit 08d3be0412
parent 156b50358b
3 changed files with 35 additions and 12 deletions
--- a/cli.py
+++ b/cli.py
@ -5508,6 +5508,7 @@ class HermesCLI:
            def run_agent():
                nonlocal result
                agent_message = _voice_prefix + message if _voice_prefix else message
                try:
                    result = self.agent.run_conversation(
                        user_message=agent_message,
                        conversation_history=self.conversation_history[:-1],  # Exclude the message we just added
@ -5515,6 +5516,17 @@ class HermesCLI:
                        task_id=self.session_id,
                        persist_user_message=message if _voice_prefix else None,
                    )
                except Exception as exc:
                    logging.error("run_conversation raised: %s", exc, exc_info=True)
                    _summary = getattr(self.agent, '_summarize_api_error', lambda e: str(e)[:300])(exc)
                    result = {
                        "final_response": f"Error: {_summary}",
                        "messages": [],
                        "api_calls": 0,
                        "completed": False,
                        "failed": True,
                        "error": _summary,
                    }
            # Start agent in background thread
            agent_thread = threading.Thread(target=run_agent)
--- a/run_agent.py
+++ b/run_agent.py
@ -6697,7 +6697,15 @@ class AIAgent:
                        self._dump_api_request_debug(
                            api_kwargs, reason="max_retries_exhausted", error=api_error,
                        )
-                        raise api_error
+                        self._persist_session(messages, conversation_history)
                        return {
                            "final_response": f"API call failed after {max_retries} retries: {_final_summary}",
                            "messages": messages,
                            "api_calls": api_call_count,
                            "completed": False,
                            "failed": True,
                            "error": _final_summary,
                        }
                    wait_time = min(2 ** retry_count, 60)  # Exponential backoff: 2s, 4s, 8s, 16s, 32s, 60s, 60s
                    logger.warning(
--- a/tests/test_run_agent.py
+++ b/tests/test_run_agent.py
@ -1425,8 +1425,8 @@ class TestRetryExhaustion:
        assert "error" in result
        assert "Invalid API response" in result["error"]
-    def test_api_error_raises_after_retries(self, agent):
+    def test_api_error_returns_gracefully_after_retries(self, agent):
-        """Exhausted retries on API errors must raise, not fall through."""
+        """Exhausted retries on API errors must return error result, not crash."""
        self._setup_agent(agent)
        agent.client.chat.completions.create.side_effect = RuntimeError("rate limited")
        with (
@ -1435,8 +1435,11 @@ class TestRetryExhaustion:
            patch.object(agent, "_cleanup_task_resources"),
            patch("run_agent.time", self._make_fast_time_mock()),
        ):
-            with pytest.raises(RuntimeError, match="rate limited"):
+            result = agent.run_conversation("hello")
-                agent.run_conversation("hello")
+        assert result.get("completed") is False
        assert result.get("failed") is True
        assert "error" in result
        assert "rate limited" in result["error"]
 # ---------------------------------------------------------------------------