diff --git a/agent/conversation_loop.py b/agent/conversation_loop.py index 2e4b7ed7073..73bed6b0670 100644 --- a/agent/conversation_loop.py +++ b/agent/conversation_loop.py @@ -4196,383 +4196,26 @@ def run_conversation( messages.append({"role": "assistant", "content": final_response}) break - if final_response is None and ( - api_call_count >= agent.max_iterations - or agent.iteration_budget.remaining <= 0 - ): - # Budget exhausted — ask the model for a summary via one extra - # API call with tools stripped. _handle_max_iterations injects a - # user message and makes a single toolless request. - _turn_exit_reason = f"max_iterations_reached({api_call_count}/{agent.max_iterations})" - agent._emit_status( - f"⚠️ Iteration budget exhausted ({api_call_count}/{agent.max_iterations}) " - "— asking model to summarise" - ) - if not agent.quiet_mode: - agent._safe_print( - f"\n⚠️ Iteration budget exhausted ({api_call_count}/{agent.max_iterations}) " - "— requesting summary..." - ) - final_response = agent._handle_max_iterations(messages, api_call_count) - - # If running as a kanban worker, signal the dispatcher that the - # worker could not complete (rather than treating it as a - # protocol violation). The agent loop strips tools before calling - # _handle_max_iterations, so the model cannot call kanban_block - # itself — we must do it on its behalf. - # - # We route through ``_record_task_failure(outcome="timed_out")`` - # rather than ``kanban_block`` so this counts toward the - # ``consecutive_failures`` counter and the dispatcher's - # ``failure_limit`` circuit breaker (#29747 gap 2). Without this, - # a task whose worker keeps exhausting its budget would block - # silently each run, get auto-promoted by the operator (or never - # surface), and re-block in an endless loop with no signal. - _kanban_task = os.environ.get("HERMES_KANBAN_TASK") - if _kanban_task: - try: - from hermes_cli import kanban_db as _kb - _conn = _kb.connect() - try: - _kb._record_task_failure( - _conn, - _kanban_task, - error=( - f"Iteration budget exhausted " - f"({api_call_count}/{agent.max_iterations}) — " - "task could not complete within the allowed " - "iterations" - ), - outcome="timed_out", - release_claim=True, - end_run=True, - event_payload_extra={ - "budget_used": api_call_count, - "budget_max": agent.max_iterations, - }, - ) - logger.info( - "recorded budget-exhausted failure for task %s (%d/%d)", - _kanban_task, api_call_count, agent.max_iterations, - ) - finally: - try: - _conn.close() - except Exception: - pass - except Exception: - logger.warning( - "Failed to record budget-exhausted failure for task %s", - _kanban_task, - exc_info=True, - ) - - # Determine if conversation completed successfully - completed = ( - final_response is not None - and api_call_count < agent.max_iterations - and not failed - ) - - # Save trajectory if enabled. ``user_message`` may be a multimodal - # list of parts; the trajectory format wants a plain string. - agent._save_trajectory(messages, _summarize_user_message_for_log(user_message), completed) - - # Clean up VM and browser for this task after conversation completes - agent._cleanup_task_resources(effective_task_id) - - # Persist session to both JSON log and SQLite only after private retry - # scaffolding has been removed. Otherwise a later user "continue" turn - # can replay assistant("(empty)") / recovery nudges and fall into the - # same empty-response loop again. - agent._drop_trailing_empty_response_scaffolding(messages) - agent._persist_session(messages, conversation_history) - - # ── Turn-exit diagnostic log ───────────────────────────────────── - # Always logged at INFO so agent.log captures WHY every turn ended. - # When the last message is a tool result (agent was mid-work), log - # at WARNING — this is the "just stops" scenario users report. - _last_msg_role = messages[-1].get("role") if messages else None - _last_tool_name = None - if _last_msg_role == "tool": - # Walk back to find the assistant message with the tool call - for _m in reversed(messages): - if _m.get("role") == "assistant" and _m.get("tool_calls"): - _tcs = _m["tool_calls"] - if _tcs and isinstance(_tcs[0], dict): - _last_tool_name = _tcs[-1].get("function", {}).get("name") - break - - _turn_tool_count = sum( - 1 for m in messages - if isinstance(m, dict) and m.get("role") == "assistant" and m.get("tool_calls") - ) - _resp_len = len(final_response) if final_response else 0 - _budget_used = agent.iteration_budget.used if agent.iteration_budget else 0 - _budget_max = agent.iteration_budget.max_total if agent.iteration_budget else 0 - - _diag_msg = ( - "Turn ended: reason=%s model=%s api_calls=%d/%d budget=%d/%d " - "tool_turns=%d last_msg_role=%s response_len=%d session=%s" - ) - _diag_args = ( - _turn_exit_reason, agent.model, api_call_count, agent.max_iterations, - _budget_used, _budget_max, - _turn_tool_count, _last_msg_role, _resp_len, - agent.session_id or "none", - ) - - if _last_msg_role == "tool" and not interrupted: - # Agent was mid-work — this is the "just stops" case. - logger.warning( - "Turn ended with pending tool result (agent may appear stuck). " - + _diag_msg + " last_tool=%s", - *_diag_args, _last_tool_name, - ) - else: - logger.info(_diag_msg, *_diag_args) - - # File-mutation verifier footer. - # If one or more ``write_file`` / ``patch`` calls failed during this - # turn and were never superseded by a successful write to the same - # path, append an advisory footer to the assistant response. This - # catches the specific case — reported by Ben Eng (#15524-adjacent) - # — where a model issues a batch of parallel patches, half of them - # fail with "Could not find old_string", and the model summarises - # the turn claiming every file was edited. The user then has to - # manually run ``git status`` to catch the lie. With this footer - # the truth is surfaced on every turn, so over-claiming is - # structurally impossible past the model. - # - # Gate: only applied when a real text response exists for this - # turn and the user didn't interrupt. Empty/interrupted turns - # already have other surface text that shouldn't be augmented. - if final_response and not interrupted: - try: - _failed = getattr(agent, "_turn_failed_file_mutations", None) or {} - if _failed and agent._file_mutation_verifier_enabled(): - footer = agent._format_file_mutation_failure_footer(_failed) - if footer: - final_response = final_response.rstrip() + "\n\n" + footer - except Exception as _ver_err: - logger.debug("file-mutation verifier footer failed: %s", _ver_err) - - # Turn-completion explainer. - # When a turn ends abnormally after substantive work — empty content - # after retries, a partial/truncated stream, a still-pending tool - # result, or an iteration/budget limit — the user otherwise gets a - # blank or fragmentary response box with no consolidated reason why - # the agent stopped (#34452). Surface a single user-visible - # explanation derived from ``_turn_exit_reason``, mirroring the - # file-mutation verifier footer pattern above. - # - # Gate carefully so healthy turns stay quiet: - # - ``text_response(...)`` exits never produce an explanation - # (handled inside the formatter), so a terse ``Done.`` is silent. - # - We only ACT when there is no genuinely usable reply this turn: - # an empty response, the "(empty)" terminal sentinel, or a - # suspiciously short partial fragment with no terminating - # punctuation (e.g. "The"). A real short answer keeps its text. - if not interrupted: - try: - if agent._turn_completion_explainer_enabled(): - _stripped = (final_response or "").strip() - _is_empty_terminal = _stripped == "" or _stripped == "(empty)" - # A short fragment that is not a normal text_response exit - # and lacks sentence-ending punctuation is treated as a - # truncated partial (the "The" case from #34452). - _is_partial_fragment = ( - not _is_empty_terminal - and not str(_turn_exit_reason).startswith("text_response") - and len(_stripped) <= 24 - and _stripped[-1:] not in {".", "!", "?", "。", "!", "?", "`", ")"} - ) - if _is_empty_terminal or _is_partial_fragment: - _explanation = agent._format_turn_completion_explanation( - _turn_exit_reason - ) - if _explanation: - if _is_empty_terminal: - # Replace the bare "(empty)"/blank sentinel with - # the actionable explanation. - final_response = _explanation - else: - # Keep the partial fragment, append the reason so - # the user sees both what arrived and why it - # stopped. - final_response = ( - _stripped + "\n\n" + _explanation - ) - except Exception as _exp_err: - logger.debug("turn-completion explainer failed: %s", _exp_err) - - _response_transformed = False - - # Plugin hook: transform_llm_output - # Fired once per turn after the tool-calling loop completes. - # Plugins can transform the LLM's output text before it's returned. - # First hook to return a string wins; None/empty return leaves text unchanged. - if final_response and not interrupted: - try: - from hermes_cli.plugins import invoke_hook as _invoke_hook - _transform_results = _invoke_hook( - "transform_llm_output", - response_text=final_response, - session_id=agent.session_id or "", - model=agent.model, - platform=getattr(agent, "platform", None) or "", - ) - for _hook_result in _transform_results: - if isinstance(_hook_result, str) and _hook_result: - final_response = _hook_result - _response_transformed = True - break # First non-empty string wins - except Exception as exc: - logger.warning("transform_llm_output hook failed: %s", exc) - - # Plugin hook: post_llm_call - # Fired once per turn after the tool-calling loop completes. - # Plugins can use this to persist conversation data (e.g. sync - # to an external memory system). - if final_response and not interrupted: - try: - from hermes_cli.plugins import invoke_hook as _invoke_hook - _invoke_hook( - "post_llm_call", - session_id=agent.session_id, - task_id=effective_task_id, - turn_id=turn_id, - user_message=original_user_message, - assistant_response=final_response, - conversation_history=list(messages), - model=agent.model, - platform=getattr(agent, "platform", None) or "", - ) - except Exception as exc: - logger.warning("post_llm_call hook failed: %s", exc) - - # Extract reasoning from the CURRENT turn only. Walk backwards - # but stop at the user message that started this turn — anything - # earlier is from a prior turn and must not leak into the reasoning - # box (confusing stale display; #17055). Within the current turn - # we still want the *most recent* non-empty reasoning: many - # providers (Claude thinking, DeepSeek v4, Codex Responses) emit - # reasoning on the tool-call step and leave the final-answer step - # with reasoning=None, so picking only the last assistant would - # silently drop legitimate same-turn reasoning. - last_reasoning = None - for msg in reversed(messages): - if msg.get("role") == "user": - break # turn boundary — don't cross into prior turns - if msg.get("role") == "assistant" and msg.get("reasoning"): - last_reasoning = msg["reasoning"] - break - - # Build result with interrupt info if applicable - result = { - "final_response": final_response, - "last_reasoning": last_reasoning, - "messages": messages, - "api_calls": api_call_count, - "completed": completed, - "turn_exit_reason": _turn_exit_reason, - "failed": failed, - "partial": False, # True only when stopped due to invalid tool calls - "interrupted": interrupted, - "response_transformed": _response_transformed, - "response_previewed": getattr(agent, "_response_was_previewed", False), - "model": agent.model, - "provider": agent.provider, - "base_url": agent.base_url, - "input_tokens": agent.session_input_tokens, - "output_tokens": agent.session_output_tokens, - "cache_read_tokens": agent.session_cache_read_tokens, - "cache_write_tokens": agent.session_cache_write_tokens, - "reasoning_tokens": agent.session_reasoning_tokens, - "prompt_tokens": agent.session_prompt_tokens, - "completion_tokens": agent.session_completion_tokens, - "total_tokens": agent.session_total_tokens, - "last_prompt_tokens": getattr(agent.context_compressor, "last_prompt_tokens", 0) or 0, - "estimated_cost_usd": agent.session_estimated_cost_usd, - "cost_status": agent.session_cost_status, - "cost_source": agent.session_cost_source, - "session_id": agent.session_id, - } - if agent._tool_guardrail_halt_decision is not None: - result["guardrail"] = agent._tool_guardrail_halt_decision.to_metadata() - # If a /steer landed after the final assistant turn (no more tool - # batches to drain into), hand it back to the caller so it can be - # delivered as the next user turn instead of being silently lost. - _leftover_steer = agent._drain_pending_steer() - if _leftover_steer: - result["pending_steer"] = _leftover_steer - agent._response_was_previewed = False - - # Include interrupt message if one triggered the interrupt - if interrupted and agent._interrupt_message: - result["interrupt_message"] = agent._interrupt_message - - # Clear interrupt state after handling - agent.clear_interrupt() - - # Clear stream callback so it doesn't leak into future calls - agent._stream_callback = None - - # Check skill trigger NOW — based on how many tool iterations THIS turn used. - _should_review_skills = False - if (agent._skill_nudge_interval > 0 - and agent._iters_since_skill >= agent._skill_nudge_interval - and "skill_manage" in agent.valid_tool_names): - _should_review_skills = True - agent._iters_since_skill = 0 - - # External memory provider: sync the completed turn + queue next prefetch. - agent._sync_external_memory_for_turn( - original_user_message=original_user_message, + # Post-loop turn finalization extracted to agent/turn_finalizer.finalize_turn + # (god-file decomposition Phase 1 step 4). Behavior-neutral: the assembled + # result dict is returned exactly as before. + from agent.turn_finalizer import finalize_turn + return finalize_turn( + agent, final_response=final_response, + api_call_count=api_call_count, interrupted=interrupted, + failed=failed, messages=messages, + conversation_history=conversation_history, + effective_task_id=effective_task_id, + turn_id=turn_id, + user_message=user_message, + original_user_message=original_user_message, + _should_review_memory=_should_review_memory, + _turn_exit_reason=_turn_exit_reason, ) - # Background memory/skill review — runs AFTER the response is delivered - # so it never competes with the user's task for model attention. - if final_response and not interrupted and (_should_review_memory or _should_review_skills): - try: - agent._spawn_background_review( - messages_snapshot=list(messages), - review_memory=_should_review_memory, - review_skills=_should_review_skills, - ) - except Exception: - pass # Background review is best-effort - - # Note: Memory provider on_session_end() + shutdown_all() are NOT - # called here — run_conversation() is called once per user message in - # multi-turn sessions. Shutting down after every turn would kill the - # provider before the second message. Actual session-end cleanup is - # handled by the CLI (atexit / /reset) and gateway (session expiry / - # _reset_session). - - # Plugin hook: on_session_end - # Fired at the very end of every run_conversation call. - # Plugins can use this for cleanup, flushing buffers, etc. - try: - from hermes_cli.plugins import invoke_hook as _invoke_hook - _invoke_hook( - "on_session_end", - session_id=agent.session_id, - task_id=effective_task_id, - turn_id=turn_id, - completed=completed, - interrupted=interrupted, - model=agent.model, - platform=getattr(agent, "platform", None) or "", - ) - except Exception as exc: - logger.warning("on_session_end hook failed: %s", exc) - - return result - __all__ = ["run_conversation"] diff --git a/agent/turn_finalizer.py b/agent/turn_finalizer.py new file mode 100644 index 00000000000..20db3fcef9f --- /dev/null +++ b/agent/turn_finalizer.py @@ -0,0 +1,428 @@ +"""Post-loop turn finalization for ``run_conversation``. + +Extracted from ``agent/conversation_loop.py`` as part of the god-file +decomposition campaign (``~/.hermes/plans/god-file-decomposition.md``, Phase 1 +step 4 — the post-loop ``TurnFinalizer`` seam). ``run_conversation``'s tail +(everything after the main tool-calling ``while`` loop) is lifted here verbatim: +budget-exhaustion summary, trajectory save, session persist, turn diagnostics, +response transforms, result-dict assembly, steer drain, and the memory/skill +review trigger. + +Behavior-neutral: the body is moved unchanged. All ``agent.*`` side effects fire +exactly as before; only the post-loop *locals* are passed in as keyword args, and +the assembled ``result`` dict is returned to ``run_conversation`` which returns it +to the caller. The function is synchronous with a single return — mirroring the +region it replaces (no awaits, no early returns). + +Module ``logger`` is imported lazily inside the body (``from +agent.conversation_loop import logger``) so this module never imports +``agent.conversation_loop`` at import time -> no import cycle, and the log records +keep the exact logger name (``"agent.conversation_loop"``). +""" + +from __future__ import annotations + +import os + +from agent.codex_responses_adapter import _summarize_user_message_for_log + + +def finalize_turn( + agent, + *, + final_response, + api_call_count, + interrupted, + failed, + messages, + conversation_history, + effective_task_id, + turn_id, + user_message, + original_user_message, + _should_review_memory, + _turn_exit_reason, +): + """Run the post-loop finalization and return the turn ``result`` dict. + + Lifted verbatim from ``run_conversation`` (the region after the main agent + loop). See module docstring. + """ + from agent.conversation_loop import logger + + if final_response is None and ( + api_call_count >= agent.max_iterations + or agent.iteration_budget.remaining <= 0 + ): + # Budget exhausted — ask the model for a summary via one extra + # API call with tools stripped. _handle_max_iterations injects a + # user message and makes a single toolless request. + _turn_exit_reason = f"max_iterations_reached({api_call_count}/{agent.max_iterations})" + agent._emit_status( + f"⚠️ Iteration budget exhausted ({api_call_count}/{agent.max_iterations}) " + "— asking model to summarise" + ) + if not agent.quiet_mode: + agent._safe_print( + f"\n⚠️ Iteration budget exhausted ({api_call_count}/{agent.max_iterations}) " + "— requesting summary..." + ) + final_response = agent._handle_max_iterations(messages, api_call_count) + + # If running as a kanban worker, signal the dispatcher that the + # worker could not complete (rather than treating it as a + # protocol violation). The agent loop strips tools before calling + # _handle_max_iterations, so the model cannot call kanban_block + # itself — we must do it on its behalf. + # + # We route through ``_record_task_failure(outcome="timed_out")`` + # rather than ``kanban_block`` so this counts toward the + # ``consecutive_failures`` counter and the dispatcher's + # ``failure_limit`` circuit breaker (#29747 gap 2). Without this, + # a task whose worker keeps exhausting its budget would block + # silently each run, get auto-promoted by the operator (or never + # surface), and re-block in an endless loop with no signal. + _kanban_task = os.environ.get("HERMES_KANBAN_TASK") + if _kanban_task: + try: + from hermes_cli import kanban_db as _kb + _conn = _kb.connect() + try: + _kb._record_task_failure( + _conn, + _kanban_task, + error=( + f"Iteration budget exhausted " + f"({api_call_count}/{agent.max_iterations}) — " + "task could not complete within the allowed " + "iterations" + ), + outcome="timed_out", + release_claim=True, + end_run=True, + event_payload_extra={ + "budget_used": api_call_count, + "budget_max": agent.max_iterations, + }, + ) + logger.info( + "recorded budget-exhausted failure for task %s (%d/%d)", + _kanban_task, api_call_count, agent.max_iterations, + ) + finally: + try: + _conn.close() + except Exception: + pass + except Exception: + logger.warning( + "Failed to record budget-exhausted failure for task %s", + _kanban_task, + exc_info=True, + ) + + # Determine if conversation completed successfully + completed = ( + final_response is not None + and api_call_count < agent.max_iterations + and not failed + ) + + # Save trajectory if enabled. ``user_message`` may be a multimodal + # list of parts; the trajectory format wants a plain string. + agent._save_trajectory(messages, _summarize_user_message_for_log(user_message), completed) + + # Clean up VM and browser for this task after conversation completes + agent._cleanup_task_resources(effective_task_id) + + # Persist session to both JSON log and SQLite only after private retry + # scaffolding has been removed. Otherwise a later user "continue" turn + # can replay assistant("(empty)") / recovery nudges and fall into the + # same empty-response loop again. + agent._drop_trailing_empty_response_scaffolding(messages) + agent._persist_session(messages, conversation_history) + + # ── Turn-exit diagnostic log ───────────────────────────────────── + # Always logged at INFO so agent.log captures WHY every turn ended. + # When the last message is a tool result (agent was mid-work), log + # at WARNING — this is the "just stops" scenario users report. + _last_msg_role = messages[-1].get("role") if messages else None + _last_tool_name = None + if _last_msg_role == "tool": + # Walk back to find the assistant message with the tool call + for _m in reversed(messages): + if _m.get("role") == "assistant" and _m.get("tool_calls"): + _tcs = _m["tool_calls"] + if _tcs and isinstance(_tcs[0], dict): + _last_tool_name = _tcs[-1].get("function", {}).get("name") + break + + _turn_tool_count = sum( + 1 for m in messages + if isinstance(m, dict) and m.get("role") == "assistant" and m.get("tool_calls") + ) + _resp_len = len(final_response) if final_response else 0 + _budget_used = agent.iteration_budget.used if agent.iteration_budget else 0 + _budget_max = agent.iteration_budget.max_total if agent.iteration_budget else 0 + + _diag_msg = ( + "Turn ended: reason=%s model=%s api_calls=%d/%d budget=%d/%d " + "tool_turns=%d last_msg_role=%s response_len=%d session=%s" + ) + _diag_args = ( + _turn_exit_reason, agent.model, api_call_count, agent.max_iterations, + _budget_used, _budget_max, + _turn_tool_count, _last_msg_role, _resp_len, + agent.session_id or "none", + ) + + if _last_msg_role == "tool" and not interrupted: + # Agent was mid-work — this is the "just stops" case. + logger.warning( + "Turn ended with pending tool result (agent may appear stuck). " + + _diag_msg + " last_tool=%s", + *_diag_args, _last_tool_name, + ) + else: + logger.info(_diag_msg, *_diag_args) + + # File-mutation verifier footer. + # If one or more ``write_file`` / ``patch`` calls failed during this + # turn and were never superseded by a successful write to the same + # path, append an advisory footer to the assistant response. This + # catches the specific case — reported by Ben Eng (#15524-adjacent) + # — where a model issues a batch of parallel patches, half of them + # fail with "Could not find old_string", and the model summarises + # the turn claiming every file was edited. The user then has to + # manually run ``git status`` to catch the lie. With this footer + # the truth is surfaced on every turn, so over-claiming is + # structurally impossible past the model. + # + # Gate: only applied when a real text response exists for this + # turn and the user didn't interrupt. Empty/interrupted turns + # already have other surface text that shouldn't be augmented. + if final_response and not interrupted: + try: + _failed = getattr(agent, "_turn_failed_file_mutations", None) or {} + if _failed and agent._file_mutation_verifier_enabled(): + footer = agent._format_file_mutation_failure_footer(_failed) + if footer: + final_response = final_response.rstrip() + "\n\n" + footer + except Exception as _ver_err: + logger.debug("file-mutation verifier footer failed: %s", _ver_err) + + # Turn-completion explainer. + # When a turn ends abnormally after substantive work — empty content + # after retries, a partial/truncated stream, a still-pending tool + # result, or an iteration/budget limit — the user otherwise gets a + # blank or fragmentary response box with no consolidated reason why + # the agent stopped (#34452). Surface a single user-visible + # explanation derived from ``_turn_exit_reason``, mirroring the + # file-mutation verifier footer pattern above. + # + # Gate carefully so healthy turns stay quiet: + # - ``text_response(...)`` exits never produce an explanation + # (handled inside the formatter), so a terse ``Done.`` is silent. + # - We only ACT when there is no genuinely usable reply this turn: + # an empty response, the "(empty)" terminal sentinel, or a + # suspiciously short partial fragment with no terminating + # punctuation (e.g. "The"). A real short answer keeps its text. + if not interrupted: + try: + if agent._turn_completion_explainer_enabled(): + _stripped = (final_response or "").strip() + _is_empty_terminal = _stripped == "" or _stripped == "(empty)" + # A short fragment that is not a normal text_response exit + # and lacks sentence-ending punctuation is treated as a + # truncated partial (the "The" case from #34452). + _is_partial_fragment = ( + not _is_empty_terminal + and not str(_turn_exit_reason).startswith("text_response") + and len(_stripped) <= 24 + and _stripped[-1:] not in {".", "!", "?", "。", "!", "?", "`", ")"} + ) + if _is_empty_terminal or _is_partial_fragment: + _explanation = agent._format_turn_completion_explanation( + _turn_exit_reason + ) + if _explanation: + if _is_empty_terminal: + # Replace the bare "(empty)"/blank sentinel with + # the actionable explanation. + final_response = _explanation + else: + # Keep the partial fragment, append the reason so + # the user sees both what arrived and why it + # stopped. + final_response = ( + _stripped + "\n\n" + _explanation + ) + except Exception as _exp_err: + logger.debug("turn-completion explainer failed: %s", _exp_err) + + _response_transformed = False + + # Plugin hook: transform_llm_output + # Fired once per turn after the tool-calling loop completes. + # Plugins can transform the LLM's output text before it's returned. + # First hook to return a string wins; None/empty return leaves text unchanged. + if final_response and not interrupted: + try: + from hermes_cli.plugins import invoke_hook as _invoke_hook + _transform_results = _invoke_hook( + "transform_llm_output", + response_text=final_response, + session_id=agent.session_id or "", + model=agent.model, + platform=getattr(agent, "platform", None) or "", + ) + for _hook_result in _transform_results: + if isinstance(_hook_result, str) and _hook_result: + final_response = _hook_result + _response_transformed = True + break # First non-empty string wins + except Exception as exc: + logger.warning("transform_llm_output hook failed: %s", exc) + + # Plugin hook: post_llm_call + # Fired once per turn after the tool-calling loop completes. + # Plugins can use this to persist conversation data (e.g. sync + # to an external memory system). + if final_response and not interrupted: + try: + from hermes_cli.plugins import invoke_hook as _invoke_hook + _invoke_hook( + "post_llm_call", + session_id=agent.session_id, + task_id=effective_task_id, + turn_id=turn_id, + user_message=original_user_message, + assistant_response=final_response, + conversation_history=list(messages), + model=agent.model, + platform=getattr(agent, "platform", None) or "", + ) + except Exception as exc: + logger.warning("post_llm_call hook failed: %s", exc) + + # Extract reasoning from the CURRENT turn only. Walk backwards + # but stop at the user message that started this turn — anything + # earlier is from a prior turn and must not leak into the reasoning + # box (confusing stale display; #17055). Within the current turn + # we still want the *most recent* non-empty reasoning: many + # providers (Claude thinking, DeepSeek v4, Codex Responses) emit + # reasoning on the tool-call step and leave the final-answer step + # with reasoning=None, so picking only the last assistant would + # silently drop legitimate same-turn reasoning. + last_reasoning = None + for msg in reversed(messages): + if msg.get("role") == "user": + break # turn boundary — don't cross into prior turns + if msg.get("role") == "assistant" and msg.get("reasoning"): + last_reasoning = msg["reasoning"] + break + + # Build result with interrupt info if applicable + result = { + "final_response": final_response, + "last_reasoning": last_reasoning, + "messages": messages, + "api_calls": api_call_count, + "completed": completed, + "turn_exit_reason": _turn_exit_reason, + "failed": failed, + "partial": False, # True only when stopped due to invalid tool calls + "interrupted": interrupted, + "response_transformed": _response_transformed, + "response_previewed": getattr(agent, "_response_was_previewed", False), + "model": agent.model, + "provider": agent.provider, + "base_url": agent.base_url, + "input_tokens": agent.session_input_tokens, + "output_tokens": agent.session_output_tokens, + "cache_read_tokens": agent.session_cache_read_tokens, + "cache_write_tokens": agent.session_cache_write_tokens, + "reasoning_tokens": agent.session_reasoning_tokens, + "prompt_tokens": agent.session_prompt_tokens, + "completion_tokens": agent.session_completion_tokens, + "total_tokens": agent.session_total_tokens, + "last_prompt_tokens": getattr(agent.context_compressor, "last_prompt_tokens", 0) or 0, + "estimated_cost_usd": agent.session_estimated_cost_usd, + "cost_status": agent.session_cost_status, + "cost_source": agent.session_cost_source, + "session_id": agent.session_id, + } + if agent._tool_guardrail_halt_decision is not None: + result["guardrail"] = agent._tool_guardrail_halt_decision.to_metadata() + # If a /steer landed after the final assistant turn (no more tool + # batches to drain into), hand it back to the caller so it can be + # delivered as the next user turn instead of being silently lost. + _leftover_steer = agent._drain_pending_steer() + if _leftover_steer: + result["pending_steer"] = _leftover_steer + agent._response_was_previewed = False + + # Include interrupt message if one triggered the interrupt + if interrupted and agent._interrupt_message: + result["interrupt_message"] = agent._interrupt_message + + # Clear interrupt state after handling + agent.clear_interrupt() + + # Clear stream callback so it doesn't leak into future calls + agent._stream_callback = None + + # Check skill trigger NOW — based on how many tool iterations THIS turn used. + _should_review_skills = False + if (agent._skill_nudge_interval > 0 + and agent._iters_since_skill >= agent._skill_nudge_interval + and "skill_manage" in agent.valid_tool_names): + _should_review_skills = True + agent._iters_since_skill = 0 + + # External memory provider: sync the completed turn + queue next prefetch. + agent._sync_external_memory_for_turn( + original_user_message=original_user_message, + final_response=final_response, + interrupted=interrupted, + messages=messages, + ) + + # Background memory/skill review — runs AFTER the response is delivered + # so it never competes with the user's task for model attention. + if final_response and not interrupted and (_should_review_memory or _should_review_skills): + try: + agent._spawn_background_review( + messages_snapshot=list(messages), + review_memory=_should_review_memory, + review_skills=_should_review_skills, + ) + except Exception: + pass # Background review is best-effort + + # Note: Memory provider on_session_end() + shutdown_all() are NOT + # called here — run_conversation() is called once per user message in + # multi-turn sessions. Shutting down after every turn would kill the + # provider before the second message. Actual session-end cleanup is + # handled by the CLI (atexit / /reset) and gateway (session expiry / + # _reset_session). + + # Plugin hook: on_session_end + # Fired at the very end of every run_conversation call. + # Plugins can use this for cleanup, flushing buffers, etc. + try: + from hermes_cli.plugins import invoke_hook as _invoke_hook + _invoke_hook( + "on_session_end", + session_id=agent.session_id, + task_id=effective_task_id, + turn_id=turn_id, + completed=completed, + interrupted=interrupted, + model=agent.model, + platform=getattr(agent, "platform", None) or "", + ) + except Exception as exc: + logger.warning("on_session_end hook failed: %s", exc) + + return result