mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-09 08:21:50 +00:00
Lift the post-loop finalization tail out of run_conversation into agent/turn_finalizer.py:finalize_turn. Behavior-neutral; run_conversation 4204 -> 3846 LOC, conversation_loop.py 4578 -> 4220. The region (everything after the main tool-calling while loop): budget-exhaustion summary, trajectory save, session persist, turn diagnostics, response transforms, result-dict assembly, steer drain, and the memory/skill review trigger. Lifted verbatim into a synchronous single-return free function; the 12 post-loop locals it reads are passed as keyword args and the assembled result dict is returned to run_conversation (which returns it to the caller). All agent.* side effects fire exactly as before. Imports: os + _summarize_user_message_for_log at module top; logger lazy from agent.conversation_loop (preserves the gateway... err 'agent.conversation_loop' logger name, no import cycle). Validation: 1609/1609 tests/run_agent/ pass; live PTY agent turn PASS.
428 lines
19 KiB
Python
428 lines
19 KiB
Python
"""Post-loop turn finalization for ``run_conversation``.
|
||
|
||
Extracted from ``agent/conversation_loop.py`` as part of the god-file
|
||
decomposition campaign (``~/.hermes/plans/god-file-decomposition.md``, Phase 1
|
||
step 4 — the post-loop ``TurnFinalizer`` seam). ``run_conversation``'s tail
|
||
(everything after the main tool-calling ``while`` loop) is lifted here verbatim:
|
||
budget-exhaustion summary, trajectory save, session persist, turn diagnostics,
|
||
response transforms, result-dict assembly, steer drain, and the memory/skill
|
||
review trigger.
|
||
|
||
Behavior-neutral: the body is moved unchanged. All ``agent.*`` side effects fire
|
||
exactly as before; only the post-loop *locals* are passed in as keyword args, and
|
||
the assembled ``result`` dict is returned to ``run_conversation`` which returns it
|
||
to the caller. The function is synchronous with a single return — mirroring the
|
||
region it replaces (no awaits, no early returns).
|
||
|
||
Module ``logger`` is imported lazily inside the body (``from
|
||
agent.conversation_loop import logger``) so this module never imports
|
||
``agent.conversation_loop`` at import time -> no import cycle, and the log records
|
||
keep the exact logger name (``"agent.conversation_loop"``).
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import os
|
||
|
||
from agent.codex_responses_adapter import _summarize_user_message_for_log
|
||
|
||
|
||
def finalize_turn(
|
||
agent,
|
||
*,
|
||
final_response,
|
||
api_call_count,
|
||
interrupted,
|
||
failed,
|
||
messages,
|
||
conversation_history,
|
||
effective_task_id,
|
||
turn_id,
|
||
user_message,
|
||
original_user_message,
|
||
_should_review_memory,
|
||
_turn_exit_reason,
|
||
):
|
||
"""Run the post-loop finalization and return the turn ``result`` dict.
|
||
|
||
Lifted verbatim from ``run_conversation`` (the region after the main agent
|
||
loop). See module docstring.
|
||
"""
|
||
from agent.conversation_loop import logger
|
||
|
||
if final_response is None and (
|
||
api_call_count >= agent.max_iterations
|
||
or agent.iteration_budget.remaining <= 0
|
||
):
|
||
# Budget exhausted — ask the model for a summary via one extra
|
||
# API call with tools stripped. _handle_max_iterations injects a
|
||
# user message and makes a single toolless request.
|
||
_turn_exit_reason = f"max_iterations_reached({api_call_count}/{agent.max_iterations})"
|
||
agent._emit_status(
|
||
f"⚠️ Iteration budget exhausted ({api_call_count}/{agent.max_iterations}) "
|
||
"— asking model to summarise"
|
||
)
|
||
if not agent.quiet_mode:
|
||
agent._safe_print(
|
||
f"\n⚠️ Iteration budget exhausted ({api_call_count}/{agent.max_iterations}) "
|
||
"— requesting summary..."
|
||
)
|
||
final_response = agent._handle_max_iterations(messages, api_call_count)
|
||
|
||
# If running as a kanban worker, signal the dispatcher that the
|
||
# worker could not complete (rather than treating it as a
|
||
# protocol violation). The agent loop strips tools before calling
|
||
# _handle_max_iterations, so the model cannot call kanban_block
|
||
# itself — we must do it on its behalf.
|
||
#
|
||
# We route through ``_record_task_failure(outcome="timed_out")``
|
||
# rather than ``kanban_block`` so this counts toward the
|
||
# ``consecutive_failures`` counter and the dispatcher's
|
||
# ``failure_limit`` circuit breaker (#29747 gap 2). Without this,
|
||
# a task whose worker keeps exhausting its budget would block
|
||
# silently each run, get auto-promoted by the operator (or never
|
||
# surface), and re-block in an endless loop with no signal.
|
||
_kanban_task = os.environ.get("HERMES_KANBAN_TASK")
|
||
if _kanban_task:
|
||
try:
|
||
from hermes_cli import kanban_db as _kb
|
||
_conn = _kb.connect()
|
||
try:
|
||
_kb._record_task_failure(
|
||
_conn,
|
||
_kanban_task,
|
||
error=(
|
||
f"Iteration budget exhausted "
|
||
f"({api_call_count}/{agent.max_iterations}) — "
|
||
"task could not complete within the allowed "
|
||
"iterations"
|
||
),
|
||
outcome="timed_out",
|
||
release_claim=True,
|
||
end_run=True,
|
||
event_payload_extra={
|
||
"budget_used": api_call_count,
|
||
"budget_max": agent.max_iterations,
|
||
},
|
||
)
|
||
logger.info(
|
||
"recorded budget-exhausted failure for task %s (%d/%d)",
|
||
_kanban_task, api_call_count, agent.max_iterations,
|
||
)
|
||
finally:
|
||
try:
|
||
_conn.close()
|
||
except Exception:
|
||
pass
|
||
except Exception:
|
||
logger.warning(
|
||
"Failed to record budget-exhausted failure for task %s",
|
||
_kanban_task,
|
||
exc_info=True,
|
||
)
|
||
|
||
# Determine if conversation completed successfully
|
||
completed = (
|
||
final_response is not None
|
||
and api_call_count < agent.max_iterations
|
||
and not failed
|
||
)
|
||
|
||
# Save trajectory if enabled. ``user_message`` may be a multimodal
|
||
# list of parts; the trajectory format wants a plain string.
|
||
agent._save_trajectory(messages, _summarize_user_message_for_log(user_message), completed)
|
||
|
||
# Clean up VM and browser for this task after conversation completes
|
||
agent._cleanup_task_resources(effective_task_id)
|
||
|
||
# Persist session to both JSON log and SQLite only after private retry
|
||
# scaffolding has been removed. Otherwise a later user "continue" turn
|
||
# can replay assistant("(empty)") / recovery nudges and fall into the
|
||
# same empty-response loop again.
|
||
agent._drop_trailing_empty_response_scaffolding(messages)
|
||
agent._persist_session(messages, conversation_history)
|
||
|
||
# ── Turn-exit diagnostic log ─────────────────────────────────────
|
||
# Always logged at INFO so agent.log captures WHY every turn ended.
|
||
# When the last message is a tool result (agent was mid-work), log
|
||
# at WARNING — this is the "just stops" scenario users report.
|
||
_last_msg_role = messages[-1].get("role") if messages else None
|
||
_last_tool_name = None
|
||
if _last_msg_role == "tool":
|
||
# Walk back to find the assistant message with the tool call
|
||
for _m in reversed(messages):
|
||
if _m.get("role") == "assistant" and _m.get("tool_calls"):
|
||
_tcs = _m["tool_calls"]
|
||
if _tcs and isinstance(_tcs[0], dict):
|
||
_last_tool_name = _tcs[-1].get("function", {}).get("name")
|
||
break
|
||
|
||
_turn_tool_count = sum(
|
||
1 for m in messages
|
||
if isinstance(m, dict) and m.get("role") == "assistant" and m.get("tool_calls")
|
||
)
|
||
_resp_len = len(final_response) if final_response else 0
|
||
_budget_used = agent.iteration_budget.used if agent.iteration_budget else 0
|
||
_budget_max = agent.iteration_budget.max_total if agent.iteration_budget else 0
|
||
|
||
_diag_msg = (
|
||
"Turn ended: reason=%s model=%s api_calls=%d/%d budget=%d/%d "
|
||
"tool_turns=%d last_msg_role=%s response_len=%d session=%s"
|
||
)
|
||
_diag_args = (
|
||
_turn_exit_reason, agent.model, api_call_count, agent.max_iterations,
|
||
_budget_used, _budget_max,
|
||
_turn_tool_count, _last_msg_role, _resp_len,
|
||
agent.session_id or "none",
|
||
)
|
||
|
||
if _last_msg_role == "tool" and not interrupted:
|
||
# Agent was mid-work — this is the "just stops" case.
|
||
logger.warning(
|
||
"Turn ended with pending tool result (agent may appear stuck). "
|
||
+ _diag_msg + " last_tool=%s",
|
||
*_diag_args, _last_tool_name,
|
||
)
|
||
else:
|
||
logger.info(_diag_msg, *_diag_args)
|
||
|
||
# File-mutation verifier footer.
|
||
# If one or more ``write_file`` / ``patch`` calls failed during this
|
||
# turn and were never superseded by a successful write to the same
|
||
# path, append an advisory footer to the assistant response. This
|
||
# catches the specific case — reported by Ben Eng (#15524-adjacent)
|
||
# — where a model issues a batch of parallel patches, half of them
|
||
# fail with "Could not find old_string", and the model summarises
|
||
# the turn claiming every file was edited. The user then has to
|
||
# manually run ``git status`` to catch the lie. With this footer
|
||
# the truth is surfaced on every turn, so over-claiming is
|
||
# structurally impossible past the model.
|
||
#
|
||
# Gate: only applied when a real text response exists for this
|
||
# turn and the user didn't interrupt. Empty/interrupted turns
|
||
# already have other surface text that shouldn't be augmented.
|
||
if final_response and not interrupted:
|
||
try:
|
||
_failed = getattr(agent, "_turn_failed_file_mutations", None) or {}
|
||
if _failed and agent._file_mutation_verifier_enabled():
|
||
footer = agent._format_file_mutation_failure_footer(_failed)
|
||
if footer:
|
||
final_response = final_response.rstrip() + "\n\n" + footer
|
||
except Exception as _ver_err:
|
||
logger.debug("file-mutation verifier footer failed: %s", _ver_err)
|
||
|
||
# Turn-completion explainer.
|
||
# When a turn ends abnormally after substantive work — empty content
|
||
# after retries, a partial/truncated stream, a still-pending tool
|
||
# result, or an iteration/budget limit — the user otherwise gets a
|
||
# blank or fragmentary response box with no consolidated reason why
|
||
# the agent stopped (#34452). Surface a single user-visible
|
||
# explanation derived from ``_turn_exit_reason``, mirroring the
|
||
# file-mutation verifier footer pattern above.
|
||
#
|
||
# Gate carefully so healthy turns stay quiet:
|
||
# - ``text_response(...)`` exits never produce an explanation
|
||
# (handled inside the formatter), so a terse ``Done.`` is silent.
|
||
# - We only ACT when there is no genuinely usable reply this turn:
|
||
# an empty response, the "(empty)" terminal sentinel, or a
|
||
# suspiciously short partial fragment with no terminating
|
||
# punctuation (e.g. "The"). A real short answer keeps its text.
|
||
if not interrupted:
|
||
try:
|
||
if agent._turn_completion_explainer_enabled():
|
||
_stripped = (final_response or "").strip()
|
||
_is_empty_terminal = _stripped == "" or _stripped == "(empty)"
|
||
# A short fragment that is not a normal text_response exit
|
||
# and lacks sentence-ending punctuation is treated as a
|
||
# truncated partial (the "The" case from #34452).
|
||
_is_partial_fragment = (
|
||
not _is_empty_terminal
|
||
and not str(_turn_exit_reason).startswith("text_response")
|
||
and len(_stripped) <= 24
|
||
and _stripped[-1:] not in {".", "!", "?", "。", "!", "?", "`", ")"}
|
||
)
|
||
if _is_empty_terminal or _is_partial_fragment:
|
||
_explanation = agent._format_turn_completion_explanation(
|
||
_turn_exit_reason
|
||
)
|
||
if _explanation:
|
||
if _is_empty_terminal:
|
||
# Replace the bare "(empty)"/blank sentinel with
|
||
# the actionable explanation.
|
||
final_response = _explanation
|
||
else:
|
||
# Keep the partial fragment, append the reason so
|
||
# the user sees both what arrived and why it
|
||
# stopped.
|
||
final_response = (
|
||
_stripped + "\n\n" + _explanation
|
||
)
|
||
except Exception as _exp_err:
|
||
logger.debug("turn-completion explainer failed: %s", _exp_err)
|
||
|
||
_response_transformed = False
|
||
|
||
# Plugin hook: transform_llm_output
|
||
# Fired once per turn after the tool-calling loop completes.
|
||
# Plugins can transform the LLM's output text before it's returned.
|
||
# First hook to return a string wins; None/empty return leaves text unchanged.
|
||
if final_response and not interrupted:
|
||
try:
|
||
from hermes_cli.plugins import invoke_hook as _invoke_hook
|
||
_transform_results = _invoke_hook(
|
||
"transform_llm_output",
|
||
response_text=final_response,
|
||
session_id=agent.session_id or "",
|
||
model=agent.model,
|
||
platform=getattr(agent, "platform", None) or "",
|
||
)
|
||
for _hook_result in _transform_results:
|
||
if isinstance(_hook_result, str) and _hook_result:
|
||
final_response = _hook_result
|
||
_response_transformed = True
|
||
break # First non-empty string wins
|
||
except Exception as exc:
|
||
logger.warning("transform_llm_output hook failed: %s", exc)
|
||
|
||
# Plugin hook: post_llm_call
|
||
# Fired once per turn after the tool-calling loop completes.
|
||
# Plugins can use this to persist conversation data (e.g. sync
|
||
# to an external memory system).
|
||
if final_response and not interrupted:
|
||
try:
|
||
from hermes_cli.plugins import invoke_hook as _invoke_hook
|
||
_invoke_hook(
|
||
"post_llm_call",
|
||
session_id=agent.session_id,
|
||
task_id=effective_task_id,
|
||
turn_id=turn_id,
|
||
user_message=original_user_message,
|
||
assistant_response=final_response,
|
||
conversation_history=list(messages),
|
||
model=agent.model,
|
||
platform=getattr(agent, "platform", None) or "",
|
||
)
|
||
except Exception as exc:
|
||
logger.warning("post_llm_call hook failed: %s", exc)
|
||
|
||
# Extract reasoning from the CURRENT turn only. Walk backwards
|
||
# but stop at the user message that started this turn — anything
|
||
# earlier is from a prior turn and must not leak into the reasoning
|
||
# box (confusing stale display; #17055). Within the current turn
|
||
# we still want the *most recent* non-empty reasoning: many
|
||
# providers (Claude thinking, DeepSeek v4, Codex Responses) emit
|
||
# reasoning on the tool-call step and leave the final-answer step
|
||
# with reasoning=None, so picking only the last assistant would
|
||
# silently drop legitimate same-turn reasoning.
|
||
last_reasoning = None
|
||
for msg in reversed(messages):
|
||
if msg.get("role") == "user":
|
||
break # turn boundary — don't cross into prior turns
|
||
if msg.get("role") == "assistant" and msg.get("reasoning"):
|
||
last_reasoning = msg["reasoning"]
|
||
break
|
||
|
||
# Build result with interrupt info if applicable
|
||
result = {
|
||
"final_response": final_response,
|
||
"last_reasoning": last_reasoning,
|
||
"messages": messages,
|
||
"api_calls": api_call_count,
|
||
"completed": completed,
|
||
"turn_exit_reason": _turn_exit_reason,
|
||
"failed": failed,
|
||
"partial": False, # True only when stopped due to invalid tool calls
|
||
"interrupted": interrupted,
|
||
"response_transformed": _response_transformed,
|
||
"response_previewed": getattr(agent, "_response_was_previewed", False),
|
||
"model": agent.model,
|
||
"provider": agent.provider,
|
||
"base_url": agent.base_url,
|
||
"input_tokens": agent.session_input_tokens,
|
||
"output_tokens": agent.session_output_tokens,
|
||
"cache_read_tokens": agent.session_cache_read_tokens,
|
||
"cache_write_tokens": agent.session_cache_write_tokens,
|
||
"reasoning_tokens": agent.session_reasoning_tokens,
|
||
"prompt_tokens": agent.session_prompt_tokens,
|
||
"completion_tokens": agent.session_completion_tokens,
|
||
"total_tokens": agent.session_total_tokens,
|
||
"last_prompt_tokens": getattr(agent.context_compressor, "last_prompt_tokens", 0) or 0,
|
||
"estimated_cost_usd": agent.session_estimated_cost_usd,
|
||
"cost_status": agent.session_cost_status,
|
||
"cost_source": agent.session_cost_source,
|
||
"session_id": agent.session_id,
|
||
}
|
||
if agent._tool_guardrail_halt_decision is not None:
|
||
result["guardrail"] = agent._tool_guardrail_halt_decision.to_metadata()
|
||
# If a /steer landed after the final assistant turn (no more tool
|
||
# batches to drain into), hand it back to the caller so it can be
|
||
# delivered as the next user turn instead of being silently lost.
|
||
_leftover_steer = agent._drain_pending_steer()
|
||
if _leftover_steer:
|
||
result["pending_steer"] = _leftover_steer
|
||
agent._response_was_previewed = False
|
||
|
||
# Include interrupt message if one triggered the interrupt
|
||
if interrupted and agent._interrupt_message:
|
||
result["interrupt_message"] = agent._interrupt_message
|
||
|
||
# Clear interrupt state after handling
|
||
agent.clear_interrupt()
|
||
|
||
# Clear stream callback so it doesn't leak into future calls
|
||
agent._stream_callback = None
|
||
|
||
# Check skill trigger NOW — based on how many tool iterations THIS turn used.
|
||
_should_review_skills = False
|
||
if (agent._skill_nudge_interval > 0
|
||
and agent._iters_since_skill >= agent._skill_nudge_interval
|
||
and "skill_manage" in agent.valid_tool_names):
|
||
_should_review_skills = True
|
||
agent._iters_since_skill = 0
|
||
|
||
# External memory provider: sync the completed turn + queue next prefetch.
|
||
agent._sync_external_memory_for_turn(
|
||
original_user_message=original_user_message,
|
||
final_response=final_response,
|
||
interrupted=interrupted,
|
||
messages=messages,
|
||
)
|
||
|
||
# Background memory/skill review — runs AFTER the response is delivered
|
||
# so it never competes with the user's task for model attention.
|
||
if final_response and not interrupted and (_should_review_memory or _should_review_skills):
|
||
try:
|
||
agent._spawn_background_review(
|
||
messages_snapshot=list(messages),
|
||
review_memory=_should_review_memory,
|
||
review_skills=_should_review_skills,
|
||
)
|
||
except Exception:
|
||
pass # Background review is best-effort
|
||
|
||
# Note: Memory provider on_session_end() + shutdown_all() are NOT
|
||
# called here — run_conversation() is called once per user message in
|
||
# multi-turn sessions. Shutting down after every turn would kill the
|
||
# provider before the second message. Actual session-end cleanup is
|
||
# handled by the CLI (atexit / /reset) and gateway (session expiry /
|
||
# _reset_session).
|
||
|
||
# Plugin hook: on_session_end
|
||
# Fired at the very end of every run_conversation call.
|
||
# Plugins can use this for cleanup, flushing buffers, etc.
|
||
try:
|
||
from hermes_cli.plugins import invoke_hook as _invoke_hook
|
||
_invoke_hook(
|
||
"on_session_end",
|
||
session_id=agent.session_id,
|
||
task_id=effective_task_id,
|
||
turn_id=turn_id,
|
||
completed=completed,
|
||
interrupted=interrupted,
|
||
model=agent.model,
|
||
platform=getattr(agent, "platform", None) or "",
|
||
)
|
||
except Exception as exc:
|
||
logger.warning("on_session_end hook failed: %s", exc)
|
||
|
||
return result
|