mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-09 08:21:50 +00:00
refactor(agent): extract run_conversation post-loop tail into finalize_turn (god-file Phase 1)
Lift the post-loop finalization tail out of run_conversation into agent/turn_finalizer.py:finalize_turn. Behavior-neutral; run_conversation 4204 -> 3846 LOC, conversation_loop.py 4578 -> 4220. The region (everything after the main tool-calling while loop): budget-exhaustion summary, trajectory save, session persist, turn diagnostics, response transforms, result-dict assembly, steer drain, and the memory/skill review trigger. Lifted verbatim into a synchronous single-return free function; the 12 post-loop locals it reads are passed as keyword args and the assembled result dict is returned to run_conversation (which returns it to the caller). All agent.* side effects fire exactly as before. Imports: os + _summarize_user_message_for_log at module top; logger lazy from agent.conversation_loop (preserves the gateway... err 'agent.conversation_loop' logger name, no import cycle). Validation: 1609/1609 tests/run_agent/ pass; live PTY agent turn PASS.
This commit is contained in:
parent
a706a349b5
commit
55b83c3d99
2 changed files with 443 additions and 372 deletions
|
|
@ -4196,383 +4196,26 @@ def run_conversation(
|
|||
messages.append({"role": "assistant", "content": final_response})
|
||||
break
|
||||
|
||||
if final_response is None and (
|
||||
api_call_count >= agent.max_iterations
|
||||
or agent.iteration_budget.remaining <= 0
|
||||
):
|
||||
# Budget exhausted — ask the model for a summary via one extra
|
||||
# API call with tools stripped. _handle_max_iterations injects a
|
||||
# user message and makes a single toolless request.
|
||||
_turn_exit_reason = f"max_iterations_reached({api_call_count}/{agent.max_iterations})"
|
||||
agent._emit_status(
|
||||
f"⚠️ Iteration budget exhausted ({api_call_count}/{agent.max_iterations}) "
|
||||
"— asking model to summarise"
|
||||
)
|
||||
if not agent.quiet_mode:
|
||||
agent._safe_print(
|
||||
f"\n⚠️ Iteration budget exhausted ({api_call_count}/{agent.max_iterations}) "
|
||||
"— requesting summary..."
|
||||
)
|
||||
final_response = agent._handle_max_iterations(messages, api_call_count)
|
||||
|
||||
# If running as a kanban worker, signal the dispatcher that the
|
||||
# worker could not complete (rather than treating it as a
|
||||
# protocol violation). The agent loop strips tools before calling
|
||||
# _handle_max_iterations, so the model cannot call kanban_block
|
||||
# itself — we must do it on its behalf.
|
||||
#
|
||||
# We route through ``_record_task_failure(outcome="timed_out")``
|
||||
# rather than ``kanban_block`` so this counts toward the
|
||||
# ``consecutive_failures`` counter and the dispatcher's
|
||||
# ``failure_limit`` circuit breaker (#29747 gap 2). Without this,
|
||||
# a task whose worker keeps exhausting its budget would block
|
||||
# silently each run, get auto-promoted by the operator (or never
|
||||
# surface), and re-block in an endless loop with no signal.
|
||||
_kanban_task = os.environ.get("HERMES_KANBAN_TASK")
|
||||
if _kanban_task:
|
||||
try:
|
||||
from hermes_cli import kanban_db as _kb
|
||||
_conn = _kb.connect()
|
||||
try:
|
||||
_kb._record_task_failure(
|
||||
_conn,
|
||||
_kanban_task,
|
||||
error=(
|
||||
f"Iteration budget exhausted "
|
||||
f"({api_call_count}/{agent.max_iterations}) — "
|
||||
"task could not complete within the allowed "
|
||||
"iterations"
|
||||
),
|
||||
outcome="timed_out",
|
||||
release_claim=True,
|
||||
end_run=True,
|
||||
event_payload_extra={
|
||||
"budget_used": api_call_count,
|
||||
"budget_max": agent.max_iterations,
|
||||
},
|
||||
)
|
||||
logger.info(
|
||||
"recorded budget-exhausted failure for task %s (%d/%d)",
|
||||
_kanban_task, api_call_count, agent.max_iterations,
|
||||
)
|
||||
finally:
|
||||
try:
|
||||
_conn.close()
|
||||
except Exception:
|
||||
pass
|
||||
except Exception:
|
||||
logger.warning(
|
||||
"Failed to record budget-exhausted failure for task %s",
|
||||
_kanban_task,
|
||||
exc_info=True,
|
||||
)
|
||||
|
||||
# Determine if conversation completed successfully
|
||||
completed = (
|
||||
final_response is not None
|
||||
and api_call_count < agent.max_iterations
|
||||
and not failed
|
||||
)
|
||||
|
||||
# Save trajectory if enabled. ``user_message`` may be a multimodal
|
||||
# list of parts; the trajectory format wants a plain string.
|
||||
agent._save_trajectory(messages, _summarize_user_message_for_log(user_message), completed)
|
||||
|
||||
# Clean up VM and browser for this task after conversation completes
|
||||
agent._cleanup_task_resources(effective_task_id)
|
||||
|
||||
# Persist session to both JSON log and SQLite only after private retry
|
||||
# scaffolding has been removed. Otherwise a later user "continue" turn
|
||||
# can replay assistant("(empty)") / recovery nudges and fall into the
|
||||
# same empty-response loop again.
|
||||
agent._drop_trailing_empty_response_scaffolding(messages)
|
||||
agent._persist_session(messages, conversation_history)
|
||||
|
||||
# ── Turn-exit diagnostic log ─────────────────────────────────────
|
||||
# Always logged at INFO so agent.log captures WHY every turn ended.
|
||||
# When the last message is a tool result (agent was mid-work), log
|
||||
# at WARNING — this is the "just stops" scenario users report.
|
||||
_last_msg_role = messages[-1].get("role") if messages else None
|
||||
_last_tool_name = None
|
||||
if _last_msg_role == "tool":
|
||||
# Walk back to find the assistant message with the tool call
|
||||
for _m in reversed(messages):
|
||||
if _m.get("role") == "assistant" and _m.get("tool_calls"):
|
||||
_tcs = _m["tool_calls"]
|
||||
if _tcs and isinstance(_tcs[0], dict):
|
||||
_last_tool_name = _tcs[-1].get("function", {}).get("name")
|
||||
break
|
||||
|
||||
_turn_tool_count = sum(
|
||||
1 for m in messages
|
||||
if isinstance(m, dict) and m.get("role") == "assistant" and m.get("tool_calls")
|
||||
)
|
||||
_resp_len = len(final_response) if final_response else 0
|
||||
_budget_used = agent.iteration_budget.used if agent.iteration_budget else 0
|
||||
_budget_max = agent.iteration_budget.max_total if agent.iteration_budget else 0
|
||||
|
||||
_diag_msg = (
|
||||
"Turn ended: reason=%s model=%s api_calls=%d/%d budget=%d/%d "
|
||||
"tool_turns=%d last_msg_role=%s response_len=%d session=%s"
|
||||
)
|
||||
_diag_args = (
|
||||
_turn_exit_reason, agent.model, api_call_count, agent.max_iterations,
|
||||
_budget_used, _budget_max,
|
||||
_turn_tool_count, _last_msg_role, _resp_len,
|
||||
agent.session_id or "none",
|
||||
)
|
||||
|
||||
if _last_msg_role == "tool" and not interrupted:
|
||||
# Agent was mid-work — this is the "just stops" case.
|
||||
logger.warning(
|
||||
"Turn ended with pending tool result (agent may appear stuck). "
|
||||
+ _diag_msg + " last_tool=%s",
|
||||
*_diag_args, _last_tool_name,
|
||||
)
|
||||
else:
|
||||
logger.info(_diag_msg, *_diag_args)
|
||||
|
||||
# File-mutation verifier footer.
|
||||
# If one or more ``write_file`` / ``patch`` calls failed during this
|
||||
# turn and were never superseded by a successful write to the same
|
||||
# path, append an advisory footer to the assistant response. This
|
||||
# catches the specific case — reported by Ben Eng (#15524-adjacent)
|
||||
# — where a model issues a batch of parallel patches, half of them
|
||||
# fail with "Could not find old_string", and the model summarises
|
||||
# the turn claiming every file was edited. The user then has to
|
||||
# manually run ``git status`` to catch the lie. With this footer
|
||||
# the truth is surfaced on every turn, so over-claiming is
|
||||
# structurally impossible past the model.
|
||||
#
|
||||
# Gate: only applied when a real text response exists for this
|
||||
# turn and the user didn't interrupt. Empty/interrupted turns
|
||||
# already have other surface text that shouldn't be augmented.
|
||||
if final_response and not interrupted:
|
||||
try:
|
||||
_failed = getattr(agent, "_turn_failed_file_mutations", None) or {}
|
||||
if _failed and agent._file_mutation_verifier_enabled():
|
||||
footer = agent._format_file_mutation_failure_footer(_failed)
|
||||
if footer:
|
||||
final_response = final_response.rstrip() + "\n\n" + footer
|
||||
except Exception as _ver_err:
|
||||
logger.debug("file-mutation verifier footer failed: %s", _ver_err)
|
||||
|
||||
# Turn-completion explainer.
|
||||
# When a turn ends abnormally after substantive work — empty content
|
||||
# after retries, a partial/truncated stream, a still-pending tool
|
||||
# result, or an iteration/budget limit — the user otherwise gets a
|
||||
# blank or fragmentary response box with no consolidated reason why
|
||||
# the agent stopped (#34452). Surface a single user-visible
|
||||
# explanation derived from ``_turn_exit_reason``, mirroring the
|
||||
# file-mutation verifier footer pattern above.
|
||||
#
|
||||
# Gate carefully so healthy turns stay quiet:
|
||||
# - ``text_response(...)`` exits never produce an explanation
|
||||
# (handled inside the formatter), so a terse ``Done.`` is silent.
|
||||
# - We only ACT when there is no genuinely usable reply this turn:
|
||||
# an empty response, the "(empty)" terminal sentinel, or a
|
||||
# suspiciously short partial fragment with no terminating
|
||||
# punctuation (e.g. "The"). A real short answer keeps its text.
|
||||
if not interrupted:
|
||||
try:
|
||||
if agent._turn_completion_explainer_enabled():
|
||||
_stripped = (final_response or "").strip()
|
||||
_is_empty_terminal = _stripped == "" or _stripped == "(empty)"
|
||||
# A short fragment that is not a normal text_response exit
|
||||
# and lacks sentence-ending punctuation is treated as a
|
||||
# truncated partial (the "The" case from #34452).
|
||||
_is_partial_fragment = (
|
||||
not _is_empty_terminal
|
||||
and not str(_turn_exit_reason).startswith("text_response")
|
||||
and len(_stripped) <= 24
|
||||
and _stripped[-1:] not in {".", "!", "?", "。", "!", "?", "`", ")"}
|
||||
)
|
||||
if _is_empty_terminal or _is_partial_fragment:
|
||||
_explanation = agent._format_turn_completion_explanation(
|
||||
_turn_exit_reason
|
||||
)
|
||||
if _explanation:
|
||||
if _is_empty_terminal:
|
||||
# Replace the bare "(empty)"/blank sentinel with
|
||||
# the actionable explanation.
|
||||
final_response = _explanation
|
||||
else:
|
||||
# Keep the partial fragment, append the reason so
|
||||
# the user sees both what arrived and why it
|
||||
# stopped.
|
||||
final_response = (
|
||||
_stripped + "\n\n" + _explanation
|
||||
)
|
||||
except Exception as _exp_err:
|
||||
logger.debug("turn-completion explainer failed: %s", _exp_err)
|
||||
|
||||
_response_transformed = False
|
||||
|
||||
# Plugin hook: transform_llm_output
|
||||
# Fired once per turn after the tool-calling loop completes.
|
||||
# Plugins can transform the LLM's output text before it's returned.
|
||||
# First hook to return a string wins; None/empty return leaves text unchanged.
|
||||
if final_response and not interrupted:
|
||||
try:
|
||||
from hermes_cli.plugins import invoke_hook as _invoke_hook
|
||||
_transform_results = _invoke_hook(
|
||||
"transform_llm_output",
|
||||
response_text=final_response,
|
||||
session_id=agent.session_id or "",
|
||||
model=agent.model,
|
||||
platform=getattr(agent, "platform", None) or "",
|
||||
)
|
||||
for _hook_result in _transform_results:
|
||||
if isinstance(_hook_result, str) and _hook_result:
|
||||
final_response = _hook_result
|
||||
_response_transformed = True
|
||||
break # First non-empty string wins
|
||||
except Exception as exc:
|
||||
logger.warning("transform_llm_output hook failed: %s", exc)
|
||||
|
||||
# Plugin hook: post_llm_call
|
||||
# Fired once per turn after the tool-calling loop completes.
|
||||
# Plugins can use this to persist conversation data (e.g. sync
|
||||
# to an external memory system).
|
||||
if final_response and not interrupted:
|
||||
try:
|
||||
from hermes_cli.plugins import invoke_hook as _invoke_hook
|
||||
_invoke_hook(
|
||||
"post_llm_call",
|
||||
session_id=agent.session_id,
|
||||
task_id=effective_task_id,
|
||||
turn_id=turn_id,
|
||||
user_message=original_user_message,
|
||||
assistant_response=final_response,
|
||||
conversation_history=list(messages),
|
||||
model=agent.model,
|
||||
platform=getattr(agent, "platform", None) or "",
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning("post_llm_call hook failed: %s", exc)
|
||||
|
||||
# Extract reasoning from the CURRENT turn only. Walk backwards
|
||||
# but stop at the user message that started this turn — anything
|
||||
# earlier is from a prior turn and must not leak into the reasoning
|
||||
# box (confusing stale display; #17055). Within the current turn
|
||||
# we still want the *most recent* non-empty reasoning: many
|
||||
# providers (Claude thinking, DeepSeek v4, Codex Responses) emit
|
||||
# reasoning on the tool-call step and leave the final-answer step
|
||||
# with reasoning=None, so picking only the last assistant would
|
||||
# silently drop legitimate same-turn reasoning.
|
||||
last_reasoning = None
|
||||
for msg in reversed(messages):
|
||||
if msg.get("role") == "user":
|
||||
break # turn boundary — don't cross into prior turns
|
||||
if msg.get("role") == "assistant" and msg.get("reasoning"):
|
||||
last_reasoning = msg["reasoning"]
|
||||
break
|
||||
|
||||
# Build result with interrupt info if applicable
|
||||
result = {
|
||||
"final_response": final_response,
|
||||
"last_reasoning": last_reasoning,
|
||||
"messages": messages,
|
||||
"api_calls": api_call_count,
|
||||
"completed": completed,
|
||||
"turn_exit_reason": _turn_exit_reason,
|
||||
"failed": failed,
|
||||
"partial": False, # True only when stopped due to invalid tool calls
|
||||
"interrupted": interrupted,
|
||||
"response_transformed": _response_transformed,
|
||||
"response_previewed": getattr(agent, "_response_was_previewed", False),
|
||||
"model": agent.model,
|
||||
"provider": agent.provider,
|
||||
"base_url": agent.base_url,
|
||||
"input_tokens": agent.session_input_tokens,
|
||||
"output_tokens": agent.session_output_tokens,
|
||||
"cache_read_tokens": agent.session_cache_read_tokens,
|
||||
"cache_write_tokens": agent.session_cache_write_tokens,
|
||||
"reasoning_tokens": agent.session_reasoning_tokens,
|
||||
"prompt_tokens": agent.session_prompt_tokens,
|
||||
"completion_tokens": agent.session_completion_tokens,
|
||||
"total_tokens": agent.session_total_tokens,
|
||||
"last_prompt_tokens": getattr(agent.context_compressor, "last_prompt_tokens", 0) or 0,
|
||||
"estimated_cost_usd": agent.session_estimated_cost_usd,
|
||||
"cost_status": agent.session_cost_status,
|
||||
"cost_source": agent.session_cost_source,
|
||||
"session_id": agent.session_id,
|
||||
}
|
||||
if agent._tool_guardrail_halt_decision is not None:
|
||||
result["guardrail"] = agent._tool_guardrail_halt_decision.to_metadata()
|
||||
# If a /steer landed after the final assistant turn (no more tool
|
||||
# batches to drain into), hand it back to the caller so it can be
|
||||
# delivered as the next user turn instead of being silently lost.
|
||||
_leftover_steer = agent._drain_pending_steer()
|
||||
if _leftover_steer:
|
||||
result["pending_steer"] = _leftover_steer
|
||||
agent._response_was_previewed = False
|
||||
|
||||
# Include interrupt message if one triggered the interrupt
|
||||
if interrupted and agent._interrupt_message:
|
||||
result["interrupt_message"] = agent._interrupt_message
|
||||
|
||||
# Clear interrupt state after handling
|
||||
agent.clear_interrupt()
|
||||
|
||||
# Clear stream callback so it doesn't leak into future calls
|
||||
agent._stream_callback = None
|
||||
|
||||
# Check skill trigger NOW — based on how many tool iterations THIS turn used.
|
||||
_should_review_skills = False
|
||||
if (agent._skill_nudge_interval > 0
|
||||
and agent._iters_since_skill >= agent._skill_nudge_interval
|
||||
and "skill_manage" in agent.valid_tool_names):
|
||||
_should_review_skills = True
|
||||
agent._iters_since_skill = 0
|
||||
|
||||
# External memory provider: sync the completed turn + queue next prefetch.
|
||||
agent._sync_external_memory_for_turn(
|
||||
original_user_message=original_user_message,
|
||||
# Post-loop turn finalization extracted to agent/turn_finalizer.finalize_turn
|
||||
# (god-file decomposition Phase 1 step 4). Behavior-neutral: the assembled
|
||||
# result dict is returned exactly as before.
|
||||
from agent.turn_finalizer import finalize_turn
|
||||
return finalize_turn(
|
||||
agent,
|
||||
final_response=final_response,
|
||||
api_call_count=api_call_count,
|
||||
interrupted=interrupted,
|
||||
failed=failed,
|
||||
messages=messages,
|
||||
conversation_history=conversation_history,
|
||||
effective_task_id=effective_task_id,
|
||||
turn_id=turn_id,
|
||||
user_message=user_message,
|
||||
original_user_message=original_user_message,
|
||||
_should_review_memory=_should_review_memory,
|
||||
_turn_exit_reason=_turn_exit_reason,
|
||||
)
|
||||
|
||||
# Background memory/skill review — runs AFTER the response is delivered
|
||||
# so it never competes with the user's task for model attention.
|
||||
if final_response and not interrupted and (_should_review_memory or _should_review_skills):
|
||||
try:
|
||||
agent._spawn_background_review(
|
||||
messages_snapshot=list(messages),
|
||||
review_memory=_should_review_memory,
|
||||
review_skills=_should_review_skills,
|
||||
)
|
||||
except Exception:
|
||||
pass # Background review is best-effort
|
||||
|
||||
# Note: Memory provider on_session_end() + shutdown_all() are NOT
|
||||
# called here — run_conversation() is called once per user message in
|
||||
# multi-turn sessions. Shutting down after every turn would kill the
|
||||
# provider before the second message. Actual session-end cleanup is
|
||||
# handled by the CLI (atexit / /reset) and gateway (session expiry /
|
||||
# _reset_session).
|
||||
|
||||
# Plugin hook: on_session_end
|
||||
# Fired at the very end of every run_conversation call.
|
||||
# Plugins can use this for cleanup, flushing buffers, etc.
|
||||
try:
|
||||
from hermes_cli.plugins import invoke_hook as _invoke_hook
|
||||
_invoke_hook(
|
||||
"on_session_end",
|
||||
session_id=agent.session_id,
|
||||
task_id=effective_task_id,
|
||||
turn_id=turn_id,
|
||||
completed=completed,
|
||||
interrupted=interrupted,
|
||||
model=agent.model,
|
||||
platform=getattr(agent, "platform", None) or "",
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning("on_session_end hook failed: %s", exc)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
|
||||
__all__ = ["run_conversation"]
|
||||
|
|
|
|||
428
agent/turn_finalizer.py
Normal file
428
agent/turn_finalizer.py
Normal file
|
|
@ -0,0 +1,428 @@
|
|||
"""Post-loop turn finalization for ``run_conversation``.
|
||||
|
||||
Extracted from ``agent/conversation_loop.py`` as part of the god-file
|
||||
decomposition campaign (``~/.hermes/plans/god-file-decomposition.md``, Phase 1
|
||||
step 4 — the post-loop ``TurnFinalizer`` seam). ``run_conversation``'s tail
|
||||
(everything after the main tool-calling ``while`` loop) is lifted here verbatim:
|
||||
budget-exhaustion summary, trajectory save, session persist, turn diagnostics,
|
||||
response transforms, result-dict assembly, steer drain, and the memory/skill
|
||||
review trigger.
|
||||
|
||||
Behavior-neutral: the body is moved unchanged. All ``agent.*`` side effects fire
|
||||
exactly as before; only the post-loop *locals* are passed in as keyword args, and
|
||||
the assembled ``result`` dict is returned to ``run_conversation`` which returns it
|
||||
to the caller. The function is synchronous with a single return — mirroring the
|
||||
region it replaces (no awaits, no early returns).
|
||||
|
||||
Module ``logger`` is imported lazily inside the body (``from
|
||||
agent.conversation_loop import logger``) so this module never imports
|
||||
``agent.conversation_loop`` at import time -> no import cycle, and the log records
|
||||
keep the exact logger name (``"agent.conversation_loop"``).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
|
||||
from agent.codex_responses_adapter import _summarize_user_message_for_log
|
||||
|
||||
|
||||
def finalize_turn(
|
||||
agent,
|
||||
*,
|
||||
final_response,
|
||||
api_call_count,
|
||||
interrupted,
|
||||
failed,
|
||||
messages,
|
||||
conversation_history,
|
||||
effective_task_id,
|
||||
turn_id,
|
||||
user_message,
|
||||
original_user_message,
|
||||
_should_review_memory,
|
||||
_turn_exit_reason,
|
||||
):
|
||||
"""Run the post-loop finalization and return the turn ``result`` dict.
|
||||
|
||||
Lifted verbatim from ``run_conversation`` (the region after the main agent
|
||||
loop). See module docstring.
|
||||
"""
|
||||
from agent.conversation_loop import logger
|
||||
|
||||
if final_response is None and (
|
||||
api_call_count >= agent.max_iterations
|
||||
or agent.iteration_budget.remaining <= 0
|
||||
):
|
||||
# Budget exhausted — ask the model for a summary via one extra
|
||||
# API call with tools stripped. _handle_max_iterations injects a
|
||||
# user message and makes a single toolless request.
|
||||
_turn_exit_reason = f"max_iterations_reached({api_call_count}/{agent.max_iterations})"
|
||||
agent._emit_status(
|
||||
f"⚠️ Iteration budget exhausted ({api_call_count}/{agent.max_iterations}) "
|
||||
"— asking model to summarise"
|
||||
)
|
||||
if not agent.quiet_mode:
|
||||
agent._safe_print(
|
||||
f"\n⚠️ Iteration budget exhausted ({api_call_count}/{agent.max_iterations}) "
|
||||
"— requesting summary..."
|
||||
)
|
||||
final_response = agent._handle_max_iterations(messages, api_call_count)
|
||||
|
||||
# If running as a kanban worker, signal the dispatcher that the
|
||||
# worker could not complete (rather than treating it as a
|
||||
# protocol violation). The agent loop strips tools before calling
|
||||
# _handle_max_iterations, so the model cannot call kanban_block
|
||||
# itself — we must do it on its behalf.
|
||||
#
|
||||
# We route through ``_record_task_failure(outcome="timed_out")``
|
||||
# rather than ``kanban_block`` so this counts toward the
|
||||
# ``consecutive_failures`` counter and the dispatcher's
|
||||
# ``failure_limit`` circuit breaker (#29747 gap 2). Without this,
|
||||
# a task whose worker keeps exhausting its budget would block
|
||||
# silently each run, get auto-promoted by the operator (or never
|
||||
# surface), and re-block in an endless loop with no signal.
|
||||
_kanban_task = os.environ.get("HERMES_KANBAN_TASK")
|
||||
if _kanban_task:
|
||||
try:
|
||||
from hermes_cli import kanban_db as _kb
|
||||
_conn = _kb.connect()
|
||||
try:
|
||||
_kb._record_task_failure(
|
||||
_conn,
|
||||
_kanban_task,
|
||||
error=(
|
||||
f"Iteration budget exhausted "
|
||||
f"({api_call_count}/{agent.max_iterations}) — "
|
||||
"task could not complete within the allowed "
|
||||
"iterations"
|
||||
),
|
||||
outcome="timed_out",
|
||||
release_claim=True,
|
||||
end_run=True,
|
||||
event_payload_extra={
|
||||
"budget_used": api_call_count,
|
||||
"budget_max": agent.max_iterations,
|
||||
},
|
||||
)
|
||||
logger.info(
|
||||
"recorded budget-exhausted failure for task %s (%d/%d)",
|
||||
_kanban_task, api_call_count, agent.max_iterations,
|
||||
)
|
||||
finally:
|
||||
try:
|
||||
_conn.close()
|
||||
except Exception:
|
||||
pass
|
||||
except Exception:
|
||||
logger.warning(
|
||||
"Failed to record budget-exhausted failure for task %s",
|
||||
_kanban_task,
|
||||
exc_info=True,
|
||||
)
|
||||
|
||||
# Determine if conversation completed successfully
|
||||
completed = (
|
||||
final_response is not None
|
||||
and api_call_count < agent.max_iterations
|
||||
and not failed
|
||||
)
|
||||
|
||||
# Save trajectory if enabled. ``user_message`` may be a multimodal
|
||||
# list of parts; the trajectory format wants a plain string.
|
||||
agent._save_trajectory(messages, _summarize_user_message_for_log(user_message), completed)
|
||||
|
||||
# Clean up VM and browser for this task after conversation completes
|
||||
agent._cleanup_task_resources(effective_task_id)
|
||||
|
||||
# Persist session to both JSON log and SQLite only after private retry
|
||||
# scaffolding has been removed. Otherwise a later user "continue" turn
|
||||
# can replay assistant("(empty)") / recovery nudges and fall into the
|
||||
# same empty-response loop again.
|
||||
agent._drop_trailing_empty_response_scaffolding(messages)
|
||||
agent._persist_session(messages, conversation_history)
|
||||
|
||||
# ── Turn-exit diagnostic log ─────────────────────────────────────
|
||||
# Always logged at INFO so agent.log captures WHY every turn ended.
|
||||
# When the last message is a tool result (agent was mid-work), log
|
||||
# at WARNING — this is the "just stops" scenario users report.
|
||||
_last_msg_role = messages[-1].get("role") if messages else None
|
||||
_last_tool_name = None
|
||||
if _last_msg_role == "tool":
|
||||
# Walk back to find the assistant message with the tool call
|
||||
for _m in reversed(messages):
|
||||
if _m.get("role") == "assistant" and _m.get("tool_calls"):
|
||||
_tcs = _m["tool_calls"]
|
||||
if _tcs and isinstance(_tcs[0], dict):
|
||||
_last_tool_name = _tcs[-1].get("function", {}).get("name")
|
||||
break
|
||||
|
||||
_turn_tool_count = sum(
|
||||
1 for m in messages
|
||||
if isinstance(m, dict) and m.get("role") == "assistant" and m.get("tool_calls")
|
||||
)
|
||||
_resp_len = len(final_response) if final_response else 0
|
||||
_budget_used = agent.iteration_budget.used if agent.iteration_budget else 0
|
||||
_budget_max = agent.iteration_budget.max_total if agent.iteration_budget else 0
|
||||
|
||||
_diag_msg = (
|
||||
"Turn ended: reason=%s model=%s api_calls=%d/%d budget=%d/%d "
|
||||
"tool_turns=%d last_msg_role=%s response_len=%d session=%s"
|
||||
)
|
||||
_diag_args = (
|
||||
_turn_exit_reason, agent.model, api_call_count, agent.max_iterations,
|
||||
_budget_used, _budget_max,
|
||||
_turn_tool_count, _last_msg_role, _resp_len,
|
||||
agent.session_id or "none",
|
||||
)
|
||||
|
||||
if _last_msg_role == "tool" and not interrupted:
|
||||
# Agent was mid-work — this is the "just stops" case.
|
||||
logger.warning(
|
||||
"Turn ended with pending tool result (agent may appear stuck). "
|
||||
+ _diag_msg + " last_tool=%s",
|
||||
*_diag_args, _last_tool_name,
|
||||
)
|
||||
else:
|
||||
logger.info(_diag_msg, *_diag_args)
|
||||
|
||||
# File-mutation verifier footer.
|
||||
# If one or more ``write_file`` / ``patch`` calls failed during this
|
||||
# turn and were never superseded by a successful write to the same
|
||||
# path, append an advisory footer to the assistant response. This
|
||||
# catches the specific case — reported by Ben Eng (#15524-adjacent)
|
||||
# — where a model issues a batch of parallel patches, half of them
|
||||
# fail with "Could not find old_string", and the model summarises
|
||||
# the turn claiming every file was edited. The user then has to
|
||||
# manually run ``git status`` to catch the lie. With this footer
|
||||
# the truth is surfaced on every turn, so over-claiming is
|
||||
# structurally impossible past the model.
|
||||
#
|
||||
# Gate: only applied when a real text response exists for this
|
||||
# turn and the user didn't interrupt. Empty/interrupted turns
|
||||
# already have other surface text that shouldn't be augmented.
|
||||
if final_response and not interrupted:
|
||||
try:
|
||||
_failed = getattr(agent, "_turn_failed_file_mutations", None) or {}
|
||||
if _failed and agent._file_mutation_verifier_enabled():
|
||||
footer = agent._format_file_mutation_failure_footer(_failed)
|
||||
if footer:
|
||||
final_response = final_response.rstrip() + "\n\n" + footer
|
||||
except Exception as _ver_err:
|
||||
logger.debug("file-mutation verifier footer failed: %s", _ver_err)
|
||||
|
||||
# Turn-completion explainer.
|
||||
# When a turn ends abnormally after substantive work — empty content
|
||||
# after retries, a partial/truncated stream, a still-pending tool
|
||||
# result, or an iteration/budget limit — the user otherwise gets a
|
||||
# blank or fragmentary response box with no consolidated reason why
|
||||
# the agent stopped (#34452). Surface a single user-visible
|
||||
# explanation derived from ``_turn_exit_reason``, mirroring the
|
||||
# file-mutation verifier footer pattern above.
|
||||
#
|
||||
# Gate carefully so healthy turns stay quiet:
|
||||
# - ``text_response(...)`` exits never produce an explanation
|
||||
# (handled inside the formatter), so a terse ``Done.`` is silent.
|
||||
# - We only ACT when there is no genuinely usable reply this turn:
|
||||
# an empty response, the "(empty)" terminal sentinel, or a
|
||||
# suspiciously short partial fragment with no terminating
|
||||
# punctuation (e.g. "The"). A real short answer keeps its text.
|
||||
if not interrupted:
|
||||
try:
|
||||
if agent._turn_completion_explainer_enabled():
|
||||
_stripped = (final_response or "").strip()
|
||||
_is_empty_terminal = _stripped == "" or _stripped == "(empty)"
|
||||
# A short fragment that is not a normal text_response exit
|
||||
# and lacks sentence-ending punctuation is treated as a
|
||||
# truncated partial (the "The" case from #34452).
|
||||
_is_partial_fragment = (
|
||||
not _is_empty_terminal
|
||||
and not str(_turn_exit_reason).startswith("text_response")
|
||||
and len(_stripped) <= 24
|
||||
and _stripped[-1:] not in {".", "!", "?", "。", "!", "?", "`", ")"}
|
||||
)
|
||||
if _is_empty_terminal or _is_partial_fragment:
|
||||
_explanation = agent._format_turn_completion_explanation(
|
||||
_turn_exit_reason
|
||||
)
|
||||
if _explanation:
|
||||
if _is_empty_terminal:
|
||||
# Replace the bare "(empty)"/blank sentinel with
|
||||
# the actionable explanation.
|
||||
final_response = _explanation
|
||||
else:
|
||||
# Keep the partial fragment, append the reason so
|
||||
# the user sees both what arrived and why it
|
||||
# stopped.
|
||||
final_response = (
|
||||
_stripped + "\n\n" + _explanation
|
||||
)
|
||||
except Exception as _exp_err:
|
||||
logger.debug("turn-completion explainer failed: %s", _exp_err)
|
||||
|
||||
_response_transformed = False
|
||||
|
||||
# Plugin hook: transform_llm_output
|
||||
# Fired once per turn after the tool-calling loop completes.
|
||||
# Plugins can transform the LLM's output text before it's returned.
|
||||
# First hook to return a string wins; None/empty return leaves text unchanged.
|
||||
if final_response and not interrupted:
|
||||
try:
|
||||
from hermes_cli.plugins import invoke_hook as _invoke_hook
|
||||
_transform_results = _invoke_hook(
|
||||
"transform_llm_output",
|
||||
response_text=final_response,
|
||||
session_id=agent.session_id or "",
|
||||
model=agent.model,
|
||||
platform=getattr(agent, "platform", None) or "",
|
||||
)
|
||||
for _hook_result in _transform_results:
|
||||
if isinstance(_hook_result, str) and _hook_result:
|
||||
final_response = _hook_result
|
||||
_response_transformed = True
|
||||
break # First non-empty string wins
|
||||
except Exception as exc:
|
||||
logger.warning("transform_llm_output hook failed: %s", exc)
|
||||
|
||||
# Plugin hook: post_llm_call
|
||||
# Fired once per turn after the tool-calling loop completes.
|
||||
# Plugins can use this to persist conversation data (e.g. sync
|
||||
# to an external memory system).
|
||||
if final_response and not interrupted:
|
||||
try:
|
||||
from hermes_cli.plugins import invoke_hook as _invoke_hook
|
||||
_invoke_hook(
|
||||
"post_llm_call",
|
||||
session_id=agent.session_id,
|
||||
task_id=effective_task_id,
|
||||
turn_id=turn_id,
|
||||
user_message=original_user_message,
|
||||
assistant_response=final_response,
|
||||
conversation_history=list(messages),
|
||||
model=agent.model,
|
||||
platform=getattr(agent, "platform", None) or "",
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning("post_llm_call hook failed: %s", exc)
|
||||
|
||||
# Extract reasoning from the CURRENT turn only. Walk backwards
|
||||
# but stop at the user message that started this turn — anything
|
||||
# earlier is from a prior turn and must not leak into the reasoning
|
||||
# box (confusing stale display; #17055). Within the current turn
|
||||
# we still want the *most recent* non-empty reasoning: many
|
||||
# providers (Claude thinking, DeepSeek v4, Codex Responses) emit
|
||||
# reasoning on the tool-call step and leave the final-answer step
|
||||
# with reasoning=None, so picking only the last assistant would
|
||||
# silently drop legitimate same-turn reasoning.
|
||||
last_reasoning = None
|
||||
for msg in reversed(messages):
|
||||
if msg.get("role") == "user":
|
||||
break # turn boundary — don't cross into prior turns
|
||||
if msg.get("role") == "assistant" and msg.get("reasoning"):
|
||||
last_reasoning = msg["reasoning"]
|
||||
break
|
||||
|
||||
# Build result with interrupt info if applicable
|
||||
result = {
|
||||
"final_response": final_response,
|
||||
"last_reasoning": last_reasoning,
|
||||
"messages": messages,
|
||||
"api_calls": api_call_count,
|
||||
"completed": completed,
|
||||
"turn_exit_reason": _turn_exit_reason,
|
||||
"failed": failed,
|
||||
"partial": False, # True only when stopped due to invalid tool calls
|
||||
"interrupted": interrupted,
|
||||
"response_transformed": _response_transformed,
|
||||
"response_previewed": getattr(agent, "_response_was_previewed", False),
|
||||
"model": agent.model,
|
||||
"provider": agent.provider,
|
||||
"base_url": agent.base_url,
|
||||
"input_tokens": agent.session_input_tokens,
|
||||
"output_tokens": agent.session_output_tokens,
|
||||
"cache_read_tokens": agent.session_cache_read_tokens,
|
||||
"cache_write_tokens": agent.session_cache_write_tokens,
|
||||
"reasoning_tokens": agent.session_reasoning_tokens,
|
||||
"prompt_tokens": agent.session_prompt_tokens,
|
||||
"completion_tokens": agent.session_completion_tokens,
|
||||
"total_tokens": agent.session_total_tokens,
|
||||
"last_prompt_tokens": getattr(agent.context_compressor, "last_prompt_tokens", 0) or 0,
|
||||
"estimated_cost_usd": agent.session_estimated_cost_usd,
|
||||
"cost_status": agent.session_cost_status,
|
||||
"cost_source": agent.session_cost_source,
|
||||
"session_id": agent.session_id,
|
||||
}
|
||||
if agent._tool_guardrail_halt_decision is not None:
|
||||
result["guardrail"] = agent._tool_guardrail_halt_decision.to_metadata()
|
||||
# If a /steer landed after the final assistant turn (no more tool
|
||||
# batches to drain into), hand it back to the caller so it can be
|
||||
# delivered as the next user turn instead of being silently lost.
|
||||
_leftover_steer = agent._drain_pending_steer()
|
||||
if _leftover_steer:
|
||||
result["pending_steer"] = _leftover_steer
|
||||
agent._response_was_previewed = False
|
||||
|
||||
# Include interrupt message if one triggered the interrupt
|
||||
if interrupted and agent._interrupt_message:
|
||||
result["interrupt_message"] = agent._interrupt_message
|
||||
|
||||
# Clear interrupt state after handling
|
||||
agent.clear_interrupt()
|
||||
|
||||
# Clear stream callback so it doesn't leak into future calls
|
||||
agent._stream_callback = None
|
||||
|
||||
# Check skill trigger NOW — based on how many tool iterations THIS turn used.
|
||||
_should_review_skills = False
|
||||
if (agent._skill_nudge_interval > 0
|
||||
and agent._iters_since_skill >= agent._skill_nudge_interval
|
||||
and "skill_manage" in agent.valid_tool_names):
|
||||
_should_review_skills = True
|
||||
agent._iters_since_skill = 0
|
||||
|
||||
# External memory provider: sync the completed turn + queue next prefetch.
|
||||
agent._sync_external_memory_for_turn(
|
||||
original_user_message=original_user_message,
|
||||
final_response=final_response,
|
||||
interrupted=interrupted,
|
||||
messages=messages,
|
||||
)
|
||||
|
||||
# Background memory/skill review — runs AFTER the response is delivered
|
||||
# so it never competes with the user's task for model attention.
|
||||
if final_response and not interrupted and (_should_review_memory or _should_review_skills):
|
||||
try:
|
||||
agent._spawn_background_review(
|
||||
messages_snapshot=list(messages),
|
||||
review_memory=_should_review_memory,
|
||||
review_skills=_should_review_skills,
|
||||
)
|
||||
except Exception:
|
||||
pass # Background review is best-effort
|
||||
|
||||
# Note: Memory provider on_session_end() + shutdown_all() are NOT
|
||||
# called here — run_conversation() is called once per user message in
|
||||
# multi-turn sessions. Shutting down after every turn would kill the
|
||||
# provider before the second message. Actual session-end cleanup is
|
||||
# handled by the CLI (atexit / /reset) and gateway (session expiry /
|
||||
# _reset_session).
|
||||
|
||||
# Plugin hook: on_session_end
|
||||
# Fired at the very end of every run_conversation call.
|
||||
# Plugins can use this for cleanup, flushing buffers, etc.
|
||||
try:
|
||||
from hermes_cli.plugins import invoke_hook as _invoke_hook
|
||||
_invoke_hook(
|
||||
"on_session_end",
|
||||
session_id=agent.session_id,
|
||||
task_id=effective_task_id,
|
||||
turn_id=turn_id,
|
||||
completed=completed,
|
||||
interrupted=interrupted,
|
||||
model=agent.model,
|
||||
platform=getattr(agent, "platform", None) or "",
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning("on_session_end hook failed: %s", exc)
|
||||
|
||||
return result
|
||||
Loading…
Add table
Add a link
Reference in a new issue