mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-23 10:42:00 +00:00
fix(agent): guard finalize_turn cleanup chain so it never drops the response (#50009)
When a turn hit max_iterations, finalize_turn ran three unguarded cleanup steps after the model's summary — _save_trajectory (file I/O), _cleanup_task_resources (remote VM/browser teardown), and _persist_session (SQLite write). Any raise there propagated out of run_conversation, discarding the partial final_response the caller was waiting for; subprocess wrappers saw an empty stdout with no traceback (#8049). Each step is now guarded independently so one failure can't skip the others. Failures log at ERROR with a traceback and are surfaced on the result dict via cleanup_errors; the partial response is always returned. Closes #8049.
This commit is contained in:
parent
796f618f99
commit
9f67ba1b01
2 changed files with 199 additions and 4 deletions
|
|
@ -128,19 +128,44 @@ def finalize_turn(
|
|||
and not failed
|
||||
)
|
||||
|
||||
# Post-loop cleanup must never lose the response. Trajectory save,
|
||||
# resource teardown, and session persistence all touch fallible
|
||||
# surfaces — file I/O / JSON serialization (_save_trajectory), remote
|
||||
# VM/browser teardown over the network (_cleanup_task_resources), and
|
||||
# SQLite writes (_persist_session). A raise from any of them used to
|
||||
# propagate straight out of run_conversation, discarding the partial
|
||||
# final_response the caller is waiting for (subprocess wrappers saw an
|
||||
# empty stdout with no traceback — #8049). Each step is now guarded
|
||||
# independently so one failure can't skip the others, and any errors
|
||||
# are surfaced on the result dict via ``cleanup_errors`` rather than
|
||||
# killing the turn.
|
||||
_cleanup_errors = []
|
||||
|
||||
# Save trajectory if enabled. ``user_message`` may be a multimodal
|
||||
# list of parts; the trajectory format wants a plain string.
|
||||
agent._save_trajectory(messages, _summarize_user_message_for_log(user_message), completed)
|
||||
try:
|
||||
agent._save_trajectory(messages, _summarize_user_message_for_log(user_message), completed)
|
||||
except Exception as _save_err:
|
||||
_cleanup_errors.append(f"save_trajectory: {_save_err}")
|
||||
logger.error("finalize_turn: _save_trajectory failed: %s", _save_err, exc_info=True)
|
||||
|
||||
# Clean up VM and browser for this task after conversation completes
|
||||
agent._cleanup_task_resources(effective_task_id)
|
||||
try:
|
||||
agent._cleanup_task_resources(effective_task_id)
|
||||
except Exception as _cleanup_err:
|
||||
_cleanup_errors.append(f"cleanup_task_resources: {_cleanup_err}")
|
||||
logger.error("finalize_turn: _cleanup_task_resources failed: %s", _cleanup_err, exc_info=True)
|
||||
|
||||
# Persist session to both JSON log and SQLite only after private retry
|
||||
# scaffolding has been removed. Otherwise a later user "continue" turn
|
||||
# can replay assistant("(empty)") / recovery nudges and fall into the
|
||||
# same empty-response loop again.
|
||||
agent._drop_trailing_empty_response_scaffolding(messages)
|
||||
agent._persist_session(messages, conversation_history)
|
||||
try:
|
||||
agent._drop_trailing_empty_response_scaffolding(messages)
|
||||
agent._persist_session(messages, conversation_history)
|
||||
except Exception as _persist_err:
|
||||
_cleanup_errors.append(f"persist_session: {_persist_err}")
|
||||
logger.error("finalize_turn: _persist_session failed: %s", _persist_err, exc_info=True)
|
||||
|
||||
# ── Turn-exit diagnostic log ─────────────────────────────────────
|
||||
# Always logged at INFO so agent.log captures WHY every turn ended.
|
||||
|
|
@ -354,6 +379,11 @@ def finalize_turn(
|
|||
}
|
||||
if agent._tool_guardrail_halt_decision is not None:
|
||||
result["guardrail"] = agent._tool_guardrail_halt_decision.to_metadata()
|
||||
# Surface any post-loop cleanup failures so the caller can distinguish a
|
||||
# clean turn from one whose trajectory/session/resource teardown raised
|
||||
# (the response is still returned either way — #8049).
|
||||
if _cleanup_errors:
|
||||
result["cleanup_errors"] = _cleanup_errors
|
||||
# If a /steer landed after the final assistant turn (no more tool
|
||||
# batches to drain into), hand it back to the caller so it can be
|
||||
# delivered as the next user turn instead of being silently lost.
|
||||
|
|
|
|||
165
tests/agent/test_turn_finalizer_cleanup_guard.py
Normal file
165
tests/agent/test_turn_finalizer_cleanup_guard.py
Normal file
|
|
@ -0,0 +1,165 @@
|
|||
"""Regression test for #8049.
|
||||
|
||||
When the post-loop cleanup chain in ``finalize_turn`` raises — trajectory
|
||||
save (file I/O), resource teardown (remote VM/browser), or session
|
||||
persistence (SQLite) — the partial ``final_response`` the caller is waiting
|
||||
for must still be returned. Previously any of those raised straight out of
|
||||
``run_conversation``, so a subprocess wrapper saw an empty stdout with no
|
||||
traceback and lost the whole turn.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
|
||||
from agent.turn_finalizer import finalize_turn
|
||||
|
||||
|
||||
class _StubBudget:
|
||||
used = 5
|
||||
max_total = 3
|
||||
remaining = 0
|
||||
|
||||
|
||||
class _StubCompressor:
|
||||
last_prompt_tokens = 0
|
||||
|
||||
|
||||
class _StubAgent:
|
||||
"""Minimal agent surface that ``finalize_turn`` reads from."""
|
||||
|
||||
def __init__(self, *, raise_in):
|
||||
self._raise_in = set(raise_in)
|
||||
self.max_iterations = 3
|
||||
self.iteration_budget = _StubBudget()
|
||||
self.context_compressor = _StubCompressor()
|
||||
self.model = "stub/model"
|
||||
self.provider = "stub"
|
||||
self.base_url = "http://stub"
|
||||
self.session_id = "sess-1"
|
||||
self.quiet_mode = True
|
||||
self.platform = "cli"
|
||||
self._interrupt_requested = False
|
||||
self._interrupt_message = None
|
||||
self._tool_guardrail_halt_decision = None
|
||||
self._response_was_previewed = False
|
||||
self._skill_nudge_interval = 0
|
||||
self._iters_since_skill = 0
|
||||
for attr in (
|
||||
"session_input_tokens",
|
||||
"session_output_tokens",
|
||||
"session_cache_read_tokens",
|
||||
"session_cache_write_tokens",
|
||||
"session_reasoning_tokens",
|
||||
"session_prompt_tokens",
|
||||
"session_completion_tokens",
|
||||
"session_total_tokens",
|
||||
"session_estimated_cost_usd",
|
||||
):
|
||||
setattr(self, attr, 0)
|
||||
self.session_cost_status = "ok"
|
||||
self.session_cost_source = "stub"
|
||||
|
||||
# --- fallible cleanup surfaces -------------------------------------
|
||||
def _save_trajectory(self, *a, **k):
|
||||
if "save_trajectory" in self._raise_in:
|
||||
raise RuntimeError("trajectory disk full")
|
||||
|
||||
def _cleanup_task_resources(self, *a, **k):
|
||||
if "cleanup_task_resources" in self._raise_in:
|
||||
raise RuntimeError("docker teardown EOF")
|
||||
|
||||
def _drop_trailing_empty_response_scaffolding(self, *a, **k):
|
||||
pass
|
||||
|
||||
def _persist_session(self, *a, **k):
|
||||
if "persist_session" in self._raise_in:
|
||||
raise RuntimeError("sqlite database is locked")
|
||||
|
||||
# --- harmless no-ops ------------------------------------------------
|
||||
def _emit_status(self, *a, **k):
|
||||
pass
|
||||
|
||||
def _safe_print(self, *a, **k):
|
||||
pass
|
||||
|
||||
def _handle_max_iterations(self, messages, n):
|
||||
return "PARTIAL SUMMARY FROM MODEL"
|
||||
|
||||
def _file_mutation_verifier_enabled(self):
|
||||
return False
|
||||
|
||||
def _turn_completion_explainer_enabled(self):
|
||||
return False
|
||||
|
||||
def _drain_pending_steer(self):
|
||||
return None
|
||||
|
||||
def clear_interrupt(self):
|
||||
pass
|
||||
|
||||
def _sync_external_memory_for_turn(self, **k):
|
||||
pass
|
||||
|
||||
|
||||
def _run(agent):
|
||||
messages = [
|
||||
{"role": "user", "content": "do a thing"},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "",
|
||||
"tool_calls": [
|
||||
{"id": "c1", "function": {"name": "read_file", "arguments": "{}"}}
|
||||
],
|
||||
},
|
||||
{"role": "tool", "tool_call_id": "c1", "content": "file contents"},
|
||||
]
|
||||
return finalize_turn(
|
||||
agent,
|
||||
final_response=None, # forces the max-iterations summary path
|
||||
api_call_count=3,
|
||||
interrupted=False,
|
||||
failed=False,
|
||||
messages=messages,
|
||||
conversation_history=None,
|
||||
effective_task_id="task-1",
|
||||
turn_id="turn-1",
|
||||
user_message="do a thing",
|
||||
original_user_message="do a thing",
|
||||
_should_review_memory=False,
|
||||
_turn_exit_reason="unknown",
|
||||
)
|
||||
|
||||
|
||||
def test_all_cleanup_steps_raise_response_still_returned():
|
||||
agent = _StubAgent(
|
||||
raise_in=("save_trajectory", "cleanup_task_resources", "persist_session")
|
||||
)
|
||||
result = _run(agent)
|
||||
assert result["final_response"] == "PARTIAL SUMMARY FROM MODEL"
|
||||
labels = [e.split(":")[0] for e in result["cleanup_errors"]]
|
||||
assert labels == ["save_trajectory", "cleanup_task_resources", "persist_session"]
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"step", ["save_trajectory", "cleanup_task_resources", "persist_session"]
|
||||
)
|
||||
def test_single_cleanup_step_raises_does_not_skip_others(step):
|
||||
agent = _StubAgent(raise_in=(step,))
|
||||
result = _run(agent)
|
||||
# Response survives.
|
||||
assert result["final_response"] == "PARTIAL SUMMARY FROM MODEL"
|
||||
# Exactly the failing step is recorded; the others ran without error.
|
||||
assert result["cleanup_errors"] == [
|
||||
next(
|
||||
e
|
||||
for e in result["cleanup_errors"]
|
||||
if e.startswith(step)
|
||||
)
|
||||
]
|
||||
assert len(result["cleanup_errors"]) == 1
|
||||
|
||||
|
||||
def test_clean_turn_has_no_cleanup_errors_key():
|
||||
agent = _StubAgent(raise_in=())
|
||||
result = _run(agent)
|
||||
assert result["final_response"] == "PARTIAL SUMMARY FROM MODEL"
|
||||
assert "cleanup_errors" not in result
|
||||
Loading…
Add table
Add a link
Reference in a new issue